Files
AI-TTS/main.py
David Scriver 9a5e776dab Add audio, scripts, and captcha generation tools
Added new media files (video, audio, subtitles), two scripts for audio combination and captcha audio generation, and updated main.py and server.py for voice and speed settings. Script files were revised for clarity and additional instructions, supporting new workflow for proctor scheduling system documentation and audio generation.
2025-11-21 10:09:24 -05:00

546 lines
19 KiB
Python

# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# import os
# import sys
# from pydub import AudioSegment
# # need to install pytorch https://pytorch.org/get-started/locally/
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# pipeline = KPipeline(lang_code='a')
# content = ''
# print('please input path')
# file_path = input()
# with open(file_path, 'r') as file:
# content = file.read()
# print(content)
# generator = pipeline(
# content, voice='af_bella', # <= change voice here
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # i => index
# print(gs) # gs => graphemes/text
# print(ps) # ps => phonemes
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # save each audio file
# count = count + 1
# directory = script_directory
# # Merge WAV files from 0.wav to 334.wav
# combined = AudioSegment.empty()
# for i in range(count): # 0 to 334 inclusive
# file_path = f"{directory}/{i}.wav"
# sound = AudioSegment.from_wav(file_path)
# combined += sound
# # Export the merged WAV file
# output_path = f"{directory}/message.wav"
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# import torch
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# pipeline.model.to(device)
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# # For plain text files, just read the content
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# # For PDFs, open the file in binary mode and extract text from each page
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Display the extracted content (optional)
# print(content)
# # Use the pipeline to generate speech from the text content
# generator = pipeline(
# content, voice='af_bella', # Change the voice here if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # Save each audio file
# count += 1
# # Merge all individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# wav_path = os.path.join(script_directory, f"{i}.wav")
# sound = AudioSegment.from_wav(wav_path)
# combined += sound
# # Export the merged WAV file
# output_path = os.path.join(script_directory, "output.wav")
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Generate the audio segments using the pipeline
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# wav_path = os.path.join(script_directory, f"{i}.wav")
# sound = AudioSegment.from_wav(wav_path)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# import random
# import string
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Generate the audio segments using the pipeline
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(part_filename, audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# sound = AudioSegment.from_wav(part_file)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# import random
# import string
# import re
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# from ebooklib.epub import EpubHtml # Import the document class
# for item in book.get_items():
# # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
# if isinstance(item, EpubHtml):
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Combine all text into one large string by removing newline characters.
# # This helps ensure the model receives larger chunks to work with.
# content = ' '.join(content.splitlines())
# # Option 1: Let the model handle its own splitting by not providing a split pattern:
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1
# )
# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# # generator = pipeline(
# # content, voice='af_bella', # Change voice if desired
# # speed=1, split_pattern=r'(?<=[.!?])\s+'
# # )
# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(part_filename, audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# sound = AudioSegment.from_wav(part_file)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
import os
import sys
import random
import string
import re
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
from pydub import AudioSegment
# Libraries for PDF and EPUB extraction
import PyPDF2
from ebooklib import epub
from bs4 import BeautifulSoup
# Generate a random string for this process instance
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
print(f"Random string for this process: {random_str}")
# Get the directory of the current script
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
pipeline = KPipeline(lang_code='a')
# Ask the user for the file path
print('Please input the file path (txt, pdf, or epub):')
file_path = input().strip()
# Determine the file extension and prepare content
file_ext = os.path.splitext(file_path)[1].lower()
content = ''
if file_ext == '.txt':
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
elif file_ext == '.pdf':
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text = page.extract_text()
if text:
content += text + '\n'
elif file_ext == '.epub':
book = epub.read_epub(file_path)
from ebooklib.epub import EpubHtml # Use EpubHtml for document-type items
for item in book.get_items():
if isinstance(item, EpubHtml):
soup = BeautifulSoup(item.get_content(), 'html.parser')
text = soup.get_text()
content += text + '\n'
else:
print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
sys.exit(1)
# Optionally display the extracted content
print(content)
# Remove all instances of "OceanofPDF.com" from the content
content = content.replace("OceanofPDF.com", "")
# Combine all text into one large string by removing newline characters.
# This helps ensure the model receives larger chunks to work with.
content = ' '.join(content.splitlines())
# Option 1: Let the model handle its own splitting by not providing a split pattern:
generator = pipeline(
content, voice='af_heart', # Change voice if desired
speed=0.8
)
# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'(?<=[.!?])\s+'
# )
count = 0
# Save each segment with the random string in the filename
for i, (gs, ps, audio) in enumerate(generator):
part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
print(i) # index of the segment
print(gs) # graphemes/text for this segment
print(ps) # phonemes for this segment
display(Audio(data=audio, rate=24000, autoplay=i==0))
sf.write(part_filename, audio, 24000) # Save each audio segment
count += 1
# Merge the individual WAV files into one
combined = AudioSegment.empty()
for i in range(count):
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
sound = AudioSegment.from_wav(part_file)
combined += sound
# Generate the output file name based on the source file name
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_path = os.path.join(script_directory, f"{base_name}.wav")
# Export the merged WAV file
combined.export(output_path, format="wav")
print(f"Merged WAV file saved as {output_path}")
# Cleanup: Remove individual part files after merging
for i in range(count):
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
if os.path.exists(part_file):
os.remove(part_file)
print(f"Removed {part_file}")