Files
AI-TTS/main.py

546 lines
19 KiB
Python

# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# import os
# import sys
# from pydub import AudioSegment
# # need to install pytorch https://pytorch.org/get-started/locally/
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# pipeline = KPipeline(lang_code='a')
# content = ''
# print('please input path')
# file_path = input()
# with open(file_path, 'r') as file:
# content = file.read()
# print(content)
# generator = pipeline(
# content, voice='af_bella', # <= change voice here
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # i => index
# print(gs) # gs => graphemes/text
# print(ps) # ps => phonemes
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # save each audio file
# count = count + 1
# directory = script_directory
# # Merge WAV files from 0.wav to 334.wav
# combined = AudioSegment.empty()
# for i in range(count): # 0 to 334 inclusive
# file_path = f"{directory}/{i}.wav"
# sound = AudioSegment.from_wav(file_path)
# combined += sound
# # Export the merged WAV file
# output_path = f"{directory}/message.wav"
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# import torch
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# pipeline.model.to(device)
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# # For plain text files, just read the content
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# # For PDFs, open the file in binary mode and extract text from each page
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Display the extracted content (optional)
# print(content)
# # Use the pipeline to generate speech from the text content
# generator = pipeline(
# content, voice='af_bella', # Change the voice here if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # Save each audio file
# count += 1
# # Merge all individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# wav_path = os.path.join(script_directory, f"{i}.wav")
# sound = AudioSegment.from_wav(wav_path)
# combined += sound
# # Export the merged WAV file
# output_path = os.path.join(script_directory, "output.wav")
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Generate the audio segments using the pipeline
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# wav_path = os.path.join(script_directory, f"{i}.wav")
# sound = AudioSegment.from_wav(wav_path)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# import random
# import string
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Generate the audio segments using the pipeline
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(part_filename, audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# sound = AudioSegment.from_wav(part_file)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# import random
# import string
# import re
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# from ebooklib.epub import EpubHtml # Import the document class
# for item in book.get_items():
# # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
# if isinstance(item, EpubHtml):
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Combine all text into one large string by removing newline characters.
# # This helps ensure the model receives larger chunks to work with.
# content = ' '.join(content.splitlines())
# # Option 1: Let the model handle its own splitting by not providing a split pattern:
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1
# )
# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# # generator = pipeline(
# # content, voice='af_bella', # Change voice if desired
# # speed=1, split_pattern=r'(?<=[.!?])\s+'
# # )
# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(part_filename, audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# sound = AudioSegment.from_wav(part_file)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
import os
import sys
import random
import string
import re
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
from pydub import AudioSegment
# Libraries for PDF and EPUB extraction
import PyPDF2
from ebooklib import epub
from bs4 import BeautifulSoup
# Generate a random string for this process instance
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
print(f"Random string for this process: {random_str}")
# Get the directory of the current script
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
pipeline = KPipeline(lang_code='a')
# Ask the user for the file path
print('Please input the file path (txt, pdf, or epub):')
file_path = input().strip()
# Determine the file extension and prepare content
file_ext = os.path.splitext(file_path)[1].lower()
content = ''
if file_ext == '.txt':
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
elif file_ext == '.pdf':
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text = page.extract_text()
if text:
content += text + '\n'
elif file_ext == '.epub':
book = epub.read_epub(file_path)
from ebooklib.epub import EpubHtml # Use EpubHtml for document-type items
for item in book.get_items():
if isinstance(item, EpubHtml):
soup = BeautifulSoup(item.get_content(), 'html.parser')
text = soup.get_text()
content += text + '\n'
else:
print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
sys.exit(1)
# Optionally display the extracted content
print(content)
# Remove all instances of "OceanofPDF.com" from the content
content = content.replace("OceanofPDF.com", "")
# Combine all text into one large string by removing newline characters.
# This helps ensure the model receives larger chunks to work with.
content = ' '.join(content.splitlines())
# Option 1: Let the model handle its own splitting by not providing a split pattern:
generator = pipeline(
content, voice='af_heart', # Change voice if desired
speed=0.85
)
# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'(?<=[.!?])\s+'
# )
count = 0
# Save each segment with the random string in the filename
for i, (gs, ps, audio) in enumerate(generator):
part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
print(i) # index of the segment
print(gs) # graphemes/text for this segment
print(ps) # phonemes for this segment
display(Audio(data=audio, rate=24000, autoplay=i==0))
sf.write(part_filename, audio, 24000) # Save each audio segment
count += 1
# Merge the individual WAV files into one
combined = AudioSegment.empty()
for i in range(count):
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
sound = AudioSegment.from_wav(part_file)
combined += sound
# Generate the output file name based on the source file name
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_path = os.path.join(script_directory, f"{base_name}.wav")
# Export the merged WAV file
combined.export(output_path, format="wav")
print(f"Merged WAV file saved as {output_path}")
# Cleanup: Remove individual part files after merging
for i in range(count):
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
if os.path.exists(part_file):
os.remove(part_file)
print(f"Removed {part_file}")