AI-TTS/main.py

# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# import os
# import sys
# from pydub import AudioSegment

# # need to install pytorch https://pytorch.org/get-started/locally/

# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))

# pipeline = KPipeline(lang_code='a')
# content = ''
# print('please input path')
# file_path = input()

# with open(file_path, 'r') as file:
#     content = file.read()
# print(content)


# generator = pipeline(
#     content, voice='af_bella', # <= change voice here
#     speed=1, split_pattern=r'\n+'
# )

# count = 0

# for i, (gs, ps, audio) in enumerate(generator):
#     print(i)  # i => index
#     print(gs) # gs => graphemes/text
#     print(ps) # ps => phonemes
#     display(Audio(data=audio, rate=24000, autoplay=i==0))
#     sf.write(f'{i}.wav', audio, 24000) # save each audio file
#     count = count + 1

# directory = script_directory

# # Merge WAV files from 0.wav to 334.wav
# combined = AudioSegment.empty()

# for i in range(count):  # 0 to 334 inclusive
#     file_path = f"{directory}/{i}.wav"
#     sound = AudioSegment.from_wav(file_path)
#     combined += sound

# # Export the merged WAV file
# output_path = f"{directory}/message.wav"
# combined.export(output_path, format="wav")

# print(f"Merged WAV file saved as {output_path}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# import torch

# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup

# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# pipeline.model.to(device)
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()

# # Determine the file extension
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''

# if file_ext == '.txt':
#     # For plain text files, just read the content
#     with open(file_path, 'r', encoding='utf-8') as file:
#         content = file.read()
# elif file_ext == '.pdf':
#     # For PDFs, open the file in binary mode and extract text from each page
#     with open(file_path, 'rb') as file:
#         reader = PyPDF2.PdfReader(file)
#         for page in reader.pages:
#             text = page.extract_text()
#             if text:
#                 content += text + '\n'
# elif file_ext == '.epub':
#     # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
#     book = epub.read_epub(file_path)
#     for item in book.get_items():
#         if item.get_type() == epub.ITEM_DOCUMENT:
#             soup = BeautifulSoup(item.get_content(), 'html.parser')
#             text = soup.get_text()
#             content += text + '\n'
# else:
#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
#     sys.exit(1)

# # Display the extracted content (optional)
# print(content)

# # Use the pipeline to generate speech from the text content
# generator = pipeline(
#     content, voice='af_bella',  # Change the voice here if desired
#     speed=1, split_pattern=r'\n+'
# )

# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
#     print(i)   # index of the segment
#     print(gs)  # graphemes/text for this segment
#     print(ps)  # phonemes for this segment
#     display(Audio(data=audio, rate=24000, autoplay=i==0))
#     sf.write(f'{i}.wav', audio, 24000)  # Save each audio file
#     count += 1

# # Merge all individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
#     wav_path = os.path.join(script_directory, f"{i}.wav")
#     sound = AudioSegment.from_wav(wav_path)
#     combined += sound

# # Export the merged WAV file
# output_path = os.path.join(script_directory, "output.wav")
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")

# # Cleanup: Remove individual part files after merging
# for i in range(count):
#     part_file = os.path.join(script_directory, f"{i}.wav")
#     if os.path.exists(part_file):
#         os.remove(part_file)
#         print(f"Removed {part_file}")

# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment

# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup

# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))

# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')

# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()

# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''

# if file_ext == '.txt':
#     with open(file_path, 'r', encoding='utf-8') as file:
#         content = file.read()
# elif file_ext == '.pdf':
#     with open(file_path, 'rb') as file:
#         reader = PyPDF2.PdfReader(file)
#         for page in reader.pages:
#             text = page.extract_text()
#             if text:
#                 content += text + '\n'
# elif file_ext == '.epub':
#     book = epub.read_epub(file_path)
#     for item in book.get_items():
#         if item.get_type() == epub.ITEM_DOCUMENT:
#             soup = BeautifulSoup(item.get_content(), 'html.parser')
#             text = soup.get_text()
#             content += text + '\n'
# else:
#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
#     sys.exit(1)

# # Optionally display the extracted content
# print(content)

# # Generate the audio segments using the pipeline
# generator = pipeline(
#     content, voice='af_bella',  # Change voice if desired
#     speed=1, split_pattern=r'\n+'
# )

# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
#     print(i)   # index of the segment
#     print(gs)  # graphemes/text for this segment
#     print(ps)  # phonemes for this segment
#     display(Audio(data=audio, rate=24000, autoplay=i==0))
#     sf.write(f'{i}.wav', audio, 24000)  # Save each audio segment
#     count += 1

# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
#     wav_path = os.path.join(script_directory, f"{i}.wav")
#     sound = AudioSegment.from_wav(wav_path)
#     combined += sound

# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")

# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")

# # Cleanup: Remove individual part files after merging
# for i in range(count):
#     part_file = os.path.join(script_directory, f"{i}.wav")
#     if os.path.exists(part_file):
#         os.remove(part_file)
#         print(f"Removed {part_file}")


# import os
# import sys
# import random
# import string
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment

# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup

# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")

# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))

# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')

# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()

# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''

# if file_ext == '.txt':
#     with open(file_path, 'r', encoding='utf-8') as file:
#         content = file.read()
# elif file_ext == '.pdf':
#     with open(file_path, 'rb') as file:
#         reader = PyPDF2.PdfReader(file)
#         for page in reader.pages:
#             text = page.extract_text()
#             if text:
#                 content += text + '\n'
# elif file_ext == '.epub':
#     book = epub.read_epub(file_path)
#     for item in book.get_items():
#         if item.get_type() == epub.ITEM_DOCUMENT:
#             soup = BeautifulSoup(item.get_content(), 'html.parser')
#             text = soup.get_text()
#             content += text + '\n'
# else:
#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
#     sys.exit(1)

# # Optionally display the extracted content
# print(content)

# # Generate the audio segments using the pipeline
# generator = pipeline(
#     content, voice='af_bella',  # Change voice if desired
#     speed=1, split_pattern=r'\n+'
# )

# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
#     part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
#     print(i)   # index of the segment
#     print(gs)  # graphemes/text for this segment
#     print(ps)  # phonemes for this segment
#     display(Audio(data=audio, rate=24000, autoplay=i==0))
#     sf.write(part_filename, audio, 24000)  # Save each audio segment
#     count += 1

# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
#     sound = AudioSegment.from_wav(part_file)
#     combined += sound

# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")

# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")

# # Cleanup: Remove individual part files after merging
# for i in range(count):
#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
#     if os.path.exists(part_file):
#         os.remove(part_file)
#         print(f"Removed {part_file}")


# import os
# import sys
# import random
# import string
# import re
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment

# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup

# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")

# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))

# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')

# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()

# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''

# if file_ext == '.txt':
#     with open(file_path, 'r', encoding='utf-8') as file:
#         content = file.read()
# elif file_ext == '.pdf':
#     with open(file_path, 'rb') as file:
#         reader = PyPDF2.PdfReader(file)
#         for page in reader.pages:
#             text = page.extract_text()
#             if text:
#                 content += text + '\n'
# elif file_ext == '.epub':
#     book = epub.read_epub(file_path)
#     from ebooklib.epub import EpubHtml  # Import the document class
#     for item in book.get_items():
#         # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
#         if isinstance(item, EpubHtml):
#             soup = BeautifulSoup(item.get_content(), 'html.parser')
#             text = soup.get_text()
#             content += text + '\n'

# else:
#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
#     sys.exit(1)

# # Optionally display the extracted content
# print(content)

# # Combine all text into one large string by removing newline characters.
# # This helps ensure the model receives larger chunks to work with.
# content = ' '.join(content.splitlines())

# # Option 1: Let the model handle its own splitting by not providing a split pattern:
# generator = pipeline(
#     content, voice='af_bella',  # Change voice if desired
#     speed=1
# )

# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# # generator = pipeline(
# #     content, voice='af_bella',  # Change voice if desired
# #     speed=1, split_pattern=r'(?<=[.!?])\s+'
# # )

# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
#     part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
#     print(i)   # index of the segment
#     print(gs)  # graphemes/text for this segment
#     print(ps)  # phonemes for this segment
#     display(Audio(data=audio, rate=24000, autoplay=i==0))
#     sf.write(part_filename, audio, 24000)  # Save each audio segment
#     count += 1

# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
#     sound = AudioSegment.from_wav(part_file)
#     combined += sound

# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")

# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")

# # Cleanup: Remove individual part files after merging
# for i in range(count):
#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
#     if os.path.exists(part_file):
#         os.remove(part_file)
#         print(f"Removed {part_file}")


import os
import sys
import random
import string
import re
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
from pydub import AudioSegment

# Libraries for PDF and EPUB extraction
import PyPDF2
from ebooklib import epub
from bs4 import BeautifulSoup

# Generate a random string for this process instance
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
print(f"Random string for this process: {random_str}")

# Get the directory of the current script
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))

# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
pipeline = KPipeline(lang_code='a')

# Ask the user for the file path
print('Please input the file path (txt, pdf, or epub):')
file_path = input().strip()

# Determine the file extension and prepare content
file_ext = os.path.splitext(file_path)[1].lower()
content = ''

if file_ext == '.txt':
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
elif file_ext == '.pdf':
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text = page.extract_text()
            if text:
                content += text + '\n'
elif file_ext == '.epub':
    book = epub.read_epub(file_path)
    from ebooklib.epub import EpubHtml  # Use EpubHtml for document-type items
    for item in book.get_items():
        if isinstance(item, EpubHtml):
            soup = BeautifulSoup(item.get_content(), 'html.parser')
            text = soup.get_text()
            content += text + '\n'
else:
    print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
    sys.exit(1)

# Optionally display the extracted content
print(content)

# Remove all instances of "OceanofPDF.com" from the content
content = content.replace("OceanofPDF.com", "")

# Combine all text into one large string by removing newline characters.
# This helps ensure the model receives larger chunks to work with.
content = ' '.join(content.splitlines())

# Option 1: Let the model handle its own splitting by not providing a split pattern:
generator = pipeline(
    content, voice='af_heart',  # Change voice if desired
    speed=0.8
)

# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# generator = pipeline(
#     content, voice='af_bella',  # Change voice if desired
#     speed=1, split_pattern=r'(?<=[.!?])\s+'
# )

count = 0
# Save each segment with the random string in the filename
for i, (gs, ps, audio) in enumerate(generator):
    part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
    print(i)   # index of the segment
    print(gs)  # graphemes/text for this segment
    print(ps)  # phonemes for this segment
    display(Audio(data=audio, rate=24000, autoplay=i==0))
    sf.write(part_filename, audio, 24000)  # Save each audio segment
    count += 1

# Merge the individual WAV files into one
combined = AudioSegment.empty()
for i in range(count):
    part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
    sound = AudioSegment.from_wav(part_file)
    combined += sound

# Generate the output file name based on the source file name
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_path = os.path.join(script_directory, f"{base_name}.wav")

# Export the merged WAV file
combined.export(output_path, format="wav")
print(f"Merged WAV file saved as {output_path}")

# Cleanup: Remove individual part files after merging
for i in range(count):
    part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
    if os.path.exists(part_file):
        os.remove(part_file)
        print(f"Removed {part_file}")