Initial commit of basically finished project I may or may not take it farther

2025-11-18 17:02:39 -05:00
commit 68b286913d
5 changed files with 866 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,545 @@
+# from kokoro import KPipeline
+# from IPython.display import display, Audio
+# import soundfile as sf
+# import os
+# import sys 
+# from pydub import AudioSegment
+
+# # need to install pytorch https://pytorch.org/get-started/locally/
+
+# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
+
+# pipeline = KPipeline(lang_code='a')
+# content = ''
+# print('please input path')
+# file_path = input()
+
+# with open(file_path, 'r') as file:
+#     content = file.read()
+# print(content)
+
+
+# generator = pipeline(
+#     content, voice='af_bella', # <= change voice here
+#     speed=1, split_pattern=r'\n+'
+# )
+
+# count = 0
+
+# for i, (gs, ps, audio) in enumerate(generator):
+#     print(i)  # i => index
+#     print(gs) # gs => graphemes/text
+#     print(ps) # ps => phonemes
+#     display(Audio(data=audio, rate=24000, autoplay=i==0))
+#     sf.write(f'{i}.wav', audio, 24000) # save each audio file
+#     count = count + 1
+
+# directory = script_directory
+
+# # Merge WAV files from 0.wav to 334.wav
+# combined = AudioSegment.empty()
+
+# for i in range(count):  # 0 to 334 inclusive
+#     file_path = f"{directory}/{i}.wav"
+#     sound = AudioSegment.from_wav(file_path)
+#     combined += sound
+
+# # Export the merged WAV file
+# output_path = f"{directory}/message.wav"
+# combined.export(output_path, format="wav")
+
+# print(f"Merged WAV file saved as {output_path}")
+# import os
+# import sys
+# from kokoro import KPipeline
+# from IPython.display import display, Audio
+# import soundfile as sf
+# from pydub import AudioSegment
+# import torch
+
+# # Libraries for PDF and EPUB extraction
+# import PyPDF2
+# from ebooklib import epub
+# from bs4 import BeautifulSoup
+
+# # Get the directory of the current script
+# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
+# pipeline = KPipeline(lang_code='a')
+# pipeline.model.to(device)
+# # Ask the user for the file path
+# print('Please input the file path (txt, pdf, or epub):')
+# file_path = input().strip()
+
+# # Determine the file extension
+# file_ext = os.path.splitext(file_path)[1].lower()
+# content = ''
+
+# if file_ext == '.txt':
+#     # For plain text files, just read the content
+#     with open(file_path, 'r', encoding='utf-8') as file:
+#         content = file.read()
+# elif file_ext == '.pdf':
+#     # For PDFs, open the file in binary mode and extract text from each page
+#     with open(file_path, 'rb') as file:
+#         reader = PyPDF2.PdfReader(file)
+#         for page in reader.pages:
+#             text = page.extract_text()
+#             if text:
+#                 content += text + '\n'
+# elif file_ext == '.epub':
+#     # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
+#     book = epub.read_epub(file_path)
+#     for item in book.get_items():
+#         if item.get_type() == epub.ITEM_DOCUMENT:
+#             soup = BeautifulSoup(item.get_content(), 'html.parser')
+#             text = soup.get_text()
+#             content += text + '\n'
+# else:
+#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
+#     sys.exit(1)
+
+# # Display the extracted content (optional)
+# print(content)
+
+# # Use the pipeline to generate speech from the text content
+# generator = pipeline(
+#     content, voice='af_bella',  # Change the voice here if desired
+#     speed=1, split_pattern=r'\n+'
+# )
+
+# count = 0
+# for i, (gs, ps, audio) in enumerate(generator):
+#     print(i)   # index of the segment
+#     print(gs)  # graphemes/text for this segment
+#     print(ps)  # phonemes for this segment
+#     display(Audio(data=audio, rate=24000, autoplay=i==0))
+#     sf.write(f'{i}.wav', audio, 24000)  # Save each audio file
+#     count += 1
+
+# # Merge all individual WAV files into one
+# combined = AudioSegment.empty()
+# for i in range(count):
+#     wav_path = os.path.join(script_directory, f"{i}.wav")
+#     sound = AudioSegment.from_wav(wav_path)
+#     combined += sound
+
+# # Export the merged WAV file
+# output_path = os.path.join(script_directory, "output.wav")
+# combined.export(output_path, format="wav")
+# print(f"Merged WAV file saved as {output_path}")
+
+# # Cleanup: Remove individual part files after merging
+# for i in range(count):
+#     part_file = os.path.join(script_directory, f"{i}.wav")
+#     if os.path.exists(part_file):
+#         os.remove(part_file)
+#         print(f"Removed {part_file}")
+
+# import os
+# import sys
+# from kokoro import KPipeline
+# from IPython.display import display, Audio
+# import soundfile as sf
+# from pydub import AudioSegment
+
+# # Libraries for PDF and EPUB extraction
+# import PyPDF2
+# from ebooklib import epub
+# from bs4 import BeautifulSoup
+
+# # Get the directory of the current script
+# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
+
+# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
+# pipeline = KPipeline(lang_code='a')
+
+# # Ask the user for the file path
+# print('Please input the file path (txt, pdf, or epub):')
+# file_path = input().strip()
+
+# # Determine the file extension and prepare content
+# file_ext = os.path.splitext(file_path)[1].lower()
+# content = ''
+
+# if file_ext == '.txt':
+#     with open(file_path, 'r', encoding='utf-8') as file:
+#         content = file.read()
+# elif file_ext == '.pdf':
+#     with open(file_path, 'rb') as file:
+#         reader = PyPDF2.PdfReader(file)
+#         for page in reader.pages:
+#             text = page.extract_text()
+#             if text:
+#                 content += text + '\n'
+# elif file_ext == '.epub':
+#     book = epub.read_epub(file_path)
+#     for item in book.get_items():
+#         if item.get_type() == epub.ITEM_DOCUMENT:
+#             soup = BeautifulSoup(item.get_content(), 'html.parser')
+#             text = soup.get_text()
+#             content += text + '\n'
+# else:
+#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
+#     sys.exit(1)
+
+# # Optionally display the extracted content
+# print(content)
+
+# # Generate the audio segments using the pipeline
+# generator = pipeline(
+#     content, voice='af_bella',  # Change voice if desired
+#     speed=1, split_pattern=r'\n+'
+# )
+
+# count = 0
+# for i, (gs, ps, audio) in enumerate(generator):
+#     print(i)   # index of the segment
+#     print(gs)  # graphemes/text for this segment
+#     print(ps)  # phonemes for this segment
+#     display(Audio(data=audio, rate=24000, autoplay=i==0))
+#     sf.write(f'{i}.wav', audio, 24000)  # Save each audio segment
+#     count += 1
+
+# # Merge the individual WAV files into one
+# combined = AudioSegment.empty()
+# for i in range(count):
+#     wav_path = os.path.join(script_directory, f"{i}.wav")
+#     sound = AudioSegment.from_wav(wav_path)
+#     combined += sound
+
+# # Generate the output file name based on the source file name
+# base_name = os.path.splitext(os.path.basename(file_path))[0]
+# output_path = os.path.join(script_directory, f"{base_name}.wav")
+
+# # Export the merged WAV file
+# combined.export(output_path, format="wav")
+# print(f"Merged WAV file saved as {output_path}")
+
+# # Cleanup: Remove individual part files after merging
+# for i in range(count):
+#     part_file = os.path.join(script_directory, f"{i}.wav")
+#     if os.path.exists(part_file):
+#         os.remove(part_file)
+#         print(f"Removed {part_file}")
+
+
+
+
+# import os
+# import sys
+# import random
+# import string
+# from kokoro import KPipeline
+# from IPython.display import display, Audio
+# import soundfile as sf
+# from pydub import AudioSegment
+
+# # Libraries for PDF and EPUB extraction
+# import PyPDF2
+# from ebooklib import epub
+# from bs4 import BeautifulSoup
+
+# # Generate a random string for this process instance
+# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
+# print(f"Random string for this process: {random_str}")
+
+# # Get the directory of the current script
+# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
+
+# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
+# pipeline = KPipeline(lang_code='a')
+
+# # Ask the user for the file path
+# print('Please input the file path (txt, pdf, or epub):')
+# file_path = input().strip()
+
+# # Determine the file extension and prepare content
+# file_ext = os.path.splitext(file_path)[1].lower()
+# content = ''
+
+# if file_ext == '.txt':
+#     with open(file_path, 'r', encoding='utf-8') as file:
+#         content = file.read()
+# elif file_ext == '.pdf':
+#     with open(file_path, 'rb') as file:
+#         reader = PyPDF2.PdfReader(file)
+#         for page in reader.pages:
+#             text = page.extract_text()
+#             if text:
+#                 content += text + '\n'
+# elif file_ext == '.epub':
+#     book = epub.read_epub(file_path)
+#     for item in book.get_items():
+#         if item.get_type() == epub.ITEM_DOCUMENT:
+#             soup = BeautifulSoup(item.get_content(), 'html.parser')
+#             text = soup.get_text()
+#             content += text + '\n'
+# else:
+#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
+#     sys.exit(1)
+
+# # Optionally display the extracted content
+# print(content)
+
+# # Generate the audio segments using the pipeline
+# generator = pipeline(
+#     content, voice='af_bella',  # Change voice if desired
+#     speed=1, split_pattern=r'\n+'
+# )
+
+# count = 0
+# # Save each segment with the random string in the filename
+# for i, (gs, ps, audio) in enumerate(generator):
+#     part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
+#     print(i)   # index of the segment
+#     print(gs)  # graphemes/text for this segment
+#     print(ps)  # phonemes for this segment
+#     display(Audio(data=audio, rate=24000, autoplay=i==0))
+#     sf.write(part_filename, audio, 24000)  # Save each audio segment
+#     count += 1
+
+# # Merge the individual WAV files into one
+# combined = AudioSegment.empty()
+# for i in range(count):
+#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
+#     sound = AudioSegment.from_wav(part_file)
+#     combined += sound
+
+# # Generate the output file name based on the source file name
+# base_name = os.path.splitext(os.path.basename(file_path))[0]
+# output_path = os.path.join(script_directory, f"{base_name}.wav")
+
+# # Export the merged WAV file
+# combined.export(output_path, format="wav")
+# print(f"Merged WAV file saved as {output_path}")
+
+# # Cleanup: Remove individual part files after merging
+# for i in range(count):
+#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
+#     if os.path.exists(part_file):
+#         os.remove(part_file)
+#         print(f"Removed {part_file}")
+
+
+# import os
+# import sys
+# import random
+# import string
+# import re
+# from kokoro import KPipeline
+# from IPython.display import display, Audio
+# import soundfile as sf
+# from pydub import AudioSegment
+
+# # Libraries for PDF and EPUB extraction
+# import PyPDF2
+# from ebooklib import epub
+# from bs4 import BeautifulSoup
+
+# # Generate a random string for this process instance
+# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
+# print(f"Random string for this process: {random_str}")
+
+# # Get the directory of the current script
+# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
+
+# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
+# pipeline = KPipeline(lang_code='a')
+
+# # Ask the user for the file path
+# print('Please input the file path (txt, pdf, or epub):')
+# file_path = input().strip()
+
+# # Determine the file extension and prepare content
+# file_ext = os.path.splitext(file_path)[1].lower()
+# content = ''
+
+# if file_ext == '.txt':
+#     with open(file_path, 'r', encoding='utf-8') as file:
+#         content = file.read()
+# elif file_ext == '.pdf':
+#     with open(file_path, 'rb') as file:
+#         reader = PyPDF2.PdfReader(file)
+#         for page in reader.pages:
+#             text = page.extract_text()
+#             if text:
+#                 content += text + '\n'
+# elif file_ext == '.epub':
+#     book = epub.read_epub(file_path)
+#     from ebooklib.epub import EpubHtml  # Import the document class
+#     for item in book.get_items():
+#         # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
+#         if isinstance(item, EpubHtml):
+#             soup = BeautifulSoup(item.get_content(), 'html.parser')
+#             text = soup.get_text()
+#             content += text + '\n'
+
+# else:
+#     print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
+#     sys.exit(1)
+
+# # Optionally display the extracted content
+# print(content)
+
+# # Combine all text into one large string by removing newline characters.
+# # This helps ensure the model receives larger chunks to work with.
+# content = ' '.join(content.splitlines())
+
+# # Option 1: Let the model handle its own splitting by not providing a split pattern:
+# generator = pipeline(
+#     content, voice='af_bella',  # Change voice if desired
+#     speed=1
+# )
+
+# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
+# # generator = pipeline(
+# #     content, voice='af_bella',  # Change voice if desired
+# #     speed=1, split_pattern=r'(?<=[.!?])\s+'
+# # )
+
+# count = 0
+# # Save each segment with the random string in the filename
+# for i, (gs, ps, audio) in enumerate(generator):
+#     part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
+#     print(i)   # index of the segment
+#     print(gs)  # graphemes/text for this segment
+#     print(ps)  # phonemes for this segment
+#     display(Audio(data=audio, rate=24000, autoplay=i==0))
+#     sf.write(part_filename, audio, 24000)  # Save each audio segment
+#     count += 1
+
+# # Merge the individual WAV files into one
+# combined = AudioSegment.empty()
+# for i in range(count):
+#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
+#     sound = AudioSegment.from_wav(part_file)
+#     combined += sound
+
+# # Generate the output file name based on the source file name
+# base_name = os.path.splitext(os.path.basename(file_path))[0]
+# output_path = os.path.join(script_directory, f"{base_name}.wav")
+
+# # Export the merged WAV file
+# combined.export(output_path, format="wav")
+# print(f"Merged WAV file saved as {output_path}")
+
+# # Cleanup: Remove individual part files after merging
+# for i in range(count):
+#     part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
+#     if os.path.exists(part_file):
+#         os.remove(part_file)
+#         print(f"Removed {part_file}")
+
+
+import os
+import sys
+import random
+import string
+import re
+from kokoro import KPipeline
+from IPython.display import display, Audio
+import soundfile as sf
+from pydub import AudioSegment
+
+# Libraries for PDF and EPUB extraction
+import PyPDF2
+from ebooklib import epub
+from bs4 import BeautifulSoup
+
+# Generate a random string for this process instance
+random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
+print(f"Random string for this process: {random_str}")
+
+# Get the directory of the current script
+script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
+
+# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
+pipeline = KPipeline(lang_code='a')
+
+# Ask the user for the file path
+print('Please input the file path (txt, pdf, or epub):')
+file_path = input().strip()
+
+# Determine the file extension and prepare content
+file_ext = os.path.splitext(file_path)[1].lower()
+content = ''
+
+if file_ext == '.txt':
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+elif file_ext == '.pdf':
+    with open(file_path, 'rb') as file:
+        reader = PyPDF2.PdfReader(file)
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                content += text + '\n'
+elif file_ext == '.epub':
+    book = epub.read_epub(file_path)
+    from ebooklib.epub import EpubHtml  # Use EpubHtml for document-type items
+    for item in book.get_items():
+        if isinstance(item, EpubHtml):
+            soup = BeautifulSoup(item.get_content(), 'html.parser')
+            text = soup.get_text()
+            content += text + '\n'
+else:
+    print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
+    sys.exit(1)
+
+# Optionally display the extracted content
+print(content)
+
+# Remove all instances of "OceanofPDF.com" from the content
+content = content.replace("OceanofPDF.com", "")
+
+# Combine all text into one large string by removing newline characters.
+# This helps ensure the model receives larger chunks to work with.
+content = ' '.join(content.splitlines())
+
+# Option 1: Let the model handle its own splitting by not providing a split pattern:
+generator = pipeline(
+    content, voice='af_heart',  # Change voice if desired
+    speed=0.85
+)
+
+# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
+# generator = pipeline(
+#     content, voice='af_bella',  # Change voice if desired
+#     speed=1, split_pattern=r'(?<=[.!?])\s+'
+# )
+
+count = 0
+# Save each segment with the random string in the filename
+for i, (gs, ps, audio) in enumerate(generator):
+    part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
+    print(i)   # index of the segment
+    print(gs)  # graphemes/text for this segment
+    print(ps)  # phonemes for this segment
+    display(Audio(data=audio, rate=24000, autoplay=i==0))
+    sf.write(part_filename, audio, 24000)  # Save each audio segment
+    count += 1
+
+# Merge the individual WAV files into one
+combined = AudioSegment.empty()
+for i in range(count):
+    part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
+    sound = AudioSegment.from_wav(part_file)
+    combined += sound
+
+# Generate the output file name based on the source file name
+base_name = os.path.splitext(os.path.basename(file_path))[0]
+output_path = os.path.join(script_directory, f"{base_name}.wav")
+
+# Export the merged WAV file
+combined.export(output_path, format="wav")
+print(f"Merged WAV file saved as {output_path}")
+
+# Cleanup: Remove individual part files after merging
+for i in range(count):
+    part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
+    if os.path.exists(part_file):
+        os.remove(part_file)
+        print(f"Removed {part_file}")
+