Initial commit of basically finished project I may or may not take it farther
This commit is contained in:
545
main.py
Normal file
545
main.py
Normal file
@@ -0,0 +1,545 @@
|
||||
# from kokoro import KPipeline
|
||||
# from IPython.display import display, Audio
|
||||
# import soundfile as sf
|
||||
# import os
|
||||
# import sys
|
||||
# from pydub import AudioSegment
|
||||
|
||||
# # need to install pytorch https://pytorch.org/get-started/locally/
|
||||
|
||||
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
|
||||
# pipeline = KPipeline(lang_code='a')
|
||||
# content = ''
|
||||
# print('please input path')
|
||||
# file_path = input()
|
||||
|
||||
# with open(file_path, 'r') as file:
|
||||
# content = file.read()
|
||||
# print(content)
|
||||
|
||||
|
||||
# generator = pipeline(
|
||||
# content, voice='af_bella', # <= change voice here
|
||||
# speed=1, split_pattern=r'\n+'
|
||||
# )
|
||||
|
||||
# count = 0
|
||||
|
||||
# for i, (gs, ps, audio) in enumerate(generator):
|
||||
# print(i) # i => index
|
||||
# print(gs) # gs => graphemes/text
|
||||
# print(ps) # ps => phonemes
|
||||
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
||||
# sf.write(f'{i}.wav', audio, 24000) # save each audio file
|
||||
# count = count + 1
|
||||
|
||||
# directory = script_directory
|
||||
|
||||
# # Merge WAV files from 0.wav to 334.wav
|
||||
# combined = AudioSegment.empty()
|
||||
|
||||
# for i in range(count): # 0 to 334 inclusive
|
||||
# file_path = f"{directory}/{i}.wav"
|
||||
# sound = AudioSegment.from_wav(file_path)
|
||||
# combined += sound
|
||||
|
||||
# # Export the merged WAV file
|
||||
# output_path = f"{directory}/message.wav"
|
||||
# combined.export(output_path, format="wav")
|
||||
|
||||
# print(f"Merged WAV file saved as {output_path}")
|
||||
# import os
|
||||
# import sys
|
||||
# from kokoro import KPipeline
|
||||
# from IPython.display import display, Audio
|
||||
# import soundfile as sf
|
||||
# from pydub import AudioSegment
|
||||
# import torch
|
||||
|
||||
# # Libraries for PDF and EPUB extraction
|
||||
# import PyPDF2
|
||||
# from ebooklib import epub
|
||||
# from bs4 import BeautifulSoup
|
||||
|
||||
# # Get the directory of the current script
|
||||
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
||||
# pipeline = KPipeline(lang_code='a')
|
||||
# pipeline.model.to(device)
|
||||
# # Ask the user for the file path
|
||||
# print('Please input the file path (txt, pdf, or epub):')
|
||||
# file_path = input().strip()
|
||||
|
||||
# # Determine the file extension
|
||||
# file_ext = os.path.splitext(file_path)[1].lower()
|
||||
# content = ''
|
||||
|
||||
# if file_ext == '.txt':
|
||||
# # For plain text files, just read the content
|
||||
# with open(file_path, 'r', encoding='utf-8') as file:
|
||||
# content = file.read()
|
||||
# elif file_ext == '.pdf':
|
||||
# # For PDFs, open the file in binary mode and extract text from each page
|
||||
# with open(file_path, 'rb') as file:
|
||||
# reader = PyPDF2.PdfReader(file)
|
||||
# for page in reader.pages:
|
||||
# text = page.extract_text()
|
||||
# if text:
|
||||
# content += text + '\n'
|
||||
# elif file_ext == '.epub':
|
||||
# # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
|
||||
# book = epub.read_epub(file_path)
|
||||
# for item in book.get_items():
|
||||
# if item.get_type() == epub.ITEM_DOCUMENT:
|
||||
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
||||
# text = soup.get_text()
|
||||
# content += text + '\n'
|
||||
# else:
|
||||
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
||||
# sys.exit(1)
|
||||
|
||||
# # Display the extracted content (optional)
|
||||
# print(content)
|
||||
|
||||
# # Use the pipeline to generate speech from the text content
|
||||
# generator = pipeline(
|
||||
# content, voice='af_bella', # Change the voice here if desired
|
||||
# speed=1, split_pattern=r'\n+'
|
||||
# )
|
||||
|
||||
# count = 0
|
||||
# for i, (gs, ps, audio) in enumerate(generator):
|
||||
# print(i) # index of the segment
|
||||
# print(gs) # graphemes/text for this segment
|
||||
# print(ps) # phonemes for this segment
|
||||
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
||||
# sf.write(f'{i}.wav', audio, 24000) # Save each audio file
|
||||
# count += 1
|
||||
|
||||
# # Merge all individual WAV files into one
|
||||
# combined = AudioSegment.empty()
|
||||
# for i in range(count):
|
||||
# wav_path = os.path.join(script_directory, f"{i}.wav")
|
||||
# sound = AudioSegment.from_wav(wav_path)
|
||||
# combined += sound
|
||||
|
||||
# # Export the merged WAV file
|
||||
# output_path = os.path.join(script_directory, "output.wav")
|
||||
# combined.export(output_path, format="wav")
|
||||
# print(f"Merged WAV file saved as {output_path}")
|
||||
|
||||
# # Cleanup: Remove individual part files after merging
|
||||
# for i in range(count):
|
||||
# part_file = os.path.join(script_directory, f"{i}.wav")
|
||||
# if os.path.exists(part_file):
|
||||
# os.remove(part_file)
|
||||
# print(f"Removed {part_file}")
|
||||
|
||||
# import os
|
||||
# import sys
|
||||
# from kokoro import KPipeline
|
||||
# from IPython.display import display, Audio
|
||||
# import soundfile as sf
|
||||
# from pydub import AudioSegment
|
||||
|
||||
# # Libraries for PDF and EPUB extraction
|
||||
# import PyPDF2
|
||||
# from ebooklib import epub
|
||||
# from bs4 import BeautifulSoup
|
||||
|
||||
# # Get the directory of the current script
|
||||
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
|
||||
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
||||
# pipeline = KPipeline(lang_code='a')
|
||||
|
||||
# # Ask the user for the file path
|
||||
# print('Please input the file path (txt, pdf, or epub):')
|
||||
# file_path = input().strip()
|
||||
|
||||
# # Determine the file extension and prepare content
|
||||
# file_ext = os.path.splitext(file_path)[1].lower()
|
||||
# content = ''
|
||||
|
||||
# if file_ext == '.txt':
|
||||
# with open(file_path, 'r', encoding='utf-8') as file:
|
||||
# content = file.read()
|
||||
# elif file_ext == '.pdf':
|
||||
# with open(file_path, 'rb') as file:
|
||||
# reader = PyPDF2.PdfReader(file)
|
||||
# for page in reader.pages:
|
||||
# text = page.extract_text()
|
||||
# if text:
|
||||
# content += text + '\n'
|
||||
# elif file_ext == '.epub':
|
||||
# book = epub.read_epub(file_path)
|
||||
# for item in book.get_items():
|
||||
# if item.get_type() == epub.ITEM_DOCUMENT:
|
||||
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
||||
# text = soup.get_text()
|
||||
# content += text + '\n'
|
||||
# else:
|
||||
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
||||
# sys.exit(1)
|
||||
|
||||
# # Optionally display the extracted content
|
||||
# print(content)
|
||||
|
||||
# # Generate the audio segments using the pipeline
|
||||
# generator = pipeline(
|
||||
# content, voice='af_bella', # Change voice if desired
|
||||
# speed=1, split_pattern=r'\n+'
|
||||
# )
|
||||
|
||||
# count = 0
|
||||
# for i, (gs, ps, audio) in enumerate(generator):
|
||||
# print(i) # index of the segment
|
||||
# print(gs) # graphemes/text for this segment
|
||||
# print(ps) # phonemes for this segment
|
||||
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
||||
# sf.write(f'{i}.wav', audio, 24000) # Save each audio segment
|
||||
# count += 1
|
||||
|
||||
# # Merge the individual WAV files into one
|
||||
# combined = AudioSegment.empty()
|
||||
# for i in range(count):
|
||||
# wav_path = os.path.join(script_directory, f"{i}.wav")
|
||||
# sound = AudioSegment.from_wav(wav_path)
|
||||
# combined += sound
|
||||
|
||||
# # Generate the output file name based on the source file name
|
||||
# base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
# output_path = os.path.join(script_directory, f"{base_name}.wav")
|
||||
|
||||
# # Export the merged WAV file
|
||||
# combined.export(output_path, format="wav")
|
||||
# print(f"Merged WAV file saved as {output_path}")
|
||||
|
||||
# # Cleanup: Remove individual part files after merging
|
||||
# for i in range(count):
|
||||
# part_file = os.path.join(script_directory, f"{i}.wav")
|
||||
# if os.path.exists(part_file):
|
||||
# os.remove(part_file)
|
||||
# print(f"Removed {part_file}")
|
||||
|
||||
|
||||
|
||||
|
||||
# import os
|
||||
# import sys
|
||||
# import random
|
||||
# import string
|
||||
# from kokoro import KPipeline
|
||||
# from IPython.display import display, Audio
|
||||
# import soundfile as sf
|
||||
# from pydub import AudioSegment
|
||||
|
||||
# # Libraries for PDF and EPUB extraction
|
||||
# import PyPDF2
|
||||
# from ebooklib import epub
|
||||
# from bs4 import BeautifulSoup
|
||||
|
||||
# # Generate a random string for this process instance
|
||||
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
||||
# print(f"Random string for this process: {random_str}")
|
||||
|
||||
# # Get the directory of the current script
|
||||
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
|
||||
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
||||
# pipeline = KPipeline(lang_code='a')
|
||||
|
||||
# # Ask the user for the file path
|
||||
# print('Please input the file path (txt, pdf, or epub):')
|
||||
# file_path = input().strip()
|
||||
|
||||
# # Determine the file extension and prepare content
|
||||
# file_ext = os.path.splitext(file_path)[1].lower()
|
||||
# content = ''
|
||||
|
||||
# if file_ext == '.txt':
|
||||
# with open(file_path, 'r', encoding='utf-8') as file:
|
||||
# content = file.read()
|
||||
# elif file_ext == '.pdf':
|
||||
# with open(file_path, 'rb') as file:
|
||||
# reader = PyPDF2.PdfReader(file)
|
||||
# for page in reader.pages:
|
||||
# text = page.extract_text()
|
||||
# if text:
|
||||
# content += text + '\n'
|
||||
# elif file_ext == '.epub':
|
||||
# book = epub.read_epub(file_path)
|
||||
# for item in book.get_items():
|
||||
# if item.get_type() == epub.ITEM_DOCUMENT:
|
||||
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
||||
# text = soup.get_text()
|
||||
# content += text + '\n'
|
||||
# else:
|
||||
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
||||
# sys.exit(1)
|
||||
|
||||
# # Optionally display the extracted content
|
||||
# print(content)
|
||||
|
||||
# # Generate the audio segments using the pipeline
|
||||
# generator = pipeline(
|
||||
# content, voice='af_bella', # Change voice if desired
|
||||
# speed=1, split_pattern=r'\n+'
|
||||
# )
|
||||
|
||||
# count = 0
|
||||
# # Save each segment with the random string in the filename
|
||||
# for i, (gs, ps, audio) in enumerate(generator):
|
||||
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
# print(i) # index of the segment
|
||||
# print(gs) # graphemes/text for this segment
|
||||
# print(ps) # phonemes for this segment
|
||||
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
||||
# sf.write(part_filename, audio, 24000) # Save each audio segment
|
||||
# count += 1
|
||||
|
||||
# # Merge the individual WAV files into one
|
||||
# combined = AudioSegment.empty()
|
||||
# for i in range(count):
|
||||
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
# sound = AudioSegment.from_wav(part_file)
|
||||
# combined += sound
|
||||
|
||||
# # Generate the output file name based on the source file name
|
||||
# base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
# output_path = os.path.join(script_directory, f"{base_name}.wav")
|
||||
|
||||
# # Export the merged WAV file
|
||||
# combined.export(output_path, format="wav")
|
||||
# print(f"Merged WAV file saved as {output_path}")
|
||||
|
||||
# # Cleanup: Remove individual part files after merging
|
||||
# for i in range(count):
|
||||
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
# if os.path.exists(part_file):
|
||||
# os.remove(part_file)
|
||||
# print(f"Removed {part_file}")
|
||||
|
||||
|
||||
# import os
|
||||
# import sys
|
||||
# import random
|
||||
# import string
|
||||
# import re
|
||||
# from kokoro import KPipeline
|
||||
# from IPython.display import display, Audio
|
||||
# import soundfile as sf
|
||||
# from pydub import AudioSegment
|
||||
|
||||
# # Libraries for PDF and EPUB extraction
|
||||
# import PyPDF2
|
||||
# from ebooklib import epub
|
||||
# from bs4 import BeautifulSoup
|
||||
|
||||
# # Generate a random string for this process instance
|
||||
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
||||
# print(f"Random string for this process: {random_str}")
|
||||
|
||||
# # Get the directory of the current script
|
||||
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
|
||||
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
||||
# pipeline = KPipeline(lang_code='a')
|
||||
|
||||
# # Ask the user for the file path
|
||||
# print('Please input the file path (txt, pdf, or epub):')
|
||||
# file_path = input().strip()
|
||||
|
||||
# # Determine the file extension and prepare content
|
||||
# file_ext = os.path.splitext(file_path)[1].lower()
|
||||
# content = ''
|
||||
|
||||
# if file_ext == '.txt':
|
||||
# with open(file_path, 'r', encoding='utf-8') as file:
|
||||
# content = file.read()
|
||||
# elif file_ext == '.pdf':
|
||||
# with open(file_path, 'rb') as file:
|
||||
# reader = PyPDF2.PdfReader(file)
|
||||
# for page in reader.pages:
|
||||
# text = page.extract_text()
|
||||
# if text:
|
||||
# content += text + '\n'
|
||||
# elif file_ext == '.epub':
|
||||
# book = epub.read_epub(file_path)
|
||||
# from ebooklib.epub import EpubHtml # Import the document class
|
||||
# for item in book.get_items():
|
||||
# # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
|
||||
# if isinstance(item, EpubHtml):
|
||||
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
||||
# text = soup.get_text()
|
||||
# content += text + '\n'
|
||||
|
||||
# else:
|
||||
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
||||
# sys.exit(1)
|
||||
|
||||
# # Optionally display the extracted content
|
||||
# print(content)
|
||||
|
||||
# # Combine all text into one large string by removing newline characters.
|
||||
# # This helps ensure the model receives larger chunks to work with.
|
||||
# content = ' '.join(content.splitlines())
|
||||
|
||||
# # Option 1: Let the model handle its own splitting by not providing a split pattern:
|
||||
# generator = pipeline(
|
||||
# content, voice='af_bella', # Change voice if desired
|
||||
# speed=1
|
||||
# )
|
||||
|
||||
# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
|
||||
# # generator = pipeline(
|
||||
# # content, voice='af_bella', # Change voice if desired
|
||||
# # speed=1, split_pattern=r'(?<=[.!?])\s+'
|
||||
# # )
|
||||
|
||||
# count = 0
|
||||
# # Save each segment with the random string in the filename
|
||||
# for i, (gs, ps, audio) in enumerate(generator):
|
||||
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
# print(i) # index of the segment
|
||||
# print(gs) # graphemes/text for this segment
|
||||
# print(ps) # phonemes for this segment
|
||||
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
||||
# sf.write(part_filename, audio, 24000) # Save each audio segment
|
||||
# count += 1
|
||||
|
||||
# # Merge the individual WAV files into one
|
||||
# combined = AudioSegment.empty()
|
||||
# for i in range(count):
|
||||
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
# sound = AudioSegment.from_wav(part_file)
|
||||
# combined += sound
|
||||
|
||||
# # Generate the output file name based on the source file name
|
||||
# base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
# output_path = os.path.join(script_directory, f"{base_name}.wav")
|
||||
|
||||
# # Export the merged WAV file
|
||||
# combined.export(output_path, format="wav")
|
||||
# print(f"Merged WAV file saved as {output_path}")
|
||||
|
||||
# # Cleanup: Remove individual part files after merging
|
||||
# for i in range(count):
|
||||
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
# if os.path.exists(part_file):
|
||||
# os.remove(part_file)
|
||||
# print(f"Removed {part_file}")
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import string
|
||||
import re
|
||||
from kokoro import KPipeline
|
||||
from IPython.display import display, Audio
|
||||
import soundfile as sf
|
||||
from pydub import AudioSegment
|
||||
|
||||
# Libraries for PDF and EPUB extraction
|
||||
import PyPDF2
|
||||
from ebooklib import epub
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Generate a random string for this process instance
|
||||
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
||||
print(f"Random string for this process: {random_str}")
|
||||
|
||||
# Get the directory of the current script
|
||||
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
|
||||
# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
||||
pipeline = KPipeline(lang_code='a')
|
||||
|
||||
# Ask the user for the file path
|
||||
print('Please input the file path (txt, pdf, or epub):')
|
||||
file_path = input().strip()
|
||||
|
||||
# Determine the file extension and prepare content
|
||||
file_ext = os.path.splitext(file_path)[1].lower()
|
||||
content = ''
|
||||
|
||||
if file_ext == '.txt':
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
elif file_ext == '.pdf':
|
||||
with open(file_path, 'rb') as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
for page in reader.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
content += text + '\n'
|
||||
elif file_ext == '.epub':
|
||||
book = epub.read_epub(file_path)
|
||||
from ebooklib.epub import EpubHtml # Use EpubHtml for document-type items
|
||||
for item in book.get_items():
|
||||
if isinstance(item, EpubHtml):
|
||||
soup = BeautifulSoup(item.get_content(), 'html.parser')
|
||||
text = soup.get_text()
|
||||
content += text + '\n'
|
||||
else:
|
||||
print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
||||
sys.exit(1)
|
||||
|
||||
# Optionally display the extracted content
|
||||
print(content)
|
||||
|
||||
# Remove all instances of "OceanofPDF.com" from the content
|
||||
content = content.replace("OceanofPDF.com", "")
|
||||
|
||||
# Combine all text into one large string by removing newline characters.
|
||||
# This helps ensure the model receives larger chunks to work with.
|
||||
content = ' '.join(content.splitlines())
|
||||
|
||||
# Option 1: Let the model handle its own splitting by not providing a split pattern:
|
||||
generator = pipeline(
|
||||
content, voice='af_heart', # Change voice if desired
|
||||
speed=0.85
|
||||
)
|
||||
|
||||
# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
|
||||
# generator = pipeline(
|
||||
# content, voice='af_bella', # Change voice if desired
|
||||
# speed=1, split_pattern=r'(?<=[.!?])\s+'
|
||||
# )
|
||||
|
||||
count = 0
|
||||
# Save each segment with the random string in the filename
|
||||
for i, (gs, ps, audio) in enumerate(generator):
|
||||
part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
print(i) # index of the segment
|
||||
print(gs) # graphemes/text for this segment
|
||||
print(ps) # phonemes for this segment
|
||||
display(Audio(data=audio, rate=24000, autoplay=i==0))
|
||||
sf.write(part_filename, audio, 24000) # Save each audio segment
|
||||
count += 1
|
||||
|
||||
# Merge the individual WAV files into one
|
||||
combined = AudioSegment.empty()
|
||||
for i in range(count):
|
||||
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
sound = AudioSegment.from_wav(part_file)
|
||||
combined += sound
|
||||
|
||||
# Generate the output file name based on the source file name
|
||||
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
output_path = os.path.join(script_directory, f"{base_name}.wav")
|
||||
|
||||
# Export the merged WAV file
|
||||
combined.export(output_path, format="wav")
|
||||
print(f"Merged WAV file saved as {output_path}")
|
||||
|
||||
# Cleanup: Remove individual part files after merging
|
||||
for i in range(count):
|
||||
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
||||
if os.path.exists(part_file):
|
||||
os.remove(part_file)
|
||||
print(f"Removed {part_file}")
|
||||
|
||||
Reference in New Issue
Block a user