546 lines
19 KiB
Python
546 lines
19 KiB
Python
# from kokoro import KPipeline
|
|
# from IPython.display import display, Audio
|
|
# import soundfile as sf
|
|
# import os
|
|
# import sys
|
|
# from pydub import AudioSegment
|
|
|
|
# # need to install pytorch https://pytorch.org/get-started/locally/
|
|
|
|
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
|
|
# pipeline = KPipeline(lang_code='a')
|
|
# content = ''
|
|
# print('please input path')
|
|
# file_path = input()
|
|
|
|
# with open(file_path, 'r') as file:
|
|
# content = file.read()
|
|
# print(content)
|
|
|
|
|
|
# generator = pipeline(
|
|
# content, voice='af_bella', # <= change voice here
|
|
# speed=1, split_pattern=r'\n+'
|
|
# )
|
|
|
|
# count = 0
|
|
|
|
# for i, (gs, ps, audio) in enumerate(generator):
|
|
# print(i) # i => index
|
|
# print(gs) # gs => graphemes/text
|
|
# print(ps) # ps => phonemes
|
|
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
|
# sf.write(f'{i}.wav', audio, 24000) # save each audio file
|
|
# count = count + 1
|
|
|
|
# directory = script_directory
|
|
|
|
# # Merge WAV files from 0.wav to 334.wav
|
|
# combined = AudioSegment.empty()
|
|
|
|
# for i in range(count): # 0 to 334 inclusive
|
|
# file_path = f"{directory}/{i}.wav"
|
|
# sound = AudioSegment.from_wav(file_path)
|
|
# combined += sound
|
|
|
|
# # Export the merged WAV file
|
|
# output_path = f"{directory}/message.wav"
|
|
# combined.export(output_path, format="wav")
|
|
|
|
# print(f"Merged WAV file saved as {output_path}")
|
|
# import os
|
|
# import sys
|
|
# from kokoro import KPipeline
|
|
# from IPython.display import display, Audio
|
|
# import soundfile as sf
|
|
# from pydub import AudioSegment
|
|
# import torch
|
|
|
|
# # Libraries for PDF and EPUB extraction
|
|
# import PyPDF2
|
|
# from ebooklib import epub
|
|
# from bs4 import BeautifulSoup
|
|
|
|
# # Get the directory of the current script
|
|
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
|
# pipeline = KPipeline(lang_code='a')
|
|
# pipeline.model.to(device)
|
|
# # Ask the user for the file path
|
|
# print('Please input the file path (txt, pdf, or epub):')
|
|
# file_path = input().strip()
|
|
|
|
# # Determine the file extension
|
|
# file_ext = os.path.splitext(file_path)[1].lower()
|
|
# content = ''
|
|
|
|
# if file_ext == '.txt':
|
|
# # For plain text files, just read the content
|
|
# with open(file_path, 'r', encoding='utf-8') as file:
|
|
# content = file.read()
|
|
# elif file_ext == '.pdf':
|
|
# # For PDFs, open the file in binary mode and extract text from each page
|
|
# with open(file_path, 'rb') as file:
|
|
# reader = PyPDF2.PdfReader(file)
|
|
# for page in reader.pages:
|
|
# text = page.extract_text()
|
|
# if text:
|
|
# content += text + '\n'
|
|
# elif file_ext == '.epub':
|
|
# # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
|
|
# book = epub.read_epub(file_path)
|
|
# for item in book.get_items():
|
|
# if item.get_type() == epub.ITEM_DOCUMENT:
|
|
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
|
# text = soup.get_text()
|
|
# content += text + '\n'
|
|
# else:
|
|
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
|
# sys.exit(1)
|
|
|
|
# # Display the extracted content (optional)
|
|
# print(content)
|
|
|
|
# # Use the pipeline to generate speech from the text content
|
|
# generator = pipeline(
|
|
# content, voice='af_bella', # Change the voice here if desired
|
|
# speed=1, split_pattern=r'\n+'
|
|
# )
|
|
|
|
# count = 0
|
|
# for i, (gs, ps, audio) in enumerate(generator):
|
|
# print(i) # index of the segment
|
|
# print(gs) # graphemes/text for this segment
|
|
# print(ps) # phonemes for this segment
|
|
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
|
# sf.write(f'{i}.wav', audio, 24000) # Save each audio file
|
|
# count += 1
|
|
|
|
# # Merge all individual WAV files into one
|
|
# combined = AudioSegment.empty()
|
|
# for i in range(count):
|
|
# wav_path = os.path.join(script_directory, f"{i}.wav")
|
|
# sound = AudioSegment.from_wav(wav_path)
|
|
# combined += sound
|
|
|
|
# # Export the merged WAV file
|
|
# output_path = os.path.join(script_directory, "output.wav")
|
|
# combined.export(output_path, format="wav")
|
|
# print(f"Merged WAV file saved as {output_path}")
|
|
|
|
# # Cleanup: Remove individual part files after merging
|
|
# for i in range(count):
|
|
# part_file = os.path.join(script_directory, f"{i}.wav")
|
|
# if os.path.exists(part_file):
|
|
# os.remove(part_file)
|
|
# print(f"Removed {part_file}")
|
|
|
|
# import os
|
|
# import sys
|
|
# from kokoro import KPipeline
|
|
# from IPython.display import display, Audio
|
|
# import soundfile as sf
|
|
# from pydub import AudioSegment
|
|
|
|
# # Libraries for PDF and EPUB extraction
|
|
# import PyPDF2
|
|
# from ebooklib import epub
|
|
# from bs4 import BeautifulSoup
|
|
|
|
# # Get the directory of the current script
|
|
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
|
|
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
|
# pipeline = KPipeline(lang_code='a')
|
|
|
|
# # Ask the user for the file path
|
|
# print('Please input the file path (txt, pdf, or epub):')
|
|
# file_path = input().strip()
|
|
|
|
# # Determine the file extension and prepare content
|
|
# file_ext = os.path.splitext(file_path)[1].lower()
|
|
# content = ''
|
|
|
|
# if file_ext == '.txt':
|
|
# with open(file_path, 'r', encoding='utf-8') as file:
|
|
# content = file.read()
|
|
# elif file_ext == '.pdf':
|
|
# with open(file_path, 'rb') as file:
|
|
# reader = PyPDF2.PdfReader(file)
|
|
# for page in reader.pages:
|
|
# text = page.extract_text()
|
|
# if text:
|
|
# content += text + '\n'
|
|
# elif file_ext == '.epub':
|
|
# book = epub.read_epub(file_path)
|
|
# for item in book.get_items():
|
|
# if item.get_type() == epub.ITEM_DOCUMENT:
|
|
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
|
# text = soup.get_text()
|
|
# content += text + '\n'
|
|
# else:
|
|
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
|
# sys.exit(1)
|
|
|
|
# # Optionally display the extracted content
|
|
# print(content)
|
|
|
|
# # Generate the audio segments using the pipeline
|
|
# generator = pipeline(
|
|
# content, voice='af_bella', # Change voice if desired
|
|
# speed=1, split_pattern=r'\n+'
|
|
# )
|
|
|
|
# count = 0
|
|
# for i, (gs, ps, audio) in enumerate(generator):
|
|
# print(i) # index of the segment
|
|
# print(gs) # graphemes/text for this segment
|
|
# print(ps) # phonemes for this segment
|
|
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
|
# sf.write(f'{i}.wav', audio, 24000) # Save each audio segment
|
|
# count += 1
|
|
|
|
# # Merge the individual WAV files into one
|
|
# combined = AudioSegment.empty()
|
|
# for i in range(count):
|
|
# wav_path = os.path.join(script_directory, f"{i}.wav")
|
|
# sound = AudioSegment.from_wav(wav_path)
|
|
# combined += sound
|
|
|
|
# # Generate the output file name based on the source file name
|
|
# base_name = os.path.splitext(os.path.basename(file_path))[0]
|
|
# output_path = os.path.join(script_directory, f"{base_name}.wav")
|
|
|
|
# # Export the merged WAV file
|
|
# combined.export(output_path, format="wav")
|
|
# print(f"Merged WAV file saved as {output_path}")
|
|
|
|
# # Cleanup: Remove individual part files after merging
|
|
# for i in range(count):
|
|
# part_file = os.path.join(script_directory, f"{i}.wav")
|
|
# if os.path.exists(part_file):
|
|
# os.remove(part_file)
|
|
# print(f"Removed {part_file}")
|
|
|
|
|
|
|
|
|
|
# import os
|
|
# import sys
|
|
# import random
|
|
# import string
|
|
# from kokoro import KPipeline
|
|
# from IPython.display import display, Audio
|
|
# import soundfile as sf
|
|
# from pydub import AudioSegment
|
|
|
|
# # Libraries for PDF and EPUB extraction
|
|
# import PyPDF2
|
|
# from ebooklib import epub
|
|
# from bs4 import BeautifulSoup
|
|
|
|
# # Generate a random string for this process instance
|
|
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
|
# print(f"Random string for this process: {random_str}")
|
|
|
|
# # Get the directory of the current script
|
|
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
|
|
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
|
# pipeline = KPipeline(lang_code='a')
|
|
|
|
# # Ask the user for the file path
|
|
# print('Please input the file path (txt, pdf, or epub):')
|
|
# file_path = input().strip()
|
|
|
|
# # Determine the file extension and prepare content
|
|
# file_ext = os.path.splitext(file_path)[1].lower()
|
|
# content = ''
|
|
|
|
# if file_ext == '.txt':
|
|
# with open(file_path, 'r', encoding='utf-8') as file:
|
|
# content = file.read()
|
|
# elif file_ext == '.pdf':
|
|
# with open(file_path, 'rb') as file:
|
|
# reader = PyPDF2.PdfReader(file)
|
|
# for page in reader.pages:
|
|
# text = page.extract_text()
|
|
# if text:
|
|
# content += text + '\n'
|
|
# elif file_ext == '.epub':
|
|
# book = epub.read_epub(file_path)
|
|
# for item in book.get_items():
|
|
# if item.get_type() == epub.ITEM_DOCUMENT:
|
|
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
|
# text = soup.get_text()
|
|
# content += text + '\n'
|
|
# else:
|
|
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
|
# sys.exit(1)
|
|
|
|
# # Optionally display the extracted content
|
|
# print(content)
|
|
|
|
# # Generate the audio segments using the pipeline
|
|
# generator = pipeline(
|
|
# content, voice='af_bella', # Change voice if desired
|
|
# speed=1, split_pattern=r'\n+'
|
|
# )
|
|
|
|
# count = 0
|
|
# # Save each segment with the random string in the filename
|
|
# for i, (gs, ps, audio) in enumerate(generator):
|
|
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
# print(i) # index of the segment
|
|
# print(gs) # graphemes/text for this segment
|
|
# print(ps) # phonemes for this segment
|
|
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
|
# sf.write(part_filename, audio, 24000) # Save each audio segment
|
|
# count += 1
|
|
|
|
# # Merge the individual WAV files into one
|
|
# combined = AudioSegment.empty()
|
|
# for i in range(count):
|
|
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
# sound = AudioSegment.from_wav(part_file)
|
|
# combined += sound
|
|
|
|
# # Generate the output file name based on the source file name
|
|
# base_name = os.path.splitext(os.path.basename(file_path))[0]
|
|
# output_path = os.path.join(script_directory, f"{base_name}.wav")
|
|
|
|
# # Export the merged WAV file
|
|
# combined.export(output_path, format="wav")
|
|
# print(f"Merged WAV file saved as {output_path}")
|
|
|
|
# # Cleanup: Remove individual part files after merging
|
|
# for i in range(count):
|
|
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
# if os.path.exists(part_file):
|
|
# os.remove(part_file)
|
|
# print(f"Removed {part_file}")
|
|
|
|
|
|
# import os
|
|
# import sys
|
|
# import random
|
|
# import string
|
|
# import re
|
|
# from kokoro import KPipeline
|
|
# from IPython.display import display, Audio
|
|
# import soundfile as sf
|
|
# from pydub import AudioSegment
|
|
|
|
# # Libraries for PDF and EPUB extraction
|
|
# import PyPDF2
|
|
# from ebooklib import epub
|
|
# from bs4 import BeautifulSoup
|
|
|
|
# # Generate a random string for this process instance
|
|
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
|
# print(f"Random string for this process: {random_str}")
|
|
|
|
# # Get the directory of the current script
|
|
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
|
|
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
|
# pipeline = KPipeline(lang_code='a')
|
|
|
|
# # Ask the user for the file path
|
|
# print('Please input the file path (txt, pdf, or epub):')
|
|
# file_path = input().strip()
|
|
|
|
# # Determine the file extension and prepare content
|
|
# file_ext = os.path.splitext(file_path)[1].lower()
|
|
# content = ''
|
|
|
|
# if file_ext == '.txt':
|
|
# with open(file_path, 'r', encoding='utf-8') as file:
|
|
# content = file.read()
|
|
# elif file_ext == '.pdf':
|
|
# with open(file_path, 'rb') as file:
|
|
# reader = PyPDF2.PdfReader(file)
|
|
# for page in reader.pages:
|
|
# text = page.extract_text()
|
|
# if text:
|
|
# content += text + '\n'
|
|
# elif file_ext == '.epub':
|
|
# book = epub.read_epub(file_path)
|
|
# from ebooklib.epub import EpubHtml # Import the document class
|
|
# for item in book.get_items():
|
|
# # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
|
|
# if isinstance(item, EpubHtml):
|
|
# soup = BeautifulSoup(item.get_content(), 'html.parser')
|
|
# text = soup.get_text()
|
|
# content += text + '\n'
|
|
|
|
# else:
|
|
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
|
# sys.exit(1)
|
|
|
|
# # Optionally display the extracted content
|
|
# print(content)
|
|
|
|
# # Combine all text into one large string by removing newline characters.
|
|
# # This helps ensure the model receives larger chunks to work with.
|
|
# content = ' '.join(content.splitlines())
|
|
|
|
# # Option 1: Let the model handle its own splitting by not providing a split pattern:
|
|
# generator = pipeline(
|
|
# content, voice='af_bella', # Change voice if desired
|
|
# speed=1
|
|
# )
|
|
|
|
# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
|
|
# # generator = pipeline(
|
|
# # content, voice='af_bella', # Change voice if desired
|
|
# # speed=1, split_pattern=r'(?<=[.!?])\s+'
|
|
# # )
|
|
|
|
# count = 0
|
|
# # Save each segment with the random string in the filename
|
|
# for i, (gs, ps, audio) in enumerate(generator):
|
|
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
# print(i) # index of the segment
|
|
# print(gs) # graphemes/text for this segment
|
|
# print(ps) # phonemes for this segment
|
|
# display(Audio(data=audio, rate=24000, autoplay=i==0))
|
|
# sf.write(part_filename, audio, 24000) # Save each audio segment
|
|
# count += 1
|
|
|
|
# # Merge the individual WAV files into one
|
|
# combined = AudioSegment.empty()
|
|
# for i in range(count):
|
|
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
# sound = AudioSegment.from_wav(part_file)
|
|
# combined += sound
|
|
|
|
# # Generate the output file name based on the source file name
|
|
# base_name = os.path.splitext(os.path.basename(file_path))[0]
|
|
# output_path = os.path.join(script_directory, f"{base_name}.wav")
|
|
|
|
# # Export the merged WAV file
|
|
# combined.export(output_path, format="wav")
|
|
# print(f"Merged WAV file saved as {output_path}")
|
|
|
|
# # Cleanup: Remove individual part files after merging
|
|
# for i in range(count):
|
|
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
# if os.path.exists(part_file):
|
|
# os.remove(part_file)
|
|
# print(f"Removed {part_file}")
|
|
|
|
|
|
import os
|
|
import sys
|
|
import random
|
|
import string
|
|
import re
|
|
from kokoro import KPipeline
|
|
from IPython.display import display, Audio
|
|
import soundfile as sf
|
|
from pydub import AudioSegment
|
|
|
|
# Libraries for PDF and EPUB extraction
|
|
import PyPDF2
|
|
from ebooklib import epub
|
|
from bs4 import BeautifulSoup
|
|
|
|
# Generate a random string for this process instance
|
|
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
|
print(f"Random string for this process: {random_str}")
|
|
|
|
# Get the directory of the current script
|
|
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
|
|
# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
|
|
pipeline = KPipeline(lang_code='a')
|
|
|
|
# Ask the user for the file path
|
|
print('Please input the file path (txt, pdf, or epub):')
|
|
file_path = input().strip()
|
|
|
|
# Determine the file extension and prepare content
|
|
file_ext = os.path.splitext(file_path)[1].lower()
|
|
content = ''
|
|
|
|
if file_ext == '.txt':
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
elif file_ext == '.pdf':
|
|
with open(file_path, 'rb') as file:
|
|
reader = PyPDF2.PdfReader(file)
|
|
for page in reader.pages:
|
|
text = page.extract_text()
|
|
if text:
|
|
content += text + '\n'
|
|
elif file_ext == '.epub':
|
|
book = epub.read_epub(file_path)
|
|
from ebooklib.epub import EpubHtml # Use EpubHtml for document-type items
|
|
for item in book.get_items():
|
|
if isinstance(item, EpubHtml):
|
|
soup = BeautifulSoup(item.get_content(), 'html.parser')
|
|
text = soup.get_text()
|
|
content += text + '\n'
|
|
else:
|
|
print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
|
|
sys.exit(1)
|
|
|
|
# Optionally display the extracted content
|
|
print(content)
|
|
|
|
# Remove all instances of "OceanofPDF.com" from the content
|
|
content = content.replace("OceanofPDF.com", "")
|
|
|
|
# Combine all text into one large string by removing newline characters.
|
|
# This helps ensure the model receives larger chunks to work with.
|
|
content = ' '.join(content.splitlines())
|
|
|
|
# Option 1: Let the model handle its own splitting by not providing a split pattern:
|
|
generator = pipeline(
|
|
content, voice='af_heart', # Change voice if desired
|
|
speed=0.85
|
|
)
|
|
|
|
# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
|
|
# generator = pipeline(
|
|
# content, voice='af_bella', # Change voice if desired
|
|
# speed=1, split_pattern=r'(?<=[.!?])\s+'
|
|
# )
|
|
|
|
count = 0
|
|
# Save each segment with the random string in the filename
|
|
for i, (gs, ps, audio) in enumerate(generator):
|
|
part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
print(i) # index of the segment
|
|
print(gs) # graphemes/text for this segment
|
|
print(ps) # phonemes for this segment
|
|
display(Audio(data=audio, rate=24000, autoplay=i==0))
|
|
sf.write(part_filename, audio, 24000) # Save each audio segment
|
|
count += 1
|
|
|
|
# Merge the individual WAV files into one
|
|
combined = AudioSegment.empty()
|
|
for i in range(count):
|
|
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
sound = AudioSegment.from_wav(part_file)
|
|
combined += sound
|
|
|
|
# Generate the output file name based on the source file name
|
|
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
|
output_path = os.path.join(script_directory, f"{base_name}.wav")
|
|
|
|
# Export the merged WAV file
|
|
combined.export(output_path, format="wav")
|
|
print(f"Merged WAV file saved as {output_path}")
|
|
|
|
# Cleanup: Remove individual part files after merging
|
|
for i in range(count):
|
|
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
|
|
if os.path.exists(part_file):
|
|
os.remove(part_file)
|
|
print(f"Removed {part_file}")
|
|
|