Initial commit of basically finished project I may or may not take it farther

This commit is contained in:
2025-11-18 17:02:39 -05:00
commit 68b286913d
5 changed files with 866 additions and 0 deletions

545
main.py Normal file
View File

@@ -0,0 +1,545 @@
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# import os
# import sys
# from pydub import AudioSegment
# # need to install pytorch https://pytorch.org/get-started/locally/
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# pipeline = KPipeline(lang_code='a')
# content = ''
# print('please input path')
# file_path = input()
# with open(file_path, 'r') as file:
# content = file.read()
# print(content)
# generator = pipeline(
# content, voice='af_bella', # <= change voice here
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # i => index
# print(gs) # gs => graphemes/text
# print(ps) # ps => phonemes
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # save each audio file
# count = count + 1
# directory = script_directory
# # Merge WAV files from 0.wav to 334.wav
# combined = AudioSegment.empty()
# for i in range(count): # 0 to 334 inclusive
# file_path = f"{directory}/{i}.wav"
# sound = AudioSegment.from_wav(file_path)
# combined += sound
# # Export the merged WAV file
# output_path = f"{directory}/message.wav"
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# import torch
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# pipeline.model.to(device)
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# # For plain text files, just read the content
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# # For PDFs, open the file in binary mode and extract text from each page
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# # For EPUBs, use ebooklib to read the book and BeautifulSoup to extract text
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Display the extracted content (optional)
# print(content)
# # Use the pipeline to generate speech from the text content
# generator = pipeline(
# content, voice='af_bella', # Change the voice here if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # Save each audio file
# count += 1
# # Merge all individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# wav_path = os.path.join(script_directory, f"{i}.wav")
# sound = AudioSegment.from_wav(wav_path)
# combined += sound
# # Export the merged WAV file
# output_path = os.path.join(script_directory, "output.wav")
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Generate the audio segments using the pipeline
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# for i, (gs, ps, audio) in enumerate(generator):
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(f'{i}.wav', audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# wav_path = os.path.join(script_directory, f"{i}.wav")
# sound = AudioSegment.from_wav(wav_path)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# import random
# import string
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# for item in book.get_items():
# if item.get_type() == epub.ITEM_DOCUMENT:
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Generate the audio segments using the pipeline
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'\n+'
# )
# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(part_filename, audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# sound = AudioSegment.from_wav(part_file)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
# import os
# import sys
# import random
# import string
# import re
# from kokoro import KPipeline
# from IPython.display import display, Audio
# import soundfile as sf
# from pydub import AudioSegment
# # Libraries for PDF and EPUB extraction
# import PyPDF2
# from ebooklib import epub
# from bs4 import BeautifulSoup
# # Generate a random string for this process instance
# random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
# print(f"Random string for this process: {random_str}")
# # Get the directory of the current script
# script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# # Initialize the pipeline (ensure you have installed and set up kokoro correctly)
# pipeline = KPipeline(lang_code='a')
# # Ask the user for the file path
# print('Please input the file path (txt, pdf, or epub):')
# file_path = input().strip()
# # Determine the file extension and prepare content
# file_ext = os.path.splitext(file_path)[1].lower()
# content = ''
# if file_ext == '.txt':
# with open(file_path, 'r', encoding='utf-8') as file:
# content = file.read()
# elif file_ext == '.pdf':
# with open(file_path, 'rb') as file:
# reader = PyPDF2.PdfReader(file)
# for page in reader.pages:
# text = page.extract_text()
# if text:
# content += text + '\n'
# elif file_ext == '.epub':
# book = epub.read_epub(file_path)
# from ebooklib.epub import EpubHtml # Import the document class
# for item in book.get_items():
# # Instead of checking for epub.ITEM_DOCUMENT, check if item is an instance of EpubHtml
# if isinstance(item, EpubHtml):
# soup = BeautifulSoup(item.get_content(), 'html.parser')
# text = soup.get_text()
# content += text + '\n'
# else:
# print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
# sys.exit(1)
# # Optionally display the extracted content
# print(content)
# # Combine all text into one large string by removing newline characters.
# # This helps ensure the model receives larger chunks to work with.
# content = ' '.join(content.splitlines())
# # Option 1: Let the model handle its own splitting by not providing a split pattern:
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1
# )
# # Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# # generator = pipeline(
# # content, voice='af_bella', # Change voice if desired
# # speed=1, split_pattern=r'(?<=[.!?])\s+'
# # )
# count = 0
# # Save each segment with the random string in the filename
# for i, (gs, ps, audio) in enumerate(generator):
# part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
# print(i) # index of the segment
# print(gs) # graphemes/text for this segment
# print(ps) # phonemes for this segment
# display(Audio(data=audio, rate=24000, autoplay=i==0))
# sf.write(part_filename, audio, 24000) # Save each audio segment
# count += 1
# # Merge the individual WAV files into one
# combined = AudioSegment.empty()
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# sound = AudioSegment.from_wav(part_file)
# combined += sound
# # Generate the output file name based on the source file name
# base_name = os.path.splitext(os.path.basename(file_path))[0]
# output_path = os.path.join(script_directory, f"{base_name}.wav")
# # Export the merged WAV file
# combined.export(output_path, format="wav")
# print(f"Merged WAV file saved as {output_path}")
# # Cleanup: Remove individual part files after merging
# for i in range(count):
# part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
# if os.path.exists(part_file):
# os.remove(part_file)
# print(f"Removed {part_file}")
import os
import sys
import random
import string
import re
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
from pydub import AudioSegment
# Libraries for PDF and EPUB extraction
import PyPDF2
from ebooklib import epub
from bs4 import BeautifulSoup
# Generate a random string for this process instance
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
print(f"Random string for this process: {random_str}")
# Get the directory of the current script
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# Initialize the pipeline (ensure you have installed and set up kokoro correctly)
pipeline = KPipeline(lang_code='a')
# Ask the user for the file path
print('Please input the file path (txt, pdf, or epub):')
file_path = input().strip()
# Determine the file extension and prepare content
file_ext = os.path.splitext(file_path)[1].lower()
content = ''
if file_ext == '.txt':
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
elif file_ext == '.pdf':
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text = page.extract_text()
if text:
content += text + '\n'
elif file_ext == '.epub':
book = epub.read_epub(file_path)
from ebooklib.epub import EpubHtml # Use EpubHtml for document-type items
for item in book.get_items():
if isinstance(item, EpubHtml):
soup = BeautifulSoup(item.get_content(), 'html.parser')
text = soup.get_text()
content += text + '\n'
else:
print("Unsupported file format. Please provide a .txt, .pdf, or .epub file.")
sys.exit(1)
# Optionally display the extracted content
print(content)
# Remove all instances of "OceanofPDF.com" from the content
content = content.replace("OceanofPDF.com", "")
# Combine all text into one large string by removing newline characters.
# This helps ensure the model receives larger chunks to work with.
content = ' '.join(content.splitlines())
# Option 1: Let the model handle its own splitting by not providing a split pattern:
generator = pipeline(
content, voice='af_heart', # Change voice if desired
speed=0.85
)
# Option 2: Alternatively, use a regex that splits on sentence boundaries (uncomment to use):
# generator = pipeline(
# content, voice='af_bella', # Change voice if desired
# speed=1, split_pattern=r'(?<=[.!?])\s+'
# )
count = 0
# Save each segment with the random string in the filename
for i, (gs, ps, audio) in enumerate(generator):
part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
print(i) # index of the segment
print(gs) # graphemes/text for this segment
print(ps) # phonemes for this segment
display(Audio(data=audio, rate=24000, autoplay=i==0))
sf.write(part_filename, audio, 24000) # Save each audio segment
count += 1
# Merge the individual WAV files into one
combined = AudioSegment.empty()
for i in range(count):
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
sound = AudioSegment.from_wav(part_file)
combined += sound
# Generate the output file name based on the source file name
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_path = os.path.join(script_directory, f"{base_name}.wav")
# Export the merged WAV file
combined.export(output_path, format="wav")
print(f"Merged WAV file saved as {output_path}")
# Cleanup: Remove individual part files after merging
for i in range(count):
part_file = os.path.join(script_directory, f"{random_str}_{i}.wav")
if os.path.exists(part_file):
os.remove(part_file)
print(f"Removed {part_file}")

145
requirements.txt Normal file
View File

@@ -0,0 +1,145 @@
annotated-types==0.7.0
anyio==4.6.2.post1
asgiref==3.8.1
asttokens==3.0.0
attrs==25.1.0
babel==2.17.0
beautifulsoup4==4.12.3
blinker==1.9.0
blis==1.2.0
bs4==0.0.2
catalogue==2.0.10
certifi==2024.8.30
cffi==1.17.1
charset-normalizer==3.4.0
click==8.1.8
cloudpathlib==0.20.0
colorama==0.4.6
comm==0.2.2
confection==0.1.5
contourpy==1.3.1
csvw==3.5.1
curated-tokenizers==0.0.9
curated-transformers==0.1.1
cycler==0.12.1
cymem==2.0.11
decorator==5.1.1
distro==1.9.0
Django==5.1.2
dlinfo==2.0.0
docopt==0.6.2
EbookLib==0.18
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
espeakng-loader==0.2.4
et_xmlfile==2.0.0
executing==2.1.0
filelock==3.17.0
Flask==3.1.0
fonttools==4.55.0
fsspec==2025.2.0
graphviz==0.20.3
h11==0.14.0
httpcore==1.0.6
httpx==0.27.2
huggingface-hub==0.29.1
idna==3.10
ipython==8.30.0
ipywidgets==8.1.5
isodate==0.7.2
itsdangerous==2.2.0
jedi==0.19.2
Jinja2==3.1.5
jiter==0.6.1
joblib==1.4.2
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
jupyterlab_widgets==3.0.13
kiwisolver==1.4.7
kokoro==0.7.16
langcodes==3.5.0
language-tags==1.2.0
language_data==1.3.0
loguru==0.7.3
lxml==5.3.1
marisa-trie==1.2.1
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib==3.9.2
matplotlib-inline==0.1.7
mdurl==0.1.2
misaki==0.7.17
mpmath==1.3.0
murmurhash==1.0.12
networkx==3.4.2
num2words==0.5.14
numpy==1.26.4
openai==1.52.0
openpyxl==3.1.5
packaging==24.2
pandas==2.2.3
parso==0.8.4
phonemizer-fork==3.3.2
pillow==11.0.0
preshed==3.0.9
prompt_toolkit==3.0.48
pure_eval==0.2.3
PyAudio==0.2.14
pycparser==2.22
pydantic==2.9.2
pydantic_core==2.23.4
pydub==0.25.1
Pygments==2.18.0
pyodbc==5.2.0
pyparsing==3.2.0
PyPDF2==3.0.1
python-dateutil==2.9.0.post0
pytz==2024.2
PyYAML==6.0.2
rdflib==7.1.3
referencing==0.36.2
regex==2024.11.6
requests==2.32.3
rfc3986==1.5.0
rich==13.9.4
rpds-py==0.23.1
safetensors==0.5.2
scikit-learn==1.5.2
scipy==1.14.1
segments==2.3.0
setuptools==75.8.0
shellingham==1.5.4
simpleaudio==1.0.4
six==1.16.0
smart-open==7.1.0
sniffio==1.3.1
soundfile==0.13.1
soupsieve==2.6
spacy==3.8.4
spacy-curated-transformers==0.3.0
spacy-legacy==3.0.12
spacy-loggers==1.0.5
sqlparse==0.5.1
srsly==2.5.1
stack-data==0.6.3
sympy==1.13.1
thinc==8.3.4
threadpoolctl==3.5.0
tokenizers==0.21.0
torch==2.6.0
torchaudio==2.6.0
torchvision==0.21.0
tqdm==4.66.5
traitlets==5.14.3
transformers==4.49.0
typer==0.15.1
typing_extensions==4.12.2
tzdata==2024.2
uritemplate==4.1.1
urllib3==2.2.3
wasabi==1.1.3
wcwidth==0.2.13
weasel==0.4.1
Werkzeug==3.1.3
widgetsnbextension==4.0.13
win32_setctime==1.2.0
wrapt==1.17.2

42
script.txt Normal file
View File

@@ -0,0 +1,42 @@
How to Sign Up for a Shift in the New Faculty Proctor Scheduling Application
first, Log into the portal.
Once youve logged in, navigate to the Available Assessments page by selecting the link in the navigation.
On the Available Assessments page, youll see a table showing all assessments currently available to you. This includes assessments within the organization you have access to, as well as any assessments that have been shared with all proctors.
In this example, I am set up as a proctor for the Department of Management and Organizational Studies under Social Science. I can see assessments under that organization, and I can also see that one assessment from Economics has been shared with all proctors.
When signing up for an assessment, you can see whether there are notes associated with it by checking the Notes Present column in the table. If a note exists, the column will say “Yes.” You can view the note by clicking the magnifying-glass icon.
After clicking the magnifying-glass icon, youll see assessment details and any notes associated with that assessment. The note may have been added by your faculty administrator or imported from our Exam Central system, where it would have been visible to students.
From this page, you can return to the Available Assessments page or add the assessment to your schedule by clicking the purple Add to My Schedule button.
Ill add this one to my schedule, and then Ill show you how to add assessments directly from the Available Assessments page.
When adding an assessment from the magnifying-glass view, you can return to the Available Assessments page by clicking the blue button, or go to your schedule by clicking the purple button. Ill click the blue button to go back.
Now that were back on the Available Assessments page, you can quickly add assessments to your schedule by clicking the blue Sign Up button on the right side of the row.
Ill add one of these assessments now.
When an assessment is added to your schedule, any other assessments that overlap with that time block are automatically removed from the Available Assessments page. The system prevents overlapping bookings to help ensure you dont overbook yourself.
Next, Ill navigate to the Assessment Schedule. You can open this page using the link in the navigation.
On the Assessment Schedule page, youll see your upcoming assessments ordered by their start date. You can also open the magnifying-glass view from the upcoming assessments table, the past assessments table, and the archived assessments table to review details at any time.
If you are no longer able to work an assessment you signed up for, you can click the purple Cancel button in the upcoming assessments table to cancel the shift. This option is not available within the 72-hour window before the shift. If you need to cancel during that 72-hour window, contact your faculty administrator and they will be able to remove you.
After completing a shift, you will need to complete a Shift and Report. You can do this from the Past Assessments table. In the right-hand column, youll see a Shift and Report button. Click this to complete the report.
In the Shift and Report form, you can review the assessment details and then choose the attendance, actual start time, actual end time, and add any notes.
If you arrived early because it was required, adjust the actual start time accordingly. If you had to stay late, adjust the actual end time as well. This information is included in the report sent to your faculty administrator.
If you did not attend, if you showed up but were sent home, or if you worked your shift normally, select the appropriate attendance option. You may add notes to give more context to your faculty administrator.
When youre finished, click Submit Report. Your report will be available to your faculty administrator for review, and your hours will be accessible to them.
This concludes how to sign up for a shift as a proctor in the new Faculty Proctor Scheduling system.

BIN
script.wav Normal file

Binary file not shown.

134
server.py Normal file
View File

@@ -0,0 +1,134 @@
import os
import time
import random
import string
import wave
import pyaudio
import soundfile as sf
from flask import Flask, request, jsonify
from kokoro import KPipeline # Assuming you have this library available
app = Flask(__name__)
selected_voice = "am_adam"
# Initialize the pipeline at app startup so it doesnt re-initialize for every request
try:
print("[DEBUG] Initializing Kokoro pipeline.")
pipeline = KPipeline(lang_code='a')
except Exception as e:
print("[DEBUG] Failed to create pipeline:", e)
pipeline = None
def play_audio_pyaudio(filename):
"""
Plays a .wav file to the default audio output using PyAudio.
"""
wf = wave.open(filename, 'rb')
p = pyaudio.PyAudio()
# Open a stream with the correct settings
stream = p.open(
format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True
)
data = wf.readframes(1024)
while data:
stream.write(data)
data = wf.readframes(1024)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
print("[DEBUG] Done playing via PyAudio.")
@app.route('/tts', methods=['POST'])
def tts():
"""
POST JSON:
{
"text": "Hello world!"
}
"""
if pipeline is None:
return jsonify({"error": "Pipeline not initialized."}), 500
data = request.get_json()
if not data or 'text' not in data:
return jsonify({"error": "JSON body must include 'text' key."}), 400
user_input = data['text'].strip()
if not user_input:
return jsonify({"error": "Empty text provided."}), 400
print(f"[DEBUG] Received text: {user_input}")
# Generate a random string for filename uniqueness
random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
print(f"[DEBUG] random_str is {random_str}")
# Attempt generating speech audio
try:
generator = pipeline(user_input, voice=selected_voice, speed=1)
print("[DEBUG] Pipeline generator created.")
except Exception as e:
print("[DEBUG] Pipeline generation failed:", e)
return jsonify({"error": str(e)}), 500
script_directory = os.path.dirname(os.path.abspath(__file__))
segment_count = 0
for i, (gs, ps, audio) in enumerate(generator):
print(f"[DEBUG] Got segment {i} from pipeline.")
try:
part_filename = os.path.join(script_directory, f"{random_str}_{i}.wav")
sf.write(part_filename, audio, 24000)
print(f"[DEBUG] Wrote file: {part_filename}")
# Play the generated audio via PyAudio
play_audio_pyaudio(part_filename)
# Delay to let the system finish releasing the file handle
time.sleep(0.5)
# Remove the file after playback
if os.path.exists(part_filename):
try:
os.remove(part_filename)
print(f"[DEBUG] Deleted file: {part_filename}")
except PermissionError:
print(f"[DEBUG] Could not delete {part_filename} (PermissionError). Retrying in 0.5s...")
time.sleep(0.5)
try:
os.remove(part_filename)
print(f"[DEBUG] Deleted file on retry: {part_filename}")
except Exception as remove_err:
print(f"[DEBUG] Still could not delete {part_filename}: {remove_err}")
else:
print(f"[DEBUG] File {part_filename} not found. Possibly removed externally.")
segment_count += 1
except Exception as seg_err:
print(f"[DEBUG] Error handling segment {i}: {seg_err}")
return jsonify({"error": str(seg_err)}), 500
if segment_count == 0:
print("[DEBUG] No audio was generated.")
return jsonify({"warning": "No audio generated."}), 200
print("[DEBUG] Finished generating and playing audio.")
return jsonify({"status": "OK", "message": "Audio played successfully."}), 200
if __name__ == '__main__':
# Run the Flask server
app.run(host='0.0.0.0', port=5000, debug=True)