AI-TTS/combine_audio.py

import sys
import os
import wave

# Usage:
#   python combine_wavs.py "abc123" "output.wav"

if len(sys.argv) < 3:
    print('Usage: python combine_wavs.py "text" "output.wav"')
    sys.exit(1)

text = sys.argv[1].lower()
output_filename = sys.argv[2]

current_folder = os.path.dirname(os.path.abspath(__file__))

# Folder where a.wav, b.wav, c.wav, 0.wav, 1.wav, etc. are stored
base_folder = os.path.join(current_folder, "captcha_audio")
print(f"Using base folder: {base_folder}")

# Output folder
output_folder = os.path.join(current_folder)
os.makedirs(output_folder, exist_ok=True)

output_path = os.path.join(output_folder, output_filename)

# Build the ordered list of WAVs to merge
wav_paths = []

for ch in text:
    if ch.isspace():
        continue  # ignore spaces

    key = ch.lower()   # your files are a.wav, b.wav, ...
    wav_name = f"{key}.wav"
    wav_path = os.path.join(base_folder, wav_name)

    if not os.path.exists(wav_path):
        raise FileNotFoundError(f"Missing WAV for character: {ch} ({wav_path})")

    wav_paths.append(wav_path)

if not wav_paths:
    raise ValueError("No valid characters found to assemble.")


# Concatenate WAV files
with wave.open(wav_paths[0], "rb") as w:
    params = w.getparams()
    combined_frames = [w.readframes(w.getnframes())]

for wav_file in wav_paths[1:]:
    with wave.open(wav_file, "rb") as w:
        if w.getparams() != params:
            raise ValueError(f"WAV format mismatch: {wav_file}")
        combined_frames.append(w.readframes(w.getnframes()))

# Write output
with wave.open(output_path, "wb") as out:
    out.setparams(params)
    for frames in combined_frames:
        out.writeframes(frames)

print(f"Saved combined WAV: {output_path}")