'''
prerequisites:
on ubuntu 22.04 or older:
pip3 install pydub
pip3 install coqui-tts
on newer operating systems run the commands in a virtual python environment
and use 'pip' instead of 'pip3'
'''
import sys
import os
import psutil
import chardet # You may need to install this library: pip install chardet
import tempfile
import shutil
from pydub import AudioSegment
import subprocess
# Conditional import of TTS
#TTS = None
def is_script_already_running(script_name):
"""Check if another instance of the script is running."""
current_pid = os.getpid()
for proc in psutil.process_iter(attrs=['pid', 'name', 'cmdline']):
try:
if proc.info['pid'] != current_pid and script_name in proc.info['cmdline']:
return True
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return False
def detect_encoding(file_path):
"""Detect the encoding of a text file."""
with open(file_path, 'rb') as f:
result = chardet.detect(f.read(100000)) # Analyze a portion of the file
return result['encoding']
def split_text_into_chunks(text, max_words=50):
"""Split text into chunks of approximately max_words, avoiding sentence splits."""
words = text.split()
chunks = []
chunk = []
word_count = 0
for word in words:
chunk.append(word)
word_count += 1
if word_count >= max_words and (word.endswith('.') or len(word) > max_words + 25):
chunks.append(' '.join(chunk))
chunk = []
word_count = 0
if chunk:
chunks.append(' '.join(chunk))
return chunks
def create_audio_chunk(chunk_text, chunk_path, model, speaker):
"""Generate TTS audio for a single chunk."""
# global TTS
# if TTS is None:
# from TTS.api import TTS # Import only if needed
# tts = TTS(model_name=model)
# tts.tts_to_file(text=chunk_text, file_path=chunk_path)
'''
note that if you are using a single speaker model, remove the parameter "--speaker_idx"
if you are using '--speaker_idx "ED"', and you get an error then you may need to correct a bug:
open file: nano /root/.local/share/tts/tts_models--en--vctk--vits/speaker_ids.json
change '"ED\n": 0,' to '"ED": 0,'
save and exit
'''
command = [
'tts',
'--text', chunk_text,
'--model_name', model,
'--speaker_idx', speaker,
'--out_path', chunk_path
]
result = subprocess.run(command, capture_output=True, text=True)
output = result.stdout + result.stderr # Capture both stdout and stderr
print(output)
def join_audio_files(audio_files, output_file):
"""Join multiple audio files into one final output file. Returns True when finished."""
combined = AudioSegment.empty()
for file in audio_files:
combined += AudioSegment.from_file(file)
combined.export(output_file, format="ogg")
return True
def convert_text_to_audio(input_file, output_file, model, speaker):
"""Convert text to audio and save as a file. Returns True if it thinks that it has succeeded, otherwise False."""
temp_dir = None
final_audio_created = False
try:
# Detect file encoding
encoding = detect_encoding(input_file)
if not encoding:
raise ValueError("Could not detect file encoding")
# Read the input text
with open(input_file, 'r', encoding=encoding, errors='replace') as file:
text = file.read()
# Split text into chunks if necessary
chunks = split_text_into_chunks(text)
if not chunks:
print("No text to convert.")
return
# Temporary directory for storing audio chunks
temp_dir = tempfile.mkdtemp()
temp_audio_files = []
for i, chunk in enumerate(chunks):
temp_audio_path = os.path.join(temp_dir, f"chunk_{i}.ogg")
create_audio_chunk(chunk, temp_audio_path, model, speaker)
temp_audio_files.append(temp_audio_path)
# Join all temporary audio files into the final output file
final_audio_created = join_audio_files(temp_audio_files, output_file)
print(f"Audio file successfully saved to {output_file}")
except Exception as e:
print(f"Error: {e}")
if temp_dir:
print(f"Temporary files are saved in {temp_dir} for debugging.")
finally:
# Clean up temporary audio files only if the final audio file was created
if temp_dir and final_audio_created:
shutil.rmtree(temp_dir)
return True
else:
return False
def process_files(input_dir, output_dir, model="tts_models/en/vctk/vits", speaker="ED"):
"""Process all text and markdown files in the input directory."""
'''
model "tts_models/en/jenny/jenny" (irish accent) is good, but is quite slow to process. note that this is a single speaker model and requires a slightly different command.
model="tts_models/en/vctk/vits", speaker="ED" is quick to process. voice quality is fairly good, but the speaker speaks sort of fast which makes it harder to understand and it is a british accent
you may want to go through the voice models and see which one works best
'''
files_to_process = []
for root, _, files in os.walk(input_dir):
for file in files:
if file.endswith(('.txt', '.md')):
files_to_process.append(os.path.join(root, file))
if not files_to_process:
print("No files to process.")
return
for input_file in files_to_process:
relative_path = os.path.relpath(os.path.dirname(input_file), input_dir)
output_subdir = os.path.join(output_dir, relative_path)
# Ensure the output subdirectory exists
os.makedirs(output_subdir, exist_ok=True)
# Ensure unique output file names
base_name = os.path.splitext(os.path.basename(input_file))[0]
output_file = os.path.join(output_subdir, f"{base_name}.ogg")
counter = 1
while os.path.exists(output_file):
output_file = os.path.join(output_subdir, f"{base_name}_{counter}.ogg")
counter += 1
# Convert text to audio
success = convert_text_to_audio(input_file, output_file, model, speaker)
# Move the original text file to the output directory
if success:
os.rename(input_file, os.path.join(output_subdir, os.path.basename(input_file)))
if __name__ == "__main__":
if is_script_already_running(sys.argv[0]):
print("Error: Another instance of the script is already running.")
sys.exit(1)
if len(sys.argv) != 3:
print("Usage: python script.py <input_directory> <output_directory>")
sys.exit(1)
input_dir = sys.argv[1]
output_dir = sys.argv[2]
# Ensure the input directory exists
if not os.path.exists(input_dir):
print(f"Error: The directory {input_dir} does not exist.")
sys.exit(1)
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
# Process the files
process_files(input_dir, output_dir)