This commit is contained in:
unknown
2025-06-20 19:39:02 -07:00
parent ba1640e82b
commit 107a24b376
5 changed files with 97 additions and 7 deletions

View File

@@ -20,6 +20,8 @@ os.environ['CALIBRE_CACHE_DIRECTORY'] = tmp_dir
os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
os.environ['HF_HOME'] = tts_dir
os.environ['HF_DATASETS_CACHE'] = tts_dir
os.environ['TRANSFORMERS_CACHE'] = tts_dir
os.environ['BARK_CACHE_DIR'] = tts_dir
os.environ['TTS_CACHE'] = tts_dir
os.environ['TORCH_HOME'] = tts_dir
os.environ['TTS_HOME'] = models_dir

View File

@@ -24,7 +24,8 @@ dependencies = [
"demucs",
"docker",
"ebooklib",
"fastapi",
"fastapi",
"fugashi",
"gradio",
"hangul-romanize",
"indic-nlp-library",

View File

@@ -6,6 +6,7 @@ demucs
docker
ebooklib
fastapi
fugashi
gradio
hangul-romanize
indic-nlp-library

View File

@@ -1,10 +1,52 @@
import os
import subprocess
import argparse
import torch
def demucs_voice(wav_file, output_dir, models_dir):
try:
# Set TORCH_HOME for demucs
torch.hub.set_dir(models_dir)
os.environ['TORCH_HOME'] = models_dir
# Run demucs subprocess
cmd = [
"demucs",
"--verbose",
"--two-stems=vocals",
"--out", output_dir,
wav_file
]
print(f"🔄 Running: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
# Output folder name is based on input filename
base_name = os.path.splitext(os.path.basename(wav_file))[0]
demucs_output_path = os.path.join(output_dir, "demucs", base_name, "vocals.wav")
if os.path.exists(demucs_output_path):
print(f"✅ Voice track saved to: {demucs_output_path}")
return demucs_output_path
else:
raise FileNotFoundError(f"Expected output not found: {demucs_output_path}")
except subprocess.CalledProcessError as e:
raise RuntimeError(
f"❌ demucs failed with exit code {e.returncode}.\n"
f"stdout: {getattr(e, 'output', 'N/A')}\n"
f"stderr: {getattr(e, 'stderr', 'N/A')}"
)
except FileNotFoundError as e:
raise RuntimeError("'demucs' command not found. Ensure it is installed and in PATH.") from e
except Exception as e:
raise RuntimeError(f"❌ Unexpected error: {e}") from e
def normalize_audio_file(input_file, output_file):
# FFmpeg command
models_dir = os.path.join('..', 'models', 'tts')
demucs_file = demucs_voice(input_file, root_dir, models_dir)
ffmpeg_cmd = [
'ffmpeg', '-i', input_file,
'ffmpeg', '-i', demucs_file,
'-af', 'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
'afftdn=nf=-70,'
'acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,'
@@ -27,6 +69,7 @@ def normalize_audio_file(input_file, output_file):
print(f"Unexpected error: {e}")
# Example Usage
input_file = os.path.join('voices', 'eng', 'adult', 'male', 'Jamie.wav')
output_file = os.path.join('voices', 'eng', 'adult', 'male', 'Jamie2.wav')
root_dir = os.path.join('..', 'voices')
input_file = os.path.join(root_dir, 'eng', 'adult', 'male', 'Jamie.wav')
output_file = os.path.join(root_dir, 'eng', 'adult', 'male', 'Jamie2.wav')
normalize_audio_file(input_file, output_file)

View File

@@ -1,5 +1,46 @@
import os
import subprocess
import argparse
import torch
def demucs_voice(wav_file, output_dir, models_dir):
try:
# Set TORCH_HOME for demucs
torch.hub.set_dir(models_dir)
os.environ['TORCH_HOME'] = models_dir
# Run demucs subprocess
cmd = [
"demucs",
"--verbose",
"--two-stems=vocals",
"--out", output_dir,
wav_file
]
print(f"🔄 Running: {' '.join(cmd)}")
subprocess.run(cmd, check=True)
# Output folder name is based on input filename
base_name = os.path.splitext(os.path.basename(wav_file))[0]
demucs_output_path = os.path.join(output_dir, "demucs", base_name, "vocals.wav")
if os.path.exists(demucs_output_path):
print(f"✅ Voice track saved to: {demucs_output_path}")
return demucs_output_path
else:
raise FileNotFoundError(f"Expected output not found: {demucs_output_path}")
except subprocess.CalledProcessError as e:
raise RuntimeError(
f"❌ demucs failed with exit code {e.returncode}.\n"
f"stdout: {getattr(e, 'output', 'N/A')}\n"
f"stderr: {getattr(e, 'stderr', 'N/A')}"
)
except FileNotFoundError as e:
raise RuntimeError("'demucs' command not found. Ensure it is installed and in PATH.") from e
except Exception as e:
raise RuntimeError(f"❌ Unexpected error: {e}") from e
def normalize_audio_folder(folder_path):
for root, dirs, files in os.walk(folder_path):
@@ -7,8 +48,10 @@ def normalize_audio_folder(folder_path):
if file.lower().endswith('.wav'):
input_file = os.path.join(root, file)
temp_file = os.path.join(root, 'temp_output.wav') # Temporary file to avoid overwriting during processing
models_dir = os.path.join('..', 'models', 'tts')
demucs_file = demucs_voice(input_file, folder_path, models_dir)
ffmpeg_cmd = [
'ffmpeg', '-i', input_file,
'ffmpeg', '-i', demucs_file,
'-af', 'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
'afftdn=nf=-70,'
'acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,'
@@ -35,5 +78,5 @@ def normalize_audio_folder(folder_path):
if os.path.exists(temp_file):
os.remove(temp_file)
folder_path = '../voices'
folder_path = '../assets/bark'
normalize_audio_folder(folder_path)