...

2026-01-10 14:28:15 -05:00 · 2025-06-20 19:39:02 -07:00
parent ba1640e82b
commit 107a24b376
5 changed files with 97 additions and 7 deletions
--- a/lib/conf.py
+++ b/lib/conf.py
@@ -20,6 +20,8 @@ os.environ['CALIBRE_CACHE_DIRECTORY'] = tmp_dir
 os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
 os.environ['HF_HOME'] = tts_dir
 os.environ['HF_DATASETS_CACHE'] = tts_dir
+os.environ['TRANSFORMERS_CACHE'] = tts_dir
+os.environ['BARK_CACHE_DIR'] = tts_dir
 os.environ['TTS_CACHE'] = tts_dir
 os.environ['TORCH_HOME'] = tts_dir
 os.environ['TTS_HOME'] = models_dir
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,8 @@ dependencies = [
 	"demucs",
 	"docker",
 	"ebooklib",
-	"fastapi",
+	"fastapi",
+	"fugashi",
 	"gradio",
 	"hangul-romanize",
 	"indic-nlp-library",
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ demucs
 docker
 ebooklib
 fastapi
+fugashi
 gradio
 hangul-romanize
 indic-nlp-library
--- a/tools/normalize_wav_file.py
+++ b/tools/normalize_wav_file.py
@@ -1,10 +1,52 @@
 import os
 import subprocess
+import argparse
+import torch
+
+def demucs_voice(wav_file, output_dir, models_dir):
+	try:
+		# Set TORCH_HOME for demucs
+		torch.hub.set_dir(models_dir)
+		os.environ['TORCH_HOME'] = models_dir
+
+		# Run demucs subprocess
+		cmd = [
+			"demucs",
+			"--verbose",
+			"--two-stems=vocals",
+			"--out", output_dir,
+			wav_file
+		]
+
+		print(f"🔄 Running: {' '.join(cmd)}")
+		subprocess.run(cmd, check=True)
+
+		# Output folder name is based on input filename
+		base_name = os.path.splitext(os.path.basename(wav_file))[0]
+		demucs_output_path = os.path.join(output_dir, "demucs", base_name, "vocals.wav")
+
+		if os.path.exists(demucs_output_path):
+			print(f"✅ Voice track saved to: {demucs_output_path}")
+			return demucs_output_path
+		else:
+			raise FileNotFoundError(f"Expected output not found: {demucs_output_path}")
+
+	except subprocess.CalledProcessError as e:
+		raise RuntimeError(
+			f"❌ demucs failed with exit code {e.returncode}.\n"
+			f"stdout: {getattr(e, 'output', 'N/A')}\n"
+			f"stderr: {getattr(e, 'stderr', 'N/A')}"
+		)
+	except FileNotFoundError as e:
+		raise RuntimeError("❌ 'demucs' command not found. Ensure it is installed and in PATH.") from e
+	except Exception as e:
+		raise RuntimeError(f"❌ Unexpected error: {e}") from e

 def normalize_audio_file(input_file, output_file):
-    # FFmpeg command
+    models_dir = os.path.join('..', 'models', 'tts')
+    demucs_file = demucs_voice(input_file, root_dir, models_dir)
    ffmpeg_cmd = [
-        'ffmpeg', '-i', input_file,
+        'ffmpeg', '-i', demucs_file,
        '-af', 'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
               'afftdn=nf=-70,'
               'acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,'
@@ -27,6 +69,7 @@ def normalize_audio_file(input_file, output_file):
        print(f"Unexpected error: {e}")

 # Example Usage
-input_file = os.path.join('voices', 'eng', 'adult', 'male', 'Jamie.wav')
-output_file = os.path.join('voices', 'eng', 'adult', 'male', 'Jamie2.wav')
+root_dir = os.path.join('..', 'voices')
+input_file = os.path.join(root_dir, 'eng', 'adult', 'male', 'Jamie.wav')
+output_file = os.path.join(root_dir, 'eng', 'adult', 'male', 'Jamie2.wav')
 normalize_audio_file(input_file, output_file)
--- a/tools/normalize_wav_folder.py
+++ b/tools/normalize_wav_folder.py
@@ -1,5 +1,46 @@
 import os
 import subprocess
+import argparse
+import torch
+
+def demucs_voice(wav_file, output_dir, models_dir):
+	try:
+		# Set TORCH_HOME for demucs
+		torch.hub.set_dir(models_dir)
+		os.environ['TORCH_HOME'] = models_dir
+
+		# Run demucs subprocess
+		cmd = [
+			"demucs",
+			"--verbose",
+			"--two-stems=vocals",
+			"--out", output_dir,
+			wav_file
+		]
+
+		print(f"🔄 Running: {' '.join(cmd)}")
+		subprocess.run(cmd, check=True)
+
+		# Output folder name is based on input filename
+		base_name = os.path.splitext(os.path.basename(wav_file))[0]
+		demucs_output_path = os.path.join(output_dir, "demucs", base_name, "vocals.wav")
+
+		if os.path.exists(demucs_output_path):
+			print(f"✅ Voice track saved to: {demucs_output_path}")
+			return demucs_output_path
+		else:
+			raise FileNotFoundError(f"Expected output not found: {demucs_output_path}")
+
+	except subprocess.CalledProcessError as e:
+		raise RuntimeError(
+			f"❌ demucs failed with exit code {e.returncode}.\n"
+			f"stdout: {getattr(e, 'output', 'N/A')}\n"
+			f"stderr: {getattr(e, 'stderr', 'N/A')}"
+		)
+	except FileNotFoundError as e:
+		raise RuntimeError("❌ 'demucs' command not found. Ensure it is installed and in PATH.") from e
+	except Exception as e:
+		raise RuntimeError(f"❌ Unexpected error: {e}") from e

 def normalize_audio_folder(folder_path):
    for root, dirs, files in os.walk(folder_path):
@@ -7,8 +48,10 @@ def normalize_audio_folder(folder_path):
            if file.lower().endswith('.wav'):
                input_file = os.path.join(root, file)
                temp_file = os.path.join(root, 'temp_output.wav')  # Temporary file to avoid overwriting during processing
+                models_dir = os.path.join('..', 'models', 'tts')
+                demucs_file = demucs_voice(input_file, folder_path, models_dir)
                ffmpeg_cmd = [
-                    'ffmpeg', '-i', input_file,
+                    'ffmpeg', '-i', demucs_file,
                    '-af', 'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
                           'afftdn=nf=-70,'
                           'acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,'
@@ -35,5 +78,5 @@ def normalize_audio_folder(folder_path):
                    if os.path.exists(temp_file):
                        os.remove(temp_file)

-folder_path = '../voices'
+folder_path = '../assets/bark'
 normalize_audio_folder(folder_path)