mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-09 13:58:14 -05:00
...
This commit is contained in:
@@ -1 +1 @@
|
||||
25.11.24
|
||||
25.11.25
|
||||
52
app.py
52
app.py
@@ -268,12 +268,12 @@ def detect_gpu()->str:
|
||||
# ============================================================
|
||||
return 'cpu'
|
||||
|
||||
def torch_version_is_leq(current:str, target:str)->str:
|
||||
def parse_torch_version(current:str)->str:
|
||||
try:
|
||||
parsed = Version(current)
|
||||
except InvalidVersion:
|
||||
parsed = Version(current.split('+')[0])
|
||||
return parsed <= Version(target)
|
||||
return parsed
|
||||
|
||||
def check_and_install_requirements(file_path:str)->bool:
|
||||
if not os.path.exists(file_path):
|
||||
@@ -281,7 +281,7 @@ def check_and_install_requirements(file_path:str)->bool:
|
||||
print(error)
|
||||
return False
|
||||
try:
|
||||
backend_specs = {"os": detect_platform_tag(), "arch": detect_arch_tag(), "pyvenv": sys.version_info[:2], "ref": detect_gpu()}
|
||||
backend_specs = {"os": detect_platform_tag(), "arch": detect_arch_tag(), "pyvenv": sys.version_info[:2], "gpu": detect_gpu()}
|
||||
print(f'--------------- {backend_specs} -------------')
|
||||
try:
|
||||
from packaging.specifiers import SpecifierSet
|
||||
@@ -425,29 +425,43 @@ def check_and_install_requirements(file_path:str)->bool:
|
||||
import numpy as np
|
||||
torch_version = torch.__version__
|
||||
numpy_version = Version(np.__version__)
|
||||
|
||||
|
||||
if backend_specs['ref'] not in ['cpu', 'unknown', 'unsupported']:
|
||||
pattern = re.search(r'\+(.+)$', torch_version)
|
||||
tag = pattern.group(1)
|
||||
match_tag = re.fullmatch(r'[0-9a-f]{7,40}', tag)
|
||||
if match_tag is None or match_tag is not None and match_tag != torch_mapping['tag']:
|
||||
if tag != backend_specs['ref']:
|
||||
torch_pkg = torch_mapping[backend_specs['ref']]
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', torch_pkg])
|
||||
|
||||
|
||||
|
||||
if torch_version_is_leq(torch_version, '2.2.2') and numpy_version >= Version('2.0.0'):
|
||||
torch_version_parsed = parse_torch_version(torch_version)
|
||||
if torch_version_parsed <= Version('2.2.2') and numpy_version >= Version('2.0.0'):
|
||||
try:
|
||||
msg = 'torch version needs nump < 2. downgrading numpy to 1.26.4...'
|
||||
msg = 'torch version needs numpy < 2. downgrading numpy to 1.26.4...'
|
||||
print(msg)
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', '--use-pep517', 'numpy<2'])
|
||||
except subprocess.CalledProcessError as e:
|
||||
error = f'Failed to downgrade to numpy < 2: {e}'
|
||||
print(error)
|
||||
return False
|
||||
|
||||
if backend_specs['gpu'] not in ['cpu', 'unknown', 'unsupported']:
|
||||
current_tag_pattern = re.search(r'\+(.+)$', torch_version)
|
||||
current_tag = current_tag_pattern.group(1)
|
||||
non_standard_tag = re.fullmatch(r'[0-9a-f]{7,40}', current_tag)
|
||||
if non_standard_tag is None and current_tag != backend_specs['gpu'] or
|
||||
non_standard_tag is not None and backend_specs['gpu'] in ['jetson-jetpack5', 'jetson-60', 'jetson-61'] and non_standard_tag != torch_mapping[backend_specs['gpu']]['tag']:
|
||||
try:
|
||||
backend_tag = torch_mapping[backend_specs['gpu']]['tag']
|
||||
backend_os = backend_specs['os']
|
||||
backend_arch = backend_specs['arch']
|
||||
backend_url = torch_mapping[backend_specs['gpu']]['url']
|
||||
if backend-specs['gpu'] == 'jetson-jetpack5'
|
||||
torch_pkg = f''
|
||||
elif backend_specs['gpu'] in ['jetson-60', 'jetson-61']:
|
||||
jetson_torch_version = default_jetson60_torch if backend_specs['gpu'] == 'jetson-60' else default_jetson61_torch
|
||||
torch_pkg = f'{backend_url}/v{backend_tag}/pytorch/torch-{jetson_torch_version}-{default_py_tag}-linux_{backend_arch}.whl'
|
||||
else:
|
||||
torch_pkg = f'{gpu_url}/{backend_tag}/torch/torch-{torch_version_parsed}+{gpu_tag}-{default_py_tag}-{backend_os}_{backend_arch}.whl'
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', torch_pkg])
|
||||
import torch
|
||||
torch_version = torch.__version__
|
||||
except subprocess.CalledProcessError as e:
|
||||
error = f'Failed to install {packages}: {e}'
|
||||
print(error)
|
||||
return False
|
||||
|
||||
|
||||
devices['CUDA']['found'] = getattr(torch, "cuda", None) is not None and torch.cuda.is_available() and not (hasattr(torch.version, "hip") and torch.version.hip is not None)
|
||||
devices['ROCM']['found'] = hasattr(torch.version, "hip") and torch.version.hip is not None and torch.cuda.is_available()
|
||||
devices['MPS']['found'] = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
|
||||
|
||||
@@ -33,5 +33,4 @@ pyannote-audio<=3.4.0
|
||||
argostranslate<=1.10.0
|
||||
gradio==5.49.1
|
||||
torch<=2.7.1
|
||||
torchaudio<=2.7.1
|
||||
coqui-tts[languages]==0.27.2
|
||||
10
lib/conf.py
10
lib/conf.py
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import platform
|
||||
import tempfile
|
||||
import sys
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Global configuration
|
||||
@@ -84,10 +85,19 @@ default_device = devices['CPU']['proc']
|
||||
default_chapters_preview = False
|
||||
|
||||
default_gpu_wiki = '<a href="https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES">GPU howto wiki</a>'
|
||||
|
||||
default_py_major = sys.version_info.major
|
||||
default_py_minor = sys.version_info.minor
|
||||
default_py_tag = f'cp{default_py_major}{default_py_minor}-cp{default_py_major}{default_py_minor}'
|
||||
|
||||
default_pytorch_url = 'https://download.pytorch.org/whl/'
|
||||
default_jetson_url = 'https://developer.download.nvidia.com/compute/redist/jp/'
|
||||
default_compiled_url = 'https://xxxxxxxxxx/compiled/xxxxxx.whl'
|
||||
|
||||
default_jetson5_torch = ''
|
||||
default_jetson60_torch = '2.4.0a0+3bcc3cddb5.nv24.07.16234504'
|
||||
default_jetson61_torch = '2.5.0a0+872d972e41.nv24.08.17622132'
|
||||
|
||||
torch_mapping = {
|
||||
|
||||
# CUDA
|
||||
|
||||
@@ -52,7 +52,6 @@ dependencies = [
|
||||
"argostranslate<=1.10.0",
|
||||
"gradio==5.49.1",
|
||||
"torch<=2.7.1",
|
||||
"torchaudio<=2.7.1",
|
||||
"coqui-tts[languages]==0.27.2"
|
||||
]
|
||||
readme = "README.md"
|
||||
|
||||
@@ -33,5 +33,4 @@ pyannote-audio==3.4.0
|
||||
argostranslate==1.10.0
|
||||
gradio==5.49.1
|
||||
torch<=2.7.1
|
||||
torchaudio<=2.7.1
|
||||
coqui-tts[languages]==0.27.2
|
||||
@@ -1,61 +0,0 @@
|
||||
import os
|
||||
import platform
|
||||
import argparse
|
||||
|
||||
tmp_dir = os.path.abspath(os.path.join('..', 'tmp'))
|
||||
models_dir = os.path.abspath(os.path.join('..', 'models'))
|
||||
tts_dir = os.path.join(models_dir, 'tts')
|
||||
|
||||
os.environ['PYTHONUTF8'] = '1'
|
||||
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||||
os.environ['COQUI_TOS_AGREED'] = '1'
|
||||
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||||
os.environ['CALIBRE_NO_NATIVE_FILEDIALOGS'] = '1'
|
||||
os.environ['DO_NOT_TRACK'] = 'true'
|
||||
os.environ['CALIBRE_TEMP_DIR'] = tmp_dir
|
||||
os.environ['CALIBRE_CACHE_DIRECTORY'] = tmp_dir
|
||||
os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
|
||||
os.environ['HF_HOME'] = tts_dir
|
||||
os.environ['HF_DATASETS_CACHE'] = tts_dir
|
||||
os.environ['BARK_CACHE_DIR'] = tts_dir
|
||||
os.environ['TTS_CACHE'] = tts_dir
|
||||
os.environ['TORCH_HOME'] = tts_dir
|
||||
os.environ['TTS_HOME'] = models_dir
|
||||
os.environ['XDG_CACHE_HOME'] = models_dir
|
||||
os.environ['ARGOS_TRANSLATE_PACKAGE_PATH'] = os.path.join(models_dir, 'argostranslate')
|
||||
os.environ['HF_TOKEN_PATH'] = os.path.join(os.path.expanduser('~'), '.huggingface_token')
|
||||
os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
|
||||
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
|
||||
os.environ['SUNO_OFFLOAD_CPU'] = 'False' # BARK option: False needs A GPU
|
||||
os.environ['SUNO_USE_SMALL_MODELS'] = 'False' # BARK option: False needs a GPU with VRAM > 4GB
|
||||
if platform.system() == 'Windows':
|
||||
os.environ['ESPEAK_DATA_PATH'] = os.path.expandvars(r"%USERPROFILE%\scoop\apps\espeak-ng\current\eSpeak NG\espeak-ng-data")
|
||||
|
||||
import torch
|
||||
import torchaudio
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from bark import SAMPLE_RATE, preload_models
|
||||
from bark.generation import codec_decode
|
||||
|
||||
def npz_to_wav(npz_path, output_path):
|
||||
preload_models()
|
||||
data = np.load(npz_path)
|
||||
fine_prompt = data["fine_prompt"]
|
||||
audio_array = codec_decode(fine_prompt)
|
||||
audio_tensor = torch.tensor(audio_array).unsqueeze(0)
|
||||
torchaudio.save(output_path, audio_tensor, SAMPLE_RATE)
|
||||
print(f"✅ Saved: {output_path}")
|
||||
|
||||
def process_all_npz_in_folder(folder_path):
|
||||
preload_models()
|
||||
for npz_file in Path(folder_path).rglob("*.npz"):
|
||||
output_path = npz_file.with_suffix(".wav")
|
||||
npz_to_wav(str(npz_file), str(output_path))
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Process all NPZ files in a folder.")
|
||||
parser.add_argument("--folder_path", type=str, required=True, help="Path to the folder containing NPZ files")
|
||||
args = parser.parse_args()
|
||||
folder_path = os.path.abspath(args.folder_path)
|
||||
process_all_npz_in_folder(folder_path)
|
||||
@@ -1,141 +0,0 @@
|
||||
# NOTE: to run this script you must move it to the root of ebook2audiobook
|
||||
|
||||
import os
|
||||
|
||||
os.environ['PYTHONUTF8'] = '1'
|
||||
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||||
os.environ['COQUI_TOS_AGREED'] = '1'
|
||||
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||||
os.environ['DO_NOT_TRACK'] = 'true'
|
||||
os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
|
||||
os.environ['HF_HOME'] = tts_dir
|
||||
os.environ['TRANSFORMERS_CACHE'] = tts_dir
|
||||
os.environ['HF_DATASETS_CACHE'] = tts_dir
|
||||
os.environ['BARK_CACHE_DIR'] = tts_dir
|
||||
os.environ['TTS_CACHE'] = tts_dir
|
||||
os.environ['TORCH_HOME'] = tts_dir
|
||||
os.environ['TTS_HOME'] = models_dir
|
||||
os.environ['XDG_CACHE_HOME'] = models_dir
|
||||
os.environ['HF_TOKEN_PATH'] = os.path.join(os.path.expanduser('~'), '.huggingface_token')
|
||||
os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
|
||||
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
|
||||
os.environ['SUNO_OFFLOAD_CPU'] = 'False'
|
||||
os.environ['SUNO_USE_SMALL_MODELS'] = 'False'
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import numpy as np
|
||||
import regex as re
|
||||
import shutil
|
||||
import soundfile as sf
|
||||
import subprocess
|
||||
import tempfile
|
||||
import torch
|
||||
import torchaudio
|
||||
import threading
|
||||
import uuid
|
||||
|
||||
from iso639 import languages
|
||||
from huggingface_hub import hf_hub_download
|
||||
from pathlib import Path
|
||||
from scipy.io import wavfile as wav
|
||||
from scipy.signal import find_peaks
|
||||
from TTS.tts.configs.bark_config import BarkConfig
|
||||
from TTS.tts.models.bark import Bark
|
||||
|
||||
from lib import *
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
||||
torch.hub.set_dir(models_dir)
|
||||
|
||||
loaded_tts = {}
|
||||
|
||||
def load_checkpoint(**kwargs):
|
||||
try:
|
||||
key = kwargs.get('key')
|
||||
tts_engine = kwargs.get('tts_engine')
|
||||
device = kwargs.get('device')
|
||||
checkpoint_dir = kwargs.get('checkpoint_dir')
|
||||
config = BarkConfig()
|
||||
config.CACHE_DIR = tts_dir
|
||||
config.USE_SMALLER_MODELS = os.environ.get('SUNO_USE_SMALL_MODELS', '').lower() == 'true'
|
||||
tts = Bark.init_from_config(config)
|
||||
tts.load_checkpoint(
|
||||
config,
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
eval=True
|
||||
)
|
||||
if tts:
|
||||
if device == 'cuda':
|
||||
tts.cuda()
|
||||
else:
|
||||
tts.to(device)
|
||||
loaded_tts[key] = {"engine": tts, "config": config}
|
||||
msg = f'{tts_engine} Loaded!'
|
||||
print(msg)
|
||||
return tts
|
||||
else:
|
||||
error = 'TTS engine could not be created!'
|
||||
print(error)
|
||||
except Exception as e:
|
||||
error = f'_load_checkpoint() error: {e}'
|
||||
return False
|
||||
|
||||
def wav_to_npz(bark_dir, wav_dir):
|
||||
try:
|
||||
tts_internal_key = f"TTS_ENGINES['BARK']-internal"
|
||||
hf_repo = models[TTS_ENGINES['BARK']]['internal']['repo']
|
||||
hf_sub = models[TTS_ENGINES['BARK']]['internal']['sub']
|
||||
text_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][0]}", cache_dir=tts_dir)
|
||||
coarse_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][1]}", cache_dir=tts_dir)
|
||||
fine_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][2]}", cache_dir=tts_dir)
|
||||
checkpoint_dir = os.path.dirname(text_model_path)
|
||||
tts = load_checkpoint(tts_engine=TTS_ENGINES['BARK'], key=tts_internal_key, checkpoint_dir=checkpoint_dir, device='cpu')
|
||||
if tts:
|
||||
fine_tuned_params = {
|
||||
"text_temp": default_engine_settings[TTS_ENGINES['BARK']]['text_temp'],
|
||||
"waveform_temp": default_engine_settings[TTS_ENGINES['BARK']]['waveform_temp']
|
||||
}
|
||||
for root, dirs, files in os.walk(wav_dir):
|
||||
for file in files:
|
||||
if file.lower().endswith('.wav'):
|
||||
match = re.match(r"^([a-z]{2})_", file)
|
||||
if match:
|
||||
speaker = os.path.splitext(file)[0]
|
||||
npz_file = f'{speaker}.npz'
|
||||
iso1_lang = match.group(1)
|
||||
lang_array = languages.get(part1=iso1_lang)
|
||||
if lang_array:
|
||||
iso3_lang = lang_array.part3
|
||||
default_text_file = os.path.join(voices_dir, iso3_lang, 'default.txt')
|
||||
default_text = Path(default_text_file).read_text(encoding="utf-8")
|
||||
with torch.no_grad():
|
||||
torch.manual_seed(67878789)
|
||||
audio_data = tts.synthesize(
|
||||
default_text,
|
||||
loaded_tts[tts_internal_key]['config'],
|
||||
speaker_id=speaker,
|
||||
voice_dirs=bark_dir,
|
||||
silent=True,
|
||||
**fine_tuned_params
|
||||
)
|
||||
del audio_data
|
||||
msg = f"Saved NPZ file: {npz_file}"
|
||||
print(msg)
|
||||
else:
|
||||
print('tts bark not loaded')
|
||||
except Exception as e:
|
||||
print(f'wav_to_npz() error: {e}')
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Convert WAV files to Bark NPZ format.")
|
||||
parser.add_argument("--bark_dir", type=str, required=True, help="Path to the Bark asset directory")
|
||||
parser.add_argument("--wav_dir", type=str, required=True, help="Path to the output WAV directory")
|
||||
args = parser.parse_args()
|
||||
bark_dir = os.path.abspath(args.bark_dir)
|
||||
wav_dir = os.path.abspath(args.wav_dir)
|
||||
wav_to_npz(bark_dir, wav_dir)
|
||||
|
||||
Reference in New Issue
Block a user