This commit is contained in:
unknown
2025-10-31 17:56:00 -07:00
parent bdb31144ef
commit f133e95a42
7 changed files with 19 additions and 15 deletions

View File

@@ -6,7 +6,6 @@ deep_translator
docker
ebooklib
fastapi
num2words
beautifulsoup4
fugashi
sudachipy
@@ -29,6 +28,7 @@ nvidia-ml-py
phonemizer-fork
pydub
torchvggish
onnxruntime-directml; sys_platform == "win32"
pyannote-audio==3.4.0
stanza==1.10.1
argostranslate==1.10.0

View File

@@ -8,8 +8,6 @@ def patched_torch_load(*args, **kwargs):
return _original_load(*args, **kwargs)
torch.load = patched_torch_load
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
import hashlib, math, os, shutil, subprocess, tempfile, threading, uuid
import numpy as np, regex as re, soundfile as sf, torchaudio
@@ -243,6 +241,7 @@ class Coqui:
self.tts.load_checkpoint(
config,
checkpoint_dir = checkpoint_dir,
use_deepspeed = default_engine_settings[TTS_ENGINES['BARK']]['use_deepspeed'],
eval = True
)
if self.tts:
@@ -461,7 +460,7 @@ class Coqui:
if device == devices['CUDA'] and torch.cuda.is_available():
dtype = (
torch.bfloat16
if getattr(self, "is_bfloat", False) and torch.cuda.is_bf16_supported()
if getattr(self, "is_bfloat", False) and torch.cuda.is_bf16_supported() and self.sessin['free_vram_gb'] > 4.0
else torch.float16
)
return torch.amp.autocast(devices['CUDA'], dtype=dtype)

View File

@@ -31,6 +31,7 @@ os.environ['ARGOS_TRANSLATE_PACKAGE_PATH'] = os.path.join(models_dir, 'argostran
os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'backend:native,max_split_size_mb:32,garbage_collection_threshold:0.5,expandable_segments:True'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["CUDA_CACHE_MAXSIZE"] = "2147483648"

View File

@@ -16,8 +16,6 @@ def patched_torch_load(*args, **kwargs)->Any:
return _original_load(*args, **kwargs)
torch.load = patched_torch_load
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
import argparse, asyncio, csv, fnmatch, hashlib, io, json, math, os, platform, random, shutil, socket, subprocess, sys, tempfile, threading, time, traceback
import warnings, unicodedata, urllib.request, uuid, zipfile, ebooklib, gradio as gr, psutil, pymupdf4llm, regex as re, requests, stanza, uvicorn, gc
@@ -136,6 +134,7 @@ class SessionContext:
"id": id,
"tab_id": None,
"is_gui_process": False,
"free_vram_gb": 0,
"process_id": None,
"status": None,
"event": None,
@@ -225,9 +224,14 @@ ctx_tracker = SessionTracker()
def cleanup_garbage():
gc.collect()
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
torch.cuda.synchronize()
torch.cuda.set_per_process_memory_fraction(0.95)
def prepare_dirs(src:str, session:DictProxy[str,Any])->bool:
try:
@@ -563,6 +567,7 @@ YOU CAN IMPROVE IT OR ASK TO A TRAINING MODEL EXPERT.
try:
stanza.download(session['language_iso1'], model_dir=os.getenv('STANZA_RESOURCES_DIR'))
stanza_nlp = stanza.Pipeline(session['language_iso1'], processors='tokenize,ner,mwt', use_gpu=True if session['device'] == devices['CUDA'] else False, download_method="reuse_resources")
#stanza_nlp = stanza.Pipeline(session['language_iso1'], processors='tokenize,ner,mwt', use_gpu=False, download_method="reuse_resources")
except (ConnectionError, TimeoutError) as e:
error = f'Stanza model download connection error: {e}. Retry later'
return error, None
@@ -2073,15 +2078,16 @@ def convert_ebook(args:dict, ctx:object|None=None)->tuple:
msg_extra = ''
vram_dict = VRAMDetector().detect_vram(session['device'])
free_vram_bytes = vram_dict.get('free_bytes', 0)
total_vram_gb = float(int(free_vram_bytes / (1024 ** 3) * 100) / 100) if free_vram_bytes > 0 else 0
if total_vram_gb == 0:
msg_extra += '<br/>VRAM not detected! restrict to 1GB max' if total_vram_gb == 0 else f'<br/>VRAM detected with {total_vram_gb}GB'
session['free_vram_gb'] = float(int(free_vram_bytes / (1024 ** 3) * 100) / 100) if free_vram_bytes > 0 else 0
if session['free_vram_gb'] == 0:
sessin['free_vram_gb'] = 1.0
msg_extra += '<br/>VRAM not detected! restrict to 1GB max' if session['free_vram_gb'] == 0 else f"<br/>VRAM detected with {session['free_vram_gb']}GB"
if session['tts_engine'] == TTS_ENGINES['BARK']:
os.environ['SUNO_USE_SMALL_MODELS'] = 'True'
msg_extra += f"<br/>Switching BARK to SMALL models"
else:
msg_extra += f'<br/>Free VRAM available: {total_vram_gb}GB'
if total_vram_gb > 4.0:
msg_extra += f'<br/>Free VRAM available: {session['free_vram_gb']}GB'
if session['free_vram_gb'] > 4.0:
if session['tts_engine'] == TTS_ENGINES['BARK']:
os.environ['SUNO_USE_SMALL_MODELS'] = 'False'
if session['device'] == devices['CUDA']:

View File

@@ -46,10 +46,6 @@ default_engine_settings = {
"top_p": 0.85,
"speed": 1.0,
"enable_text_splitting": False,
# to enable deepspeed, you must install it first:
# conda activate ./python_env (linux/mac) or .\python_env (windows)
# pip install deepspeed
# conda deactivate
"use_deepspeed": False,
"files": ['config.json', 'model.pth', 'vocab.json', 'ref.wav', 'speakers_xtts.pth'],
"voices": {

View File

@@ -47,6 +47,7 @@ dependencies = [
"phonemizer-fork",
"pydub",
"torchvggish",
"onnxruntime-directml; sys_platform == 'win32'",
"pyannote-audio==3.4.0",
"stanza==1.10.1",
"argostranslate==1.10.0",

View File

@@ -28,6 +28,7 @@ nvidia-ml-py
phonemizer-fork
pydub
torchvggish
onnxruntime-directml; sys_platform == "win32"
pyannote-audio==3.4.0
stanza==1.10.1
argostranslate==1.10.0