...

2026-01-10 06:18:02 -05:00 · 2025-10-31 17:56:00 -07:00
parent bdb31144ef
commit f133e95a42
7 changed files with 19 additions and 15 deletions
--- a/ebook2audiobook.egg-info/requires.txt
+++ b/ebook2audiobook.egg-info/requires.txt
@@ -6,7 +6,6 @@ deep_translator
 docker
 ebooklib
 fastapi
-num2words
 beautifulsoup4
 fugashi
 sudachipy
@@ -29,6 +28,7 @@ nvidia-ml-py
 phonemizer-fork
 pydub
 torchvggish
+onnxruntime-directml; sys_platform == "win32"
 pyannote-audio==3.4.0
 stanza==1.10.1
 argostranslate==1.10.0
--- a/lib/classes/tts_engines/coqui.py
+++ b/lib/classes/tts_engines/coqui.py
@@ -8,8 +8,6 @@ def patched_torch_load(*args, **kwargs):
    return _original_load(*args, **kwargs)
    
 torch.load = patched_torch_load
-torch.backends.cudnn.benchmark = False
-torch.backends.cudnn.deterministic = True

 import hashlib, math, os, shutil, subprocess, tempfile, threading, uuid
 import numpy as np, regex as re, soundfile as sf, torchaudio
@@ -243,6 +241,7 @@ class Coqui:
                        self.tts.load_checkpoint(
                            config,
                            checkpoint_dir = checkpoint_dir,
+                            use_deepspeed = default_engine_settings[TTS_ENGINES['BARK']]['use_deepspeed'],
                            eval = True
                        )
            if self.tts:
@@ -461,7 +460,7 @@ class Coqui:
        if device == devices['CUDA'] and torch.cuda.is_available():
            dtype = (
                torch.bfloat16
-                if getattr(self, "is_bfloat", False) and torch.cuda.is_bf16_supported()
+                if getattr(self, "is_bfloat", False) and torch.cuda.is_bf16_supported() and self.sessin['free_vram_gb'] > 4.0
                else torch.float16
            )
            return torch.amp.autocast(devices['CUDA'], dtype=dtype)
--- a/lib/conf.py
+++ b/lib/conf.py
@@ -31,6 +31,7 @@ os.environ['ARGOS_TRANSLATE_PACKAGE_PATH'] = os.path.join(models_dir, 'argostran
 os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
 os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
 os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'backend:native,max_split_size_mb:32,garbage_collection_threshold:0.5,expandable_segments:True'
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 os.environ["CUDA_CACHE_MAXSIZE"] = "2147483648"
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -16,8 +16,6 @@ def patched_torch_load(*args, **kwargs)->Any:
    return _original_load(*args, **kwargs)

 torch.load = patched_torch_load
-torch.backends.cudnn.benchmark = False
-torch.backends.cudnn.deterministic = True

 import argparse, asyncio, csv, fnmatch, hashlib, io, json, math, os, platform, random, shutil, socket, subprocess, sys, tempfile, threading, time, traceback
 import warnings, unicodedata, urllib.request, uuid, zipfile, ebooklib, gradio as gr, psutil, pymupdf4llm, regex as re, requests, stanza, uvicorn, gc
@@ -136,6 +134,7 @@ class SessionContext:
                "id": id,
                "tab_id": None,
                "is_gui_process": False,
+                "free_vram_gb": 0,
                "process_id": None,
                "status": None,
                "event": None,
@@ -225,9 +224,14 @@ ctx_tracker = SessionTracker()
 def cleanup_garbage():
    gc.collect()
    if torch.cuda.is_available():
+        torch.backends.cudnn.benchmark = False
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
        torch.cuda.synchronize()
+        torch.cuda.set_per_process_memory_fraction(0.95)

 def prepare_dirs(src:str, session:DictProxy[str,Any])->bool:
    try:
@@ -563,6 +567,7 @@ YOU CAN IMPROVE IT OR ASK TO A TRAINING MODEL EXPERT.
            try:
                stanza.download(session['language_iso1'], model_dir=os.getenv('STANZA_RESOURCES_DIR'))
                stanza_nlp = stanza.Pipeline(session['language_iso1'], processors='tokenize,ner,mwt', use_gpu=True if session['device'] == devices['CUDA'] else False, download_method="reuse_resources")
+                #stanza_nlp = stanza.Pipeline(session['language_iso1'], processors='tokenize,ner,mwt', use_gpu=False, download_method="reuse_resources")
            except (ConnectionError, TimeoutError) as e:
                error = f'Stanza model download connection error: {e}. Retry later'
                return error, None
@@ -2073,15 +2078,16 @@ def convert_ebook(args:dict, ctx:object|None=None)->tuple:
                            msg_extra = ''
                            vram_dict = VRAMDetector().detect_vram(session['device'])
                            free_vram_bytes = vram_dict.get('free_bytes', 0)
-                            total_vram_gb = float(int(free_vram_bytes / (1024 ** 3) * 100) / 100) if free_vram_bytes > 0 else 0
-                            if total_vram_gb == 0:
-                                msg_extra += '<br/>VRAM not detected! restrict to 1GB max' if total_vram_gb == 0 else f'<br/>VRAM detected with {total_vram_gb}GB'
+                            session['free_vram_gb'] = float(int(free_vram_bytes / (1024 ** 3) * 100) / 100) if free_vram_bytes > 0 else 0
+                            if session['free_vram_gb'] == 0:
+                                sessin['free_vram_gb'] = 1.0
+                                msg_extra += '<br/>VRAM not detected! restrict to 1GB max' if session['free_vram_gb'] == 0 else f"<br/>VRAM detected with {session['free_vram_gb']}GB"
                                if session['tts_engine'] == TTS_ENGINES['BARK']:
                                    os.environ['SUNO_USE_SMALL_MODELS'] = 'True'
                                    msg_extra += f"<br/>Switching BARK to SMALL models"
                            else:
-                                msg_extra += f'<br/>Free VRAM available: {total_vram_gb}GB'
-                                if total_vram_gb > 4.0:
+                                msg_extra += f'<br/>Free VRAM available: {session['free_vram_gb']}GB'
+                                if session['free_vram_gb'] > 4.0:
                                    if session['tts_engine'] == TTS_ENGINES['BARK']:
                                        os.environ['SUNO_USE_SMALL_MODELS'] = 'False'                        
                            if session['device'] == devices['CUDA']:
--- a/lib/models.py
+++ b/lib/models.py
@@ -46,10 +46,6 @@ default_engine_settings = {
        "top_p": 0.85,
        "speed": 1.0,
        "enable_text_splitting": False,
-        # to enable deepspeed, you must install it first:
-        # conda activate ./python_env (linux/mac) or .\python_env (windows)
-        # pip install deepspeed
-        # conda deactivate
        "use_deepspeed": False,
        "files": ['config.json', 'model.pth', 'vocab.json', 'ref.wav', 'speakers_xtts.pth'],
        "voices": {
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,7 @@ dependencies = [
    "phonemizer-fork",
    "pydub",
    "torchvggish",
+	"onnxruntime-directml; sys_platform == 'win32'",
    "pyannote-audio==3.4.0",
    "stanza==1.10.1",
    "argostranslate==1.10.0",
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,6 +28,7 @@ nvidia-ml-py
 phonemizer-fork
 pydub
 torchvggish
+onnxruntime-directml; sys_platform == "win32"
 pyannote-audio==3.4.0
 stanza==1.10.1
 argostranslate==1.10.0