mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-10 06:18:02 -05:00
...
This commit is contained in:
@@ -6,7 +6,6 @@ deep_translator
|
||||
docker
|
||||
ebooklib
|
||||
fastapi
|
||||
num2words
|
||||
beautifulsoup4
|
||||
fugashi
|
||||
sudachipy
|
||||
@@ -29,6 +28,7 @@ nvidia-ml-py
|
||||
phonemizer-fork
|
||||
pydub
|
||||
torchvggish
|
||||
onnxruntime-directml; sys_platform == "win32"
|
||||
pyannote-audio==3.4.0
|
||||
stanza==1.10.1
|
||||
argostranslate==1.10.0
|
||||
|
||||
@@ -8,8 +8,6 @@ def patched_torch_load(*args, **kwargs):
|
||||
return _original_load(*args, **kwargs)
|
||||
|
||||
torch.load = patched_torch_load
|
||||
torch.backends.cudnn.benchmark = False
|
||||
torch.backends.cudnn.deterministic = True
|
||||
|
||||
import hashlib, math, os, shutil, subprocess, tempfile, threading, uuid
|
||||
import numpy as np, regex as re, soundfile as sf, torchaudio
|
||||
@@ -243,6 +241,7 @@ class Coqui:
|
||||
self.tts.load_checkpoint(
|
||||
config,
|
||||
checkpoint_dir = checkpoint_dir,
|
||||
use_deepspeed = default_engine_settings[TTS_ENGINES['BARK']]['use_deepspeed'],
|
||||
eval = True
|
||||
)
|
||||
if self.tts:
|
||||
@@ -461,7 +460,7 @@ class Coqui:
|
||||
if device == devices['CUDA'] and torch.cuda.is_available():
|
||||
dtype = (
|
||||
torch.bfloat16
|
||||
if getattr(self, "is_bfloat", False) and torch.cuda.is_bf16_supported()
|
||||
if getattr(self, "is_bfloat", False) and torch.cuda.is_bf16_supported() and self.sessin['free_vram_gb'] > 4.0
|
||||
else torch.float16
|
||||
)
|
||||
return torch.amp.autocast(devices['CUDA'], dtype=dtype)
|
||||
|
||||
@@ -31,6 +31,7 @@ os.environ['ARGOS_TRANSLATE_PACKAGE_PATH'] = os.path.join(models_dir, 'argostran
|
||||
os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
|
||||
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
|
||||
os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
|
||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'backend:native,max_split_size_mb:32,garbage_collection_threshold:0.5,expandable_segments:True'
|
||||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
||||
os.environ["CUDA_CACHE_MAXSIZE"] = "2147483648"
|
||||
|
||||
@@ -16,8 +16,6 @@ def patched_torch_load(*args, **kwargs)->Any:
|
||||
return _original_load(*args, **kwargs)
|
||||
|
||||
torch.load = patched_torch_load
|
||||
torch.backends.cudnn.benchmark = False
|
||||
torch.backends.cudnn.deterministic = True
|
||||
|
||||
import argparse, asyncio, csv, fnmatch, hashlib, io, json, math, os, platform, random, shutil, socket, subprocess, sys, tempfile, threading, time, traceback
|
||||
import warnings, unicodedata, urllib.request, uuid, zipfile, ebooklib, gradio as gr, psutil, pymupdf4llm, regex as re, requests, stanza, uvicorn, gc
|
||||
@@ -136,6 +134,7 @@ class SessionContext:
|
||||
"id": id,
|
||||
"tab_id": None,
|
||||
"is_gui_process": False,
|
||||
"free_vram_gb": 0,
|
||||
"process_id": None,
|
||||
"status": None,
|
||||
"event": None,
|
||||
@@ -225,9 +224,14 @@ ctx_tracker = SessionTracker()
|
||||
def cleanup_garbage():
|
||||
gc.collect()
|
||||
if torch.cuda.is_available():
|
||||
torch.backends.cudnn.benchmark = False
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.ipc_collect()
|
||||
torch.cuda.synchronize()
|
||||
torch.cuda.set_per_process_memory_fraction(0.95)
|
||||
|
||||
def prepare_dirs(src:str, session:DictProxy[str,Any])->bool:
|
||||
try:
|
||||
@@ -563,6 +567,7 @@ YOU CAN IMPROVE IT OR ASK TO A TRAINING MODEL EXPERT.
|
||||
try:
|
||||
stanza.download(session['language_iso1'], model_dir=os.getenv('STANZA_RESOURCES_DIR'))
|
||||
stanza_nlp = stanza.Pipeline(session['language_iso1'], processors='tokenize,ner,mwt', use_gpu=True if session['device'] == devices['CUDA'] else False, download_method="reuse_resources")
|
||||
#stanza_nlp = stanza.Pipeline(session['language_iso1'], processors='tokenize,ner,mwt', use_gpu=False, download_method="reuse_resources")
|
||||
except (ConnectionError, TimeoutError) as e:
|
||||
error = f'Stanza model download connection error: {e}. Retry later'
|
||||
return error, None
|
||||
@@ -2073,15 +2078,16 @@ def convert_ebook(args:dict, ctx:object|None=None)->tuple:
|
||||
msg_extra = ''
|
||||
vram_dict = VRAMDetector().detect_vram(session['device'])
|
||||
free_vram_bytes = vram_dict.get('free_bytes', 0)
|
||||
total_vram_gb = float(int(free_vram_bytes / (1024 ** 3) * 100) / 100) if free_vram_bytes > 0 else 0
|
||||
if total_vram_gb == 0:
|
||||
msg_extra += '<br/>VRAM not detected! restrict to 1GB max' if total_vram_gb == 0 else f'<br/>VRAM detected with {total_vram_gb}GB'
|
||||
session['free_vram_gb'] = float(int(free_vram_bytes / (1024 ** 3) * 100) / 100) if free_vram_bytes > 0 else 0
|
||||
if session['free_vram_gb'] == 0:
|
||||
sessin['free_vram_gb'] = 1.0
|
||||
msg_extra += '<br/>VRAM not detected! restrict to 1GB max' if session['free_vram_gb'] == 0 else f"<br/>VRAM detected with {session['free_vram_gb']}GB"
|
||||
if session['tts_engine'] == TTS_ENGINES['BARK']:
|
||||
os.environ['SUNO_USE_SMALL_MODELS'] = 'True'
|
||||
msg_extra += f"<br/>Switching BARK to SMALL models"
|
||||
else:
|
||||
msg_extra += f'<br/>Free VRAM available: {total_vram_gb}GB'
|
||||
if total_vram_gb > 4.0:
|
||||
msg_extra += f'<br/>Free VRAM available: {session['free_vram_gb']}GB'
|
||||
if session['free_vram_gb'] > 4.0:
|
||||
if session['tts_engine'] == TTS_ENGINES['BARK']:
|
||||
os.environ['SUNO_USE_SMALL_MODELS'] = 'False'
|
||||
if session['device'] == devices['CUDA']:
|
||||
|
||||
@@ -46,10 +46,6 @@ default_engine_settings = {
|
||||
"top_p": 0.85,
|
||||
"speed": 1.0,
|
||||
"enable_text_splitting": False,
|
||||
# to enable deepspeed, you must install it first:
|
||||
# conda activate ./python_env (linux/mac) or .\python_env (windows)
|
||||
# pip install deepspeed
|
||||
# conda deactivate
|
||||
"use_deepspeed": False,
|
||||
"files": ['config.json', 'model.pth', 'vocab.json', 'ref.wav', 'speakers_xtts.pth'],
|
||||
"voices": {
|
||||
|
||||
@@ -47,6 +47,7 @@ dependencies = [
|
||||
"phonemizer-fork",
|
||||
"pydub",
|
||||
"torchvggish",
|
||||
"onnxruntime-directml; sys_platform == 'win32'",
|
||||
"pyannote-audio==3.4.0",
|
||||
"stanza==1.10.1",
|
||||
"argostranslate==1.10.0",
|
||||
|
||||
@@ -28,6 +28,7 @@ nvidia-ml-py
|
||||
phonemizer-fork
|
||||
pydub
|
||||
torchvggish
|
||||
onnxruntime-directml; sys_platform == "win32"
|
||||
pyannote-audio==3.4.0
|
||||
stanza==1.10.1
|
||||
argostranslate==1.10.0
|
||||
|
||||
Reference in New Issue
Block a user