Merge pull request #43 from ROBERT-MCDOWELL/main

pull attempt into v2.0
This commit is contained in:
Drew Thomasson
2024-11-15 23:50:54 -05:00
committed by GitHub
65 changed files with 189 additions and 174 deletions

48
app.py
View File

@@ -6,9 +6,10 @@ import subprocess
import sys
from lib.conf import *
from lib.lang import language_options, default_language_code
from lib.lang import language_mapping, default_language_code
script_mode = NATIVE
share = False
def check_python_version():
current_version = sys.version_info[:2] # (major, minor)
@@ -99,10 +100,10 @@ def is_port_in_use(port):
return s.connect_ex(('0.0.0.0', port)) == 0
def main():
global script_mode, ebooks_dir
global script_mode, share, ebooks_dir
# Convert the list of languages to a string to display in the help text
language_options_str = ", ".join(language_options)
lang_list_str = ", ".join(list(language_mapping.keys()))
# Argument parser to handle optional parameters with descriptions
parser = argparse.ArgumentParser(
@@ -111,12 +112,12 @@ def main():
Example usage:
Windows:
headless:
ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --use_custom_model --custom_model 'model.zip' --custom_config config.json --custom_vocab vocab.json
ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --custom_model 'model.zip'
Graphic Interface:
ebook2audiobook.cmd
Linux/Mac:
headless:
./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --use_custom_model --custom_model 'model.zip' --custom_config config.json --custom_vocab vocab.json
./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --custom_model 'model.zip'
Graphic Interface:
./ebook2audiobook.sh
""",
@@ -124,8 +125,8 @@ Linux/Mac:
)
options = [
"--script_mode", "--share", "--headless", "--ebook", "--ebooks_dir",
"--voice", "--language", "--device", "--use_custom_model", "--custom_model",
"--custom_config", "--custom_vocab", "--custom_model_url", "--temperature",
"--voice", "--language", "--device", "--custom_model",
"--custom_model_url", "--temperature",
"--length_penalty", "--repetition_penalty", "--top_k", "--top_p", "--speed",
"--enable_text_splitting", "--version"
]
@@ -142,37 +143,31 @@ Linux/Mac:
parser.add_argument(options[5], type=str,
help="Path to the target voice file for TTS. Optional, uses a default voice if not provided.")
parser.add_argument(options[6], type=str, default="en",
help=f"Language for the audiobook conversion. Options: {language_options_str}. Defaults to English (en).")
help=f"Language for the audiobook conversion. Options: {lang_list_str}. Defaults to English (en).")
parser.add_argument(options[7], type=str, default="cpu", choices=["cpu", "gpu"],
help=f"Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.")
parser.add_argument(options[8], action="store_true",
help="Use a custom TTS model. Defaults to False. Must be True to use custom models.")
parser.add_argument(options[9], type=str,
parser.add_argument(options[8], type=str,
help="Path to the custom model file (.pth). Required if using a custom model.")
parser.add_argument(options[10], type=str,
help="Path to the custom config file (config.json). Required if using a custom model.")
parser.add_argument(options[11], type=str,
help="Path to the custom vocab file (vocab.json). Required if using a custom model.")
parser.add_argument(options[12], type=str,
parser.add_argument(options[9], type=str,
help=("URL to download the custom model as a zip file. Optional, but will be used if provided. "
"Examples include David Attenborough's model: "
"'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
"More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
parser.add_argument(options[13], type=float, default=0.65,
parser.add_argument(options[10], type=float, default=0.65,
help="Temperature for the model. Defaults to 0.65. Higher temperatures lead to more creative outputs.")
parser.add_argument(options[14], type=float, default=1.0,
parser.add_argument(options[11], type=float, default=1.0,
help="A length penalty applied to the autoregressive decoder. Defaults to 1.0. Not applied to custom models.")
parser.add_argument(options[15], type=float, default=2.0,
parser.add_argument(options[12], type=float, default=2.0,
help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
parser.add_argument(options[16], type=int, default=50,
parser.add_argument(options[13], type=int, default=50,
help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
parser.add_argument(options[17], type=float, default=0.8,
parser.add_argument(options[14], type=float, default=0.8,
help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
parser.add_argument(options[18], type=float, default=1.0,
parser.add_argument(options[15], type=float, default=1.0,
help="Speed factor for the speech generation. Defaults to 1.0.")
parser.add_argument(options[19], action="store_true",
parser.add_argument(options[16], action="store_true",
help="Enable splitting text into sentences. Defaults to False.")
parser.add_argument(options[20], action="version",version=f"ebook2audiobook version {version}",
parser.add_argument(options[17], action="version",version=f"ebook2audiobook version {version}",
help="Show the version of the script and exit")
for arg in sys.argv:
@@ -188,6 +183,7 @@ Linux/Mac:
sys.exit(1)
script_mode = args.script_mode if args.script_mode else script_mode
share = args.share if args.share else share
if script_mode == NATIVE:
check_pkg = check_and_install_requirements(requirements_file)
@@ -236,11 +232,13 @@ Linux/Mac:
else:
print(f"Error: The directory {ebooks_dir} does not exist.")
sys.exit(1)
elif args.ebook:
progress_status, audiobook_file = convert_ebook(args)
if audiobook_file is None:
print(f"Conversion failed: {progress_status}")
sys.exit(1)
else:
print("Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.")
sys.exit(1)
@@ -249,7 +247,7 @@ Linux/Mac:
allowed_arguments = {'--share', '--script_mode'}
passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}
if passed_args_set.issubset(allowed_arguments):
web_interface(args.script_mode, args.share)
web_interface(script_mode, share)
else:
print("Error: In non-headless mode, no option or only '--share' can be passed")
sys.exit(1)

View File

@@ -37,7 +37,7 @@ if not exist "%CALIBRE_TEMP_DIR%" (
icacls "%CALIBRE_TEMP_DIR%" /grant Users:(OI)(CI)F /T
for %%A in (%*) do (
for %%A in (%ARGS%) do (
if "%%A"=="%DOCKER_UTILS%" (
set "SCRIPT_MODE=%DOCKER_UTILS%"
break
@@ -228,7 +228,7 @@ if not "%DOCKER_BUILD_STATUS%"=="0" (
net session >nul 2>&1
if %errorlevel% equ 0 (
echo Restarting in user mode...
start "" /b cmd /c "%~f0" %*
start "" /b cmd /c "%~f0" %ARGS%
exit /b
)
goto dispatch
@@ -269,7 +269,7 @@ if "%SCRIPT_MODE%"=="%FULL_DOCKER%" (
call conda create --prefix %SCRIPT_DIR%\%PYTHON_ENV% python=%PYTHON_VERSION% -y
call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
call python -m pip install --upgrade pip
call python -m pip install beautifulsoup4 coqui-tts ebooklib docker "gradio>=4.44.0" mecab mecab-python3 "nltk>=3.8.2" pydub translate tqdm unidic
call python -m pip install --upgrade -r requirements.txt
call python -m unidic download
call python -m spacy download en_core_web_sm
call python -m nltk.downloader punkt_tab

View File

@@ -212,7 +212,7 @@ function conda_check {
source $CONDA_ENV
conda activate $SCRIPT_DIR/$PYTHON_ENV
python -m pip install --upgrade pip
python -m pip install beautifulsoup4 coqui-tts ebooklib docker "gradio>=4.44.0" mecab mecab-python3 "nltk>=3.8.2" pydub translate tqdm unidic
python -m pip install --upgrade -r requirements.txt
python -m unidic download
python -m spacy download en_core_web_sm
python -m nltk.downloader punkt_tab

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1 +0,0 @@
this is the test four from the result of text file to audiobook conversion.

BIN
ebooks/test_ar.azw3 Normal file

Binary file not shown.

1
ebooks/test_ar.txt Normal file
View File

@@ -0,0 +1 @@
هذا هو الاختبار من نتيجة تحويل ملف نصي إلى كتاب صوتي.

BIN
ebooks/test_cs.azw3 Normal file

Binary file not shown.

1
ebooks/test_cs.txt Normal file
View File

@@ -0,0 +1 @@
Toto je test od výsledku převodu textového souboru na audioknihu.

BIN
ebooks/test_da.azw3 Normal file

Binary file not shown.

1
ebooks/test_da.txt Normal file
View File

@@ -0,0 +1 @@
Dette er testen fra resultatet af konvertering af tekstfil til lydbog.

BIN
ebooks/test_de.azw3 Normal file

Binary file not shown.

1
ebooks/test_de.txt Normal file
View File

@@ -0,0 +1 @@
Dies ist der Test des Ergebnisses der Konvertierung einer Textdatei in ein Hörbuch.

BIN
ebooks/test_el.azw3 Normal file

Binary file not shown.

1
ebooks/test_el.txt Normal file
View File

@@ -0,0 +1 @@
Αυτή είναι η δοκιμή από το αποτέλεσμα της μετατροπής αρχείου κειμένου σε ηχητικό βιβλίο.

BIN
ebooks/test_en.azw3 Normal file

Binary file not shown.

1
ebooks/test_en.txt Normal file
View File

@@ -0,0 +1 @@
This is the test from the result of text file to audiobook conversion.

BIN
ebooks/test_es.azw3 Normal file

Binary file not shown.

1
ebooks/test_es.txt Normal file
View File

@@ -0,0 +1 @@
Esta es la prueba del resultado de la conversión de archivo de texto a audiolibro.

BIN
ebooks/test_fi.azw3 Normal file

Binary file not shown.

1
ebooks/test_fi.txt Normal file
View File

@@ -0,0 +1 @@
Tämä on testi tekstitiedoston muuntamisen tuloksesta äänikirjaksi.

BIN
ebooks/test_fr.azw3 Normal file

Binary file not shown.

1
ebooks/test_fr.txt Normal file
View File

@@ -0,0 +1 @@
Ceci est le test provenant d'un fichier text en livre audio.

BIN
ebooks/test_hr.azw3 Normal file

Binary file not shown.

1
ebooks/test_hr.txt Normal file
View File

@@ -0,0 +1 @@
Ovo je test rezultata pretvorbe tekstualne datoteke u audioknjigu.

BIN
ebooks/test_it.azw3 Normal file

Binary file not shown.

1
ebooks/test_it.txt Normal file
View File

@@ -0,0 +1 @@
Questo è il test del risultato della conversione del file di testo in audiolibro.

BIN
ebooks/test_ja.azw3 Normal file

Binary file not shown.

1
ebooks/test_ja.txt Normal file
View File

@@ -0,0 +1 @@
これは、テキスト ファイルからオーディオブックへの変換結果のテストです。

BIN
ebooks/test_ko.azw3 Normal file

Binary file not shown.

1
ebooks/test_ko.txt Normal file
View File

@@ -0,0 +1 @@
이는 텍스트 파일을 오디오북으로 변환한 결과에 대한 테스트입니다.

BIN
ebooks/test_nb.azw3 Normal file

Binary file not shown.

1
ebooks/test_nb.txt Normal file
View File

@@ -0,0 +1 @@
Dette er testen fra resultatet av konvertering av tekstfil til lydbok.

BIN
ebooks/test_nl.azw3 Normal file

Binary file not shown.

1
ebooks/test_nl.txt Normal file
View File

@@ -0,0 +1 @@
Dit is de test op basis van het resultaat van de conversie van een tekstbestand naar een audioboek.

BIN
ebooks/test_pl.azw3 Normal file

Binary file not shown.

1
ebooks/test_pl.txt Normal file
View File

@@ -0,0 +1 @@
To jest test wyniku konwersji pliku tekstowego na audiobook.

BIN
ebooks/test_pt.azw3 Normal file

Binary file not shown.

1
ebooks/test_pt.txt Normal file
View File

@@ -0,0 +1 @@
Este é o teste do resultado da conversão de ficheiro de texto em audiolivro.

BIN
ebooks/test_ro.azw3 Normal file

Binary file not shown.

1
ebooks/test_ro.txt Normal file
View File

@@ -0,0 +1 @@
Acesta este testul de la rezultatul conversiei fișierului text la cartea audio.

BIN
ebooks/test_ru.azw3 Normal file

Binary file not shown.

1
ebooks/test_ru.txt Normal file
View File

@@ -0,0 +1 @@
Это тест результата конвертации текстового файла в аудиокнигу.

BIN
ebooks/test_sl.azw3 Normal file

Binary file not shown.

1
ebooks/test_sl.txt Normal file
View File

@@ -0,0 +1 @@
To je preizkus rezultata pretvorbe besedilne datoteke v zvočno knjigo.

BIN
ebooks/test_sv.azw3 Normal file

Binary file not shown.

1
ebooks/test_sv.txt Normal file
View File

@@ -0,0 +1 @@
Detta är testet från resultatet av konvertering av textfil till ljudbok.

BIN
ebooks/test_zh.azw3 Normal file

Binary file not shown.

1
ebooks/test_zh.txt Normal file
View File

@@ -0,0 +1 @@
这是从文本文件到有声读物的转换结果进行的测试。

View File

@@ -206,21 +206,18 @@ def download_and_extract(path_or_url, extract_to=models_dir):
except Exception as e:
raise DependencyError(e)
def load_spacy_model(language):
model_name = f"{language}_core_web_sm"
try:
nltk.data.find('tokenizers/punkt_tab')
except LookupError:
print("Downloading NLTK punkt tokenizer...")
nltk.download('punkt_tab')
if not is_package(model_name):
try:
print(f"Downloading model: {model_name}")
download_package(model_name) # Download the model if not installed
except Exception as e:
print(f"Error downloading model {model_name}: {e}")
return None
return spacy.load(model_name)
#def load_spacy_model(language):
# lang_pack = language_mapping[language]["model"]
# try:
# nlp = spacy.load(lang_pack)
# except OSError:
# print("Spacy model not found. Tyring to download it...")
# try:
# subprocess.run(["python", "-m", "spacy", "download", lang_pack])
# nlp = spacy.load(lang_pack)
# except OSError:
# raise ValueError(f"Spacy model does not exist for {language_mapping[language]['name']}...")
# return nlp
def translate_pronouns(language):
global ebook_pronouns
@@ -385,7 +382,7 @@ def concat_audio_chapters(metadatas, cover_file):
languages = metadatas.get('Languages', None)
if languages:
ffmpeg_metadata += f"language={languages}\n\n" # Language
ffmpeg_metadata += f"language={languages}\n\n"
else:
print("Warning: metadatas is None. Skipping metadata generation.")
@@ -676,10 +673,11 @@ def split_long_sentence(sentence, language='en', max_pauses=10):
:return: A list of sentence parts that meet the criteria.
"""
# Get the Max character length for the selected language -2 : with a default of 248 if no language is found
max_length = (char_limits.get(language, 250)-2)
char_limits = language_mapping[language]["char_limit"]
max_length = (char_limits - 2)
# Adjust the pause punctuation symbols based on language
if language == 'zh-cn':
if language == 'zh':
punctuation = ['', '', '', '', ''] # Chinese-specific pause punctuation including sentence-ending marks
elif language == 'ja':
punctuation = ['', '', '', '', ''] # Japanese-specific pause punctuation
@@ -724,7 +722,7 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
target_voice_file = default_target_voice_file
# Handle custom model or use standard TTS model
print("Loading model...")
print("Loading TTS ...")
if custom_model:
config_path = custom_model['config']
model_path = custom_model['model']
@@ -732,22 +730,22 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
config = XttsConfig()
config.models_dir = models_dir
config.load_json(config_path)
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=model_path, vocab_path=vocab_path)
tts = Xtts.init_from_config(config)
tts.load_checkpoint(config, checkpoint_dir=model_path, vocab_path=vocab_path)
else:
#selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
#model = TTS(selected_tts_model, progress_bar=False).to(device)
base_dir = os.path.join(models_dir,"tts_models--multilingual--multi-dataset--xtts_v2")
#tts = TTS(selected_tts_model, progress_bar=False).to(device)
base_dir = os.path.join(models_dir,"XTTS-v2")
config_path = os.path.join(base_dir,"config.json")
config = XttsConfig()
config.models_dir = models_dir
config.load_json(config_path)
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir=base_dir)
tts = Xtts.init_from_config(config)
tts.load_checkpoint(config, checkpoint_dir=base_dir)
model.to(device)
tts.to(device)
print("Computing speaker latents...")
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[target_voice_file])
gpt_cond_latent, speaker_embedding = tts.get_conditioning_latents(audio_path=[target_voice_file])
chapters_dir_audio_fragments = os.path.join(ebook_chapters_audio_dir, "fragments")
os.makedirs(chapters_dir_audio_fragments, exist_ok=True)
@@ -755,35 +753,37 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
# Calculate the total number of chapters and segments (fragments) to set progress bar correctly
total_segments = 0
total_chapters = len([f for f in os.listdir(ebook_chapters_dir) if f.endswith('.txt')])
# Check if the language is nltk compatible
nltk_language = language_mapping[language]["name"].lower() if language_mapping[language] else None
# Pre-calculate total segments (sentences + fragments per chapter)
for chapter_file in sorted(os.listdir(ebook_chapters_dir)):
if cancellation_requested.is_set():
stop_and_detach_tts(model)
stop_and_detach_tts(tts)
msg = "Cancel requested"
raise ValueError(msg)
if chapter_file.endswith('.txt'):
with open(os.path.join(ebook_chapters_dir, chapter_file), 'r', encoding='utf-8') as file:
chapter_text = file.read()
nltk_language = language_mapping.get(language)
if nltk_language:
sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
else:
sentences = [chapter_text]
#if nltk_language is not None:
#sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
#sentences = nltk.tokenize.word_tokenize(chapter_text, language=nltk_language, preserve_line=False)
#else:
sentences = [chapter_text]
# Calculate total fragments for this chapter
for sentence in sentences:
fragments = split_long_sentence(sentence, language=language)
total_segments += len(fragments)
# Initialize progress tracking
current_progress = 0
total_progress = total_segments + total_chapters # Total is chapters + segments/fragments
with tqdm(total=total_progress, desc="Processing 0.00%", bar_format='{desc}: {n_fmt}/{total_fmt} ', unit="step") as t:
for chapter_file in sorted(os.listdir(ebook_chapters_dir)):
if cancellation_requested.is_set():
stop_and_detach_tts(model)
stop_and_detach_tts(tts)
msg = "Cancel requested"
raise ValueError(msg)
if chapter_file.endswith('.txt'):
@@ -800,30 +800,29 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
with open(chapter_file_path, 'r', encoding='utf-8') as file:
chapter_text = file.read()
nltk_language = language_mapping.get(language)
if nltk_language:
sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
else:
sentences = [chapter_text]
#if nltk_language is not None:
# sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
#sentences = nltk.tokenize.word_tokenize(chapter_text, language=nltk_language, preserve_line=False)
#else:
sentences = [chapter_text]
for sentence in sentences:
if cancellation_requested.is_set():
stop_and_detach_tts(model)
stop_and_detach_tts(tts)
msg = "Cancel requested"
raise ValueError(msg)
fragments = split_long_sentence(sentence, language=language)
for fragment in fragments:
if cancellation_requested.is_set():
stop_and_detach_tts(model)
stop_and_detach_tts(tts)
msg = "Cancel requested"
raise ValueError(msg)
if fragment != "":
print(f"Generating fragment: {fragment}...")
fragment_file_path = os.path.join(chapters_dir_audio_fragments, f"{count_fragments}.wav")
#if custom_model:
out = model.inference(
#if custom_tts:
out = tts.inference(
fragment, language, gpt_cond_latent, speaker_embedding,
temperature=temperature, repetition_penalty=repetition_penalty,
top_k=top_k, top_p=top_p, speed=speed, enable_text_splitting=enable_text_splitting
@@ -876,11 +875,11 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
except Exception as e:
raise DependencyError(e)
def stop_and_detach_tts(model):
# Move the model to CPU if on GPU
if next(model.parameters()).is_cuda:
model.to('cpu')
del model
def stop_and_detach_tts(tts):
# Move the tts to CPU if on GPU
if next(tts.parameters()).is_cuda:
tts.to('cpu')
del tts
if torch.cuda.is_available():
torch.cuda.empty_cache()
@@ -913,10 +912,6 @@ def convert_ebook(args):
device = args.device.lower()
target_voice_file = args.voice
language = args.language
use_custom_model = args.use_custom_model
custom_model_file = args.custom_model
custom_config_file = args.custom_config
custom_vocab_file = args.custom_vocab
temperature = args.temperature
length_penalty = args.length_penalty
repetition_penalty = args.repetition_penalty
@@ -924,6 +919,7 @@ def convert_ebook(args):
top_p = args.top_p
speed = args.speed
enable_text_splitting = args.enable_text_splitting
custom_model_file = args.custom_model
custom_model_url = args.custom_model_url
if not os.path.splitext(ebook_src)[1]:
@@ -951,8 +947,7 @@ def convert_ebook(args):
ebook_pronouns = translate_pronouns(language)
# Load spaCy model for language analysis (you can switch models based on language)
nlp = load_spacy_model(language)
#if load_spacy_model(language):
# Prepare tmp dir and properties
if prepare_dirs(args.ebook) :
@@ -961,7 +956,7 @@ def convert_ebook(args):
# Handle custom model if the user chose to use one
custom_model = None
if use_custom_model and custom_model_file and custom_config_file and custom_vocab_file:
if custom_model_file and custom_config_file and custom_vocab_file:
custom_model = {
'model': custom_model_file,
'config': custom_config_file,
@@ -969,7 +964,7 @@ def convert_ebook(args):
}
# If a custom model URL is provided, download and use it
if use_custom_model and custom_model_url:
if custom_model_url:
print(f"Received custom model URL: {custom_model_url}")
model_dir = get_model_dir_from_url(custom_model_url)
if download_and_extract(custom_model_url, model_dir):
@@ -1007,6 +1002,8 @@ def convert_ebook(args):
raise DependencyError("convert_chapters_to_audio() failed!")
else:
return None, None
#else:
# return None, None
else:
print(f"Temporary directory {tmp_dir} not removed due to failure.")
return None, None
@@ -1017,12 +1014,14 @@ def convert_ebook(args):
def web_interface(mode, share):
global ebook_src, is_converting, interface, cancellation_requested, is_gui_process, script_mode, is_gui_shared, audiobooks_ddn
script_mode = mode
is_gui_process = True
is_gui_shared = share
audiobook_file = None
language_options = [details["native_name"] for details in language_mapping.values()]
default_language_native = language_mapping[default_language_code]["native_name"]
theme = gr.themes.Origin(
primary_hue="amber",
secondary_hue="green",
@@ -1042,8 +1041,11 @@ def web_interface(mode, share):
gr.HTML(
"""
<style>
input[type="checkbox"] {
border-color: #fafafa !Important;
}
.svelte-1xyfx7i.center.boundedheight.flex{
height: 110px !important;
height: 120px !important;
}
.block.svelte-5y6bt2 {
padding: 10px !important;
@@ -1056,6 +1058,10 @@ def web_interface(mode, share):
margin: 0 !important;
font-size: 12px !important;
}
.block.svelte-5y6bt2.padded {
height: auto !important;
padding: 10px !important;
}
.block.svelte-5y6bt2.padded.hide-container {
height: auto !important;
padding: 0 !important;
@@ -1074,25 +1080,25 @@ def web_interface(mode, share):
padding: 0;
margin: 0;
}
#component-8, #component-9, #component-34 {
height: 119px !important;
}
</style>
"""
)
with gr.Tabs(): # Create tabs for better UI organization
with gr.Tabs():
with gr.TabItem("Input Options"):
with gr.Row():
with gr.Column(scale=3):
ebook_file = gr.File(label="eBook File")
target_voice_file = gr.File(label="Target Voice File (Optional)")
language = gr.Dropdown(label="Language", choices=language_options, value="en")
device = gr.Radio(label="Processor Unit", choices=["CPU", "GPU"], value="CPU")
language = gr.Dropdown(label="Language", choices=language_options, value=default_language_native)
with gr.Column(scale=3):
use_custom_model = gr.Checkbox(label="Use Custom Model")
custom_model_file = gr.File(label="Custom Model File (Optional)", visible=False)
custom_config_file = gr.File(label="Custom Config File (Optional)", visible=False)
custom_vocab_file = gr.File(label="Custom Vocab File (Optional)", visible=False)
custom_model_url = gr.Textbox(label="Custom Model Zip URL (Optional)", visible=False)
with gr.Group():
target_voice_file = gr.File(label="Cloning Voice* (a .wav or .mp3 no more than 12sec)")
custom_model_file = gr.File(label="Model* (a .zip containing config.json, vocab.json, model.pth)")
custom_model_url = gr.Textbox(placeholder="https://www.example.com/model.zip", label="Model from URL*")
gr.Markdown('<p>* Optional</p>')
with gr.TabItem("Audio Generation Preferences"):
gr.Markdown(
"""
@@ -1154,9 +1160,8 @@ def web_interface(mode, share):
info="Splits long texts into sentences to generate audio in chunks. Useful for very long inputs."
)
session_status = gr.Textbox(label="Session")
session = gr.Textbox(label="Session", visible=False)
session_status = gr.Textbox(label="Session")
session = gr.Textbox(label="Session", visible=False)
conversion_progress = gr.Textbox(label="Progress")
convert_btn = gr.Button("Convert", variant="primary", interactive=False)
audio_player = gr.Audio(label="Listen", type="filepath", visible=False)
@@ -1231,11 +1236,20 @@ def web_interface(mode, share):
return gr.Button("Convert", variant="primary", interactive=False), None, audiobook_file, update_audiobooks_ddn()
def refresh_audiobook_list():
if not os.path.isdir(audiobooks_dir):
os.makedirs(audiobooks_dir, exist_ok=True)
files = [f for f in os.listdir(audiobooks_dir)]
files.sort(key=lambda x: os.path.getmtime(os.path.join(audiobooks_dir, x)), reverse=True)
files = []
if audiobooks_dir is not None:
if not os.path.isdir(audiobooks_dir):
os.makedirs(audiobooks_dir, exist_ok=True)
files = [f for f in os.listdir(audiobooks_dir)]
files.sort(key=lambda x: os.path.getmtime(os.path.join(audiobooks_dir, x)), reverse=True)
return files
def update_audiobook_link(audiobook):
if audiobooks_dir is not None:
if audiobook:
link = os.path.join(audiobooks_dir, audiobook)
return link, link, gr.update(visible=True)
return None, None, gr.update(visible=False)
def disable_convert_btn():
return gr.Button("Convert", variant="primary", interactive=False)
@@ -1244,13 +1258,6 @@ def web_interface(mode, share):
files = refresh_audiobook_list()
return gr.Dropdown(choices=files, label="Audiobooks", value=files[0] if files else None)
def update_audiobook_link(audiobook):
if audiobook:
link = os.path.join(audiobooks_dir, audiobook)
return link, link, gr.update(visible=True)
else:
return None, None, gr.update(visible=False)
def change_ebook_file(btn, f):
global ebook_src, is_converting, cancellation_requested
if f is None:
@@ -1269,13 +1276,13 @@ def web_interface(mode, share):
data["event"] = 'change_data'
return data
def process_conversion(session, device, ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting):
def process_conversion(session, device, ebook_file, target_voice_file, language, custom_model_file, custom_model_url, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting):
global ebook_src, is_converting, audiobook_file
ebook_src = ebook_file.name if ebook_file else None
target_voice_file = target_voice_file.name if target_voice_file else None
custom_model_file = custom_model_file.name if custom_model_file else None
custom_config_file = custom_config_file.name if custom_config_file else None
custom_vocab_file = custom_vocab_file.name if custom_vocab_file else None
language = next((code for code, details in language_mapping.items() if details["native_name"] == language), None)
if not ebook_src:
return "Error: eBook file is required."
@@ -1288,10 +1295,7 @@ def web_interface(mode, share):
ebook=ebook_src,
voice=target_voice_file,
language=language,
use_custom_model=use_custom_model,
custom_model=custom_model_file,
custom_config=custom_config_file,
custom_vocab=custom_vocab_file,
custom_model_url=custom_model_url,
temperature=float(temperature),
length_penalty=float(length_penalty),
@@ -1339,11 +1343,6 @@ def web_interface(mode, share):
audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
return [data, f"{warning_text}{warning_text_extra}", data["session_id"], update_audiobooks_ddn()]
use_custom_model.change(
lambda x: [gr.update(visible=x)] * 4,
inputs=[use_custom_model],
outputs=[custom_model_file, custom_config_file, custom_vocab_file, custom_model_url]
)
ebook_file.change(
fn=change_ebook_file,
inputs=[convert_btn, ebook_file],
@@ -1383,9 +1382,8 @@ def web_interface(mode, share):
fn=process_conversion,
inputs=[
session, device, ebook_file, target_voice_file, language,
use_custom_model, custom_model_file, custom_config_file,
custom_vocab_file, custom_model_url, temperature, length_penalty, repetition_penalty,
top_k, top_p, speed, enable_text_splitting
custom_model_file, custom_model_url, temperature, length_penalty,
repetition_penalty, top_k, top_p, speed, enable_text_splitting
],
outputs=[conversion_progress, modal_html]
).then(

View File

@@ -1,48 +1,33 @@
import os
language_options = [
"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko"
]
char_limits = {
"en": 250, # English
"es": 239, # Spanish
"fr": 273, # French
"de": 253, # German
"it": 213, # Italian
"pt": 203, # Portuguese
"pl": 224, # Polish
"tr": 226, # Turkish
"ru": 182, # Russian
"nl": 251, # Dutch
"cs": 186, # Czech
"ar": 166, # Arabic
"zh-cn": 82, # Chinese (Simplified)
"ja": 71, # Japanese
"hu": 224, # Hungarian
"ko": 95, # Korean
}
# Mapping of language codes to NLTK's supported language names
language_mapping = {
"en": "english",
"de": "german",
"fr": "french",
"es": "spanish",
"it": "italian",
"pt": "portuguese",
"nl": "dutch",
"pl": "polish",
"cs": "czech",
"ru": "russian",
"tr": "turkish",
"el": "greek",
"et": "estonian",
"no": "norwegian",
"ml": "malayalam",
"sl": "slovene",
"da": "danish",
"fi": "finnish",
"sv": "swedish"
"ar": {"name": "Arabic", "native_name": "العربية", "char_limit": 166, "model": "ar_core_news_sm"},
"cs": {"name": "Czech", "native_name": "Čeština", "char_limit": 186, "model": "cs_core_news_sm"},
"da": {"name": "Danish", "native_name": "Dansk", "char_limit": 220, "model": "da_core_news_sm"},
"de": {"name": "German", "native_name": "Deutsch", "char_limit": 253, "model": "de_core_news_sm"},
"el": {"name": "Greek", "native_name": "Ελληνικά", "char_limit": 200, "model": "el_core_news_sm"},
"en": {"name": "English", "native_name": "English", "char_limit": 250, "model": "en_core_web_sm"},
"es": {"name": "Spanish", "native_name": "Español", "char_limit": 239, "model": "es_core_news_md"},
"fa": {"name": "Persian", "native_name": "فارسی", "char_limit": 150, "model": "???"},
"fi": {"name": "Finnish", "native_name": "Suomi", "char_limit": 230, "model": "fi_core_news_sm"},
"fr": {"name": "French", "native_name": "Français", "char_limit": 273, "model": "fr_core_news_sm"},
"hi": {"name": "Hindi", "native_name": "हिंदी", "char_limit": 220, "model": "???"},
"hr": {"name": "Croatian", "native_name": "Hrvatski", "char_limit": 210, "model": "hr_core_news_sm"},
"it": {"name": "Italian", "native_name": "Italiano", "char_limit": 213, "model": "it_core_news_sm"},
"ja": {"name": "Japanese", "native_name": "日本語", "char_limit": 71, "model": "ja_core_news_sm"},
"ko": {"name": "Korean", "native_name": "한국어", "char_limit": 95, "model": "ko_core_news_sm"},
"nb": {"name": "Norwegian", "native_name": "Norsk Bokmål", "char_limit": 225, "model": "nb_core_news_sm"},
"nl": {"name": "Dutch", "native_name": "Nederlands", "char_limit": 251, "model": "nl_core_news_sm"},
"pl": {"name": "Polish", "native_name": "Polski", "char_limit": 224, "model": "pl_core_news_sm"},
"pt": {"name": "Portuguese", "native_name": "Português", "char_limit": 203, "model": "pt_core_news_sm"},
"ro": {"name": "Romanian", "native_name": "Română", "char_limit": 190, "model": "ro_core_news_sm"},
"ru": {"name": "Russian", "native_name": "Русский", "char_limit": 182, "model": "ru_core_news_sm"},
"sl": {"name": "Slovenian", "native_name": "Slovenščina", "char_limit": 210, "model": "sl_core_news_sm"},
"sv": {"name": "Swedish", "native_name": "Svenska", "char_limit": 215, "model": "sv_core_news_sm"},
"tr": {"name": "Turkish", "native_name": "Türkçe", "char_limit": 200, "model": "???"},
"vi": {"name": "Vietnamese", "native_name": "Tiếng Việt", "char_limit": 180, "model": "???"},
"yo": {"name": "Yoruba", "native_name": "Yorùbá", "char_limit": 180, "model": "???"},
"zh": {"name": "Chinese", "native_name": "中文", "char_limit": 82, "model": "zh_core_web_sm"}
}
default_language_code = "en"

1
models/XTTS-v2 Submodule

Submodule models/XTTS-v2 added at 6c2b0d75ea

View File

@@ -13,14 +13,20 @@ authors = [
]
dependencies = [
"beautifulsoup4",
"camel-tools",
"coqui-tts",
"ebooklib",
"cutlet",
"docker",
"ebooklib",
"gensim",
"gradio>=4.44.0",
"jieba",
"mecab",
"mecab-python3",
"nltk>=3.8.2",
"pydub",
"pypinyin",
"sentencepiece",
"translate",
"tqdm",
"unidic",

View File

@@ -1,12 +1,18 @@
beautifulsoup4
camel-tools
coqui-tts
ebooklib
cutlet
docker
ebooklib
gensim
gradio>=4.44.0
jieba
mecab
mecab-python3
nltk>=3.8.2
pydub
pypinyin
sentencepiece
translate
tqdm
unidic

0
models/.gitkeep → voices/adult/female/zh/.gitkeep Executable file → Normal file
View File