mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-09 13:58:14 -05:00
Merge pull request #43 from ROBERT-MCDOWELL/main
pull attempt into v2.0
This commit is contained in:
48
app.py
48
app.py
@@ -6,9 +6,10 @@ import subprocess
|
||||
import sys
|
||||
|
||||
from lib.conf import *
|
||||
from lib.lang import language_options, default_language_code
|
||||
from lib.lang import language_mapping, default_language_code
|
||||
|
||||
script_mode = NATIVE
|
||||
share = False
|
||||
|
||||
def check_python_version():
|
||||
current_version = sys.version_info[:2] # (major, minor)
|
||||
@@ -99,10 +100,10 @@ def is_port_in_use(port):
|
||||
return s.connect_ex(('0.0.0.0', port)) == 0
|
||||
|
||||
def main():
|
||||
global script_mode, ebooks_dir
|
||||
global script_mode, share, ebooks_dir
|
||||
|
||||
# Convert the list of languages to a string to display in the help text
|
||||
language_options_str = ", ".join(language_options)
|
||||
lang_list_str = ", ".join(list(language_mapping.keys()))
|
||||
|
||||
# Argument parser to handle optional parameters with descriptions
|
||||
parser = argparse.ArgumentParser(
|
||||
@@ -111,12 +112,12 @@ def main():
|
||||
Example usage:
|
||||
Windows:
|
||||
headless:
|
||||
ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --use_custom_model --custom_model 'model.zip' --custom_config config.json --custom_vocab vocab.json
|
||||
ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --custom_model 'model.zip'
|
||||
Graphic Interface:
|
||||
ebook2audiobook.cmd
|
||||
Linux/Mac:
|
||||
headless:
|
||||
./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --use_custom_model --custom_model 'model.zip' --custom_config config.json --custom_vocab vocab.json
|
||||
./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --custom_model 'model.zip'
|
||||
Graphic Interface:
|
||||
./ebook2audiobook.sh
|
||||
""",
|
||||
@@ -124,8 +125,8 @@ Linux/Mac:
|
||||
)
|
||||
options = [
|
||||
"--script_mode", "--share", "--headless", "--ebook", "--ebooks_dir",
|
||||
"--voice", "--language", "--device", "--use_custom_model", "--custom_model",
|
||||
"--custom_config", "--custom_vocab", "--custom_model_url", "--temperature",
|
||||
"--voice", "--language", "--device", "--custom_model",
|
||||
"--custom_model_url", "--temperature",
|
||||
"--length_penalty", "--repetition_penalty", "--top_k", "--top_p", "--speed",
|
||||
"--enable_text_splitting", "--version"
|
||||
]
|
||||
@@ -142,37 +143,31 @@ Linux/Mac:
|
||||
parser.add_argument(options[5], type=str,
|
||||
help="Path to the target voice file for TTS. Optional, uses a default voice if not provided.")
|
||||
parser.add_argument(options[6], type=str, default="en",
|
||||
help=f"Language for the audiobook conversion. Options: {language_options_str}. Defaults to English (en).")
|
||||
help=f"Language for the audiobook conversion. Options: {lang_list_str}. Defaults to English (en).")
|
||||
parser.add_argument(options[7], type=str, default="cpu", choices=["cpu", "gpu"],
|
||||
help=f"Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.")
|
||||
parser.add_argument(options[8], action="store_true",
|
||||
help="Use a custom TTS model. Defaults to False. Must be True to use custom models.")
|
||||
parser.add_argument(options[9], type=str,
|
||||
parser.add_argument(options[8], type=str,
|
||||
help="Path to the custom model file (.pth). Required if using a custom model.")
|
||||
parser.add_argument(options[10], type=str,
|
||||
help="Path to the custom config file (config.json). Required if using a custom model.")
|
||||
parser.add_argument(options[11], type=str,
|
||||
help="Path to the custom vocab file (vocab.json). Required if using a custom model.")
|
||||
parser.add_argument(options[12], type=str,
|
||||
parser.add_argument(options[9], type=str,
|
||||
help=("URL to download the custom model as a zip file. Optional, but will be used if provided. "
|
||||
"Examples include David Attenborough's model: "
|
||||
"'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
|
||||
"More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
|
||||
parser.add_argument(options[13], type=float, default=0.65,
|
||||
parser.add_argument(options[10], type=float, default=0.65,
|
||||
help="Temperature for the model. Defaults to 0.65. Higher temperatures lead to more creative outputs.")
|
||||
parser.add_argument(options[14], type=float, default=1.0,
|
||||
parser.add_argument(options[11], type=float, default=1.0,
|
||||
help="A length penalty applied to the autoregressive decoder. Defaults to 1.0. Not applied to custom models.")
|
||||
parser.add_argument(options[15], type=float, default=2.0,
|
||||
parser.add_argument(options[12], type=float, default=2.0,
|
||||
help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
|
||||
parser.add_argument(options[16], type=int, default=50,
|
||||
parser.add_argument(options[13], type=int, default=50,
|
||||
help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
|
||||
parser.add_argument(options[17], type=float, default=0.8,
|
||||
parser.add_argument(options[14], type=float, default=0.8,
|
||||
help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
|
||||
parser.add_argument(options[18], type=float, default=1.0,
|
||||
parser.add_argument(options[15], type=float, default=1.0,
|
||||
help="Speed factor for the speech generation. Defaults to 1.0.")
|
||||
parser.add_argument(options[19], action="store_true",
|
||||
parser.add_argument(options[16], action="store_true",
|
||||
help="Enable splitting text into sentences. Defaults to False.")
|
||||
parser.add_argument(options[20], action="version",version=f"ebook2audiobook version {version}",
|
||||
parser.add_argument(options[17], action="version",version=f"ebook2audiobook version {version}",
|
||||
help="Show the version of the script and exit")
|
||||
|
||||
for arg in sys.argv:
|
||||
@@ -188,6 +183,7 @@ Linux/Mac:
|
||||
sys.exit(1)
|
||||
|
||||
script_mode = args.script_mode if args.script_mode else script_mode
|
||||
share = args.share if args.share else share
|
||||
|
||||
if script_mode == NATIVE:
|
||||
check_pkg = check_and_install_requirements(requirements_file)
|
||||
@@ -236,11 +232,13 @@ Linux/Mac:
|
||||
else:
|
||||
print(f"Error: The directory {ebooks_dir} does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
elif args.ebook:
|
||||
progress_status, audiobook_file = convert_ebook(args)
|
||||
if audiobook_file is None:
|
||||
print(f"Conversion failed: {progress_status}")
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
print("Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.")
|
||||
sys.exit(1)
|
||||
@@ -249,7 +247,7 @@ Linux/Mac:
|
||||
allowed_arguments = {'--share', '--script_mode'}
|
||||
passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}
|
||||
if passed_args_set.issubset(allowed_arguments):
|
||||
web_interface(args.script_mode, args.share)
|
||||
web_interface(script_mode, share)
|
||||
else:
|
||||
print("Error: In non-headless mode, no option or only '--share' can be passed")
|
||||
sys.exit(1)
|
||||
|
||||
@@ -37,7 +37,7 @@ if not exist "%CALIBRE_TEMP_DIR%" (
|
||||
|
||||
icacls "%CALIBRE_TEMP_DIR%" /grant Users:(OI)(CI)F /T
|
||||
|
||||
for %%A in (%*) do (
|
||||
for %%A in (%ARGS%) do (
|
||||
if "%%A"=="%DOCKER_UTILS%" (
|
||||
set "SCRIPT_MODE=%DOCKER_UTILS%"
|
||||
break
|
||||
@@ -228,7 +228,7 @@ if not "%DOCKER_BUILD_STATUS%"=="0" (
|
||||
net session >nul 2>&1
|
||||
if %errorlevel% equ 0 (
|
||||
echo Restarting in user mode...
|
||||
start "" /b cmd /c "%~f0" %*
|
||||
start "" /b cmd /c "%~f0" %ARGS%
|
||||
exit /b
|
||||
)
|
||||
goto dispatch
|
||||
@@ -269,7 +269,7 @@ if "%SCRIPT_MODE%"=="%FULL_DOCKER%" (
|
||||
call conda create --prefix %SCRIPT_DIR%\%PYTHON_ENV% python=%PYTHON_VERSION% -y
|
||||
call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
|
||||
call python -m pip install --upgrade pip
|
||||
call python -m pip install beautifulsoup4 coqui-tts ebooklib docker "gradio>=4.44.0" mecab mecab-python3 "nltk>=3.8.2" pydub translate tqdm unidic
|
||||
call python -m pip install --upgrade -r requirements.txt
|
||||
call python -m unidic download
|
||||
call python -m spacy download en_core_web_sm
|
||||
call python -m nltk.downloader punkt_tab
|
||||
|
||||
@@ -212,7 +212,7 @@ function conda_check {
|
||||
source $CONDA_ENV
|
||||
conda activate $SCRIPT_DIR/$PYTHON_ENV
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install beautifulsoup4 coqui-tts ebooklib docker "gradio>=4.44.0" mecab mecab-python3 "nltk>=3.8.2" pydub translate tqdm unidic
|
||||
python -m pip install --upgrade -r requirements.txt
|
||||
python -m unidic download
|
||||
python -m spacy download en_core_web_sm
|
||||
python -m nltk.downloader punkt_tab
|
||||
|
||||
Binary file not shown.
Binary file not shown.
BIN
ebooks/test3.pdf
BIN
ebooks/test3.pdf
Binary file not shown.
@@ -1 +0,0 @@
|
||||
this is the test four from the result of text file to audiobook conversion.
|
||||
BIN
ebooks/test_ar.azw3
Normal file
BIN
ebooks/test_ar.azw3
Normal file
Binary file not shown.
1
ebooks/test_ar.txt
Normal file
1
ebooks/test_ar.txt
Normal file
@@ -0,0 +1 @@
|
||||
هذا هو الاختبار من نتيجة تحويل ملف نصي إلى كتاب صوتي.
|
||||
BIN
ebooks/test_cs.azw3
Normal file
BIN
ebooks/test_cs.azw3
Normal file
Binary file not shown.
1
ebooks/test_cs.txt
Normal file
1
ebooks/test_cs.txt
Normal file
@@ -0,0 +1 @@
|
||||
Toto je test od výsledku převodu textového souboru na audioknihu.
|
||||
BIN
ebooks/test_da.azw3
Normal file
BIN
ebooks/test_da.azw3
Normal file
Binary file not shown.
1
ebooks/test_da.txt
Normal file
1
ebooks/test_da.txt
Normal file
@@ -0,0 +1 @@
|
||||
Dette er testen fra resultatet af konvertering af tekstfil til lydbog.
|
||||
BIN
ebooks/test_de.azw3
Normal file
BIN
ebooks/test_de.azw3
Normal file
Binary file not shown.
1
ebooks/test_de.txt
Normal file
1
ebooks/test_de.txt
Normal file
@@ -0,0 +1 @@
|
||||
Dies ist der Test des Ergebnisses der Konvertierung einer Textdatei in ein Hörbuch.
|
||||
BIN
ebooks/test_el.azw3
Normal file
BIN
ebooks/test_el.azw3
Normal file
Binary file not shown.
1
ebooks/test_el.txt
Normal file
1
ebooks/test_el.txt
Normal file
@@ -0,0 +1 @@
|
||||
Αυτή είναι η δοκιμή από το αποτέλεσμα της μετατροπής αρχείου κειμένου σε ηχητικό βιβλίο.
|
||||
BIN
ebooks/test_en.azw3
Normal file
BIN
ebooks/test_en.azw3
Normal file
Binary file not shown.
1
ebooks/test_en.txt
Normal file
1
ebooks/test_en.txt
Normal file
@@ -0,0 +1 @@
|
||||
This is the test from the result of text file to audiobook conversion.
|
||||
BIN
ebooks/test_es.azw3
Normal file
BIN
ebooks/test_es.azw3
Normal file
Binary file not shown.
1
ebooks/test_es.txt
Normal file
1
ebooks/test_es.txt
Normal file
@@ -0,0 +1 @@
|
||||
Esta es la prueba del resultado de la conversión de archivo de texto a audiolibro.
|
||||
BIN
ebooks/test_fi.azw3
Normal file
BIN
ebooks/test_fi.azw3
Normal file
Binary file not shown.
1
ebooks/test_fi.txt
Normal file
1
ebooks/test_fi.txt
Normal file
@@ -0,0 +1 @@
|
||||
Tämä on testi tekstitiedoston muuntamisen tuloksesta äänikirjaksi.
|
||||
BIN
ebooks/test_fr.azw3
Normal file
BIN
ebooks/test_fr.azw3
Normal file
Binary file not shown.
1
ebooks/test_fr.txt
Normal file
1
ebooks/test_fr.txt
Normal file
@@ -0,0 +1 @@
|
||||
Ceci est le test provenant d'un fichier text en livre audio.
|
||||
BIN
ebooks/test_hr.azw3
Normal file
BIN
ebooks/test_hr.azw3
Normal file
Binary file not shown.
1
ebooks/test_hr.txt
Normal file
1
ebooks/test_hr.txt
Normal file
@@ -0,0 +1 @@
|
||||
Ovo je test rezultata pretvorbe tekstualne datoteke u audioknjigu.
|
||||
BIN
ebooks/test_it.azw3
Normal file
BIN
ebooks/test_it.azw3
Normal file
Binary file not shown.
1
ebooks/test_it.txt
Normal file
1
ebooks/test_it.txt
Normal file
@@ -0,0 +1 @@
|
||||
Questo è il test del risultato della conversione del file di testo in audiolibro.
|
||||
BIN
ebooks/test_ja.azw3
Normal file
BIN
ebooks/test_ja.azw3
Normal file
Binary file not shown.
1
ebooks/test_ja.txt
Normal file
1
ebooks/test_ja.txt
Normal file
@@ -0,0 +1 @@
|
||||
これは、テキスト ファイルからオーディオブックへの変換結果のテストです。
|
||||
BIN
ebooks/test_ko.azw3
Normal file
BIN
ebooks/test_ko.azw3
Normal file
Binary file not shown.
1
ebooks/test_ko.txt
Normal file
1
ebooks/test_ko.txt
Normal file
@@ -0,0 +1 @@
|
||||
이는 텍스트 파일을 오디오북으로 변환한 결과에 대한 테스트입니다.
|
||||
BIN
ebooks/test_nb.azw3
Normal file
BIN
ebooks/test_nb.azw3
Normal file
Binary file not shown.
1
ebooks/test_nb.txt
Normal file
1
ebooks/test_nb.txt
Normal file
@@ -0,0 +1 @@
|
||||
Dette er testen fra resultatet av konvertering av tekstfil til lydbok.
|
||||
BIN
ebooks/test_nl.azw3
Normal file
BIN
ebooks/test_nl.azw3
Normal file
Binary file not shown.
1
ebooks/test_nl.txt
Normal file
1
ebooks/test_nl.txt
Normal file
@@ -0,0 +1 @@
|
||||
Dit is de test op basis van het resultaat van de conversie van een tekstbestand naar een audioboek.
|
||||
BIN
ebooks/test_pl.azw3
Normal file
BIN
ebooks/test_pl.azw3
Normal file
Binary file not shown.
1
ebooks/test_pl.txt
Normal file
1
ebooks/test_pl.txt
Normal file
@@ -0,0 +1 @@
|
||||
To jest test wyniku konwersji pliku tekstowego na audiobook.
|
||||
BIN
ebooks/test_pt.azw3
Normal file
BIN
ebooks/test_pt.azw3
Normal file
Binary file not shown.
1
ebooks/test_pt.txt
Normal file
1
ebooks/test_pt.txt
Normal file
@@ -0,0 +1 @@
|
||||
Este é o teste do resultado da conversão de ficheiro de texto em audiolivro.
|
||||
BIN
ebooks/test_ro.azw3
Normal file
BIN
ebooks/test_ro.azw3
Normal file
Binary file not shown.
1
ebooks/test_ro.txt
Normal file
1
ebooks/test_ro.txt
Normal file
@@ -0,0 +1 @@
|
||||
Acesta este testul de la rezultatul conversiei fișierului text la cartea audio.
|
||||
BIN
ebooks/test_ru.azw3
Normal file
BIN
ebooks/test_ru.azw3
Normal file
Binary file not shown.
1
ebooks/test_ru.txt
Normal file
1
ebooks/test_ru.txt
Normal file
@@ -0,0 +1 @@
|
||||
Это тест результата конвертации текстового файла в аудиокнигу.
|
||||
BIN
ebooks/test_sl.azw3
Normal file
BIN
ebooks/test_sl.azw3
Normal file
Binary file not shown.
1
ebooks/test_sl.txt
Normal file
1
ebooks/test_sl.txt
Normal file
@@ -0,0 +1 @@
|
||||
To je preizkus rezultata pretvorbe besedilne datoteke v zvočno knjigo.
|
||||
BIN
ebooks/test_sv.azw3
Normal file
BIN
ebooks/test_sv.azw3
Normal file
Binary file not shown.
1
ebooks/test_sv.txt
Normal file
1
ebooks/test_sv.txt
Normal file
@@ -0,0 +1 @@
|
||||
Detta är testet från resultatet av konvertering av textfil till ljudbok.
|
||||
BIN
ebooks/test_zh.azw3
Normal file
BIN
ebooks/test_zh.azw3
Normal file
Binary file not shown.
1
ebooks/test_zh.txt
Normal file
1
ebooks/test_zh.txt
Normal file
@@ -0,0 +1 @@
|
||||
这是从文本文件到有声读物的转换结果进行的测试。
|
||||
198
lib/functions.py
198
lib/functions.py
@@ -206,21 +206,18 @@ def download_and_extract(path_or_url, extract_to=models_dir):
|
||||
except Exception as e:
|
||||
raise DependencyError(e)
|
||||
|
||||
def load_spacy_model(language):
|
||||
model_name = f"{language}_core_web_sm"
|
||||
try:
|
||||
nltk.data.find('tokenizers/punkt_tab')
|
||||
except LookupError:
|
||||
print("Downloading NLTK punkt tokenizer...")
|
||||
nltk.download('punkt_tab')
|
||||
if not is_package(model_name):
|
||||
try:
|
||||
print(f"Downloading model: {model_name}")
|
||||
download_package(model_name) # Download the model if not installed
|
||||
except Exception as e:
|
||||
print(f"Error downloading model {model_name}: {e}")
|
||||
return None
|
||||
return spacy.load(model_name)
|
||||
#def load_spacy_model(language):
|
||||
# lang_pack = language_mapping[language]["model"]
|
||||
# try:
|
||||
# nlp = spacy.load(lang_pack)
|
||||
# except OSError:
|
||||
# print("Spacy model not found. Tyring to download it...")
|
||||
# try:
|
||||
# subprocess.run(["python", "-m", "spacy", "download", lang_pack])
|
||||
# nlp = spacy.load(lang_pack)
|
||||
# except OSError:
|
||||
# raise ValueError(f"Spacy model does not exist for {language_mapping[language]['name']}...")
|
||||
# return nlp
|
||||
|
||||
def translate_pronouns(language):
|
||||
global ebook_pronouns
|
||||
@@ -385,7 +382,7 @@ def concat_audio_chapters(metadatas, cover_file):
|
||||
|
||||
languages = metadatas.get('Languages', None)
|
||||
if languages:
|
||||
ffmpeg_metadata += f"language={languages}\n\n" # Language
|
||||
ffmpeg_metadata += f"language={languages}\n\n"
|
||||
|
||||
else:
|
||||
print("Warning: metadatas is None. Skipping metadata generation.")
|
||||
@@ -676,10 +673,11 @@ def split_long_sentence(sentence, language='en', max_pauses=10):
|
||||
:return: A list of sentence parts that meet the criteria.
|
||||
"""
|
||||
# Get the Max character length for the selected language -2 : with a default of 248 if no language is found
|
||||
max_length = (char_limits.get(language, 250)-2)
|
||||
char_limits = language_mapping[language]["char_limit"]
|
||||
max_length = (char_limits - 2)
|
||||
|
||||
# Adjust the pause punctuation symbols based on language
|
||||
if language == 'zh-cn':
|
||||
if language == 'zh':
|
||||
punctuation = [',', '。', ';', '?', '!'] # Chinese-specific pause punctuation including sentence-ending marks
|
||||
elif language == 'ja':
|
||||
punctuation = ['、', '。', ';', '?', '!'] # Japanese-specific pause punctuation
|
||||
@@ -724,7 +722,7 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
|
||||
target_voice_file = default_target_voice_file
|
||||
|
||||
# Handle custom model or use standard TTS model
|
||||
print("Loading model...")
|
||||
print("Loading TTS ...")
|
||||
if custom_model:
|
||||
config_path = custom_model['config']
|
||||
model_path = custom_model['model']
|
||||
@@ -732,22 +730,22 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
|
||||
config = XttsConfig()
|
||||
config.models_dir = models_dir
|
||||
config.load_json(config_path)
|
||||
model = Xtts.init_from_config(config)
|
||||
model.load_checkpoint(config, checkpoint_dir=model_path, vocab_path=vocab_path)
|
||||
tts = Xtts.init_from_config(config)
|
||||
tts.load_checkpoint(config, checkpoint_dir=model_path, vocab_path=vocab_path)
|
||||
else:
|
||||
#selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
|
||||
#model = TTS(selected_tts_model, progress_bar=False).to(device)
|
||||
base_dir = os.path.join(models_dir,"tts_models--multilingual--multi-dataset--xtts_v2")
|
||||
#tts = TTS(selected_tts_model, progress_bar=False).to(device)
|
||||
base_dir = os.path.join(models_dir,"XTTS-v2")
|
||||
config_path = os.path.join(base_dir,"config.json")
|
||||
config = XttsConfig()
|
||||
config.models_dir = models_dir
|
||||
config.load_json(config_path)
|
||||
model = Xtts.init_from_config(config)
|
||||
model.load_checkpoint(config, checkpoint_dir=base_dir)
|
||||
tts = Xtts.init_from_config(config)
|
||||
tts.load_checkpoint(config, checkpoint_dir=base_dir)
|
||||
|
||||
model.to(device)
|
||||
tts.to(device)
|
||||
print("Computing speaker latents...")
|
||||
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[target_voice_file])
|
||||
gpt_cond_latent, speaker_embedding = tts.get_conditioning_latents(audio_path=[target_voice_file])
|
||||
|
||||
chapters_dir_audio_fragments = os.path.join(ebook_chapters_audio_dir, "fragments")
|
||||
os.makedirs(chapters_dir_audio_fragments, exist_ok=True)
|
||||
@@ -755,35 +753,37 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
|
||||
# Calculate the total number of chapters and segments (fragments) to set progress bar correctly
|
||||
total_segments = 0
|
||||
total_chapters = len([f for f in os.listdir(ebook_chapters_dir) if f.endswith('.txt')])
|
||||
|
||||
# Check if the language is nltk compatible
|
||||
nltk_language = language_mapping[language]["name"].lower() if language_mapping[language] else None
|
||||
|
||||
# Pre-calculate total segments (sentences + fragments per chapter)
|
||||
for chapter_file in sorted(os.listdir(ebook_chapters_dir)):
|
||||
if cancellation_requested.is_set():
|
||||
stop_and_detach_tts(model)
|
||||
stop_and_detach_tts(tts)
|
||||
msg = "Cancel requested"
|
||||
raise ValueError(msg)
|
||||
if chapter_file.endswith('.txt'):
|
||||
with open(os.path.join(ebook_chapters_dir, chapter_file), 'r', encoding='utf-8') as file:
|
||||
chapter_text = file.read()
|
||||
nltk_language = language_mapping.get(language)
|
||||
if nltk_language:
|
||||
sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
|
||||
else:
|
||||
sentences = [chapter_text]
|
||||
|
||||
#if nltk_language is not None:
|
||||
#sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
|
||||
#sentences = nltk.tokenize.word_tokenize(chapter_text, language=nltk_language, preserve_line=False)
|
||||
#else:
|
||||
sentences = [chapter_text]
|
||||
|
||||
# Calculate total fragments for this chapter
|
||||
for sentence in sentences:
|
||||
fragments = split_long_sentence(sentence, language=language)
|
||||
total_segments += len(fragments)
|
||||
|
||||
# Initialize progress tracking
|
||||
current_progress = 0
|
||||
total_progress = total_segments + total_chapters # Total is chapters + segments/fragments
|
||||
|
||||
with tqdm(total=total_progress, desc="Processing 0.00%", bar_format='{desc}: {n_fmt}/{total_fmt} ', unit="step") as t:
|
||||
for chapter_file in sorted(os.listdir(ebook_chapters_dir)):
|
||||
if cancellation_requested.is_set():
|
||||
stop_and_detach_tts(model)
|
||||
stop_and_detach_tts(tts)
|
||||
msg = "Cancel requested"
|
||||
raise ValueError(msg)
|
||||
if chapter_file.endswith('.txt'):
|
||||
@@ -800,30 +800,29 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
|
||||
|
||||
with open(chapter_file_path, 'r', encoding='utf-8') as file:
|
||||
chapter_text = file.read()
|
||||
nltk_language = language_mapping.get(language)
|
||||
|
||||
if nltk_language:
|
||||
sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
|
||||
else:
|
||||
sentences = [chapter_text]
|
||||
#if nltk_language is not None:
|
||||
# sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
|
||||
#sentences = nltk.tokenize.word_tokenize(chapter_text, language=nltk_language, preserve_line=False)
|
||||
#else:
|
||||
sentences = [chapter_text]
|
||||
|
||||
for sentence in sentences:
|
||||
if cancellation_requested.is_set():
|
||||
stop_and_detach_tts(model)
|
||||
stop_and_detach_tts(tts)
|
||||
msg = "Cancel requested"
|
||||
raise ValueError(msg)
|
||||
fragments = split_long_sentence(sentence, language=language)
|
||||
for fragment in fragments:
|
||||
if cancellation_requested.is_set():
|
||||
stop_and_detach_tts(model)
|
||||
stop_and_detach_tts(tts)
|
||||
msg = "Cancel requested"
|
||||
raise ValueError(msg)
|
||||
if fragment != "":
|
||||
print(f"Generating fragment: {fragment}...")
|
||||
fragment_file_path = os.path.join(chapters_dir_audio_fragments, f"{count_fragments}.wav")
|
||||
|
||||
#if custom_model:
|
||||
out = model.inference(
|
||||
#if custom_tts:
|
||||
out = tts.inference(
|
||||
fragment, language, gpt_cond_latent, speaker_embedding,
|
||||
temperature=temperature, repetition_penalty=repetition_penalty,
|
||||
top_k=top_k, top_p=top_p, speed=speed, enable_text_splitting=enable_text_splitting
|
||||
@@ -876,11 +875,11 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
|
||||
except Exception as e:
|
||||
raise DependencyError(e)
|
||||
|
||||
def stop_and_detach_tts(model):
|
||||
# Move the model to CPU if on GPU
|
||||
if next(model.parameters()).is_cuda:
|
||||
model.to('cpu')
|
||||
del model
|
||||
def stop_and_detach_tts(tts):
|
||||
# Move the tts to CPU if on GPU
|
||||
if next(tts.parameters()).is_cuda:
|
||||
tts.to('cpu')
|
||||
del tts
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -913,10 +912,6 @@ def convert_ebook(args):
|
||||
device = args.device.lower()
|
||||
target_voice_file = args.voice
|
||||
language = args.language
|
||||
use_custom_model = args.use_custom_model
|
||||
custom_model_file = args.custom_model
|
||||
custom_config_file = args.custom_config
|
||||
custom_vocab_file = args.custom_vocab
|
||||
temperature = args.temperature
|
||||
length_penalty = args.length_penalty
|
||||
repetition_penalty = args.repetition_penalty
|
||||
@@ -924,6 +919,7 @@ def convert_ebook(args):
|
||||
top_p = args.top_p
|
||||
speed = args.speed
|
||||
enable_text_splitting = args.enable_text_splitting
|
||||
custom_model_file = args.custom_model
|
||||
custom_model_url = args.custom_model_url
|
||||
|
||||
if not os.path.splitext(ebook_src)[1]:
|
||||
@@ -951,8 +947,7 @@ def convert_ebook(args):
|
||||
ebook_pronouns = translate_pronouns(language)
|
||||
|
||||
# Load spaCy model for language analysis (you can switch models based on language)
|
||||
nlp = load_spacy_model(language)
|
||||
|
||||
#if load_spacy_model(language):
|
||||
# Prepare tmp dir and properties
|
||||
if prepare_dirs(args.ebook) :
|
||||
|
||||
@@ -961,7 +956,7 @@ def convert_ebook(args):
|
||||
|
||||
# Handle custom model if the user chose to use one
|
||||
custom_model = None
|
||||
if use_custom_model and custom_model_file and custom_config_file and custom_vocab_file:
|
||||
if custom_model_file and custom_config_file and custom_vocab_file:
|
||||
custom_model = {
|
||||
'model': custom_model_file,
|
||||
'config': custom_config_file,
|
||||
@@ -969,7 +964,7 @@ def convert_ebook(args):
|
||||
}
|
||||
|
||||
# If a custom model URL is provided, download and use it
|
||||
if use_custom_model and custom_model_url:
|
||||
if custom_model_url:
|
||||
print(f"Received custom model URL: {custom_model_url}")
|
||||
model_dir = get_model_dir_from_url(custom_model_url)
|
||||
if download_and_extract(custom_model_url, model_dir):
|
||||
@@ -1007,6 +1002,8 @@ def convert_ebook(args):
|
||||
raise DependencyError("convert_chapters_to_audio() failed!")
|
||||
else:
|
||||
return None, None
|
||||
#else:
|
||||
# return None, None
|
||||
else:
|
||||
print(f"Temporary directory {tmp_dir} not removed due to failure.")
|
||||
return None, None
|
||||
@@ -1017,12 +1014,14 @@ def convert_ebook(args):
|
||||
|
||||
def web_interface(mode, share):
|
||||
global ebook_src, is_converting, interface, cancellation_requested, is_gui_process, script_mode, is_gui_shared, audiobooks_ddn
|
||||
|
||||
|
||||
script_mode = mode
|
||||
is_gui_process = True
|
||||
is_gui_shared = share
|
||||
audiobook_file = None
|
||||
|
||||
language_options = [details["native_name"] for details in language_mapping.values()]
|
||||
default_language_native = language_mapping[default_language_code]["native_name"]
|
||||
|
||||
theme = gr.themes.Origin(
|
||||
primary_hue="amber",
|
||||
secondary_hue="green",
|
||||
@@ -1042,8 +1041,11 @@ def web_interface(mode, share):
|
||||
gr.HTML(
|
||||
"""
|
||||
<style>
|
||||
input[type="checkbox"] {
|
||||
border-color: #fafafa !Important;
|
||||
}
|
||||
.svelte-1xyfx7i.center.boundedheight.flex{
|
||||
height: 110px !important;
|
||||
height: 120px !important;
|
||||
}
|
||||
.block.svelte-5y6bt2 {
|
||||
padding: 10px !important;
|
||||
@@ -1056,6 +1058,10 @@ def web_interface(mode, share):
|
||||
margin: 0 !important;
|
||||
font-size: 12px !important;
|
||||
}
|
||||
.block.svelte-5y6bt2.padded {
|
||||
height: auto !important;
|
||||
padding: 10px !important;
|
||||
}
|
||||
.block.svelte-5y6bt2.padded.hide-container {
|
||||
height: auto !important;
|
||||
padding: 0 !important;
|
||||
@@ -1074,25 +1080,25 @@ def web_interface(mode, share):
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
#component-8, #component-9, #component-34 {
|
||||
height: 119px !important;
|
||||
}
|
||||
</style>
|
||||
"""
|
||||
)
|
||||
with gr.Tabs(): # Create tabs for better UI organization
|
||||
with gr.Tabs():
|
||||
with gr.TabItem("Input Options"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=3):
|
||||
ebook_file = gr.File(label="eBook File")
|
||||
target_voice_file = gr.File(label="Target Voice File (Optional)")
|
||||
language = gr.Dropdown(label="Language", choices=language_options, value="en")
|
||||
device = gr.Radio(label="Processor Unit", choices=["CPU", "GPU"], value="CPU")
|
||||
|
||||
language = gr.Dropdown(label="Language", choices=language_options, value=default_language_native)
|
||||
with gr.Column(scale=3):
|
||||
use_custom_model = gr.Checkbox(label="Use Custom Model")
|
||||
custom_model_file = gr.File(label="Custom Model File (Optional)", visible=False)
|
||||
custom_config_file = gr.File(label="Custom Config File (Optional)", visible=False)
|
||||
custom_vocab_file = gr.File(label="Custom Vocab File (Optional)", visible=False)
|
||||
custom_model_url = gr.Textbox(label="Custom Model Zip URL (Optional)", visible=False)
|
||||
|
||||
with gr.Group():
|
||||
target_voice_file = gr.File(label="Cloning Voice* (a .wav or .mp3 no more than 12sec)")
|
||||
custom_model_file = gr.File(label="Model* (a .zip containing config.json, vocab.json, model.pth)")
|
||||
custom_model_url = gr.Textbox(placeholder="https://www.example.com/model.zip", label="Model from URL*")
|
||||
gr.Markdown('<p>* Optional</p>')
|
||||
with gr.TabItem("Audio Generation Preferences"):
|
||||
gr.Markdown(
|
||||
"""
|
||||
@@ -1154,9 +1160,8 @@ def web_interface(mode, share):
|
||||
info="Splits long texts into sentences to generate audio in chunks. Useful for very long inputs."
|
||||
)
|
||||
|
||||
session_status = gr.Textbox(label="Session")
|
||||
session = gr.Textbox(label="Session", visible=False)
|
||||
|
||||
session_status = gr.Textbox(label="Session")
|
||||
session = gr.Textbox(label="Session", visible=False)
|
||||
conversion_progress = gr.Textbox(label="Progress")
|
||||
convert_btn = gr.Button("Convert", variant="primary", interactive=False)
|
||||
audio_player = gr.Audio(label="Listen", type="filepath", visible=False)
|
||||
@@ -1231,11 +1236,20 @@ def web_interface(mode, share):
|
||||
return gr.Button("Convert", variant="primary", interactive=False), None, audiobook_file, update_audiobooks_ddn()
|
||||
|
||||
def refresh_audiobook_list():
|
||||
if not os.path.isdir(audiobooks_dir):
|
||||
os.makedirs(audiobooks_dir, exist_ok=True)
|
||||
files = [f for f in os.listdir(audiobooks_dir)]
|
||||
files.sort(key=lambda x: os.path.getmtime(os.path.join(audiobooks_dir, x)), reverse=True)
|
||||
files = []
|
||||
if audiobooks_dir is not None:
|
||||
if not os.path.isdir(audiobooks_dir):
|
||||
os.makedirs(audiobooks_dir, exist_ok=True)
|
||||
files = [f for f in os.listdir(audiobooks_dir)]
|
||||
files.sort(key=lambda x: os.path.getmtime(os.path.join(audiobooks_dir, x)), reverse=True)
|
||||
return files
|
||||
|
||||
def update_audiobook_link(audiobook):
|
||||
if audiobooks_dir is not None:
|
||||
if audiobook:
|
||||
link = os.path.join(audiobooks_dir, audiobook)
|
||||
return link, link, gr.update(visible=True)
|
||||
return None, None, gr.update(visible=False)
|
||||
|
||||
def disable_convert_btn():
|
||||
return gr.Button("Convert", variant="primary", interactive=False)
|
||||
@@ -1244,13 +1258,6 @@ def web_interface(mode, share):
|
||||
files = refresh_audiobook_list()
|
||||
return gr.Dropdown(choices=files, label="Audiobooks", value=files[0] if files else None)
|
||||
|
||||
def update_audiobook_link(audiobook):
|
||||
if audiobook:
|
||||
link = os.path.join(audiobooks_dir, audiobook)
|
||||
return link, link, gr.update(visible=True)
|
||||
else:
|
||||
return None, None, gr.update(visible=False)
|
||||
|
||||
def change_ebook_file(btn, f):
|
||||
global ebook_src, is_converting, cancellation_requested
|
||||
if f is None:
|
||||
@@ -1269,13 +1276,13 @@ def web_interface(mode, share):
|
||||
data["event"] = 'change_data'
|
||||
return data
|
||||
|
||||
def process_conversion(session, device, ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting):
|
||||
def process_conversion(session, device, ebook_file, target_voice_file, language, custom_model_file, custom_model_url, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting):
|
||||
global ebook_src, is_converting, audiobook_file
|
||||
|
||||
ebook_src = ebook_file.name if ebook_file else None
|
||||
target_voice_file = target_voice_file.name if target_voice_file else None
|
||||
custom_model_file = custom_model_file.name if custom_model_file else None
|
||||
custom_config_file = custom_config_file.name if custom_config_file else None
|
||||
custom_vocab_file = custom_vocab_file.name if custom_vocab_file else None
|
||||
language = next((code for code, details in language_mapping.items() if details["native_name"] == language), None)
|
||||
|
||||
if not ebook_src:
|
||||
return "Error: eBook file is required."
|
||||
@@ -1288,10 +1295,7 @@ def web_interface(mode, share):
|
||||
ebook=ebook_src,
|
||||
voice=target_voice_file,
|
||||
language=language,
|
||||
use_custom_model=use_custom_model,
|
||||
custom_model=custom_model_file,
|
||||
custom_config=custom_config_file,
|
||||
custom_vocab=custom_vocab_file,
|
||||
custom_model_url=custom_model_url,
|
||||
temperature=float(temperature),
|
||||
length_penalty=float(length_penalty),
|
||||
@@ -1339,11 +1343,6 @@ def web_interface(mode, share):
|
||||
audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
|
||||
return [data, f"{warning_text}{warning_text_extra}", data["session_id"], update_audiobooks_ddn()]
|
||||
|
||||
use_custom_model.change(
|
||||
lambda x: [gr.update(visible=x)] * 4,
|
||||
inputs=[use_custom_model],
|
||||
outputs=[custom_model_file, custom_config_file, custom_vocab_file, custom_model_url]
|
||||
)
|
||||
ebook_file.change(
|
||||
fn=change_ebook_file,
|
||||
inputs=[convert_btn, ebook_file],
|
||||
@@ -1383,9 +1382,8 @@ def web_interface(mode, share):
|
||||
fn=process_conversion,
|
||||
inputs=[
|
||||
session, device, ebook_file, target_voice_file, language,
|
||||
use_custom_model, custom_model_file, custom_config_file,
|
||||
custom_vocab_file, custom_model_url, temperature, length_penalty, repetition_penalty,
|
||||
top_k, top_p, speed, enable_text_splitting
|
||||
custom_model_file, custom_model_url, temperature, length_penalty,
|
||||
repetition_penalty, top_k, top_p, speed, enable_text_splitting
|
||||
],
|
||||
outputs=[conversion_progress, modal_html]
|
||||
).then(
|
||||
|
||||
69
lib/lang.py
69
lib/lang.py
@@ -1,48 +1,33 @@
|
||||
import os
|
||||
|
||||
language_options = [
|
||||
"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko"
|
||||
]
|
||||
char_limits = {
|
||||
"en": 250, # English
|
||||
"es": 239, # Spanish
|
||||
"fr": 273, # French
|
||||
"de": 253, # German
|
||||
"it": 213, # Italian
|
||||
"pt": 203, # Portuguese
|
||||
"pl": 224, # Polish
|
||||
"tr": 226, # Turkish
|
||||
"ru": 182, # Russian
|
||||
"nl": 251, # Dutch
|
||||
"cs": 186, # Czech
|
||||
"ar": 166, # Arabic
|
||||
"zh-cn": 82, # Chinese (Simplified)
|
||||
"ja": 71, # Japanese
|
||||
"hu": 224, # Hungarian
|
||||
"ko": 95, # Korean
|
||||
}
|
||||
|
||||
# Mapping of language codes to NLTK's supported language names
|
||||
language_mapping = {
|
||||
"en": "english",
|
||||
"de": "german",
|
||||
"fr": "french",
|
||||
"es": "spanish",
|
||||
"it": "italian",
|
||||
"pt": "portuguese",
|
||||
"nl": "dutch",
|
||||
"pl": "polish",
|
||||
"cs": "czech",
|
||||
"ru": "russian",
|
||||
"tr": "turkish",
|
||||
"el": "greek",
|
||||
"et": "estonian",
|
||||
"no": "norwegian",
|
||||
"ml": "malayalam",
|
||||
"sl": "slovene",
|
||||
"da": "danish",
|
||||
"fi": "finnish",
|
||||
"sv": "swedish"
|
||||
"ar": {"name": "Arabic", "native_name": "العربية", "char_limit": 166, "model": "ar_core_news_sm"},
|
||||
"cs": {"name": "Czech", "native_name": "Čeština", "char_limit": 186, "model": "cs_core_news_sm"},
|
||||
"da": {"name": "Danish", "native_name": "Dansk", "char_limit": 220, "model": "da_core_news_sm"},
|
||||
"de": {"name": "German", "native_name": "Deutsch", "char_limit": 253, "model": "de_core_news_sm"},
|
||||
"el": {"name": "Greek", "native_name": "Ελληνικά", "char_limit": 200, "model": "el_core_news_sm"},
|
||||
"en": {"name": "English", "native_name": "English", "char_limit": 250, "model": "en_core_web_sm"},
|
||||
"es": {"name": "Spanish", "native_name": "Español", "char_limit": 239, "model": "es_core_news_md"},
|
||||
"fa": {"name": "Persian", "native_name": "فارسی", "char_limit": 150, "model": "???"},
|
||||
"fi": {"name": "Finnish", "native_name": "Suomi", "char_limit": 230, "model": "fi_core_news_sm"},
|
||||
"fr": {"name": "French", "native_name": "Français", "char_limit": 273, "model": "fr_core_news_sm"},
|
||||
"hi": {"name": "Hindi", "native_name": "हिंदी", "char_limit": 220, "model": "???"},
|
||||
"hr": {"name": "Croatian", "native_name": "Hrvatski", "char_limit": 210, "model": "hr_core_news_sm"},
|
||||
"it": {"name": "Italian", "native_name": "Italiano", "char_limit": 213, "model": "it_core_news_sm"},
|
||||
"ja": {"name": "Japanese", "native_name": "日本語", "char_limit": 71, "model": "ja_core_news_sm"},
|
||||
"ko": {"name": "Korean", "native_name": "한국어", "char_limit": 95, "model": "ko_core_news_sm"},
|
||||
"nb": {"name": "Norwegian", "native_name": "Norsk Bokmål", "char_limit": 225, "model": "nb_core_news_sm"},
|
||||
"nl": {"name": "Dutch", "native_name": "Nederlands", "char_limit": 251, "model": "nl_core_news_sm"},
|
||||
"pl": {"name": "Polish", "native_name": "Polski", "char_limit": 224, "model": "pl_core_news_sm"},
|
||||
"pt": {"name": "Portuguese", "native_name": "Português", "char_limit": 203, "model": "pt_core_news_sm"},
|
||||
"ro": {"name": "Romanian", "native_name": "Română", "char_limit": 190, "model": "ro_core_news_sm"},
|
||||
"ru": {"name": "Russian", "native_name": "Русский", "char_limit": 182, "model": "ru_core_news_sm"},
|
||||
"sl": {"name": "Slovenian", "native_name": "Slovenščina", "char_limit": 210, "model": "sl_core_news_sm"},
|
||||
"sv": {"name": "Swedish", "native_name": "Svenska", "char_limit": 215, "model": "sv_core_news_sm"},
|
||||
"tr": {"name": "Turkish", "native_name": "Türkçe", "char_limit": 200, "model": "???"},
|
||||
"vi": {"name": "Vietnamese", "native_name": "Tiếng Việt", "char_limit": 180, "model": "???"},
|
||||
"yo": {"name": "Yoruba", "native_name": "Yorùbá", "char_limit": 180, "model": "???"},
|
||||
"zh": {"name": "Chinese", "native_name": "中文", "char_limit": 82, "model": "zh_core_web_sm"}
|
||||
}
|
||||
|
||||
default_language_code = "en"
|
||||
|
||||
1
models/XTTS-v2
Submodule
1
models/XTTS-v2
Submodule
Submodule models/XTTS-v2 added at 6c2b0d75ea
@@ -13,14 +13,20 @@ authors = [
|
||||
]
|
||||
dependencies = [
|
||||
"beautifulsoup4",
|
||||
"camel-tools",
|
||||
"coqui-tts",
|
||||
"ebooklib",
|
||||
"cutlet",
|
||||
"docker",
|
||||
"ebooklib",
|
||||
"gensim",
|
||||
"gradio>=4.44.0",
|
||||
"jieba",
|
||||
"mecab",
|
||||
"mecab-python3",
|
||||
"nltk>=3.8.2",
|
||||
"pydub",
|
||||
"pypinyin",
|
||||
"sentencepiece",
|
||||
"translate",
|
||||
"tqdm",
|
||||
"unidic",
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
beautifulsoup4
|
||||
camel-tools
|
||||
coqui-tts
|
||||
ebooklib
|
||||
cutlet
|
||||
docker
|
||||
ebooklib
|
||||
gensim
|
||||
gradio>=4.44.0
|
||||
jieba
|
||||
mecab
|
||||
mecab-python3
|
||||
nltk>=3.8.2
|
||||
pydub
|
||||
pypinyin
|
||||
sentencepiece
|
||||
translate
|
||||
tqdm
|
||||
unidic
|
||||
0
models/.gitkeep → voices/adult/female/zh/.gitkeep
Executable file → Normal file
0
models/.gitkeep → voices/adult/female/zh/.gitkeep
Executable file → Normal file
Reference in New Issue
Block a user