Merge pull request #43 from ROBERT-MCDOWELL/main

pull attempt into v2.0
2026-01-09 13:58:14 -05:00 · 2024-11-15 23:50:54 -05:00
parent d6297a5aaf c9621d4bce
commit ad357da076
65 changed files with 189 additions and 174 deletions
--- a/app.py
+++ b/app.py
@@ -6,9 +6,10 @@ import subprocess
 import sys

 from lib.conf import *
-from lib.lang import language_options, default_language_code
+from lib.lang import language_mapping, default_language_code

 script_mode = NATIVE
+share = False

 def check_python_version():
    current_version = sys.version_info[:2]  # (major, minor)
@@ -99,10 +100,10 @@ def is_port_in_use(port):
        return s.connect_ex(('0.0.0.0', port)) == 0

 def main():
-    global script_mode, ebooks_dir
+    global script_mode, share, ebooks_dir
    
    # Convert the list of languages to a string to display in the help text
-    language_options_str = ", ".join(language_options)
+    lang_list_str = ", ".join(list(language_mapping.keys()))

    # Argument parser to handle optional parameters with descriptions
    parser = argparse.ArgumentParser(
@@ -111,12 +112,12 @@ def main():
 Example usage:    
 Windows:
    headless:
-    ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --use_custom_model --custom_model 'model.zip' --custom_config config.json --custom_vocab vocab.json
+    ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --custom_model 'model.zip'
    Graphic Interface:
    ebook2audiobook.cmd
 Linux/Mac:
    headless:
-    ./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --use_custom_model --custom_model 'model.zip' --custom_config config.json --custom_vocab vocab.json
+    ./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice' --language en --custom_model 'model.zip'
    Graphic Interface:
    ./ebook2audiobook.sh
 """,
@@ -124,8 +125,8 @@ Linux/Mac:
    )
    options = [
        "--script_mode", "--share", "--headless", "--ebook", "--ebooks_dir",
-        "--voice", "--language", "--device", "--use_custom_model", "--custom_model", 
-        "--custom_config", "--custom_vocab", "--custom_model_url", "--temperature",
+        "--voice", "--language", "--device", "--custom_model", 
+        "--custom_model_url", "--temperature",
        "--length_penalty", "--repetition_penalty", "--top_k", "--top_p", "--speed",
        "--enable_text_splitting", "--version"
    ]
@@ -142,37 +143,31 @@ Linux/Mac:
    parser.add_argument(options[5], type=str,
                        help="Path to the target voice file for TTS. Optional, uses a default voice if not provided.")
    parser.add_argument(options[6], type=str, default="en",
-                        help=f"Language for the audiobook conversion. Options: {language_options_str}. Defaults to English (en).")
+                        help=f"Language for the audiobook conversion. Options: {lang_list_str}. Defaults to English (en).")
    parser.add_argument(options[7], type=str, default="cpu", choices=["cpu", "gpu"],
                        help=f"Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.")
-    parser.add_argument(options[8], action="store_true",
-                        help="Use a custom TTS model. Defaults to False. Must be True to use custom models.")
-    parser.add_argument(options[9], type=str,
+    parser.add_argument(options[8], type=str,
                        help="Path to the custom model file (.pth). Required if using a custom model.")
-    parser.add_argument(options[10], type=str,
-                        help="Path to the custom config file (config.json). Required if using a custom model.")
-    parser.add_argument(options[11], type=str,
-                        help="Path to the custom vocab file (vocab.json). Required if using a custom model.")
-    parser.add_argument(options[12], type=str,
+    parser.add_argument(options[9], type=str,
                        help=("URL to download the custom model as a zip file. Optional, but will be used if provided. "
                              "Examples include David Attenborough's model: "
                              "'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
                              "More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
-    parser.add_argument(options[13], type=float, default=0.65,
+    parser.add_argument(options[10], type=float, default=0.65,
                        help="Temperature for the model. Defaults to 0.65. Higher temperatures lead to more creative outputs.")
-    parser.add_argument(options[14], type=float, default=1.0,
+    parser.add_argument(options[11], type=float, default=1.0,
                        help="A length penalty applied to the autoregressive decoder. Defaults to 1.0. Not applied to custom models.")
-    parser.add_argument(options[15], type=float, default=2.0,
+    parser.add_argument(options[12], type=float, default=2.0,
                        help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
-    parser.add_argument(options[16], type=int, default=50,
+    parser.add_argument(options[13], type=int, default=50,
                        help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
-    parser.add_argument(options[17], type=float, default=0.8,
+    parser.add_argument(options[14], type=float, default=0.8,
                        help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
-    parser.add_argument(options[18], type=float, default=1.0,
+    parser.add_argument(options[15], type=float, default=1.0,
                        help="Speed factor for the speech generation. Defaults to 1.0.")
-    parser.add_argument(options[19], action="store_true",
+    parser.add_argument(options[16], action="store_true",
                        help="Enable splitting text into sentences. Defaults to False.")
-    parser.add_argument(options[20], action="version",version=f"ebook2audiobook version {version}",
+    parser.add_argument(options[17], action="version",version=f"ebook2audiobook version {version}",
                        help="Show the version of the script and exit")

    for arg in sys.argv:
@@ -188,6 +183,7 @@ Linux/Mac:
        sys.exit(1)
    
    script_mode = args.script_mode if args.script_mode else script_mode
+    share =  args.share if args.share else share
    
    if script_mode == NATIVE:
        check_pkg = check_and_install_requirements(requirements_file)
@@ -236,11 +232,13 @@ Linux/Mac:
            else:
                print(f"Error: The directory {ebooks_dir} does not exist.")
                sys.exit(1)
+
        elif args.ebook:
            progress_status, audiobook_file = convert_ebook(args)
            if audiobook_file is None:
                print(f"Conversion failed: {progress_status}")
                sys.exit(1)
+
        else:
            print("Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.")
            sys.exit(1)       
@@ -249,7 +247,7 @@ Linux/Mac:
        allowed_arguments = {'--share', '--script_mode'}
        passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}
        if passed_args_set.issubset(allowed_arguments):
-             web_interface(args.script_mode, args.share)
+             web_interface(script_mode, share)
        else:
            print("Error: In non-headless mode, no option or only '--share' can be passed")
            sys.exit(1)
--- a/ebook2audiobook.cmd
+++ b/ebook2audiobook.cmd
@@ -37,7 +37,7 @@ if not exist "%CALIBRE_TEMP_DIR%" (

 icacls "%CALIBRE_TEMP_DIR%" /grant Users:(OI)(CI)F /T

-for %%A in (%*) do (
+for %%A in (%ARGS%) do (
 	if "%%A"=="%DOCKER_UTILS%" (
 		set "SCRIPT_MODE=%DOCKER_UTILS%"
 		break
@@ -228,7 +228,7 @@ if not "%DOCKER_BUILD_STATUS%"=="0" (
 net session >nul 2>&1
 if %errorlevel% equ 0 (
    echo Restarting in user mode...
-    start "" /b cmd /c "%~f0" %*
+    start "" /b cmd /c "%~f0" %ARGS%
    exit /b
 )
 goto dispatch
@@ -269,7 +269,7 @@ if "%SCRIPT_MODE%"=="%FULL_DOCKER%" (
 		call conda create --prefix %SCRIPT_DIR%\%PYTHON_ENV% python=%PYTHON_VERSION% -y
 		call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
 		call python -m pip install --upgrade pip
-		call python -m pip install beautifulsoup4 coqui-tts ebooklib docker "gradio>=4.44.0" mecab mecab-python3 "nltk>=3.8.2" pydub translate tqdm unidic
+		call python -m pip install --upgrade -r requirements.txt
 		call python -m unidic download
 		call python -m spacy download en_core_web_sm
 		call python -m nltk.downloader punkt_tab
--- a/ebook2audiobook.sh
+++ b/ebook2audiobook.sh
@@ -212,7 +212,7 @@ function conda_check {
 		source $CONDA_ENV
 		conda activate $SCRIPT_DIR/$PYTHON_ENV
 		python -m pip install --upgrade pip
-		python -m pip install beautifulsoup4 coqui-tts ebooklib docker "gradio>=4.44.0" mecab mecab-python3 "nltk>=3.8.2" pydub translate tqdm unidic
+		python -m pip install --upgrade -r requirements.txt
 		python -m unidic download
 		python -m spacy download en_core_web_sm
 		python -m nltk.downloader punkt_tab
--- a/ebooks/test1.epub
+++ b/ebooks/test1.epub
--- a/ebooks/test2.azw3
+++ b/ebooks/test2.azw3
--- a/ebooks/test3.pdf
+++ b/ebooks/test3.pdf
--- a/ebooks/test4.txt
+++ b/ebooks/test4.txt
@@ -1 +0,0 @@
-this is the test four from the result of text file to audiobook conversion.
--- a/ebooks/test_ar.azw3
+++ b/ebooks/test_ar.azw3
--- a/ebooks/test_ar.txt
+++ b/ebooks/test_ar.txt
@@ -0,0 +1 @@
+هذا هو الاختبار من نتيجة تحويل ملف نصي إلى كتاب صوتي.
--- a/ebooks/test_cs.azw3
+++ b/ebooks/test_cs.azw3
--- a/ebooks/test_cs.txt
+++ b/ebooks/test_cs.txt
@@ -0,0 +1 @@
+Toto je test od výsledku převodu textového souboru na audioknihu.
--- a/ebooks/test_da.azw3
+++ b/ebooks/test_da.azw3
--- a/ebooks/test_da.txt
+++ b/ebooks/test_da.txt
@@ -0,0 +1 @@
+Dette er testen fra resultatet af konvertering af tekstfil til lydbog.
--- a/ebooks/test_de.azw3
+++ b/ebooks/test_de.azw3
--- a/ebooks/test_de.txt
+++ b/ebooks/test_de.txt
@@ -0,0 +1 @@
+Dies ist der Test des Ergebnisses der Konvertierung einer Textdatei in ein Hörbuch.
--- a/ebooks/test_el.azw3
+++ b/ebooks/test_el.azw3
--- a/ebooks/test_el.txt
+++ b/ebooks/test_el.txt
@@ -0,0 +1 @@
+Αυτή είναι η δοκιμή από το αποτέλεσμα της μετατροπής αρχείου κειμένου σε ηχητικό βιβλίο.
--- a/ebooks/test_en.azw3
+++ b/ebooks/test_en.azw3
--- a/ebooks/test_en.txt
+++ b/ebooks/test_en.txt
@@ -0,0 +1 @@
+This is the test from the result of text file to audiobook conversion.
--- a/ebooks/test_es.azw3
+++ b/ebooks/test_es.azw3
--- a/ebooks/test_es.txt
+++ b/ebooks/test_es.txt
@@ -0,0 +1 @@
+Esta es la prueba del resultado de la conversión de archivo de texto a audiolibro.
--- a/ebooks/test_fi.azw3
+++ b/ebooks/test_fi.azw3
--- a/ebooks/test_fi.txt
+++ b/ebooks/test_fi.txt
@@ -0,0 +1 @@
+Tämä on testi tekstitiedoston muuntamisen tuloksesta äänikirjaksi.
--- a/ebooks/test_fr.azw3
+++ b/ebooks/test_fr.azw3
--- a/ebooks/test_fr.txt
+++ b/ebooks/test_fr.txt
@@ -0,0 +1 @@
+Ceci est le test provenant d'un fichier text en livre audio.
--- a/ebooks/test_hr.azw3
+++ b/ebooks/test_hr.azw3
--- a/ebooks/test_hr.txt
+++ b/ebooks/test_hr.txt
@@ -0,0 +1 @@
+Ovo je test rezultata pretvorbe tekstualne datoteke u audioknjigu.
--- a/ebooks/test_it.azw3
+++ b/ebooks/test_it.azw3
--- a/ebooks/test_it.txt
+++ b/ebooks/test_it.txt
@@ -0,0 +1 @@
+Questo è il test del risultato della conversione del file di testo in audiolibro.
--- a/ebooks/test_ja.azw3
+++ b/ebooks/test_ja.azw3
--- a/ebooks/test_ja.txt
+++ b/ebooks/test_ja.txt
@@ -0,0 +1 @@
+これは、テキスト ファイルからオーディオブックへの変換結果のテストです。
--- a/ebooks/test_ko.azw3
+++ b/ebooks/test_ko.azw3
--- a/ebooks/test_ko.txt
+++ b/ebooks/test_ko.txt
@@ -0,0 +1 @@
+이는 텍스트 파일을 오디오북으로 변환한 결과에 대한 테스트입니다.
--- a/ebooks/test_nb.azw3
+++ b/ebooks/test_nb.azw3
--- a/ebooks/test_nb.txt
+++ b/ebooks/test_nb.txt
@@ -0,0 +1 @@
+Dette er testen fra resultatet av konvertering av tekstfil til lydbok.
--- a/ebooks/test_nl.azw3
+++ b/ebooks/test_nl.azw3
--- a/ebooks/test_nl.txt
+++ b/ebooks/test_nl.txt
@@ -0,0 +1 @@
+Dit is de test op basis van het resultaat van de conversie van een tekstbestand naar een audioboek.
--- a/ebooks/test_pl.azw3
+++ b/ebooks/test_pl.azw3
--- a/ebooks/test_pl.txt
+++ b/ebooks/test_pl.txt
@@ -0,0 +1 @@
+To jest test wyniku konwersji pliku tekstowego na audiobook.
--- a/ebooks/test_pt.azw3
+++ b/ebooks/test_pt.azw3
--- a/ebooks/test_pt.txt
+++ b/ebooks/test_pt.txt
@@ -0,0 +1 @@
+Este é o teste do resultado da conversão de ficheiro de texto em audiolivro.
--- a/ebooks/test_ro.azw3
+++ b/ebooks/test_ro.azw3
--- a/ebooks/test_ro.txt
+++ b/ebooks/test_ro.txt
@@ -0,0 +1 @@
+Acesta este testul de la rezultatul conversiei fișierului text la cartea audio.
--- a/ebooks/test_ru.azw3
+++ b/ebooks/test_ru.azw3
--- a/ebooks/test_ru.txt
+++ b/ebooks/test_ru.txt
@@ -0,0 +1 @@
+Это тест результата конвертации текстового файла в аудиокнигу.
--- a/ebooks/test_sl.azw3
+++ b/ebooks/test_sl.azw3
--- a/ebooks/test_sl.txt
+++ b/ebooks/test_sl.txt
@@ -0,0 +1 @@
+To je preizkus rezultata pretvorbe besedilne datoteke v zvočno knjigo.
--- a/ebooks/test_sv.azw3
+++ b/ebooks/test_sv.azw3
--- a/ebooks/test_sv.txt
+++ b/ebooks/test_sv.txt
@@ -0,0 +1 @@
+Detta är testet från resultatet av konvertering av textfil till ljudbok.
--- a/ebooks/test_zh.azw3
+++ b/ebooks/test_zh.azw3
--- a/ebooks/test_zh.txt
+++ b/ebooks/test_zh.txt
@@ -0,0 +1 @@
+这是从文本文件到有声读物的转换结果进行的测试。
--- a/lib/functions.py
+++ b/lib/functions.py
@@ -206,21 +206,18 @@ def download_and_extract(path_or_url, extract_to=models_dir):
    except Exception as e:
        raise DependencyError(e)

-def load_spacy_model(language):
-    model_name = f"{language}_core_web_sm" 
-    try:
-        nltk.data.find('tokenizers/punkt_tab')
-    except LookupError:
-        print("Downloading NLTK punkt tokenizer...")
-        nltk.download('punkt_tab')
-    if not is_package(model_name):
-        try:
-            print(f"Downloading model: {model_name}")
-            download_package(model_name)  # Download the model if not installed
-        except Exception as e:
-            print(f"Error downloading model {model_name}: {e}")
-            return None
-    return spacy.load(model_name)
+#def load_spacy_model(language):
+#    lang_pack = language_mapping[language]["model"]
+#    try:
+#        nlp = spacy.load(lang_pack)
+#    except OSError:
+#        print("Spacy model not found. Tyring to download it...")
+#        try:
+#            subprocess.run(["python", "-m", "spacy", "download", lang_pack])
+#            nlp = spacy.load(lang_pack)
+#        except OSError:
+#             raise ValueError(f"Spacy model does not exist for {language_mapping[language]['name']}...")
+#    return nlp

 def translate_pronouns(language):
    global ebook_pronouns  
@@ -385,7 +382,7 @@ def concat_audio_chapters(metadatas, cover_file):
                        
                languages = metadatas.get('Languages', None)
                if languages:
-                    ffmpeg_metadata += f"language={languages}\n\n"  # Language
+                    ffmpeg_metadata += f"language={languages}\n\n"
                
            else:
                print("Warning: metadatas is None. Skipping metadata generation.")
@@ -676,10 +673,11 @@ def split_long_sentence(sentence, language='en', max_pauses=10):
    :return: A list of sentence parts that meet the criteria.
    """
    # Get the Max character length for the selected language -2 : with a default of 248 if no language is found
-    max_length = (char_limits.get(language, 250)-2)
+    char_limits = language_mapping[language]["char_limit"]
+    max_length = (char_limits - 2)

    # Adjust the pause punctuation symbols based on language
-    if language == 'zh-cn':
+    if language == 'zh':
        punctuation = ['，', '。', '；', '？', '！']  # Chinese-specific pause punctuation including sentence-ending marks
    elif language == 'ja':
        punctuation = ['、', '。', '；', '？', '！']  # Japanese-specific pause punctuation
@@ -724,7 +722,7 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
            target_voice_file = default_target_voice_file
        
        # Handle custom model or use standard TTS model
-        print("Loading model...")
+        print("Loading TTS ...")
        if custom_model:
            config_path = custom_model['config']
            model_path = custom_model['model']
@@ -732,22 +730,22 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
            config = XttsConfig()
            config.models_dir = models_dir
            config.load_json(config_path)
-            model = Xtts.init_from_config(config)
-            model.load_checkpoint(config, checkpoint_dir=model_path, vocab_path=vocab_path)
+            tts = Xtts.init_from_config(config)
+            tts.load_checkpoint(config, checkpoint_dir=model_path, vocab_path=vocab_path)
        else:
            #selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
-            #model = TTS(selected_tts_model, progress_bar=False).to(device)
-            base_dir = os.path.join(models_dir,"tts_models--multilingual--multi-dataset--xtts_v2")
+            #tts = TTS(selected_tts_model, progress_bar=False).to(device)
+            base_dir = os.path.join(models_dir,"XTTS-v2")
            config_path = os.path.join(base_dir,"config.json")
            config = XttsConfig()
            config.models_dir = models_dir
            config.load_json(config_path)
-            model = Xtts.init_from_config(config)
-            model.load_checkpoint(config, checkpoint_dir=base_dir)
+            tts = Xtts.init_from_config(config)
+            tts.load_checkpoint(config, checkpoint_dir=base_dir)
          
-        model.to(device)
+        tts.to(device)
        print("Computing speaker latents...")
-        gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[target_voice_file])
+        gpt_cond_latent, speaker_embedding = tts.get_conditioning_latents(audio_path=[target_voice_file])
        
        chapters_dir_audio_fragments = os.path.join(ebook_chapters_audio_dir, "fragments")
        os.makedirs(chapters_dir_audio_fragments, exist_ok=True)
@@ -755,35 +753,37 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
        # Calculate the total number of chapters and segments (fragments) to set progress bar correctly
        total_segments = 0
        total_chapters = len([f for f in os.listdir(ebook_chapters_dir) if f.endswith('.txt')])
+        
+        # Check if the language is nltk compatible
+        nltk_language = language_mapping[language]["name"].lower() if language_mapping[language] else None

        # Pre-calculate total segments (sentences + fragments per chapter)
        for chapter_file in sorted(os.listdir(ebook_chapters_dir)):
            if cancellation_requested.is_set():
-                stop_and_detach_tts(model)
+                stop_and_detach_tts(tts)
                msg = "Cancel requested"
                raise ValueError(msg)
            if chapter_file.endswith('.txt'):
                with open(os.path.join(ebook_chapters_dir, chapter_file), 'r', encoding='utf-8') as file:
                    chapter_text = file.read()
-                    nltk_language = language_mapping.get(language)
-                    if nltk_language:
-                        sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
-                    else:
-                        sentences = [chapter_text]
-                    
+                    #if nltk_language is not None:
+                        #sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
+                        #sentences = nltk.tokenize.word_tokenize(chapter_text, language=nltk_language, preserve_line=False)
+                    #else:
+                    sentences = [chapter_text]
+
                    # Calculate total fragments for this chapter
                    for sentence in sentences:
                        fragments = split_long_sentence(sentence, language=language)
                        total_segments += len(fragments)

-        # Initialize progress tracking
        current_progress = 0
        total_progress = total_segments + total_chapters  # Total is chapters + segments/fragments

        with tqdm(total=total_progress, desc="Processing 0.00%", bar_format='{desc}: {n_fmt}/{total_fmt} ', unit="step") as t:
            for chapter_file in sorted(os.listdir(ebook_chapters_dir)):
                if cancellation_requested.is_set():
-                    stop_and_detach_tts(model)
+                    stop_and_detach_tts(tts)
                    msg = "Cancel requested"
                    raise ValueError(msg)
                if chapter_file.endswith('.txt'):
@@ -800,30 +800,29 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe

                    with open(chapter_file_path, 'r', encoding='utf-8') as file:
                        chapter_text = file.read()
-                        nltk_language = language_mapping.get(language)
-                        
-                        if nltk_language:
-                            sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
-                        else:
-                            sentences = [chapter_text]
+                        #if nltk_language is not None:
+                        #    sentences = nltk.tokenize.sent_tokenize(chapter_text, language=nltk_language)
+                            #sentences = nltk.tokenize.word_tokenize(chapter_text, language=nltk_language, preserve_line=False)
+                        #else:
+                        sentences = [chapter_text]
                        
                        for sentence in sentences:
                            if cancellation_requested.is_set():
-                                stop_and_detach_tts(model)
+                                stop_and_detach_tts(tts)
                                msg = "Cancel requested"
                                raise ValueError(msg)
                            fragments = split_long_sentence(sentence, language=language)
                            for fragment in fragments:
                                if cancellation_requested.is_set():
-                                    stop_and_detach_tts(model)
+                                    stop_and_detach_tts(tts)
                                    msg = "Cancel requested"
                                    raise ValueError(msg)
                                if fragment != "":
                                    print(f"Generating fragment: {fragment}...")
                                    fragment_file_path = os.path.join(chapters_dir_audio_fragments, f"{count_fragments}.wav")
                                    
-                                    #if custom_model:
-                                    out = model.inference(
+                                    #if custom_tts:
+                                    out = tts.inference(
                                        fragment, language, gpt_cond_latent, speaker_embedding, 
                                        temperature=temperature, repetition_penalty=repetition_penalty, 
                                        top_k=top_k, top_p=top_p, speed=speed, enable_text_splitting=enable_text_splitting
@@ -876,11 +875,11 @@ def convert_chapters_to_audio(device, temperature, length_penalty, repetition_pe
    except Exception as e:
        raise DependencyError(e)
    
-def stop_and_detach_tts(model):
-    # Move the model to CPU if on GPU
-    if next(model.parameters()).is_cuda:
-        model.to('cpu')
-    del model
+def stop_and_detach_tts(tts):
+    # Move the tts to CPU if on GPU
+    if next(tts.parameters()).is_cuda:
+        tts.to('cpu')
+    del tts
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

@@ -913,10 +912,6 @@ def convert_ebook(args):
            device = args.device.lower()
            target_voice_file = args.voice
            language = args.language
-            use_custom_model = args.use_custom_model
-            custom_model_file = args.custom_model
-            custom_config_file = args.custom_config
-            custom_vocab_file = args.custom_vocab
            temperature = args.temperature
            length_penalty = args.length_penalty
            repetition_penalty = args.repetition_penalty
@@ -924,6 +919,7 @@ def convert_ebook(args):
            top_p = args.top_p
            speed = args.speed
            enable_text_splitting = args.enable_text_splitting
+            custom_model_file = args.custom_model
            custom_model_url = args.custom_model_url

            if not os.path.splitext(ebook_src)[1]:
@@ -951,8 +947,7 @@ def convert_ebook(args):
                ebook_pronouns = translate_pronouns(language)
                
            # Load spaCy model for language analysis (you can switch models based on language)
-            nlp = load_spacy_model(language)
-
+            #if load_spacy_model(language):
            # Prepare tmp dir and properties
            if prepare_dirs(args.ebook) : 
                
@@ -961,7 +956,7 @@ def convert_ebook(args):

                # Handle custom model if the user chose to use one
                custom_model = None
-                if use_custom_model and custom_model_file and custom_config_file and custom_vocab_file:
+                if custom_model_file and custom_config_file and custom_vocab_file:
                    custom_model = {
                        'model': custom_model_file,
                        'config': custom_config_file,
@@ -969,7 +964,7 @@ def convert_ebook(args):
                    }

                # If a custom model URL is provided, download and use it
-                if use_custom_model and custom_model_url:
+                if custom_model_url:
                    print(f"Received custom model URL: {custom_model_url}")
                    model_dir = get_model_dir_from_url(custom_model_url)
                    if download_and_extract(custom_model_url, model_dir):
@@ -1007,6 +1002,8 @@ def convert_ebook(args):
                        raise DependencyError("convert_chapters_to_audio() failed!")
                else:
                    return None, None
+                #else:
+                #    return None, None
            else:
                print(f"Temporary directory {tmp_dir} not removed due to failure.")
                return None, None
@@ -1017,12 +1014,14 @@ def convert_ebook(args):

 def web_interface(mode, share):
    global ebook_src, is_converting, interface, cancellation_requested, is_gui_process, script_mode, is_gui_shared, audiobooks_ddn
-    
+
    script_mode = mode
    is_gui_process = True
    is_gui_shared = share
    audiobook_file = None
-    
+    language_options = [details["native_name"] for details in language_mapping.values()]
+    default_language_native = language_mapping[default_language_code]["native_name"]
+
    theme = gr.themes.Origin(
        primary_hue="amber",
        secondary_hue="green",
@@ -1042,8 +1041,11 @@ def web_interface(mode, share):
        gr.HTML(
            """
            <style>
+                input[type="checkbox"] {
+                    border-color: #fafafa !Important;
+                }
                .svelte-1xyfx7i.center.boundedheight.flex{
-                    height: 110px !important;
+                    height: 120px !important;
                }
                .block.svelte-5y6bt2 {
                    padding: 10px !important;
@@ -1056,6 +1058,10 @@ def web_interface(mode, share):
                    margin: 0 !important;
                    font-size: 12px !important;
                }
+                .block.svelte-5y6bt2.padded {
+                    height: auto !important;
+                    padding: 10px !important;
+                }
                .block.svelte-5y6bt2.padded.hide-container {
                    height: auto !important;
                    padding: 0 !important;
@@ -1074,25 +1080,25 @@ def web_interface(mode, share):
                    padding: 0;
                    margin: 0;
                }
+                #component-8, #component-9, #component-34 {
+                    height: 119px !important;
+                }
            </style>
            """
        )
-        with gr.Tabs():  # Create tabs for better UI organization
+        with gr.Tabs():
            with gr.TabItem("Input Options"):
                with gr.Row():
                    with gr.Column(scale=3):
                        ebook_file = gr.File(label="eBook File")
-                        target_voice_file = gr.File(label="Target Voice File (Optional)")
-                        language = gr.Dropdown(label="Language", choices=language_options, value="en")
                        device = gr.Radio(label="Processor Unit", choices=["CPU", "GPU"], value="CPU")
-
+                        language = gr.Dropdown(label="Language", choices=language_options, value=default_language_native)  
                    with gr.Column(scale=3):
-                        use_custom_model = gr.Checkbox(label="Use Custom Model")
-                        custom_model_file = gr.File(label="Custom Model File (Optional)", visible=False)
-                        custom_config_file = gr.File(label="Custom Config File (Optional)", visible=False)
-                        custom_vocab_file = gr.File(label="Custom Vocab File (Optional)", visible=False)
-                        custom_model_url = gr.Textbox(label="Custom Model Zip URL (Optional)", visible=False)
-
+                        with gr.Group():
+                            target_voice_file = gr.File(label="Cloning Voice* (a .wav or .mp3 no more than 12sec)")
+                            custom_model_file = gr.File(label="Model* (a .zip containing config.json, vocab.json, model.pth)")
+                            custom_model_url = gr.Textbox(placeholder="https://www.example.com/model.zip", label="Model from URL*")
+                            gr.Markdown('<p>* Optional</p>')
            with gr.TabItem("Audio Generation Preferences"):
                gr.Markdown(
                    """
@@ -1154,9 +1160,8 @@ def web_interface(mode, share):
                    info="Splits long texts into sentences to generate audio in chunks. Useful for very long inputs."
                )
                
-            session_status = gr.Textbox(label="Session")
-            session = gr.Textbox(label="Session", visible=False)
-
+        session_status = gr.Textbox(label="Session")
+        session = gr.Textbox(label="Session", visible=False)
        conversion_progress = gr.Textbox(label="Progress")
        convert_btn = gr.Button("Convert", variant="primary", interactive=False)
        audio_player = gr.Audio(label="Listen", type="filepath", visible=False)
@@ -1231,11 +1236,20 @@ def web_interface(mode, share):
            return gr.Button("Convert", variant="primary", interactive=False), None, audiobook_file, update_audiobooks_ddn()

        def refresh_audiobook_list():
-            if not os.path.isdir(audiobooks_dir):
-                os.makedirs(audiobooks_dir, exist_ok=True)
-            files = [f for f in os.listdir(audiobooks_dir)]
-            files.sort(key=lambda x: os.path.getmtime(os.path.join(audiobooks_dir, x)), reverse=True)
+            files = []
+            if audiobooks_dir is not None:
+                if not os.path.isdir(audiobooks_dir):
+                    os.makedirs(audiobooks_dir, exist_ok=True)
+                files = [f for f in os.listdir(audiobooks_dir)]
+                files.sort(key=lambda x: os.path.getmtime(os.path.join(audiobooks_dir, x)), reverse=True)
            return files
+
+        def update_audiobook_link(audiobook):
+            if audiobooks_dir is not None:
+                if audiobook:
+                    link = os.path.join(audiobooks_dir, audiobook)
+                    return link, link, gr.update(visible=True)
+            return None, None, gr.update(visible=False)
            
        def disable_convert_btn():
            return gr.Button("Convert", variant="primary", interactive=False)
@@ -1244,13 +1258,6 @@ def web_interface(mode, share):
            files = refresh_audiobook_list()
            return gr.Dropdown(choices=files, label="Audiobooks", value=files[0] if files else None)

-        def update_audiobook_link(audiobook):
-            if audiobook:
-                link = os.path.join(audiobooks_dir, audiobook)
-                return link, link, gr.update(visible=True)
-            else:
-                return None, None, gr.update(visible=False)
-
        def change_ebook_file(btn, f):
            global ebook_src, is_converting, cancellation_requested
            if f is None:
@@ -1269,13 +1276,13 @@ def web_interface(mode, share):
            data["event"] = 'change_data'
            return data

-        def process_conversion(session, device, ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting):                             
+        def process_conversion(session, device, ebook_file, target_voice_file, language, custom_model_file, custom_model_url, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting):                             
            global ebook_src, is_converting, audiobook_file
+
            ebook_src = ebook_file.name if ebook_file else None
            target_voice_file = target_voice_file.name if target_voice_file else None
            custom_model_file = custom_model_file.name if custom_model_file else None
-            custom_config_file = custom_config_file.name if custom_config_file else None
-            custom_vocab_file = custom_vocab_file.name if custom_vocab_file else None
+            language = next((code for code, details in language_mapping.items() if details["native_name"] == language), None)

            if not ebook_src:
                return "Error: eBook file is required."
@@ -1288,10 +1295,7 @@ def web_interface(mode, share):
                ebook=ebook_src,
                voice=target_voice_file,
                language=language,
-                use_custom_model=use_custom_model,
                custom_model=custom_model_file,
-                custom_config=custom_config_file,
-                custom_vocab=custom_vocab_file,
                custom_model_url=custom_model_url,
                temperature=float(temperature),
                length_penalty=float(length_penalty),
@@ -1339,11 +1343,6 @@ def web_interface(mode, share):
                audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
            return [data, f"{warning_text}{warning_text_extra}", data["session_id"], update_audiobooks_ddn()]

-        use_custom_model.change(
-            lambda x: [gr.update(visible=x)] * 4,
-            inputs=[use_custom_model],
-            outputs=[custom_model_file, custom_config_file, custom_vocab_file, custom_model_url]
-        )
        ebook_file.change(
            fn=change_ebook_file,
            inputs=[convert_btn, ebook_file],
@@ -1383,9 +1382,8 @@ def web_interface(mode, share):
            fn=process_conversion,
            inputs=[
                session, device, ebook_file, target_voice_file, language, 
-                use_custom_model, custom_model_file, custom_config_file, 
-                custom_vocab_file, custom_model_url, temperature, length_penalty, repetition_penalty, 
-                top_k, top_p, speed, enable_text_splitting
+                custom_model_file, custom_model_url, temperature, length_penalty,
+                repetition_penalty, top_k, top_p, speed, enable_text_splitting
            ],
            outputs=[conversion_progress, modal_html]           
        ).then(
--- a/lib/lang.py
+++ b/lib/lang.py
@@ -1,48 +1,33 @@
 import os

-language_options = [
-    "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko"
-]
-char_limits = {
-    "en": 250,      # English
-    "es": 239,      # Spanish
-    "fr": 273,      # French
-    "de": 253,      # German
-    "it": 213,      # Italian
-    "pt": 203,      # Portuguese
-    "pl": 224,      # Polish
-    "tr": 226,      # Turkish
-    "ru": 182,      # Russian
-    "nl": 251,      # Dutch
-    "cs": 186,      # Czech
-    "ar": 166,      # Arabic
-    "zh-cn": 82,    # Chinese (Simplified)
-    "ja": 71,       # Japanese
-    "hu": 224,      # Hungarian
-    "ko": 95,       # Korean
-}
-
-# Mapping of language codes to NLTK's supported language names
 language_mapping = {
-    "en": "english",
-    "de": "german",
-    "fr": "french",
-    "es": "spanish",
-    "it": "italian",
-    "pt": "portuguese",
-    "nl": "dutch",
-    "pl": "polish",  
-    "cs": "czech",   
-    "ru": "russian",
-    "tr": "turkish",
-    "el": "greek",
-    "et": "estonian",
-    "no": "norwegian",
-    "ml": "malayalam",
-    "sl": "slovene",
-    "da": "danish",
-    "fi": "finnish",
-    "sv": "swedish"
+    "ar": {"name": "Arabic", "native_name": "العربية", "char_limit": 166, "model": "ar_core_news_sm"},
+    "cs": {"name": "Czech", "native_name": "Čeština", "char_limit": 186, "model": "cs_core_news_sm"},
+    "da": {"name": "Danish", "native_name": "Dansk", "char_limit": 220, "model": "da_core_news_sm"},
+    "de": {"name": "German", "native_name": "Deutsch", "char_limit": 253, "model": "de_core_news_sm"},
+    "el": {"name": "Greek", "native_name": "Ελληνικά", "char_limit": 200, "model": "el_core_news_sm"},
+    "en": {"name": "English", "native_name": "English", "char_limit": 250, "model": "en_core_web_sm"},
+    "es": {"name": "Spanish", "native_name": "Español", "char_limit": 239, "model": "es_core_news_md"},
+    "fa": {"name": "Persian", "native_name": "فارسی", "char_limit": 150, "model": "???"},
+    "fi": {"name": "Finnish", "native_name": "Suomi", "char_limit": 230, "model": "fi_core_news_sm"},
+    "fr": {"name": "French", "native_name": "Français", "char_limit": 273, "model": "fr_core_news_sm"},
+    "hi": {"name": "Hindi", "native_name": "हिंदी", "char_limit": 220, "model": "???"},
+    "hr": {"name": "Croatian", "native_name": "Hrvatski", "char_limit": 210, "model": "hr_core_news_sm"},
+    "it": {"name": "Italian", "native_name": "Italiano", "char_limit": 213, "model": "it_core_news_sm"},
+    "ja": {"name": "Japanese", "native_name": "日本語", "char_limit": 71, "model": "ja_core_news_sm"},
+    "ko": {"name": "Korean", "native_name": "한국어", "char_limit": 95, "model": "ko_core_news_sm"},
+    "nb": {"name": "Norwegian", "native_name": "Norsk Bokmål", "char_limit": 225, "model": "nb_core_news_sm"},
+    "nl": {"name": "Dutch", "native_name": "Nederlands", "char_limit": 251, "model": "nl_core_news_sm"},
+    "pl": {"name": "Polish", "native_name": "Polski", "char_limit": 224, "model": "pl_core_news_sm"},
+    "pt": {"name": "Portuguese", "native_name": "Português", "char_limit": 203, "model": "pt_core_news_sm"},
+    "ro": {"name": "Romanian", "native_name": "Română", "char_limit": 190, "model": "ro_core_news_sm"},
+    "ru": {"name": "Russian", "native_name": "Русский", "char_limit": 182, "model": "ru_core_news_sm"},
+    "sl": {"name": "Slovenian", "native_name": "Slovenščina", "char_limit": 210, "model": "sl_core_news_sm"},
+    "sv": {"name": "Swedish", "native_name": "Svenska", "char_limit": 215, "model": "sv_core_news_sm"},
+    "tr": {"name": "Turkish", "native_name": "Türkçe", "char_limit": 200, "model": "???"},
+    "vi": {"name": "Vietnamese", "native_name": "Tiếng Việt", "char_limit": 180, "model": "???"},
+    "yo": {"name": "Yoruba", "native_name": "Yorùbá", "char_limit": 180, "model": "???"},
+    "zh": {"name": "Chinese", "native_name": "中文", "char_limit": 82, "model": "zh_core_web_sm"}
 }

 default_language_code = "en"
--- a/models/XTTS-v2
+++ b/models/XTTS-v2
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,14 +13,20 @@ authors = [
 ]
 dependencies = [
    "beautifulsoup4",
+	"camel-tools",
    "coqui-tts",
-    "ebooklib",
+	"cutlet",
    "docker",
+    "ebooklib",
+	"gensim",
    "gradio>=4.44.0",
+	"jieba",
    "mecab",
    "mecab-python3",
    "nltk>=3.8.2",
    "pydub",
+	"pypinyin",
+	"sentencepiece",
    "translate",
    "tqdm",
    "unidic",
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,18 @@
 beautifulsoup4
+camel-tools
 coqui-tts
-ebooklib
+cutlet
 docker
+ebooklib
+gensim
 gradio>=4.44.0
+jieba
 mecab
 mecab-python3
 nltk>=3.8.2
 pydub
+pypinyin
+sentencepiece
 translate
 tqdm
 unidic
--- a/voices/adult/female/zh/.gitkeep
+++ b/voices/adult/female/zh/.gitkeep
--- a/voices/adult/female/zh-cn/.gitkeep
+++ b/voices/adult/female/zh-cn/.gitkeep
--- a/voices/adult/male/zh-cn/.gitkeep
+++ b/voices/adult/male/zh-cn/.gitkeep
--- a/voices/child/boy/zh-cn/.gitkeep
+++ b/voices/child/boy/zh-cn/.gitkeep
--- a/voices/child/girl/zh-cn/.gitkeep
+++ b/voices/child/girl/zh-cn/.gitkeep
--- a/voices/elder/female/zh-cn/.gitkeep
+++ b/voices/elder/female/zh-cn/.gitkeep
--- a/voices/elder/male/zh-cn/.gitkeep
+++ b/voices/elder/male/zh-cn/.gitkeep
--- a/voices/teen/male/zh-cn/.gitkeep
+++ b/voices/teen/male/zh-cn/.gitkeep
--- a/voices/teen/female/zh-cn/.gitkeep
+++ b/voices/teen/female/zh-cn/.gitkeep
				`@@ -1 +0,0 @@`
				`this is the test four from the result of text file to audiobook conversion.`
				`@@ -0,0 +1 @@`
				`هذا هو الاختبار من نتيجة تحويل ملف نصي إلى كتاب صوتي.`
				`@@ -0,0 +1 @@`
				`Toto je test od výsledku převodu textového souboru na audioknihu.`
				`@@ -0,0 +1 @@`
				`Dette er testen fra resultatet af konvertering af tekstfil til lydbog.`
				`@@ -0,0 +1 @@`
				`Dies ist der Test des Ergebnisses der Konvertierung einer Textdatei in ein Hörbuch.`
				`@@ -0,0 +1 @@`
				`Αυτή είναι η δοκιμή από το αποτέλεσμα της μετατροπής αρχείου κειμένου σε ηχητικό βιβλίο.`
				`@@ -0,0 +1 @@`
				`This is the test from the result of text file to audiobook conversion.`
				`@@ -0,0 +1 @@`
				`Esta es la prueba del resultado de la conversión de archivo de texto a audiolibro.`
				`@@ -0,0 +1 @@`
				`Tämä on testi tekstitiedoston muuntamisen tuloksesta äänikirjaksi.`
				`@@ -0,0 +1 @@`
				`Ceci est le test provenant d'un fichier text en livre audio.`