v25.12.32

2026-01-07 21:14:06 -05:00 · 2025-12-30 19:12:19 -08:00
parent ace94ece87 4c28fce9e1
commit e04a58329e
15 changed files with 102 additions and 67 deletions
--- a/4
+++ b/4
@@ -3,8 +3,8 @@ FROM python:${PYTHON_VERSION}-slim-bookworm

 SHELL ["/bin/bash", "-o", "pipefail", "-c"]

-ARG APP_VERSION=25.12.31
-ARG DEVICE_TAG=cpu
+ARG APP_VERSION=25.12.32
+ARG DEVICE_TAG=cu128
 ARG DOCKER_DEVICE_STR='{"name": "cu128", "os": "manylinux_2_28", "arch": "x86_64", "pyvenv": [3, 12], "tag": "cu128", "note": "default device"}'
 ARG DOCKER_PROGRAMS_STR="curl ffmpeg nodejs npm espeak-ng sox tesseract-ocr"
 ARG CALIBRE_INSTALLER_URL="https://download.calibre-ebook.com/linux-installer.sh"
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-25.12.31
+25.12.32
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,7 +5,7 @@ services:
      context: .
      dockerfile: Dockerfile
      args:
-        APP_VERSION: ${APP_VERSION:-25.25.25}
+        APP_VERSION: ${APP_VERSION:-25.12.32}
        DEVICE_TAG: ${DEVICE_TAG:-cpu}         # e.g. cu128, cu118, rocm, xpu, cpu
    container_name: ebook2audiobook
    working_dir: /app
--- a/ebook2audiobook.egg-info/requires.txt
+++ b/ebook2audiobook.egg-info/requires.txt
@@ -8,8 +8,7 @@ ebooklib
 fastapi
 hf_xet
 beautifulsoup4
-sudachipy
-sudachidict-core
+nagisa
 pymupdf
 pymupdf-layout
 pytesseract
--- a/lib/classes/device_installer.py
+++ b/lib/classes/device_installer.py
@@ -428,7 +428,8 @@ class DeviceInstaller():
                    ):
                        if os.path.exists(p):
                            with open(p, 'r', encoding='utf-8', errors='ignore') as f:
-                                version = f.read()
+                                v = f.read()
+                                version = lib_version_parse(v)
                            break
                elif os.name == 'nt':
                    for env in ('ROCM_PATH', 'HIP_PATH'):
@@ -447,10 +448,14 @@ class DeviceInstaller():
                            break
                if version:
                    cmp = toolkit_version_compare(version, rocm_version_range)
+                    min_version = rocm_version_range["min"]
+                    max_version = rocm_version_range["max"]
+                    min_version_str = ".".join(map(str, min_version)) if isinstance(min_version, (tuple, list)) else str(min_version)
+                    max_version_str = ".".join(map(str, max_version)) if isinstance(max_version, (tuple, list)) else str(max_version)
                    if cmp == -1:
-                        msg = f'ROCm {version} < min {rocm_version_range["min"]}. Please upgrade.'
+                        msg = f'ROCm {version} < min {min_version_str}. Please upgrade.'
                    elif cmp == 1:
-                        msg = f'ROCm {version} > max {rocm_version_range["max"]}. Falling back to CPU.'
+                        msg = f'ROCm {version} > max {max_version_str}. Falling back to CPU.'
                    elif cmp == 0:
                        devices['ROCM']['found'] = True
                        parts = version.split(".")
@@ -531,10 +536,12 @@ class DeviceInstaller():
                                    break
                if version:
                    cmp = toolkit_version_compare(version, cuda_version_range)
+                    min_ver = ".".join(str(part) for part in cuda_version_range["min"])
+                    max_ver = ".".join(str(part) for part in cuda_version_range["max"])
                    if cmp == -1:
-                        msg = f'CUDA {version} < min {cuda_version_range["min"]}. Please upgrade.'
+                        msg = f'CUDA {version} < min {min_ver}. Please upgrade.'
                    elif cmp == 1:
-                        msg = f'CUDA {version} > max {cuda_version_range["max"]}. Falling back to CPU.'
+                        msg = f'CUDA {version} > max {max_ver}. Falling back to CPU.'
                    elif cmp == 0:
                        devices['CUDA']['found'] = True
                        parts = version.split(".")
@@ -580,7 +587,14 @@ class DeviceInstaller():
                if version:
                    cmp = toolkit_version_compare(version, xpu_version_range)
                    if cmp == -1 or cmp == 1:
-                        msg = f'XPU {version} out of supported range {xpu_version_range}. Falling back to CPU.'
+                        range_display = (
+                            f"{xpu_version_range.get('min')} to {xpu_version_range.get('max')}"
+                            if isinstance(xpu_version_range, dict)
+                            and 'min' in xpu_version_range
+                            and 'max' in xpu_version_range
+                            else str(xpu_version_range)
+                        )
+                        msg = f'XPU {version} out of supported range {range_display}. Falling back to CPU.'
                    elif cmp == 0:
                        devices['XPU']['found'] = True
                        name = 'xpu'
--- a/lib/classes/tts_engines/bark.py
+++ b/lib/classes/tts_engines/bark.py
@@ -122,6 +122,9 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
                        return False
            if self.engine:
                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
+                s = sentence.strip()
+                if len(s) < 3 or not any(c.isalnum() for c in s):
+                    return True
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
--- a/lib/classes/tts_engines/fairseq.py
+++ b/lib/classes/tts_engines/fairseq.py
@@ -78,6 +78,9 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
            if self.engine:
                device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
+                s = sentence.strip()
+                if len(s) < 3 or not any(c.isalnum() for c in s):
+                    return True
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
--- a/lib/classes/tts_engines/tacotron.py
+++ b/lib/classes/tts_engines/tacotron.py
@@ -103,6 +103,9 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
            if self.engine:
                device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
+                s = sentence.strip()
+                if len(s) < 3 or not any(c.isalnum() for c in s):
+                    return True
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
--- a/lib/classes/tts_engines/vits.py
+++ b/lib/classes/tts_engines/vits.py
@@ -86,6 +86,9 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
            if self.engine:
                device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
+                s = sentence.strip()
+                if len(s) < 3 or not any(c.isalnum() for c in s):
+                    return True
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
--- a/lib/classes/tts_engines/xtts.py
+++ b/lib/classes/tts_engines/xtts.py
@@ -85,6 +85,9 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
                        return False
            if self.engine:
                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
+                s = sentence.strip()
+                if len(s) < 3 or not any(c.isalnum() for c in s):
+                    return True
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
--- a/lib/classes/tts_engines/yourtts.py
+++ b/lib/classes/tts_engines/yourtts.py
@@ -74,6 +74,9 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
                        return False
            if self.engine:
                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
+                s = sentence.strip()
+                if len(s) < 3 or not any(c.isalnum() for c in s):
+                    return True
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
--- a/lib/core.py
+++ b/lib/core.py
@@ -1039,10 +1039,15 @@ def get_sentences(text:str, id:str)->list|None:
                        jieba.dt.cache_file = os.path.join(models_dir, 'jieba.cache')
                        result.extend([t for t in jieba.cut(segment) if t.strip()])
                    elif lang == 'jpn':
+                        """
                        from sudachipy import dictionary, tokenizer
                        sudachi = dictionary.Dictionary().create()
                        mode = tokenizer.Tokenizer.SplitMode.C
                        result.extend([m.surface() for m in sudachi.tokenize(segment, mode) if m.surface().strip()])
+                        """
+                        import nagisa
+                        tokens = nagisa.tagging(segment).words
+                        result.extend(tokens)
                    elif lang == 'kor':
                        from soynlp.tokenizer import LTokenizer
                        ltokenizer = LTokenizer()
@@ -1679,55 +1684,56 @@ def convert_chapters2audio(id:str)->bool:
            print(msg)
            if session['is_gui_process']:
                progress_bar = gr.Progress(track_tqdm=False)
-            ebook_name = Path(session['ebook']).name
-            with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t:
-                for x in range(0, total_chapters):
-                    chapter_num = x + 1
-                    chapter_audio_file = f'chapter_{chapter_num}.{default_audio_proc_format}'
-                    sentences = session['chapters'][x]
-                    sentences_count = sum(1 for row in sentences if row.strip() not in TTS_SML.values())
-                    start = sentence_number
-                    msg = f'Block {chapter_num} containing {sentences_count} sentences...'
-                    print(msg)
-                    for i, sentence in enumerate(sentences):
-                        if session['cancellation_requested']:
-                            msg = 'Cancel requested'
-                            print(msg)
-                            return False
-                        if sentence_number in missing_sentences or sentence_number > resume_sentence or (sentence_number == 0 and resume_sentence == 0):
-                            if sentence_number <= resume_sentence and sentence_number > 0:
-                                msg = f'**Recovering missing file sentence {sentence_number}'
+            if session['ebook']:
+                ebook_name = Path(session['ebook']).name
+                with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t:
+                    for x in range(0, total_chapters):
+                        chapter_num = x + 1
+                        chapter_audio_file = f'chapter_{chapter_num}.{default_audio_proc_format}'
+                        sentences = session['chapters'][x]
+                        sentences_count = sum(1 for row in sentences if row.strip() not in TTS_SML.values())
+                        start = sentence_number
+                        msg = f'Block {chapter_num} containing {sentences_count} sentences...'
+                        print(msg)
+                        for i, sentence in enumerate(sentences):
+                            if session['cancellation_requested']:
+                                msg = 'Cancel requested'
                                print(msg)
-                            sentence = sentence.strip()
-                            success = tts_manager.convert_sentence2audio(sentence_number, sentence) if sentence else True
-                            if success:
-                                total_progress = (t.n + 1) / total_iterations
-                                if session['is_gui_process']:
-                                    progress_bar(progress=total_progress, desc=ebook_name)
-                                is_sentence = sentence.strip() not in TTS_SML.values()
-                                percentage = total_progress * 100
-                                t.set_description(f"{percentage:.2f}%")
-                                msg = f' : {sentence}' if is_sentence else f' : {sentence}'
+                                return False
+                            if sentence_number in missing_sentences or sentence_number > resume_sentence or (sentence_number == 0 and resume_sentence == 0):
+                                if sentence_number <= resume_sentence and sentence_number > 0:
+                                    msg = f'**Recovering missing file sentence {sentence_number}'
+                                    print(msg)
+                                sentence = sentence.strip()
+                                success = tts_manager.convert_sentence2audio(sentence_number, sentence) if sentence else True
+                                if success:
+                                    total_progress = (t.n + 1) / total_iterations
+                                    if session['is_gui_process']:
+                                        progress_bar(progress=total_progress, desc=ebook_name)
+                                    is_sentence = sentence.strip() not in TTS_SML.values()
+                                    percentage = total_progress * 100
+                                    t.set_description(f"{percentage:.2f}%")
+                                    msg = f' : {sentence}' if is_sentence else f' : {sentence}'
+                                    print(msg)
+                                else:
+                                    return False
+                            if sentence.strip() not in TTS_SML.values():
+                                sentence_number += 1
+                            t.update(1)
+                        end = sentence_number - 1 if sentence_number > 1 else sentence_number
+                        msg = f'End of Block {chapter_num}'
+                        print(msg)
+                        if chapter_num in missing_chapters or sentence_number > resume_sentence:
+                            if chapter_num <= resume_chapter:
+                                msg = f'**Recovering missing file block {chapter_num}'
+                                print(msg)
+                            if combine_audio_sentences(chapter_audio_file, int(start), int(end), id):
+                                msg = f'Combining block {chapter_num} to audio, sentence {start} to {end}'
                                print(msg)
                            else:
+                                msg = 'combine_audio_sentences() failed!'
+                                print(msg)
                                return False
-                        if sentence.strip() not in TTS_SML.values():
-                            sentence_number += 1
-                        t.update(1)
-                    end = sentence_number - 1 if sentence_number > 1 else sentence_number
-                    msg = f'End of Block {chapter_num}'
-                    print(msg)
-                    if chapter_num in missing_chapters or sentence_number > resume_sentence:
-                        if chapter_num <= resume_chapter:
-                            msg = f'**Recovering missing file block {chapter_num}'
-                            print(msg)
-                        if combine_audio_sentences(chapter_audio_file, int(start), int(end), id):
-                            msg = f'Combining block {chapter_num} to audio, sentence {start} to {end}'
-                            print(msg)
-                        else:
-                            msg = 'combine_audio_sentences() failed!'
-                            print(msg)
-                            return False
            return True
        except Exception as e:
            DependencyError(e)
@@ -1941,11 +1947,10 @@ def combine_audio_chapters(id:str)->list[str]|None:
                    target_rate = '48000'
                    cmd += ['-c:a', 'libopus', '-compression_level', '0', '-b:a', '192k', '-ar', target_rate]
                cmd += ['-map_metadata', '1'] 
-            if 'output_channel' in session:
-                if session['output_channel'] == 'mono':
-                    cmd += ['-ac', '1']
-                elif session['output_channel'] == 'stereo':
-                    cmd += ['-ac', '2']
+            if session['output_channel'] == 'stereo':
+                cmd += ['-ac', '2']
+            else:
+                cmd += ['-ac', '1']
            if input_codec == target_codec and input_rate == target_rate:
                cmd = [
                    shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', ffmpeg_combined_audio,
@@ -2317,6 +2322,7 @@ def convert_ebook(args:dict)->tuple:
                session['bark_waveform_temp'] =  float(args['bark_waveform_temp'])
                session['audiobooks_dir'] = str(args['audiobooks_dir']) if args['audiobooks_dir'] else None
                session['output_format'] = str(args['output_format'])
+                session['output_channel'] = str(args['output_channel'])
                session['output_split'] = bool(args['output_split'])
                session['output_split_hours'] = args['output_split_hours']if args['output_split_hours'] is not None else default_output_split_hours
                session['model_cache'] = f"{session['tts_engine']}-{session['fine_tuned']}"
--- a/podman-compose.yml
+++ b/podman-compose.yml
@@ -5,7 +5,7 @@ services:
      context: .
      dockerfile: Dockerfile
      args:
-        APP_VERSION: ${APP_VERSION:-25.25.25}
+        APP_VERSION: ${APP_VERSION:-25.12.32}
        DEVICE_TAG: ${DEVICE_TAG:-cpu}         # e.g. cu124, cu128, rocm, xpu, cpu etc.
    container_name: ebook2audiobook
    working_dir: /app
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,8 +27,7 @@ dependencies = [
 	"fastapi",
 	"hf_xet",
 	"beautifulsoup4",
-	"sudachipy",
-	"sudachidict-core",
+	"nagisa",
 	"pymupdf",
 	"pymupdf-layout",
 	"pytesseract",
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,8 +8,7 @@ ebooklib
 fastapi
 hf_xet
 beautifulsoup4
-sudachipy
-sudachidict-core
+nagisa
 pymupdf
 pymupdf-layout
 pytesseract
@@ -1 +1 @@
 .12.31
 .12.32