diff --git a/Dockerfile b/Dockerfile index 0cf225cb..d8c01e06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,8 @@ FROM python:${PYTHON_VERSION}-slim-bookworm SHELL ["/bin/bash", "-o", "pipefail", "-c"] -ARG APP_VERSION=25.12.31 -ARG DEVICE_TAG=cpu +ARG APP_VERSION=25.12.32 +ARG DEVICE_TAG=cu128 ARG DOCKER_DEVICE_STR='{"name": "cu128", "os": "manylinux_2_28", "arch": "x86_64", "pyvenv": [3, 12], "tag": "cu128", "note": "default device"}' ARG DOCKER_PROGRAMS_STR="curl ffmpeg nodejs npm espeak-ng sox tesseract-ocr" ARG CALIBRE_INSTALLER_URL="https://download.calibre-ebook.com/linux-installer.sh" diff --git a/VERSION.txt b/VERSION.txt index fbbaaf9f..9d72f5a8 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -25.12.31 \ No newline at end of file +25.12.32 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 80715e01..6fc06c80 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ services: context: . dockerfile: Dockerfile args: - APP_VERSION: ${APP_VERSION:-25.25.25} + APP_VERSION: ${APP_VERSION:-25.12.32} DEVICE_TAG: ${DEVICE_TAG:-cpu} # e.g. cu128, cu118, rocm, xpu, cpu container_name: ebook2audiobook working_dir: /app diff --git a/ebook2audiobook.egg-info/requires.txt b/ebook2audiobook.egg-info/requires.txt index 68e5da5f..a7678966 100644 --- a/ebook2audiobook.egg-info/requires.txt +++ b/ebook2audiobook.egg-info/requires.txt @@ -8,8 +8,7 @@ ebooklib fastapi hf_xet beautifulsoup4 -sudachipy -sudachidict-core +nagisa pymupdf pymupdf-layout pytesseract diff --git a/lib/classes/device_installer.py b/lib/classes/device_installer.py index f7d51497..07240871 100644 --- a/lib/classes/device_installer.py +++ b/lib/classes/device_installer.py @@ -428,7 +428,8 @@ class DeviceInstaller(): ): if os.path.exists(p): with open(p, 'r', encoding='utf-8', errors='ignore') as f: - version = f.read() + v = f.read() + version = lib_version_parse(v) break elif os.name == 'nt': for env in ('ROCM_PATH', 'HIP_PATH'): @@ -447,10 +448,14 @@ class DeviceInstaller(): break if version: cmp = toolkit_version_compare(version, rocm_version_range) + min_version = rocm_version_range["min"] + max_version = rocm_version_range["max"] + min_version_str = ".".join(map(str, min_version)) if isinstance(min_version, (tuple, list)) else str(min_version) + max_version_str = ".".join(map(str, max_version)) if isinstance(max_version, (tuple, list)) else str(max_version) if cmp == -1: - msg = f'ROCm {version} < min {rocm_version_range["min"]}. Please upgrade.' + msg = f'ROCm {version} < min {min_version_str}. Please upgrade.' elif cmp == 1: - msg = f'ROCm {version} > max {rocm_version_range["max"]}. Falling back to CPU.' + msg = f'ROCm {version} > max {max_version_str}. Falling back to CPU.' elif cmp == 0: devices['ROCM']['found'] = True parts = version.split(".") @@ -531,10 +536,12 @@ class DeviceInstaller(): break if version: cmp = toolkit_version_compare(version, cuda_version_range) + min_ver = ".".join(str(part) for part in cuda_version_range["min"]) + max_ver = ".".join(str(part) for part in cuda_version_range["max"]) if cmp == -1: - msg = f'CUDA {version} < min {cuda_version_range["min"]}. Please upgrade.' + msg = f'CUDA {version} < min {min_ver}. Please upgrade.' elif cmp == 1: - msg = f'CUDA {version} > max {cuda_version_range["max"]}. Falling back to CPU.' + msg = f'CUDA {version} > max {max_ver}. Falling back to CPU.' elif cmp == 0: devices['CUDA']['found'] = True parts = version.split(".") @@ -580,7 +587,14 @@ class DeviceInstaller(): if version: cmp = toolkit_version_compare(version, xpu_version_range) if cmp == -1 or cmp == 1: - msg = f'XPU {version} out of supported range {xpu_version_range}. Falling back to CPU.' + range_display = ( + f"{xpu_version_range.get('min')} to {xpu_version_range.get('max')}" + if isinstance(xpu_version_range, dict) + and 'min' in xpu_version_range + and 'max' in xpu_version_range + else str(xpu_version_range) + ) + msg = f'XPU {version} out of supported range {range_display}. Falling back to CPU.' elif cmp == 0: devices['XPU']['found'] = True name = 'xpu' diff --git a/lib/classes/tts_engines/bark.py b/lib/classes/tts_engines/bark.py index c8147fab..feb45653 100644 --- a/lib/classes/tts_engines/bark.py +++ b/lib/classes/tts_engines/bark.py @@ -122,6 +122,9 @@ class Bark(TTSUtils, TTSRegistry, name='bark'): return False if self.engine: final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}') + s = sentence.strip() + if len(s) < 3 or not any(c.isalnum() for c in s): + return True if sentence == TTS_SML['break']: silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100 break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds diff --git a/lib/classes/tts_engines/fairseq.py b/lib/classes/tts_engines/fairseq.py index 279d3b38..2df92fdc 100644 --- a/lib/classes/tts_engines/fairseq.py +++ b/lib/classes/tts_engines/fairseq.py @@ -78,6 +78,9 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'): if self.engine: device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device'] final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}') + s = sentence.strip() + if len(s) < 3 or not any(c.isalnum() for c in s): + return True if sentence == TTS_SML['break']: silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100 break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds diff --git a/lib/classes/tts_engines/tacotron.py b/lib/classes/tts_engines/tacotron.py index 6095a59c..c9ab98ee 100644 --- a/lib/classes/tts_engines/tacotron.py +++ b/lib/classes/tts_engines/tacotron.py @@ -103,6 +103,9 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'): if self.engine: device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device'] final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}') + s = sentence.strip() + if len(s) < 3 or not any(c.isalnum() for c in s): + return True if sentence == TTS_SML['break']: silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100 break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds diff --git a/lib/classes/tts_engines/vits.py b/lib/classes/tts_engines/vits.py index 40c3c534..3ff058d2 100644 --- a/lib/classes/tts_engines/vits.py +++ b/lib/classes/tts_engines/vits.py @@ -86,6 +86,9 @@ class Vits(TTSUtils, TTSRegistry, name='vits'): if self.engine: device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device'] final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}') + s = sentence.strip() + if len(s) < 3 or not any(c.isalnum() for c in s): + return True if sentence == TTS_SML['break']: silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100 break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds diff --git a/lib/classes/tts_engines/xtts.py b/lib/classes/tts_engines/xtts.py index 57b50ce3..81cfe65a 100644 --- a/lib/classes/tts_engines/xtts.py +++ b/lib/classes/tts_engines/xtts.py @@ -85,6 +85,9 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'): return False if self.engine: final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}') + s = sentence.strip() + if len(s) < 3 or not any(c.isalnum() for c in s): + return True if sentence == TTS_SML['break']: silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100 break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds diff --git a/lib/classes/tts_engines/yourtts.py b/lib/classes/tts_engines/yourtts.py index 18db70e3..a9b2e206 100644 --- a/lib/classes/tts_engines/yourtts.py +++ b/lib/classes/tts_engines/yourtts.py @@ -74,6 +74,9 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'): return False if self.engine: final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}') + s = sentence.strip() + if len(s) < 3 or not any(c.isalnum() for c in s): + return True if sentence == TTS_SML['break']: silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100 break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds diff --git a/lib/core.py b/lib/core.py index 87e81428..58932330 100644 --- a/lib/core.py +++ b/lib/core.py @@ -1039,10 +1039,15 @@ def get_sentences(text:str, id:str)->list|None: jieba.dt.cache_file = os.path.join(models_dir, 'jieba.cache') result.extend([t for t in jieba.cut(segment) if t.strip()]) elif lang == 'jpn': + """ from sudachipy import dictionary, tokenizer sudachi = dictionary.Dictionary().create() mode = tokenizer.Tokenizer.SplitMode.C result.extend([m.surface() for m in sudachi.tokenize(segment, mode) if m.surface().strip()]) + """ + import nagisa + tokens = nagisa.tagging(segment).words + result.extend(tokens) elif lang == 'kor': from soynlp.tokenizer import LTokenizer ltokenizer = LTokenizer() @@ -1679,55 +1684,56 @@ def convert_chapters2audio(id:str)->bool: print(msg) if session['is_gui_process']: progress_bar = gr.Progress(track_tqdm=False) - ebook_name = Path(session['ebook']).name - with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t: - for x in range(0, total_chapters): - chapter_num = x + 1 - chapter_audio_file = f'chapter_{chapter_num}.{default_audio_proc_format}' - sentences = session['chapters'][x] - sentences_count = sum(1 for row in sentences if row.strip() not in TTS_SML.values()) - start = sentence_number - msg = f'Block {chapter_num} containing {sentences_count} sentences...' - print(msg) - for i, sentence in enumerate(sentences): - if session['cancellation_requested']: - msg = 'Cancel requested' - print(msg) - return False - if sentence_number in missing_sentences or sentence_number > resume_sentence or (sentence_number == 0 and resume_sentence == 0): - if sentence_number <= resume_sentence and sentence_number > 0: - msg = f'**Recovering missing file sentence {sentence_number}' + if session['ebook']: + ebook_name = Path(session['ebook']).name + with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t: + for x in range(0, total_chapters): + chapter_num = x + 1 + chapter_audio_file = f'chapter_{chapter_num}.{default_audio_proc_format}' + sentences = session['chapters'][x] + sentences_count = sum(1 for row in sentences if row.strip() not in TTS_SML.values()) + start = sentence_number + msg = f'Block {chapter_num} containing {sentences_count} sentences...' + print(msg) + for i, sentence in enumerate(sentences): + if session['cancellation_requested']: + msg = 'Cancel requested' print(msg) - sentence = sentence.strip() - success = tts_manager.convert_sentence2audio(sentence_number, sentence) if sentence else True - if success: - total_progress = (t.n + 1) / total_iterations - if session['is_gui_process']: - progress_bar(progress=total_progress, desc=ebook_name) - is_sentence = sentence.strip() not in TTS_SML.values() - percentage = total_progress * 100 - t.set_description(f"{percentage:.2f}%") - msg = f' : {sentence}' if is_sentence else f' : {sentence}' + return False + if sentence_number in missing_sentences or sentence_number > resume_sentence or (sentence_number == 0 and resume_sentence == 0): + if sentence_number <= resume_sentence and sentence_number > 0: + msg = f'**Recovering missing file sentence {sentence_number}' + print(msg) + sentence = sentence.strip() + success = tts_manager.convert_sentence2audio(sentence_number, sentence) if sentence else True + if success: + total_progress = (t.n + 1) / total_iterations + if session['is_gui_process']: + progress_bar(progress=total_progress, desc=ebook_name) + is_sentence = sentence.strip() not in TTS_SML.values() + percentage = total_progress * 100 + t.set_description(f"{percentage:.2f}%") + msg = f' : {sentence}' if is_sentence else f' : {sentence}' + print(msg) + else: + return False + if sentence.strip() not in TTS_SML.values(): + sentence_number += 1 + t.update(1) + end = sentence_number - 1 if sentence_number > 1 else sentence_number + msg = f'End of Block {chapter_num}' + print(msg) + if chapter_num in missing_chapters or sentence_number > resume_sentence: + if chapter_num <= resume_chapter: + msg = f'**Recovering missing file block {chapter_num}' + print(msg) + if combine_audio_sentences(chapter_audio_file, int(start), int(end), id): + msg = f'Combining block {chapter_num} to audio, sentence {start} to {end}' print(msg) else: + msg = 'combine_audio_sentences() failed!' + print(msg) return False - if sentence.strip() not in TTS_SML.values(): - sentence_number += 1 - t.update(1) - end = sentence_number - 1 if sentence_number > 1 else sentence_number - msg = f'End of Block {chapter_num}' - print(msg) - if chapter_num in missing_chapters or sentence_number > resume_sentence: - if chapter_num <= resume_chapter: - msg = f'**Recovering missing file block {chapter_num}' - print(msg) - if combine_audio_sentences(chapter_audio_file, int(start), int(end), id): - msg = f'Combining block {chapter_num} to audio, sentence {start} to {end}' - print(msg) - else: - msg = 'combine_audio_sentences() failed!' - print(msg) - return False return True except Exception as e: DependencyError(e) @@ -1941,11 +1947,10 @@ def combine_audio_chapters(id:str)->list[str]|None: target_rate = '48000' cmd += ['-c:a', 'libopus', '-compression_level', '0', '-b:a', '192k', '-ar', target_rate] cmd += ['-map_metadata', '1'] - if 'output_channel' in session: - if session['output_channel'] == 'mono': - cmd += ['-ac', '1'] - elif session['output_channel'] == 'stereo': - cmd += ['-ac', '2'] + if session['output_channel'] == 'stereo': + cmd += ['-ac', '2'] + else: + cmd += ['-ac', '1'] if input_codec == target_codec and input_rate == target_rate: cmd = [ shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', ffmpeg_combined_audio, @@ -2317,6 +2322,7 @@ def convert_ebook(args:dict)->tuple: session['bark_waveform_temp'] = float(args['bark_waveform_temp']) session['audiobooks_dir'] = str(args['audiobooks_dir']) if args['audiobooks_dir'] else None session['output_format'] = str(args['output_format']) + session['output_channel'] = str(args['output_channel']) session['output_split'] = bool(args['output_split']) session['output_split_hours'] = args['output_split_hours']if args['output_split_hours'] is not None else default_output_split_hours session['model_cache'] = f"{session['tts_engine']}-{session['fine_tuned']}" diff --git a/podman-compose.yml b/podman-compose.yml index d6ba4379..786c3a46 100644 --- a/podman-compose.yml +++ b/podman-compose.yml @@ -5,7 +5,7 @@ services: context: . dockerfile: Dockerfile args: - APP_VERSION: ${APP_VERSION:-25.25.25} + APP_VERSION: ${APP_VERSION:-25.12.32} DEVICE_TAG: ${DEVICE_TAG:-cpu} # e.g. cu124, cu128, rocm, xpu, cpu etc. container_name: ebook2audiobook working_dir: /app diff --git a/pyproject.toml b/pyproject.toml index eb8dc745..7fea2c75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,7 @@ dependencies = [ "fastapi", "hf_xet", "beautifulsoup4", - "sudachipy", - "sudachidict-core", + "nagisa", "pymupdf", "pymupdf-layout", "pytesseract", diff --git a/requirements.txt b/requirements.txt index 4dd22fe5..02a7e0cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,8 +8,7 @@ ebooklib fastapi hf_xet beautifulsoup4 -sudachipy -sudachidict-core +nagisa pymupdf pymupdf-layout pytesseract