mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-07 21:14:06 -05:00
v25.12.32
This commit is contained in:
@@ -3,8 +3,8 @@ FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
ARG APP_VERSION=25.12.31
|
||||
ARG DEVICE_TAG=cpu
|
||||
ARG APP_VERSION=25.12.32
|
||||
ARG DEVICE_TAG=cu128
|
||||
ARG DOCKER_DEVICE_STR='{"name": "cu128", "os": "manylinux_2_28", "arch": "x86_64", "pyvenv": [3, 12], "tag": "cu128", "note": "default device"}'
|
||||
ARG DOCKER_PROGRAMS_STR="curl ffmpeg nodejs npm espeak-ng sox tesseract-ocr"
|
||||
ARG CALIBRE_INSTALLER_URL="https://download.calibre-ebook.com/linux-installer.sh"
|
||||
|
||||
@@ -1 +1 @@
|
||||
25.12.31
|
||||
25.12.32
|
||||
@@ -5,7 +5,7 @@ services:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
APP_VERSION: ${APP_VERSION:-25.25.25}
|
||||
APP_VERSION: ${APP_VERSION:-25.12.32}
|
||||
DEVICE_TAG: ${DEVICE_TAG:-cpu} # e.g. cu128, cu118, rocm, xpu, cpu
|
||||
container_name: ebook2audiobook
|
||||
working_dir: /app
|
||||
|
||||
@@ -8,8 +8,7 @@ ebooklib
|
||||
fastapi
|
||||
hf_xet
|
||||
beautifulsoup4
|
||||
sudachipy
|
||||
sudachidict-core
|
||||
nagisa
|
||||
pymupdf
|
||||
pymupdf-layout
|
||||
pytesseract
|
||||
|
||||
@@ -428,7 +428,8 @@ class DeviceInstaller():
|
||||
):
|
||||
if os.path.exists(p):
|
||||
with open(p, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
version = f.read()
|
||||
v = f.read()
|
||||
version = lib_version_parse(v)
|
||||
break
|
||||
elif os.name == 'nt':
|
||||
for env in ('ROCM_PATH', 'HIP_PATH'):
|
||||
@@ -447,10 +448,14 @@ class DeviceInstaller():
|
||||
break
|
||||
if version:
|
||||
cmp = toolkit_version_compare(version, rocm_version_range)
|
||||
min_version = rocm_version_range["min"]
|
||||
max_version = rocm_version_range["max"]
|
||||
min_version_str = ".".join(map(str, min_version)) if isinstance(min_version, (tuple, list)) else str(min_version)
|
||||
max_version_str = ".".join(map(str, max_version)) if isinstance(max_version, (tuple, list)) else str(max_version)
|
||||
if cmp == -1:
|
||||
msg = f'ROCm {version} < min {rocm_version_range["min"]}. Please upgrade.'
|
||||
msg = f'ROCm {version} < min {min_version_str}. Please upgrade.'
|
||||
elif cmp == 1:
|
||||
msg = f'ROCm {version} > max {rocm_version_range["max"]}. Falling back to CPU.'
|
||||
msg = f'ROCm {version} > max {max_version_str}. Falling back to CPU.'
|
||||
elif cmp == 0:
|
||||
devices['ROCM']['found'] = True
|
||||
parts = version.split(".")
|
||||
@@ -531,10 +536,12 @@ class DeviceInstaller():
|
||||
break
|
||||
if version:
|
||||
cmp = toolkit_version_compare(version, cuda_version_range)
|
||||
min_ver = ".".join(str(part) for part in cuda_version_range["min"])
|
||||
max_ver = ".".join(str(part) for part in cuda_version_range["max"])
|
||||
if cmp == -1:
|
||||
msg = f'CUDA {version} < min {cuda_version_range["min"]}. Please upgrade.'
|
||||
msg = f'CUDA {version} < min {min_ver}. Please upgrade.'
|
||||
elif cmp == 1:
|
||||
msg = f'CUDA {version} > max {cuda_version_range["max"]}. Falling back to CPU.'
|
||||
msg = f'CUDA {version} > max {max_ver}. Falling back to CPU.'
|
||||
elif cmp == 0:
|
||||
devices['CUDA']['found'] = True
|
||||
parts = version.split(".")
|
||||
@@ -580,7 +587,14 @@ class DeviceInstaller():
|
||||
if version:
|
||||
cmp = toolkit_version_compare(version, xpu_version_range)
|
||||
if cmp == -1 or cmp == 1:
|
||||
msg = f'XPU {version} out of supported range {xpu_version_range}. Falling back to CPU.'
|
||||
range_display = (
|
||||
f"{xpu_version_range.get('min')} to {xpu_version_range.get('max')}"
|
||||
if isinstance(xpu_version_range, dict)
|
||||
and 'min' in xpu_version_range
|
||||
and 'max' in xpu_version_range
|
||||
else str(xpu_version_range)
|
||||
)
|
||||
msg = f'XPU {version} out of supported range {range_display}. Falling back to CPU.'
|
||||
elif cmp == 0:
|
||||
devices['XPU']['found'] = True
|
||||
name = 'xpu'
|
||||
|
||||
@@ -122,6 +122,9 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
|
||||
return False
|
||||
if self.engine:
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -78,6 +78,9 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
|
||||
if self.engine:
|
||||
device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -103,6 +103,9 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
|
||||
if self.engine:
|
||||
device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -86,6 +86,9 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
|
||||
if self.engine:
|
||||
device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -85,6 +85,9 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
|
||||
return False
|
||||
if self.engine:
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -74,6 +74,9 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
|
||||
return False
|
||||
if self.engine:
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
106
lib/core.py
106
lib/core.py
@@ -1039,10 +1039,15 @@ def get_sentences(text:str, id:str)->list|None:
|
||||
jieba.dt.cache_file = os.path.join(models_dir, 'jieba.cache')
|
||||
result.extend([t for t in jieba.cut(segment) if t.strip()])
|
||||
elif lang == 'jpn':
|
||||
"""
|
||||
from sudachipy import dictionary, tokenizer
|
||||
sudachi = dictionary.Dictionary().create()
|
||||
mode = tokenizer.Tokenizer.SplitMode.C
|
||||
result.extend([m.surface() for m in sudachi.tokenize(segment, mode) if m.surface().strip()])
|
||||
"""
|
||||
import nagisa
|
||||
tokens = nagisa.tagging(segment).words
|
||||
result.extend(tokens)
|
||||
elif lang == 'kor':
|
||||
from soynlp.tokenizer import LTokenizer
|
||||
ltokenizer = LTokenizer()
|
||||
@@ -1679,55 +1684,56 @@ def convert_chapters2audio(id:str)->bool:
|
||||
print(msg)
|
||||
if session['is_gui_process']:
|
||||
progress_bar = gr.Progress(track_tqdm=False)
|
||||
ebook_name = Path(session['ebook']).name
|
||||
with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t:
|
||||
for x in range(0, total_chapters):
|
||||
chapter_num = x + 1
|
||||
chapter_audio_file = f'chapter_{chapter_num}.{default_audio_proc_format}'
|
||||
sentences = session['chapters'][x]
|
||||
sentences_count = sum(1 for row in sentences if row.strip() not in TTS_SML.values())
|
||||
start = sentence_number
|
||||
msg = f'Block {chapter_num} containing {sentences_count} sentences...'
|
||||
print(msg)
|
||||
for i, sentence in enumerate(sentences):
|
||||
if session['cancellation_requested']:
|
||||
msg = 'Cancel requested'
|
||||
print(msg)
|
||||
return False
|
||||
if sentence_number in missing_sentences or sentence_number > resume_sentence or (sentence_number == 0 and resume_sentence == 0):
|
||||
if sentence_number <= resume_sentence and sentence_number > 0:
|
||||
msg = f'**Recovering missing file sentence {sentence_number}'
|
||||
if session['ebook']:
|
||||
ebook_name = Path(session['ebook']).name
|
||||
with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t:
|
||||
for x in range(0, total_chapters):
|
||||
chapter_num = x + 1
|
||||
chapter_audio_file = f'chapter_{chapter_num}.{default_audio_proc_format}'
|
||||
sentences = session['chapters'][x]
|
||||
sentences_count = sum(1 for row in sentences if row.strip() not in TTS_SML.values())
|
||||
start = sentence_number
|
||||
msg = f'Block {chapter_num} containing {sentences_count} sentences...'
|
||||
print(msg)
|
||||
for i, sentence in enumerate(sentences):
|
||||
if session['cancellation_requested']:
|
||||
msg = 'Cancel requested'
|
||||
print(msg)
|
||||
sentence = sentence.strip()
|
||||
success = tts_manager.convert_sentence2audio(sentence_number, sentence) if sentence else True
|
||||
if success:
|
||||
total_progress = (t.n + 1) / total_iterations
|
||||
if session['is_gui_process']:
|
||||
progress_bar(progress=total_progress, desc=ebook_name)
|
||||
is_sentence = sentence.strip() not in TTS_SML.values()
|
||||
percentage = total_progress * 100
|
||||
t.set_description(f"{percentage:.2f}%")
|
||||
msg = f' : {sentence}' if is_sentence else f' : {sentence}'
|
||||
return False
|
||||
if sentence_number in missing_sentences or sentence_number > resume_sentence or (sentence_number == 0 and resume_sentence == 0):
|
||||
if sentence_number <= resume_sentence and sentence_number > 0:
|
||||
msg = f'**Recovering missing file sentence {sentence_number}'
|
||||
print(msg)
|
||||
sentence = sentence.strip()
|
||||
success = tts_manager.convert_sentence2audio(sentence_number, sentence) if sentence else True
|
||||
if success:
|
||||
total_progress = (t.n + 1) / total_iterations
|
||||
if session['is_gui_process']:
|
||||
progress_bar(progress=total_progress, desc=ebook_name)
|
||||
is_sentence = sentence.strip() not in TTS_SML.values()
|
||||
percentage = total_progress * 100
|
||||
t.set_description(f"{percentage:.2f}%")
|
||||
msg = f' : {sentence}' if is_sentence else f' : {sentence}'
|
||||
print(msg)
|
||||
else:
|
||||
return False
|
||||
if sentence.strip() not in TTS_SML.values():
|
||||
sentence_number += 1
|
||||
t.update(1)
|
||||
end = sentence_number - 1 if sentence_number > 1 else sentence_number
|
||||
msg = f'End of Block {chapter_num}'
|
||||
print(msg)
|
||||
if chapter_num in missing_chapters or sentence_number > resume_sentence:
|
||||
if chapter_num <= resume_chapter:
|
||||
msg = f'**Recovering missing file block {chapter_num}'
|
||||
print(msg)
|
||||
if combine_audio_sentences(chapter_audio_file, int(start), int(end), id):
|
||||
msg = f'Combining block {chapter_num} to audio, sentence {start} to {end}'
|
||||
print(msg)
|
||||
else:
|
||||
msg = 'combine_audio_sentences() failed!'
|
||||
print(msg)
|
||||
return False
|
||||
if sentence.strip() not in TTS_SML.values():
|
||||
sentence_number += 1
|
||||
t.update(1)
|
||||
end = sentence_number - 1 if sentence_number > 1 else sentence_number
|
||||
msg = f'End of Block {chapter_num}'
|
||||
print(msg)
|
||||
if chapter_num in missing_chapters or sentence_number > resume_sentence:
|
||||
if chapter_num <= resume_chapter:
|
||||
msg = f'**Recovering missing file block {chapter_num}'
|
||||
print(msg)
|
||||
if combine_audio_sentences(chapter_audio_file, int(start), int(end), id):
|
||||
msg = f'Combining block {chapter_num} to audio, sentence {start} to {end}'
|
||||
print(msg)
|
||||
else:
|
||||
msg = 'combine_audio_sentences() failed!'
|
||||
print(msg)
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
DependencyError(e)
|
||||
@@ -1941,11 +1947,10 @@ def combine_audio_chapters(id:str)->list[str]|None:
|
||||
target_rate = '48000'
|
||||
cmd += ['-c:a', 'libopus', '-compression_level', '0', '-b:a', '192k', '-ar', target_rate]
|
||||
cmd += ['-map_metadata', '1']
|
||||
if 'output_channel' in session:
|
||||
if session['output_channel'] == 'mono':
|
||||
cmd += ['-ac', '1']
|
||||
elif session['output_channel'] == 'stereo':
|
||||
cmd += ['-ac', '2']
|
||||
if session['output_channel'] == 'stereo':
|
||||
cmd += ['-ac', '2']
|
||||
else:
|
||||
cmd += ['-ac', '1']
|
||||
if input_codec == target_codec and input_rate == target_rate:
|
||||
cmd = [
|
||||
shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', ffmpeg_combined_audio,
|
||||
@@ -2317,6 +2322,7 @@ def convert_ebook(args:dict)->tuple:
|
||||
session['bark_waveform_temp'] = float(args['bark_waveform_temp'])
|
||||
session['audiobooks_dir'] = str(args['audiobooks_dir']) if args['audiobooks_dir'] else None
|
||||
session['output_format'] = str(args['output_format'])
|
||||
session['output_channel'] = str(args['output_channel'])
|
||||
session['output_split'] = bool(args['output_split'])
|
||||
session['output_split_hours'] = args['output_split_hours']if args['output_split_hours'] is not None else default_output_split_hours
|
||||
session['model_cache'] = f"{session['tts_engine']}-{session['fine_tuned']}"
|
||||
|
||||
@@ -5,7 +5,7 @@ services:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
APP_VERSION: ${APP_VERSION:-25.25.25}
|
||||
APP_VERSION: ${APP_VERSION:-25.12.32}
|
||||
DEVICE_TAG: ${DEVICE_TAG:-cpu} # e.g. cu124, cu128, rocm, xpu, cpu etc.
|
||||
container_name: ebook2audiobook
|
||||
working_dir: /app
|
||||
|
||||
@@ -27,8 +27,7 @@ dependencies = [
|
||||
"fastapi",
|
||||
"hf_xet",
|
||||
"beautifulsoup4",
|
||||
"sudachipy",
|
||||
"sudachidict-core",
|
||||
"nagisa",
|
||||
"pymupdf",
|
||||
"pymupdf-layout",
|
||||
"pytesseract",
|
||||
|
||||
@@ -8,8 +8,7 @@ ebooklib
|
||||
fastapi
|
||||
hf_xet
|
||||
beautifulsoup4
|
||||
sudachipy
|
||||
sudachidict-core
|
||||
nagisa
|
||||
pymupdf
|
||||
pymupdf-layout
|
||||
pytesseract
|
||||
|
||||
Reference in New Issue
Block a user