mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-08 21:38:12 -05:00
...
This commit is contained in:
@@ -122,6 +122,9 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
|
||||
return False
|
||||
if self.engine:
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -78,6 +78,9 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
|
||||
if self.engine:
|
||||
device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -103,6 +103,9 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
|
||||
if self.engine:
|
||||
device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -86,6 +86,9 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
|
||||
if self.engine:
|
||||
device = devices['CUDA']['proc'] if self.session['device'] in ['cuda', 'jetson'] else self.session['device']
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -85,6 +85,9 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
|
||||
return False
|
||||
if self.engine:
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
@@ -74,6 +74,9 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
|
||||
return False
|
||||
if self.engine:
|
||||
final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
|
||||
s = sentence.strip()
|
||||
if len(s) < 3 or not any(c.isalnum() for c in s):
|
||||
return True
|
||||
if sentence == TTS_SML['break']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
|
||||
16
lib/core.py
16
lib/core.py
@@ -1047,11 +1047,7 @@ def get_sentences(text:str, id:str)->list|None:
|
||||
"""
|
||||
import nagisa
|
||||
tokens = nagisa.tagging(segment).words
|
||||
result.extend([
|
||||
f' {token}'
|
||||
for token in tokens
|
||||
if token.strip()
|
||||
])
|
||||
result.extend(tokens)
|
||||
elif lang == 'kor':
|
||||
from soynlp.tokenizer import LTokenizer
|
||||
ltokenizer = LTokenizer()
|
||||
@@ -1951,11 +1947,10 @@ def combine_audio_chapters(id:str)->list[str]|None:
|
||||
target_rate = '48000'
|
||||
cmd += ['-c:a', 'libopus', '-compression_level', '0', '-b:a', '192k', '-ar', target_rate]
|
||||
cmd += ['-map_metadata', '1']
|
||||
if 'output_channel' in session:
|
||||
if session['output_channel'] == 'mono':
|
||||
cmd += ['-ac', '1']
|
||||
elif session['output_channel'] == 'stereo':
|
||||
cmd += ['-ac', '2']
|
||||
if session['output_channel'] == 'stereo':
|
||||
cmd += ['-ac', '2']
|
||||
else:
|
||||
cmd += ['-ac', '1']
|
||||
if input_codec == target_codec and input_rate == target_rate:
|
||||
cmd = [
|
||||
shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', ffmpeg_combined_audio,
|
||||
@@ -2327,6 +2322,7 @@ def convert_ebook(args:dict)->tuple:
|
||||
session['bark_waveform_temp'] = float(args['bark_waveform_temp'])
|
||||
session['audiobooks_dir'] = str(args['audiobooks_dir']) if args['audiobooks_dir'] else None
|
||||
session['output_format'] = str(args['output_format'])
|
||||
session['output_channel'] = str(args['output_channel'])
|
||||
session['output_split'] = bool(args['output_split'])
|
||||
session['output_split_hours'] = args['output_split_hours']if args['output_split_hours'] is not None else default_output_split_hours
|
||||
session['model_cache'] = f"{session['tts_engine']}-{session['fine_tuned']}"
|
||||
|
||||
Reference in New Issue
Block a user