mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-10 06:18:02 -05:00
V25 -> V26
This commit is contained in:
@@ -120,7 +120,7 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def convert_sml(self, sml:str)->None:
|
||||
def convert_sml(self, sml:str)->bool:
|
||||
if sml == TTS_SML['break']['token']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
@@ -135,14 +135,14 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
|
||||
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
|
||||
self.audio_segments.append(pause_tensor.clone())
|
||||
elif TTS_SML['voice']['match'].fullmatch(sml):
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
|
||||
if os.path.exists(self.session['voice']):
|
||||
if not self.set_voice():
|
||||
return False
|
||||
if self.set_voice():
|
||||
return True
|
||||
else:
|
||||
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
|
||||
print(error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def convert(self, sentence_index:int, sentence:str)->bool:
|
||||
try:
|
||||
|
||||
@@ -74,7 +74,7 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def convert_sml(self, sml:str)->None:
|
||||
def convert_sml(self, sml:str)->bool:
|
||||
if sml == TTS_SML['break']['token']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
@@ -89,14 +89,14 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
|
||||
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
|
||||
self.audio_segments.append(pause_tensor.clone())
|
||||
elif TTS_SML['voice']['match'].fullmatch(sml):
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
|
||||
if os.path.exists(self.session['voice']):
|
||||
if not self.set_voice():
|
||||
return False
|
||||
if self.set_voice():
|
||||
return True
|
||||
else:
|
||||
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
|
||||
print(error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def convert(self, sentence_index:int, sentence:str)->bool:
|
||||
try:
|
||||
|
||||
@@ -99,7 +99,7 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def convert_sml(self, sml:str)->None:
|
||||
def convert_sml(self, sml:str)->bool:
|
||||
if sml == TTS_SML['break']['token']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
@@ -114,14 +114,14 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
|
||||
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
|
||||
self.audio_segments.append(pause_tensor.clone())
|
||||
elif TTS_SML['voice']['match'].fullmatch(sml):
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
|
||||
if os.path.exists(self.session['voice']):
|
||||
if not self.set_voice():
|
||||
return False
|
||||
if self.set_voice():
|
||||
return True
|
||||
else:
|
||||
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
|
||||
print(error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def convert(self, sentence_index:int, sentence:str)->bool:
|
||||
try:
|
||||
|
||||
@@ -82,7 +82,7 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def convert_sml(self, sml:str)->None:
|
||||
def convert_sml(self, sml:str)->bool:
|
||||
if sml == TTS_SML['break']['token']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
@@ -97,14 +97,14 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
|
||||
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
|
||||
self.audio_segments.append(pause_tensor.clone())
|
||||
elif TTS_SML['voice']['match'].fullmatch(sml):
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
|
||||
if os.path.exists(self.session['voice']):
|
||||
if not self.set_voice():
|
||||
return False
|
||||
if self.set_voice():
|
||||
return True
|
||||
else:
|
||||
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
|
||||
print(error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def convert(self, sentence_index:int, sentence:str)->bool:
|
||||
try:
|
||||
|
||||
@@ -81,7 +81,7 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def convert_sml(self, sml:str)->None:
|
||||
def convert_sml(self, sml:str)->bool:
|
||||
if sml == TTS_SML['break']['token']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
@@ -96,14 +96,14 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
|
||||
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
|
||||
self.audio_segments.append(pause_tensor.clone())
|
||||
elif TTS_SML['voice']['match'].fullmatch(sml):
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
|
||||
if os.path.exists(self.session['voice']):
|
||||
if not self.set_voice():
|
||||
return False
|
||||
if self.set_voice():
|
||||
return True
|
||||
else:
|
||||
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
|
||||
print(error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def convert(self, sentence_index:int, sentence:str)->bool:
|
||||
try:
|
||||
|
||||
@@ -71,7 +71,7 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def convert_sml(self, sml:str)->None:
|
||||
def convert_sml(self, sml:str)->bool:
|
||||
if sml == TTS_SML['break']['token']:
|
||||
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
|
||||
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
|
||||
@@ -86,14 +86,14 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
|
||||
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
|
||||
self.audio_segments.append(pause_tensor.clone())
|
||||
elif TTS_SML['voice']['match'].fullmatch(sml):
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
|
||||
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
|
||||
if os.path.exists(self.session['voice']):
|
||||
if not self.set_voice():
|
||||
return False
|
||||
if self.set_voice():
|
||||
return True
|
||||
else:
|
||||
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
|
||||
print(error)
|
||||
return True
|
||||
return False
|
||||
|
||||
def convert(self, sentence_index:int, sentence:str)->bool:
|
||||
try:
|
||||
@@ -165,7 +165,7 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
|
||||
sentence_obj = {
|
||||
"start": start_time,
|
||||
"end": end_time,
|
||||
"text": part,
|
||||
"text": sentence,
|
||||
"idx": self.sentence_idx
|
||||
}
|
||||
self.sentence_idx = self._append_sentence2vtt(sentence_obj, self.vtt_path)
|
||||
|
||||
23
lib/core.py
23
lib/core.py
@@ -1000,6 +1000,7 @@ def filter_chapter(doc:EpubHtml, id:str, stanza_nlp:Pipeline, is_num2words_compa
|
||||
return None
|
||||
|
||||
def get_sentences(text:str, id:str)->list|None:
|
||||
|
||||
def split_inclusive(text:str, pattern:re.Pattern[str])->list[str]:
|
||||
result = []
|
||||
last_end = 0
|
||||
@@ -1675,17 +1676,17 @@ def convert_chapters2audio(id:str)->bool:
|
||||
if session['ebook']:
|
||||
ebook_name = Path(session['ebook']).name
|
||||
with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t:
|
||||
sentence_num = 0
|
||||
sentence_idx = 0
|
||||
sml_values = {
|
||||
v['token']
|
||||
for v in TTS_SML.values()
|
||||
if isinstance(v, dict) and 'token' in v
|
||||
}
|
||||
for c in range(0, total_chapters):
|
||||
chapter_idx = c + 1
|
||||
chapter_idx = c
|
||||
chapter_audio_file = f'chapter_{chapter_idx}.{default_audio_proc_format}'
|
||||
sentences = session['chapters'][c]
|
||||
start = sentence_num
|
||||
start = sentence_idx
|
||||
if c in missing_chapters:
|
||||
msg = f'********* Recovering missing block {c} *********'
|
||||
print(msg)
|
||||
@@ -1694,20 +1695,20 @@ def convert_chapters2audio(id:str)->bool:
|
||||
print(msg)
|
||||
msg = f'Block {chapter_idx} containing {len(sentences)} sentences…'
|
||||
print(msg)
|
||||
for sentence_num, sentence in enumerate(sentences):
|
||||
for sentence_idx, sentence in enumerate(sentences):
|
||||
if session['cancellation_requested']:
|
||||
msg = 'Cancel requested'
|
||||
print(msg)
|
||||
return False
|
||||
if sentence_num in missing_sentences or sentence_num >= resume_sentence:
|
||||
if sentence_idx in missing_sentences or sentence_idx >= resume_sentence:
|
||||
sentence = sentence.strip()
|
||||
if len(sentence) > 2 and any(c.isalnum() for c in sentence):
|
||||
if sentence_num in missing_sentences:
|
||||
msg = f'********* Recovering missing sentence {sentence_num} *********'
|
||||
elif resume_sentence == sentence_num and resume_sentence > 0:
|
||||
if sentence_idx in missing_sentences:
|
||||
msg = f'********* Recovering missing sentence {sentence_idx} *********'
|
||||
elif resume_sentence == sentence_idx and resume_sentence > 0:
|
||||
msg = f'********* Resuming from sentence {resume_sentence} ********'
|
||||
print(msg)
|
||||
success = tts_manager.convert_sentence2audio(sentence_num, sentence) if sentence else True
|
||||
success = tts_manager.convert_sentence2audio(sentence_idx, sentence) if sentence else True
|
||||
if not success:
|
||||
return False
|
||||
total_progress = (t.n + 1) / total_iterations
|
||||
@@ -1718,10 +1719,10 @@ def convert_chapters2audio(id:str)->bool:
|
||||
msg = f' : {sentence}'
|
||||
print(msg)
|
||||
t.update(1)
|
||||
end = sentence_num
|
||||
end = sentence_idx
|
||||
msg = f'End of Block {chapter_idx}'
|
||||
print(msg)
|
||||
if chapter_idx in missing_chapters or sentence_num > resume_sentence:
|
||||
if chapter_idx in missing_chapters or sentence_idx > resume_sentence:
|
||||
if combine_audio_sentences(chapter_audio_file, int(start), int(end), id):
|
||||
msg = f'Combining block {chapter_idx} to audio, sentence {start} to {end}'
|
||||
print(msg)
|
||||
|
||||
Reference in New Issue
Block a user