V25 -> V26

This commit is contained in:
ROBERT MCDOWELL
2026-01-04 12:19:02 -08:00
committed by GitHub
7 changed files with 43 additions and 42 deletions

View File

@@ -120,7 +120,7 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
return False
return True
def convert_sml(self, sml:str)->None:
def convert_sml(self, sml:str)->bool:
if sml == TTS_SML['break']['token']:
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -135,14 +135,14 @@ class Bark(TTSUtils, TTSRegistry, name='bark'):
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
self.audio_segments.append(pause_tensor.clone())
elif TTS_SML['voice']['match'].fullmatch(sml):
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
if os.path.exists(self.session['voice']):
if not self.set_voice():
return False
if self.set_voice():
return True
else:
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
print(error)
return True
return False
def convert(self, sentence_index:int, sentence:str)->bool:
try:

View File

@@ -74,7 +74,7 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
return False
return True
def convert_sml(self, sml:str)->None:
def convert_sml(self, sml:str)->bool:
if sml == TTS_SML['break']['token']:
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -89,14 +89,14 @@ class Fairseq(TTSUtils, TTSRegistry, name='fairseq'):
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
self.audio_segments.append(pause_tensor.clone())
elif TTS_SML['voice']['match'].fullmatch(sml):
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
if os.path.exists(self.session['voice']):
if not self.set_voice():
return False
if self.set_voice():
return True
else:
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
print(error)
return True
return False
def convert(self, sentence_index:int, sentence:str)->bool:
try:

View File

@@ -99,7 +99,7 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
return False
return True
def convert_sml(self, sml:str)->None:
def convert_sml(self, sml:str)->bool:
if sml == TTS_SML['break']['token']:
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -114,14 +114,14 @@ class Tacotron2(TTSUtils, TTSRegistry, name='tacotron'):
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
self.audio_segments.append(pause_tensor.clone())
elif TTS_SML['voice']['match'].fullmatch(sml):
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
if os.path.exists(self.session['voice']):
if not self.set_voice():
return False
if self.set_voice():
return True
else:
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
print(error)
return True
return False
def convert(self, sentence_index:int, sentence:str)->bool:
try:

View File

@@ -82,7 +82,7 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
return False
return True
def convert_sml(self, sml:str)->None:
def convert_sml(self, sml:str)->bool:
if sml == TTS_SML['break']['token']:
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -97,14 +97,14 @@ class Vits(TTSUtils, TTSRegistry, name='vits'):
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
self.audio_segments.append(pause_tensor.clone())
elif TTS_SML['voice']['match'].fullmatch(sml):
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
if os.path.exists(self.session['voice']):
if not self.set_voice():
return False
if self.set_voice():
return True
else:
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
print(error)
return True
return False
def convert(self, sentence_index:int, sentence:str)->bool:
try:

View File

@@ -81,7 +81,7 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
return False
return True
def convert_sml(self, sml:str)->None:
def convert_sml(self, sml:str)->bool:
if sml == TTS_SML['break']['token']:
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -96,14 +96,14 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
self.audio_segments.append(pause_tensor.clone())
elif TTS_SML['voice']['match'].fullmatch(sml):
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
if os.path.exists(self.session['voice']):
if not self.set_voice():
return False
if self.set_voice():
return True
else:
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
print(error)
return True
return False
def convert(self, sentence_index:int, sentence:str)->bool:
try:

View File

@@ -71,7 +71,7 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
return False
return True
def convert_sml(self, sml:str)->None:
def convert_sml(self, sml:str)->bool:
if sml == TTS_SML['break']['token']:
silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
break_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -86,14 +86,14 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
pause_tensor = torch.zeros(1, int(self.params['samplerate'] * silence_time)) # 1.0 to 1.6 seconds
self.audio_segments.append(pause_tensor.clone())
elif TTS_SML['voice']['match'].fullmatch(sml):
self.session['voice'] = os.path.abspath(TTS_SML['voice'].fullmatch(sml).group(1))
self.session['voice'] = os.path.abspath(TTS_SML['voice']['match'].fullmatch(sml).group(1))
if os.path.exists(self.session['voice']):
if not self.set_voice():
return False
if self.set_voice():
return True
else:
error = f"convert_sml() error: voice {self.session['voice']} does not exist!"
print(error)
return True
return False
def convert(self, sentence_index:int, sentence:str)->bool:
try:
@@ -165,7 +165,7 @@ class YourTTS(TTSUtils, TTSRegistry, name='yourtts'):
sentence_obj = {
"start": start_time,
"end": end_time,
"text": part,
"text": sentence,
"idx": self.sentence_idx
}
self.sentence_idx = self._append_sentence2vtt(sentence_obj, self.vtt_path)

View File

@@ -1000,6 +1000,7 @@ def filter_chapter(doc:EpubHtml, id:str, stanza_nlp:Pipeline, is_num2words_compa
return None
def get_sentences(text:str, id:str)->list|None:
def split_inclusive(text:str, pattern:re.Pattern[str])->list[str]:
result = []
last_end = 0
@@ -1675,17 +1676,17 @@ def convert_chapters2audio(id:str)->bool:
if session['ebook']:
ebook_name = Path(session['ebook']).name
with tqdm(total=total_iterations, desc='0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=0) as t:
sentence_num = 0
sentence_idx = 0
sml_values = {
v['token']
for v in TTS_SML.values()
if isinstance(v, dict) and 'token' in v
}
for c in range(0, total_chapters):
chapter_idx = c + 1
chapter_idx = c
chapter_audio_file = f'chapter_{chapter_idx}.{default_audio_proc_format}'
sentences = session['chapters'][c]
start = sentence_num
start = sentence_idx
if c in missing_chapters:
msg = f'********* Recovering missing block {c} *********'
print(msg)
@@ -1694,20 +1695,20 @@ def convert_chapters2audio(id:str)->bool:
print(msg)
msg = f'Block {chapter_idx} containing {len(sentences)} sentences…'
print(msg)
for sentence_num, sentence in enumerate(sentences):
for sentence_idx, sentence in enumerate(sentences):
if session['cancellation_requested']:
msg = 'Cancel requested'
print(msg)
return False
if sentence_num in missing_sentences or sentence_num >= resume_sentence:
if sentence_idx in missing_sentences or sentence_idx >= resume_sentence:
sentence = sentence.strip()
if len(sentence) > 2 and any(c.isalnum() for c in sentence):
if sentence_num in missing_sentences:
msg = f'********* Recovering missing sentence {sentence_num} *********'
elif resume_sentence == sentence_num and resume_sentence > 0:
if sentence_idx in missing_sentences:
msg = f'********* Recovering missing sentence {sentence_idx} *********'
elif resume_sentence == sentence_idx and resume_sentence > 0:
msg = f'********* Resuming from sentence {resume_sentence} ********'
print(msg)
success = tts_manager.convert_sentence2audio(sentence_num, sentence) if sentence else True
success = tts_manager.convert_sentence2audio(sentence_idx, sentence) if sentence else True
if not success:
return False
total_progress = (t.n + 1) / total_iterations
@@ -1718,10 +1719,10 @@ def convert_chapters2audio(id:str)->bool:
msg = f' : {sentence}'
print(msg)
t.update(1)
end = sentence_num
end = sentence_idx
msg = f'End of Block {chapter_idx}'
print(msg)
if chapter_idx in missing_chapters or sentence_num > resume_sentence:
if chapter_idx in missing_chapters or sentence_idx > resume_sentence:
if combine_audio_sentences(chapter_audio_file, int(start), int(end), id):
msg = f'Combining block {chapter_idx} to audio, sentence {start} to {end}'
print(msg)