mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-10 06:18:02 -05:00
Fix PIPER TTS integration by updating TTSManager routing and correcting HuggingFace model paths
Co-authored-by: DrewThomasson <126999465+DrewThomasson@users.noreply.github.com>
This commit is contained in:
@@ -88,9 +88,34 @@ class Piper:
|
||||
# Download voice model from HuggingFace
|
||||
repo_id = models[TTS_ENGINES['PIPER']]['internal']['repo']
|
||||
|
||||
# Get model files based on voice name
|
||||
model_file = f"{voice_name}.onnx"
|
||||
config_file = f"{voice_name}.onnx.json"
|
||||
# Get model files based on voice name
|
||||
# Map voice name to correct path in repository
|
||||
voice_path_map = {
|
||||
'en_US-lessac-medium': 'en/en_US/lessac/medium/en_US-lessac-medium',
|
||||
'en_US-lessac-high': 'en/en_US/lessac/high/en_US-lessac-high',
|
||||
'en_US-lessac-low': 'en/en_US/lessac/low/en_US-lessac-low',
|
||||
'fr_FR-upmc-medium': 'fr/fr_FR/upmc/medium/fr_FR-upmc-medium',
|
||||
'de_DE-thorsten-medium': 'de/de_DE/thorsten/medium/de_DE-thorsten-medium',
|
||||
'es_ES-davefx-medium': 'es/es_ES/davefx/medium/es_ES-davefx-medium',
|
||||
'it_IT-riccardo-x_low': 'it/it_IT/riccardo/x_low/it_IT-riccardo-x_low',
|
||||
'pt_BR-edresson-low': 'pt/pt_BR/edresson/low/pt_BR-edresson-low'
|
||||
}
|
||||
|
||||
voice_path = voice_path_map.get(voice_name)
|
||||
if not voice_path:
|
||||
# If voice not in map, try to construct path from voice name
|
||||
parts = voice_name.split('-')
|
||||
if len(parts) >= 3:
|
||||
lang_code = parts[0].split('_')[0].lower()
|
||||
country_voice = parts[0]
|
||||
speaker = parts[1]
|
||||
quality = '-'.join(parts[2:])
|
||||
voice_path = f"{lang_code}/{country_voice}/{speaker}/{quality}/{voice_name}"
|
||||
else:
|
||||
raise ValueError(f"Unknown voice format: {voice_name}")
|
||||
|
||||
model_file = f"{voice_path}.onnx"
|
||||
config_file = f"{voice_path}.onnx.json"
|
||||
|
||||
# Download the model files
|
||||
model_path = hf_hub_download(
|
||||
@@ -182,6 +207,7 @@ class Piper:
|
||||
if is_audio_data_valid(audio_sentence):
|
||||
sourceTensor = self._tensor_type(audio_sentence)
|
||||
audio_tensor = sourceTensor.clone().detach().unsqueeze(0).cpu()
|
||||
trim_audio_buffer = 0.004
|
||||
if sentence[-1].isalnum() or sentence[-1] == '—':
|
||||
audio_tensor = trim_audio(audio_tensor.squeeze(), settings['samplerate'], 0.003, trim_audio_buffer).unsqueeze(0)
|
||||
self.audio_segments.append(audio_tensor)
|
||||
|
||||
@@ -13,9 +13,9 @@ class TTSManager:
|
||||
if self.session['tts_engine'] in [TTS_ENGINES['XTTSv2'], TTS_ENGINES['BARK'], TTS_ENGINES['VITS'], TTS_ENGINES['FAIRSEQ'], TTS_ENGINES['TACOTRON2'], TTS_ENGINES['YOURTTS']]:
|
||||
from lib.classes.tts_engines.coqui import Coqui
|
||||
self.tts = Coqui(self.session)
|
||||
#elif self.session['tts_engine'] in [TTS_ENGINES['NEW_TTS']]:
|
||||
# from lib.classes.tts_engines.new_tts import NewTts
|
||||
# self.tts = NewTts(self.session)
|
||||
elif self.session['tts_engine'] in [TTS_ENGINES['PIPER']]:
|
||||
from lib.classes.tts_engines.piper import Piper
|
||||
self.tts = Piper(self.session)
|
||||
if self.tts:
|
||||
return True
|
||||
else:
|
||||
|
||||
1
test_input.txt
Normal file
1
test_input.txt
Normal file
@@ -0,0 +1 @@
|
||||
This is a test sentence.
|
||||
Reference in New Issue
Block a user