Add voice_model command line argument support for PIPER TTS

Co-authored-by: DrewThomasson <126999465+DrewThomasson@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-08-05 01:42:04 +00:00
parent fab7bdeec9
commit f3d40d7670
6 changed files with 22 additions and 19 deletions

40
app.py
View File

@@ -164,7 +164,7 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
)
options = [
'--script_mode', '--session', '--share', '--headless',
'--ebook', '--ebooks_dir', '--language', '--voice', '--device', '--tts_engine',
'--ebook', '--ebooks_dir', '--language', '--voice', '--voice_model', '--device', '--tts_engine',
'--custom_model', '--fine_tuned', '--output_format',
'--temperature', '--length_penalty', '--num_beams', '--repetition_penalty', '--top_k', '--top_p', '--speed', '--enable_text_splitting',
'--text_temp', '--waveform_temp',
@@ -188,38 +188,40 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
headless_optional_group = parser.add_argument_group('optional parameters')
headless_optional_group.add_argument(options[7], type=str, default=None, help='''(Optional) Path to the voice cloning file for TTS engine.
Uses the default voice if not present.''')
headless_optional_group.add_argument(options[8], type=str, default=default_device, choices=device_list, help=f'''(Optional) Pprocessor unit type for the conversion.
headless_optional_group.add_argument(options[8], type=str, default=None, help='''(Optional) Voice model for PIPER TTS engine (e.g., en_US-lessac-medium, de_DE-thorsten-medium).
Uses the default voice model if not present.''')
headless_optional_group.add_argument(options[9], type=str, default=default_device, choices=device_list, help=f'''(Optional) Pprocessor unit type for the conversion.
Default is set in ./lib/conf.py if not present. Fall back to CPU if GPU not available.''')
headless_optional_group.add_argument(options[9], type=str, default=None, choices=tts_engine_list_keys+tts_engine_list_values, help=f'''(Optional) Preferred TTS engine (available are: {tts_engine_list_keys+tts_engine_list_values}.
headless_optional_group.add_argument(options[10], type=str, default=None, choices=tts_engine_list_keys+tts_engine_list_values, help=f'''(Optional) Preferred TTS engine (available are: {tts_engine_list_keys+tts_engine_list_values}.
Default depends on the selected language. The tts engine should be compatible with the chosen language''')
headless_optional_group.add_argument(options[10], type=str, default=None, help=f'''(Optional) Path to the custom model zip file cntaining mandatory model files.
headless_optional_group.add_argument(options[11], type=str, default=None, help=f'''(Optional) Path to the custom model zip file cntaining mandatory model files.
Please refer to ./lib/models.py''')
headless_optional_group.add_argument(options[11], type=str, default=default_fine_tuned, help='''(Optional) Fine tuned model path. Default is builtin model.''')
headless_optional_group.add_argument(options[12], type=str, default=default_output_format, help=f'''(Optional) Output audio format. Default is set in ./lib/conf.py''')
headless_optional_group.add_argument(options[13], type=float, default=None, help=f"""(xtts only, optional) Temperature for the model.
headless_optional_group.add_argument(options[12], type=str, default=default_fine_tuned, help='''(Optional) Fine tuned model path. Default is builtin model.''')
headless_optional_group.add_argument(options[13], type=str, default=default_output_format, help=f'''(Optional) Output audio format. Default is set in ./lib/conf.py''')
headless_optional_group.add_argument(options[14], type=float, default=None, help=f"""(xtts only, optional) Temperature for the model.
Default to config.json model. Higher temperatures lead to more creative outputs.""")
headless_optional_group.add_argument(options[14], type=float, default=None, help=f"""(xtts only, optional) A length penalty applied to the autoregressive decoder.
headless_optional_group.add_argument(options[15], type=float, default=None, help=f"""(xtts only, optional) A length penalty applied to the autoregressive decoder.
Default to config.json model. Not applied to custom models.""")
headless_optional_group.add_argument(options[15], type=int, default=None, help=f"""(xtts only, optional) Controls how many alternative sequences the model explores. Must be equal or greater than length penalty.
headless_optional_group.add_argument(options[16], type=int, default=None, help=f"""(xtts only, optional) Controls how many alternative sequences the model explores. Must be equal or greater than length penalty.
Default to config.json model.""")
headless_optional_group.add_argument(options[16], type=float, default=None, help=f"""(xtts only, optional) A penalty that prevents the autoregressive decoder from repeating itself.
headless_optional_group.add_argument(options[17], type=float, default=None, help=f"""(xtts only, optional) A penalty that prevents the autoregressive decoder from repeating itself.
Default to config.json model.""")
headless_optional_group.add_argument(options[17], type=int, default=None, help=f"""(xtts only, optional) Top-k sampling.
headless_optional_group.add_argument(options[18], type=int, default=None, help=f"""(xtts only, optional) Top-k sampling.
Lower values mean more likely outputs and increased audio generation speed.
Default to config.json model.""")
headless_optional_group.add_argument(options[18], type=float, default=None, help=f"""(xtts only, optional) Top-p sampling.
headless_optional_group.add_argument(options[19], type=float, default=None, help=f"""(xtts only, optional) Top-p sampling.
Lower values mean more likely outputs and increased audio generation speed. Default to config.json model.""")
headless_optional_group.add_argument(options[19], type=float, default=None, help=f"""(xtts only, optional) Speed factor for the speech generation.
headless_optional_group.add_argument(options[20], type=float, default=None, help=f"""(xtts only, optional) Speed factor for the speech generation.
Default to config.json model.""")
headless_optional_group.add_argument(options[20], action='store_true', help=f"""(xtts only, optional) Enable TTS text splitting. This option is known to not be very efficient.
headless_optional_group.add_argument(options[21], action='store_true', help=f"""(xtts only, optional) Enable TTS text splitting. This option is known to not be very efficient.
Default to config.json model.""")
headless_optional_group.add_argument(options[21], type=float, default=None, help=f"""(bark only, optional) Text Temperature for the model.
headless_optional_group.add_argument(options[22], type=float, default=None, help=f"""(bark only, optional) Text Temperature for the model.
Default to {default_engine_settings[TTS_ENGINES['BARK']]['text_temp']}. Higher temperatures lead to more creative outputs.""")
headless_optional_group.add_argument(options[22], type=float, default=None, help=f"""(bark only, optional) Waveform Temperature for the model.
headless_optional_group.add_argument(options[23], type=float, default=None, help=f"""(bark only, optional) Waveform Temperature for the model.
Default to {default_engine_settings[TTS_ENGINES['BARK']]['waveform_temp']}. Higher temperatures lead to more creative outputs.""")
headless_optional_group.add_argument(options[23], type=str, help=f'''(Optional) Path to the output directory. Default is set in ./lib/conf.py''')
headless_optional_group.add_argument(options[24], action='version', version=f'ebook2audiobook version {prog_version}', help='''Show the version of the script and exit''')
headless_optional_group.add_argument(options[25], action='store_true', help=argparse.SUPPRESS)
headless_optional_group.add_argument(options[24], type=str, help=f'''(Optional) Path to the output directory. Default is set in ./lib/conf.py''')
headless_optional_group.add_argument(options[25], action='version', version=f'ebook2audiobook version {prog_version}', help='''Show the version of the script and exit''')
headless_optional_group.add_argument(options[26], action='store_true', help=argparse.SUPPRESS)
for arg in sys.argv:
if arg.startswith('--') and arg not in options:

Binary file not shown.

Binary file not shown.

View File

@@ -1798,6 +1798,7 @@ def convert_ebook(args, ctx=None):
session['waveform_temp'] = args['waveform_temp']
session['audiobooks_dir'] = args['audiobooks_dir']
session['voice'] = args['voice']
session['voice_model'] = args['voice_model']
info_session = f"\n*********** Session: {id} **************\nStore it in case of interruption, crash, reuse of custom model or custom voice,\nyou can resume the conversion with --session option"