mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-10 06:18:02 -05:00
...
This commit is contained in:
@@ -7,8 +7,9 @@ from .models import (
|
||||
|
||||
from .conf import (
|
||||
FULL_DOCKER, NATIVE, audiobooks_cli_dir, audiobooks_gradio_dir,
|
||||
audiobooks_host_dir, debug_mode, default_audio_proc_format, default_device,
|
||||
default_gpu_wiki, default_output_format, device_list, ebook_formats,
|
||||
audiobooks_host_dir, debug_mode, default_audio_proc_samplerate,
|
||||
default_audio_proc_format, default_device, default_gpu_wiki,
|
||||
default_output_format, device_list, ebook_formats,
|
||||
ebooks_dir, interface_component_options, interface_concurrency_limit,
|
||||
interface_host, interface_port, interface_shared_tmp_expire,
|
||||
max_python_version, min_python_version, models_dir, os,
|
||||
@@ -36,9 +37,9 @@ __all__ = [
|
||||
|
||||
# from conf
|
||||
"FULL_DOCKER", "NATIVE", "audiobooks_cli_dir", "audiobooks_gradio_dir",
|
||||
"audiobooks_host_dir", "debug_mode", "default_audio_proc_format",
|
||||
"default_device", "default_gpu_wiki", "default_output_format",
|
||||
"device_list", "ebook_formats", "ebooks_dir",
|
||||
"audiobooks_host_dir", "debug_mode", "default_audio_proc_samplerate",
|
||||
"default_audio_proc_format", "default_device", "default_gpu_wiki",
|
||||
"default_output_format", "device_list", "ebook_formats", "ebooks_dir",
|
||||
"interface_component_options", "interface_concurrency_limit",
|
||||
"interface_host", "interface_port", "interface_shared_tmp_expire",
|
||||
"max_python_version", "min_python_version", "models_dir", "os",
|
||||
|
||||
@@ -306,7 +306,7 @@ class Coqui:
|
||||
new_voice_path = re.sub(r'([\\/])eng([\\/])', rf'\1{lang_dir}\2', voice_path)
|
||||
proc_voice_path = new_voice_path.replace('.wav', '_temp.wav')
|
||||
torchaudio.save(proc_voice_path, audio_tensor, default_engine_settings[TTS_ENGINES['XTTSv2']]['samplerate'], format='wav')
|
||||
if normalize_audio(proc_voice_path, new_voice_path, 24000):
|
||||
if normalize_audio(proc_voice_path, new_voice_path, default_audio_proc_samplerate):
|
||||
del audio_data, sourceTensor, audio_tensor
|
||||
if self.session['tts_engine'] != TTS_ENGINES['XTTSv2']:
|
||||
del tts
|
||||
|
||||
@@ -10,7 +10,7 @@ from io import BytesIO
|
||||
from pydub import AudioSegment, silence
|
||||
from pydub.silence import detect_silence
|
||||
|
||||
from lib.conf import voice_formats
|
||||
from lib.conf import voice_formats, default_audio_proc_samplerate
|
||||
from lib.models import TTS_ENGINES, models
|
||||
from lib.classes.background_detector import BackgroundDetector
|
||||
|
||||
@@ -204,8 +204,9 @@ class VoiceExtractor:
|
||||
raise ValueError(error)
|
||||
|
||||
def _normalize_audio(self):
|
||||
try:
|
||||
process_file = os.path.join(self.session['voice_dir'], f'{self.voice_name}.wav')
|
||||
try:
|
||||
rate = default_audio_proc_samplerate
|
||||
process_file = os.path.join(self.session['voice_dir'], f'{self.voice_name}_proc.wav')
|
||||
ffmpeg_cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', self.voice_track]
|
||||
filter_complex = (
|
||||
'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
|
||||
@@ -226,34 +227,33 @@ class VoiceExtractor:
|
||||
'-y', process_file
|
||||
]
|
||||
error = None
|
||||
for rate in ['16000', '24000']:
|
||||
ffmpeg_cmd[-3] = rate
|
||||
output_file = re.sub(r'\.wav$', f'_{rate}.wav', process_file)
|
||||
ffmpeg_cmd[-1] = output_file
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
ffmpeg_cmd,
|
||||
env={},
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
universal_newlines=True,
|
||||
encoding='utf-8'
|
||||
)
|
||||
for line in process.stdout:
|
||||
print(line, end='') # Print each line of stdout
|
||||
process.wait()
|
||||
if process.returncode != 0:
|
||||
error = f'_normalize_audio(): process.returncode: {process.returncode}'
|
||||
break
|
||||
elif not os.path.exists(output_file) or os.path.getsize(output_file) == 0:
|
||||
error = f'_normalize_audio() error: {output_file} was not created or is empty.'
|
||||
break
|
||||
else:
|
||||
self.final_files.append(output_file)
|
||||
except subprocess.CalledProcessError as e:
|
||||
error = f'_normalize_audio() ffmpeg.Error: {e.stderr.decode()}'
|
||||
ffmpeg_cmd[-3] = rate
|
||||
output_file = re.sub(r'_proc\.wav$', f'.wav', process_file)
|
||||
ffmpeg_cmd[-1] = output_file
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
ffmpeg_cmd,
|
||||
env={},
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
universal_newlines=True,
|
||||
encoding='utf-8'
|
||||
)
|
||||
for line in process.stdout:
|
||||
print(line, end='') # Print each line of stdout
|
||||
process.wait()
|
||||
if process.returncode != 0:
|
||||
error = f'_normalize_audio(): process.returncode: {process.returncode}'
|
||||
break
|
||||
elif not os.path.exists(output_file) or os.path.getsize(output_file) == 0:
|
||||
error = f'_normalize_audio() error: {output_file} was not created or is empty.'
|
||||
break
|
||||
else:
|
||||
self.final_files.append(output_file)
|
||||
except subprocess.CalledProcessError as e:
|
||||
error = f'_normalize_audio() ffmpeg.Error: {e.stderr.decode()}'
|
||||
break
|
||||
shutil.rmtree(self.demucs_dir, ignore_errors=True)
|
||||
if os.path.exists(process_file):
|
||||
os.remove(process_file)
|
||||
|
||||
@@ -71,6 +71,7 @@ audiobooks_cli_dir = os.path.abspath(os.path.join('audiobooks','cli'))
|
||||
ebook_formats = ['.epub', '.mobi', '.azw3', '.fb2', '.lrf', '.rb', '.snb', '.tcr', '.pdf', '.txt', '.rtf', '.doc', '.docx', '.html', '.odt', '.azw'] # Add or remove the format you accept as input
|
||||
voice_formats = ['.mp4', '.m4b', '.m4a', '.mp3', '.wav', '.aac', '.flac', '.alac', '.ogg', '.aiff', '.aif', '.wma', '.dsd', '.opus', '.pcmu', '.pcma', '.gsm'] # Add or remove the format you accept as input
|
||||
output_formats = ['aac', 'flac', 'mp3', 'm4b', 'm4a', 'mp4', 'mov', 'ogg', 'wav', 'webm']
|
||||
default_audio_proc_samplerate = '24000'
|
||||
default_audio_proc_format = 'flac' # or 'mp3', 'aac', 'm4a', 'm4b', 'amr', '3gp', 'alac'. 'wav' format is ok but limited to process files < 4GB
|
||||
default_output_format = 'm4b'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user