mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-07 21:14:06 -05:00
Add configurable parallel processing with dynamic instance calculation
- Added config options in lib/conf.py: enable_parallel_processing (default: True) and parallel_min_free_ram_gb (default: 2) - Replaced simple 2x RAM check with intelligent calculation of max parallel instances - System now considers available RAM, VRAM, and maintains minimum free RAM - Supports creating multiple parallel instances (not just 2) based on available resources - One GPU instance + additional CPU instances as RAM allows Co-authored-by: DrewThomasson <126999465+DrewThomasson@users.noreply.github.com>
This commit is contained in:
@@ -49,6 +49,10 @@ device_list = ['cpu', 'gpu', 'mps']
|
||||
default_device = 'cpu'
|
||||
default_gpu_wiki = '<a href="https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES">howto wiki</a>'
|
||||
|
||||
# Parallel processing configuration
|
||||
enable_parallel_processing = True # Enable/disable parallel processing feature
|
||||
parallel_min_free_ram_gb = 2 # Minimum free RAM (GB) to maintain when calculating parallel instances
|
||||
|
||||
python_env_dir = os.path.abspath(os.path.join('.','python_env'))
|
||||
requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
|
||||
|
||||
|
||||
137
lib/functions.py
137
lib/functions.py
@@ -983,29 +983,91 @@ def get_ram():
|
||||
vm = psutil.virtual_memory()
|
||||
return vm.total // (1024 ** 3)
|
||||
|
||||
def get_available_ram():
|
||||
"""Get available RAM in GB"""
|
||||
vm = psutil.virtual_memory()
|
||||
return vm.available // (1024 ** 3)
|
||||
|
||||
def calculate_max_parallel_instances(device, tts_engine):
|
||||
"""
|
||||
Calculate the maximum number of parallel TTS instances based on available resources.
|
||||
|
||||
Strategy:
|
||||
- For GPU: Consider both system RAM and GPU VRAM
|
||||
- For CPU: Only consider system RAM
|
||||
- Always maintain at least parallel_min_free_ram_gb GB free RAM
|
||||
- Return number of instances (1 means sequential, >1 means parallel)
|
||||
|
||||
Returns tuple: (num_instances, reason)
|
||||
"""
|
||||
from lib.conf import enable_parallel_processing, parallel_min_free_ram_gb
|
||||
|
||||
# Check if parallel processing is enabled in config
|
||||
if not enable_parallel_processing:
|
||||
return 1, "Parallel processing disabled in config"
|
||||
|
||||
# Only enable parallel processing for GPU devices
|
||||
if device not in ['cuda', 'mps']:
|
||||
return 1, "CPU mode - parallel processing disabled"
|
||||
|
||||
# Get model requirements
|
||||
model_rating = default_engine_settings.get(tts_engine, {}).get('rating', {})
|
||||
model_ram_required = model_rating.get('RAM', 8)
|
||||
model_vram_required = model_rating.get('GPU VRAM', 4)
|
||||
|
||||
# Get available resources
|
||||
system_ram = get_ram()
|
||||
available_ram = get_available_ram()
|
||||
vram = get_vram()
|
||||
|
||||
# Calculate how many instances can fit in RAM
|
||||
# Reserve parallel_min_free_ram_gb GB for system
|
||||
usable_ram = max(0, available_ram - parallel_min_free_ram_gb)
|
||||
max_ram_instances = max(1, int(usable_ram / model_ram_required))
|
||||
|
||||
# Calculate how many instances can fit in VRAM (if GPU available)
|
||||
max_vram_instances = 1 # Default to 1 (GPU instance)
|
||||
if vram > 0 and device == 'cuda':
|
||||
# One GPU instance + additional CPU instances if RAM allows
|
||||
max_vram_instances = 1 # GPU instance always counts as 1
|
||||
# We can add CPU instances if RAM allows
|
||||
remaining_ram_for_cpu = usable_ram - model_ram_required # After GPU instance
|
||||
additional_cpu_instances = max(0, int(remaining_ram_for_cpu / model_ram_required))
|
||||
max_vram_instances += additional_cpu_instances
|
||||
elif device == 'mps':
|
||||
# MPS shares memory with system RAM
|
||||
# Calculate based on available RAM only
|
||||
max_vram_instances = max_ram_instances
|
||||
|
||||
# The limiting factor is the minimum of RAM and VRAM calculations
|
||||
max_instances = min(max_ram_instances, max_vram_instances) if device == 'cuda' else max_ram_instances
|
||||
|
||||
# Ensure at least 1 instance
|
||||
max_instances = max(1, max_instances)
|
||||
|
||||
# Build informative message
|
||||
if max_instances == 1:
|
||||
msg = f"Sequential processing: Available RAM ({available_ram}GB) insufficient for parallel instances"
|
||||
else:
|
||||
msg = f"Parallel processing: {max_instances} instances (System RAM: {system_ram}GB, Available: {available_ram}GB"
|
||||
if vram > 0 and device == 'cuda':
|
||||
msg += f", VRAM: {vram}GB"
|
||||
msg += f", Model requires: {model_ram_required}GB RAM"
|
||||
if device == 'cuda':
|
||||
msg += f" + {model_vram_required}GB VRAM"
|
||||
msg += f", Keeping {parallel_min_free_ram_gb}GB free)"
|
||||
|
||||
return max_instances, msg
|
||||
|
||||
def should_enable_parallel_processing(device, tts_engine):
|
||||
"""
|
||||
Determine if parallel GPU+CPU processing should be enabled.
|
||||
Enabled when:
|
||||
- Device is GPU (cuda or mps)
|
||||
- System RAM > 2x the model's RAM requirement
|
||||
This is a wrapper around calculate_max_parallel_instances for backward compatibility.
|
||||
|
||||
Returns tuple: (enable_parallel, reason)
|
||||
"""
|
||||
if device not in ['cuda', 'mps']:
|
||||
return False, "Not using GPU"
|
||||
|
||||
# Get RAM requirements from model rating
|
||||
model_ram_required = default_engine_settings.get(tts_engine, {}).get('rating', {}).get('RAM', 8)
|
||||
system_ram = get_ram()
|
||||
|
||||
# Check if we have more than double the RAM needed
|
||||
if system_ram > (2 * model_ram_required):
|
||||
msg = f"Parallel processing enabled: System RAM ({system_ram}GB) > 2x Model RAM ({model_ram_required}GB)"
|
||||
return True, msg
|
||||
else:
|
||||
msg = f"Parallel processing disabled: System RAM ({system_ram}GB) <= 2x Model RAM ({model_ram_required}GB)"
|
||||
return False, msg
|
||||
max_instances, msg = calculate_max_parallel_instances(device, tts_engine)
|
||||
return max_instances > 1, msg
|
||||
|
||||
def get_vram():
|
||||
os_name = platform.system()
|
||||
@@ -1425,29 +1487,36 @@ def convert_chapters2audio(id):
|
||||
print('Cancel requested')
|
||||
return False
|
||||
|
||||
# Check if parallel processing should be enabled
|
||||
enable_parallel, parallel_msg = should_enable_parallel_processing(session['device'], session['tts_engine'])
|
||||
# Calculate maximum parallel instances based on available resources
|
||||
max_instances, parallel_msg = calculate_max_parallel_instances(session['device'], session['tts_engine'])
|
||||
print(parallel_msg)
|
||||
|
||||
if enable_parallel:
|
||||
# Create both GPU and CPU TTS managers for parallel processing
|
||||
tts_manager_gpu = TTSManager(session) # Uses the configured device (GPU)
|
||||
# Create TTS managers based on max_instances
|
||||
tts_managers = []
|
||||
if max_instances > 1:
|
||||
# First instance on GPU
|
||||
tts_manager_gpu = TTSManager(session)
|
||||
if not tts_manager_gpu:
|
||||
error = f"GPU TTS engine {session['tts_engine']} could not be loaded!"
|
||||
print(error)
|
||||
return False
|
||||
tts_managers.append(tts_manager_gpu)
|
||||
|
||||
tts_manager_cpu = TTSManager(session, force_device='cpu')
|
||||
if not tts_manager_cpu:
|
||||
error = f"CPU TTS engine {session['tts_engine']} could not be loaded!"
|
||||
print(error)
|
||||
# Fall back to GPU-only if CPU fails
|
||||
enable_parallel = False
|
||||
tts_managers = [tts_manager_gpu]
|
||||
else:
|
||||
msg = "Parallel GPU+CPU processing enabled - sentences will be processed concurrently on both devices"
|
||||
# Additional instances on CPU
|
||||
for i in range(1, max_instances):
|
||||
tts_manager_cpu = TTSManager(session, force_device='cpu')
|
||||
if not tts_manager_cpu:
|
||||
msg = f"Warning: Could not create CPU TTS instance {i}. Continuing with {len(tts_managers)} instance(s)."
|
||||
print(msg)
|
||||
break
|
||||
tts_managers.append(tts_manager_cpu)
|
||||
|
||||
if len(tts_managers) > 1:
|
||||
msg = f"Parallel processing with {len(tts_managers)} instances (1 GPU + {len(tts_managers)-1} CPU)"
|
||||
print(msg)
|
||||
else:
|
||||
msg = "Falling back to sequential processing (GPU only)"
|
||||
print(msg)
|
||||
tts_managers = [tts_manager_gpu, tts_manager_cpu]
|
||||
else:
|
||||
# Single TTS manager with configured device
|
||||
tts_manager = TTSManager(session)
|
||||
@@ -1518,8 +1587,8 @@ def convert_chapters2audio(id):
|
||||
msg = f'Block {chapter_num} containing {sentences_count} sentences...'
|
||||
print(msg)
|
||||
|
||||
# Process sentences - use parallel execution if enabled
|
||||
if enable_parallel and len(tts_managers) > 1:
|
||||
# Process sentences - use parallel execution if multiple managers available
|
||||
if len(tts_managers) > 1:
|
||||
# Parallel processing: submit sentences to thread pool
|
||||
# Collect sentences to process in batches
|
||||
batch_size = 2 * len(tts_managers) # Process in small batches
|
||||
|
||||
Reference in New Issue
Block a user