From 66f9fafedae65905716552daefddc4235bb43304 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Oct 2025 20:33:39 +0000 Subject: [PATCH] Add configurable parallel processing with dynamic instance calculation - Added config options in lib/conf.py: enable_parallel_processing (default: True) and parallel_min_free_ram_gb (default: 2) - Replaced simple 2x RAM check with intelligent calculation of max parallel instances - System now considers available RAM, VRAM, and maintains minimum free RAM - Supports creating multiple parallel instances (not just 2) based on available resources - One GPU instance + additional CPU instances as RAM allows Co-authored-by: DrewThomasson <126999465+DrewThomasson@users.noreply.github.com> --- lib/conf.py | 4 ++ lib/functions.py | 137 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 107 insertions(+), 34 deletions(-) diff --git a/lib/conf.py b/lib/conf.py index b2148d20..07ccc9f5 100644 --- a/lib/conf.py +++ b/lib/conf.py @@ -49,6 +49,10 @@ device_list = ['cpu', 'gpu', 'mps'] default_device = 'cpu' default_gpu_wiki = 'howto wiki' +# Parallel processing configuration +enable_parallel_processing = True # Enable/disable parallel processing feature +parallel_min_free_ram_gb = 2 # Minimum free RAM (GB) to maintain when calculating parallel instances + python_env_dir = os.path.abspath(os.path.join('.','python_env')) requirements_file = os.path.abspath(os.path.join('.','requirements.txt')) diff --git a/lib/functions.py b/lib/functions.py index 7703059e..3a5ea832 100644 --- a/lib/functions.py +++ b/lib/functions.py @@ -983,29 +983,91 @@ def get_ram(): vm = psutil.virtual_memory() return vm.total // (1024 ** 3) +def get_available_ram(): + """Get available RAM in GB""" + vm = psutil.virtual_memory() + return vm.available // (1024 ** 3) + +def calculate_max_parallel_instances(device, tts_engine): + """ + Calculate the maximum number of parallel TTS instances based on available resources. + + Strategy: + - For GPU: Consider both system RAM and GPU VRAM + - For CPU: Only consider system RAM + - Always maintain at least parallel_min_free_ram_gb GB free RAM + - Return number of instances (1 means sequential, >1 means parallel) + + Returns tuple: (num_instances, reason) + """ + from lib.conf import enable_parallel_processing, parallel_min_free_ram_gb + + # Check if parallel processing is enabled in config + if not enable_parallel_processing: + return 1, "Parallel processing disabled in config" + + # Only enable parallel processing for GPU devices + if device not in ['cuda', 'mps']: + return 1, "CPU mode - parallel processing disabled" + + # Get model requirements + model_rating = default_engine_settings.get(tts_engine, {}).get('rating', {}) + model_ram_required = model_rating.get('RAM', 8) + model_vram_required = model_rating.get('GPU VRAM', 4) + + # Get available resources + system_ram = get_ram() + available_ram = get_available_ram() + vram = get_vram() + + # Calculate how many instances can fit in RAM + # Reserve parallel_min_free_ram_gb GB for system + usable_ram = max(0, available_ram - parallel_min_free_ram_gb) + max_ram_instances = max(1, int(usable_ram / model_ram_required)) + + # Calculate how many instances can fit in VRAM (if GPU available) + max_vram_instances = 1 # Default to 1 (GPU instance) + if vram > 0 and device == 'cuda': + # One GPU instance + additional CPU instances if RAM allows + max_vram_instances = 1 # GPU instance always counts as 1 + # We can add CPU instances if RAM allows + remaining_ram_for_cpu = usable_ram - model_ram_required # After GPU instance + additional_cpu_instances = max(0, int(remaining_ram_for_cpu / model_ram_required)) + max_vram_instances += additional_cpu_instances + elif device == 'mps': + # MPS shares memory with system RAM + # Calculate based on available RAM only + max_vram_instances = max_ram_instances + + # The limiting factor is the minimum of RAM and VRAM calculations + max_instances = min(max_ram_instances, max_vram_instances) if device == 'cuda' else max_ram_instances + + # Ensure at least 1 instance + max_instances = max(1, max_instances) + + # Build informative message + if max_instances == 1: + msg = f"Sequential processing: Available RAM ({available_ram}GB) insufficient for parallel instances" + else: + msg = f"Parallel processing: {max_instances} instances (System RAM: {system_ram}GB, Available: {available_ram}GB" + if vram > 0 and device == 'cuda': + msg += f", VRAM: {vram}GB" + msg += f", Model requires: {model_ram_required}GB RAM" + if device == 'cuda': + msg += f" + {model_vram_required}GB VRAM" + msg += f", Keeping {parallel_min_free_ram_gb}GB free)" + + return max_instances, msg + def should_enable_parallel_processing(device, tts_engine): """ Determine if parallel GPU+CPU processing should be enabled. - Enabled when: - - Device is GPU (cuda or mps) - - System RAM > 2x the model's RAM requirement + This is a wrapper around calculate_max_parallel_instances for backward compatibility. Returns tuple: (enable_parallel, reason) """ - if device not in ['cuda', 'mps']: - return False, "Not using GPU" - - # Get RAM requirements from model rating - model_ram_required = default_engine_settings.get(tts_engine, {}).get('rating', {}).get('RAM', 8) - system_ram = get_ram() - - # Check if we have more than double the RAM needed - if system_ram > (2 * model_ram_required): - msg = f"Parallel processing enabled: System RAM ({system_ram}GB) > 2x Model RAM ({model_ram_required}GB)" - return True, msg - else: - msg = f"Parallel processing disabled: System RAM ({system_ram}GB) <= 2x Model RAM ({model_ram_required}GB)" - return False, msg + max_instances, msg = calculate_max_parallel_instances(device, tts_engine) + return max_instances > 1, msg def get_vram(): os_name = platform.system() @@ -1425,29 +1487,36 @@ def convert_chapters2audio(id): print('Cancel requested') return False - # Check if parallel processing should be enabled - enable_parallel, parallel_msg = should_enable_parallel_processing(session['device'], session['tts_engine']) + # Calculate maximum parallel instances based on available resources + max_instances, parallel_msg = calculate_max_parallel_instances(session['device'], session['tts_engine']) print(parallel_msg) - if enable_parallel: - # Create both GPU and CPU TTS managers for parallel processing - tts_manager_gpu = TTSManager(session) # Uses the configured device (GPU) + # Create TTS managers based on max_instances + tts_managers = [] + if max_instances > 1: + # First instance on GPU + tts_manager_gpu = TTSManager(session) if not tts_manager_gpu: error = f"GPU TTS engine {session['tts_engine']} could not be loaded!" print(error) return False + tts_managers.append(tts_manager_gpu) - tts_manager_cpu = TTSManager(session, force_device='cpu') - if not tts_manager_cpu: - error = f"CPU TTS engine {session['tts_engine']} could not be loaded!" - print(error) - # Fall back to GPU-only if CPU fails - enable_parallel = False - tts_managers = [tts_manager_gpu] - else: - msg = "Parallel GPU+CPU processing enabled - sentences will be processed concurrently on both devices" + # Additional instances on CPU + for i in range(1, max_instances): + tts_manager_cpu = TTSManager(session, force_device='cpu') + if not tts_manager_cpu: + msg = f"Warning: Could not create CPU TTS instance {i}. Continuing with {len(tts_managers)} instance(s)." + print(msg) + break + tts_managers.append(tts_manager_cpu) + + if len(tts_managers) > 1: + msg = f"Parallel processing with {len(tts_managers)} instances (1 GPU + {len(tts_managers)-1} CPU)" + print(msg) + else: + msg = "Falling back to sequential processing (GPU only)" print(msg) - tts_managers = [tts_manager_gpu, tts_manager_cpu] else: # Single TTS manager with configured device tts_manager = TTSManager(session) @@ -1518,8 +1587,8 @@ def convert_chapters2audio(id): msg = f'Block {chapter_num} containing {sentences_count} sentences...' print(msg) - # Process sentences - use parallel execution if enabled - if enable_parallel and len(tts_managers) > 1: + # Process sentences - use parallel execution if multiple managers available + if len(tts_managers) > 1: # Parallel processing: submit sentences to thread pool # Collect sentences to process in batches batch_size = 2 * len(tts_managers) # Process in small batches