Merge branch 'v25' into DrewThomasson-RafeBeckley-patch-1

2026-01-09 13:58:14 -05:00 · 2025-11-11 07:05:07 -05:00
parent 16557d9cf7 ac76b38592
commit 95b4025d32
57 changed files with 4422 additions and 3809 deletions
--- a/94
+++ b/94
@@ -16,7 +16,6 @@ RUN apt-get update && \
 # Install Rust compiler
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 ENV PATH="/root/.cargo/bin:${PATH}"
-# Set the working directory
 WORKDIR /app
 # Install UniDic (non-torch dependent)
 RUN pip install --no-cache-dir unidic-lite unidic && \
@@ -31,74 +30,61 @@ ARG TORCH_VERSION=""
 # Add parameter to control whether to skip the XTTS test
 ARG SKIP_XTTS_TEST="false"

+# Copy the application
+WORKDIR /app
+COPY . /app

-# Extract torch versions from requirements.txt or set to empty strings if not found
-RUN TORCH_VERSION_REQ=$(grep -E "^torch==" requirements.txt | cut -d'=' -f3 || echo "") && \
-    TORCHAUDIO_VERSION_REQ=$(grep -E "^torchaudio==" requirements.txt | cut -d'=' -f3 || echo "") && \
-    TORCHVISION_VERSION_REQ=$(grep -E "^torchvision==" requirements.txt | cut -d'=' -f3 || echo "") && \
-    echo "Found in requirements: torch==$TORCH_VERSION_REQ torchaudio==$TORCHAUDIO_VERSION_REQ torchvision==$TORCHVISION_VERSION_REQ"
-
-# Install PyTorch with CUDA support if specified
+# Install requirements.txt or PyTorch variants based on TORCH_VERSION
 RUN if [ ! -z "$TORCH_VERSION" ]; then \
-        # Check if we need to use specific versions or get the latest
-        if [ ! -z "$TORCH_VERSION_REQ" ] && [ ! -z "$TORCHVISION_VERSION_REQ" ] && [ ! -z "$TORCHAUDIO_VERSION_REQ" ]; then \
-            echo "Using specific versions from requirements.txt" && \
-            TORCH_SPEC="torch==${TORCH_VERSION_REQ}" && \
-            TORCHVISION_SPEC="torchvision==${TORCHVISION_VERSION_REQ}" && \
-            TORCHAUDIO_SPEC="torchaudio==${TORCHAUDIO_VERSION_REQ}"; \
-        else \
-            echo "Using latest versions for the selected variant" && \
-            TORCH_SPEC="torch" && \
-            TORCHVISION_SPEC="torchvision" && \
-            TORCHAUDIO_SPEC="torchaudio"; \
-        fi && \
-        \
        # Check if TORCH_VERSION contains "cuda" and extract version number
        if echo "$TORCH_VERSION" | grep -q "cuda"; then \
            CUDA_VERSION=$(echo "$TORCH_VERSION" | sed 's/cuda//g') && \
            echo "Detected CUDA version: $CUDA_VERSION" && \
-            echo "Attempting to install PyTorch nightly for CUDA $CUDA_VERSION..." && \
-            #if ! pip install --no-cache-dir --pre $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION}; then \
-            if ! pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION}; then \
-                echo "❌ Nightly build for CUDA $CUDA_VERSION not available or failed" && \
-                echo "🔄 Trying stable release for CUDA $CUDA_VERSION..." && \
-                #if pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC --extra-index-url https://download.pytorch.org/whl/cu${CUDA_VERSION}; then \
-                if pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION}; then \
-                    echo "✅ Successfully installed stable PyTorch for CUDA $CUDA_VERSION"; \
-                else \
-                    echo "❌ Both nightly and stable builds failed for CUDA $CUDA_VERSION"; \
-                    echo "💡 This CUDA version may not be supported by PyTorch"; \
-                    exit 1; \
-                fi; \
+            \
+            # Special handling for CUDA 11.8
+            if [ "$CUDA_VERSION" = "118" ]; then \
+                echo "Installing PyTorch for CUDA 11.8..." && \
+                pip install --no-cache-dir --upgrade -r requirements.txt && pip install pyannote-audio==3.4.0 && pip install --no-cache-dir --upgrade torch==2.7.1 torchvision==2.7.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu118; \
+            elif [ "$CUDA_VERSION" = "128" ]; then \
+                echo "Installing PyTorch for CUDA 12.8..." && \
+                pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade torch==2.7.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu128; \
            else \
-                echo "✅ Successfully installed nightly PyTorch for CUDA $CUDA_VERSION"; \
+                echo "Attempting to install stable PyTorch for CUDA $CUDA_VERSION..." && \
+                if ! pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION}; then \
+                    echo "❌ Stable build for CUDA $CUDA_VERSION not available or failed" && \
+                    echo "🔄 Trying nightly release for CUDA $CUDA_VERSION..." && \
+                    if pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION}; then \
+                        echo "✅ Successfully installed nightly PyTorch for CUDA $CUDA_VERSION"; \
+                    else \
+                        echo "❌ Both stable and nightly builds failed for CUDA $CUDA_VERSION"; \
+                        echo "💡 This CUDA version may not be supported by PyTorch"; \
+                        exit 1; \
+                    fi; \
+                else \
+                    echo "✅ Successfully installed stable PyTorch for CUDA $CUDA_VERSION"; \
+                fi; \
            fi; \
        else \
-            # Handle non-CUDA cases (existing functionality)
+            # Handle non-CUDA cases
            case "$TORCH_VERSION" in \
                "rocm") \
-                    # Using the correct syntax for ROCm PyTorch installation
-                    pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC --extra-index-url https://download.pytorch.org/whl/rocm6.2 \
+                    pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2 \
                    ;; \
                "xpu") \
-                    # Install PyTorch with Intel XPU support through IPEX
-                    pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC && \
+                    pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade torch torchvision torchaudio && \
                    pip install --no-cache-dir intel-extension-for-pytorch --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \
                    ;; \
                "cpu") \
-                    pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC --extra-index-url https://download.pytorch.org/whl/cpu \
+                    pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu \
                    ;; \
                *) \
-                    pip install --no-cache-dir $TORCH_VERSION \
+                    echo "Installing custom PyTorch specification: $TORCH_VERSION" && \
+                    pip install --no-cache-dir --upgrade -r requirements.txt && pip install --no-cache-dir --upgrade $TORCH_VERSION \
                    ;; \
            esac; \
-        fi && \
-        # Install remaining requirements, skipping torch packages that might be there
-        grep -v -E "^torch==|^torchvision==|^torchaudio==|^torchvision$" requirements.txt > requirements_no_torch.txt && \
-        pip install --no-cache-dir --upgrade -r requirements_no_torch.txt && \
-        rm requirements_no_torch.txt; \
+        fi; \
    else \
-        # Install all requirements as specified
+        echo "No TORCH_VERSION specified, using packages from requirements.txt" && \
        pip install --no-cache-dir --upgrade -r requirements.txt; \
    fi

@@ -114,9 +100,6 @@ RUN if [ "$SKIP_XTTS_TEST" != "true" ]; then \
        echo "Skipping XTTS test run as requested."; \
    fi

-# Copy the application
-COPY . /app
-
 # Expose the required port
 EXPOSE 7860
 # Start the Gradio app with the required flag
@@ -126,3 +109,12 @@ ENTRYPOINT ["python", "app.py", "--script_mode", "full_docker"]
 #docker build --pull --build-arg BASE_IMAGE=athomasson2/ebook2audiobook:latest -t your-image-name .
 #The --pull flag forces Docker to always try to pull the latest version of the image, even if it already exists locally.
 #Without --pull, Docker will only use the local version if it exists, which might not be the latest.
+
+# Example build commands:
+# For CUDA 11.8: docker build --build-arg TORCH_VERSION=cuda118 -t your-image-name .
+# For CUDA 12.8: docker build --build-arg TORCH_VERSION=cuda128 -t your-image-name .
+# For CUDA 12.1: docker build --build-arg TORCH_VERSION=cuda121 -t your-image-name .
+# For ROCm: docker build --build-arg TORCH_VERSION=rocm -t your-image-name .
+# For CPU: docker build --build-arg TORCH_VERSION=cpu -t your-image-name .
+# For XPU: docker build --build-arg TORCH_VERSION=xpu -t your-image-name .
+# Default (no TORCH_VERSION): docker build -t your-image-name .
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # 📚 ebook2audiobook
 CPU/GPU Converter from eBooks to audiobooks with chapters and metadata<br/>
-using XTTSv2, Bark, Vits, Fairseq, YourTTS, Tacotron and more. Supports voice cloning and +1110 languages!
+using XTTSv2, Bark, Vits, Fairseq, YourTTS, Tacotron2 and more. Supports voice cloning and +1110 languages!
 > [!IMPORTANT]
 **This tool is intended for use with non-DRM, legally acquired eBooks only.** <br>
 The authors are not responsible for any misuse of this software or any resulting legal consequences. <br>
@@ -83,18 +83,18 @@ https://github.com/user-attachments/assets/81c4baad-117e-4db5-ac86-efc2b7fea921
    - [Basic Headless Usage](#basic--usage)
    - [Headless Custom XTTS Model Usage](#example-of-custom-model-zip-upload)
    - [Help command output](#help-command-output)
-  - [Run Remotely](#run-remotely)  
+  - [Run Remotely](#run-remotely)
+  - [Docker](#docker-compose)
+    - [Docker Compose (Recommended)](#docker-compose)
+    - [Docker Compose Headless](#compose-headless)
+    - [Compose Build Arguments](#compose-build-arguments)
+    - [Compose container file locations](#compose-container-file-locations)
+    - [Common Docker issues](#common-docker-issues)
+    - [Docker Build (Manual)](https://github.com/DrewThomasson/ebook2audiobook/wiki/Manual-Docker-Guide)
+  
 - [Fine Tuned TTS models](#fine-tuned-tts-models)
  - [Collection of Fine-Tuned TTS Models](#fine-tuned-tts-collection)
  - [Train XTTSv2](#fine-tune-your-own-xttsv2-model)
- [Docker](#docker-gpu-options) 
-  - [GPU options](#docker-gpu-options)
-  - [Docker Run](#running-the-pre-built-docker-container)
-  - [Docker Build](#building-the-docker-container)
-  - [Docker Compose](#docker-compose)
-  - [Docker headless guide](#docker-headless-guide)
-  - [Docker container file locations](#docker-container-file-locations)
-  - [Common Docker issues](#common-docker-issues)
 - [Supported eBook Formats](#supported-ebook-formats)
 - [Output Formats](#output-formats)
 - [Updating to Latest Version](#updating-to-latest-version)
@@ -125,7 +125,7 @@ https://github.com/user-attachments/assets/81c4baad-117e-4db5-ac86-efc2b7fea921


 ##  Hardware Requirements
- 4gb RAM minimum, 8GB recommended
+- 2gb RAM minimum, 8GB recommended
 - Virtualization enabled if running on windows (Docker only)
 - CPU (intel, AMD, ARM), GPU (Nvidia, AMD*, Intel*) (Recommended), MPS (Apple Silicon CPU)
 *available very soon
@@ -147,16 +147,18 @@ cd ebook2audiobook
 ```

 ### Launching Gradio Web Interface  
-1. **Run ebook2audiobook**:  
+1. **Run ebook2audiobook**:
+
   - **Linux/MacOS**  
     ```bash
     ./ebook2audiobook.sh  # Run launch script
     ```
-
+     <i>Note for MacOS users: homebrew is installed to install missing programs.</i>
+     
   - **Mac Launcher**  
     Double click `Mac Ebook2Audiobook Launcher.command`

-  
+
   - **Windows**  
     ```bash
     ebook2audiobook.cmd  # Run launch script or double click on it
@@ -164,22 +166,12 @@ cd ebook2audiobook
     
   - **Windows Launcher**  
     Double click `ebook2audiobook.cmd`
-
-
-   - **Manual Python Install**
-     ```bash
-     # (for experts only!)
-     REQUIRED_PROGRAMS=("calibre" "ffmpeg" "nodejs" "mecab" "espeak-ng" "rust" "sox")
-     REQUIRED_PYTHON_VERSION="3.12"
-     pip install -r requirements.txt  # Install Python Requirements
-     python app.py  # Run Ebook2Audiobook
-     ```
   
 1. **Open the Web App**: Click the URL provided in the terminal to access the web app and convert eBooks. `http://localhost:7860/`
 2. **For Public Link**:
-   `python app.py --share` (all OS)
   `./ebook2audiobook.sh --share` (Linux/MacOS)
   `ebook2audiobook.cmd --share` (Windows)
+   `python app.py --share` (all OS)

 > [!IMPORTANT]
 **If the script is stopped and run again, you need to refresh your gradio GUI interface<br>
@@ -341,84 +333,11 @@ NOTE: in gradio/gui mode, to cancel a running conversion, just click on the [X]

 TIP: if it needs some more pauses, just add '###' or '[pause]' between the words you wish more pause. one [pause] equals to 1.4 seconds

-#### Docker GPU Options
-
-Available pre-build tags: `latest` (CUDA 11.8)
-#### Edit: IF GPU isn't detected then you'll have to build the image -> [Building the Docker Container](#building-the-docker-container)
-
-
-
-#### Running the pre-built Docker Container
-
- -Run with CPU only
-```powershell
-docker run --pull always --rm -p 7860:7860 athomasson2/ebook2audiobook
-```
- -Run with GPU Speedup (NVIDIA compatible only)
-```powershell
-docker run --pull always --rm --gpus all -p 7860:7860 athomasson2/ebook2audiobook
-```
-
-This command will start the Gradio interface on port 7860.(localhost:7860)
- For more options add the parameter `--help`
-
-
-#### Building the Docker Container
- You can build the docker image with the command:
-```powershell
-docker build -t athomasson2/ebook2audiobook .
-```
-#### Avalible Docker Build Arguments
-
-`--build-arg TORCH_VERSION=cuda118` Available tags: [cuda121, cuda118, cuda128, rocm, xpu, cpu] 
-
-All CUDA version numbers should work, Ex: CUDA 11.6-> cuda116
-
-`--build-arg SKIP_XTTS_TEST=true` (Saves space by not baking XTTSv2 model into docker image)
-
-
-## Docker container file locations
-All ebook2audiobooks will have the base dir of `/app/`
-For example:
-`tmp` = `/app/tmp`
-`audiobooks` = `/app/audiobooks`
-
-
-## Docker headless guide
-
-> [!IMPORTANT]
-**For simpler headless setup use the [Compose](#compose-headless).** <br>
-
- Before you do run this you need to create a dir named "input-folder" in your current dir
-  which will be linked, This is where you can put your input files for the docker image to see
-```bash
-mkdir input-folder && mkdir Audiobooks
-```
- In the command below swap out **YOUR_INPUT_FILE.TXT** with the name of your input file 
-```bash
-docker run --pull always --rm \
-    -v $(pwd)/input-folder:/app/input_folder \
-    -v $(pwd)/audiobooks:/app/audiobooks \
-    athomasson2/ebook2audiobook \
-    --headless --ebook /input_folder/YOUR_EBOOK_FILE
-```
- The output Audiobooks will be found in the Audiobook folder which will also be located
-  in your local dir you ran this docker command in
-
-
-## To get the help command for the other parameters this program has you can run this 
-
-```bash
-docker run --pull always --rm athomasson2/ebook2audiobook --help
-
-```
-That will output this 
-[Help command output](#help-command-output)
-

 ### Docker Compose
-This project uses Docker Compose to run locally. You can enable or disable GPU support 
-by setting either `*gpu-enabled` or `*gpu-disabled` in `docker-compose.yml`
+
+For pre-built image enable `#image: docker.io/athomasson2/ebook2audiobook:latest` in `docker-compose.yml`
+


 #### Steps to Run
@@ -429,46 +348,48 @@ by setting either `*gpu-enabled` or `*gpu-disabled` in `docker-compose.yml`
   ```
 2. **Set GPU Support (disabled by default)**
  To enable GPU support, modify `docker-compose.yml` and change `*gpu-disabled` to `*gpu-enabled`
-3. **Start the service:**
+4. **Start the service:**
    ```bash
    # Docker
-    docker-compose up -d # To rebuild add --build
+    docker-compose up -d # To rebuild add --build 
+    # To stop -> docker-compose down

    # Podman
    podman compose -f podman-compose.yml up -d # To rebuild add --build
+    # To stop -> podman compose -f podman-compose.yml down
    ```
-4. **Access the service:**
+5. **Access the service:**
  The service will be available at http://localhost:7860.

+
+### Compose Build Arguments
+
+```bash
+SKIP_XTTS_TEST: "true" # (Saves space by not baking xtts model into docker image)
+TORCH_VERSION: cuda118 # Available tags: [cuda121, cuda118, cuda128, rocm, xpu, cpu] # All CUDA version numbers should work, Ex: CUDA 11.6-> cuda116
+```
+
+
 ### Compose Headless

 [Headless Wiki for more info](https://github.com/DrewThomasson/ebook2audiobook/wiki/Docker-Compose-Headless-guide)
-
+```bash
 A headless example is already contained within the `docker-compose.yml` file.

 The `docker-compose.yml` file will act as the base dir for any headless commands added.
+```
+
+### Compose container file locations
+
+```bash
+By Default: All compose containers share the contents your local `ebook2audiobook` folder
+```


-
-
-## Common Docker Issues
+### Common Docker Issues

 - My NVIDIA GPU isnt being detected?? -> [GPU ISSUES Wiki Page](https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES)

- `python: can't open file '/home/user/app/app.py': [Errno 2] No such file or directory` (Just remove all post arguments as I replaced the `CMD` with `ENTRYPOINT` in the [Dockerfile](Dockerfile))
-  - Example: `docker run --pull always athomasson2/ebook2audiobook app.py --script_mode full_docker` - > corrected - > `docker run --pull always athomasson2/ebook2audiobook`
-  - Arguments can be easily added like this now `docker run --pull always athomasson2/ebook2audiobook --share`
-
- Docker gets stuck downloading Fine-Tuned models.
-  (This does not happen for every computer but some appear to run into this issue)
-  Disabling the progress bar appears to fix the issue,
-  as discussed [here in #191](https://github.com/DrewThomasson/ebook2audiobook/issues/191)
-  Example of adding this fix in the `docker run` command
-```Dockerfile
-docker run --pull always --rm --gpus all -e HF_HUB_DISABLE_PROGRESS_BARS=1 -e HF_HUB_ENABLE_HF_TRANSFER=0 \
-    -p 7860:7860 athomasson2/ebook2audiobook
-```
-

 ## Fine Tuned TTS models
 #### Fine Tune your own XTTSv2 model
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-25.10.25
+25.11.11
--- a/app.py
+++ b/app.py
@@ -7,10 +7,10 @@ import socket
 import subprocess
 import sys
 import tempfile
+import time
 import warnings

 from importlib.metadata import version, PackageNotFoundError
-from typing import Any, Optional, Union, Callable
 from pathlib import Path
 from lib import *

@@ -52,83 +52,125 @@ def check_and_install_requirements(file_path:str)->bool:
        print(error)
        return False
    try:
-        from importlib.metadata import version, PackageNotFoundError
        try:
            from packaging.specifiers import SpecifierSet
            from packaging.version import Version
+            from tqdm import tqdm
+            from packaging.markers import Marker
        except ImportError:
-            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', 'packaging'])
+            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', 'packaging', 'tqdm'])
            from packaging.specifiers import SpecifierSet
            from packaging.version import Version
+            from tqdm import tqdm
+            from packaging.markers import Marker
        import re as regex
-        from tqdm import tqdm
+        flexible_packages = {"torch", "torchaudio", "numpy"}
+        torch_version = False
+        try:
+            import torch
+            torch_version = getattr(torch, '__version__', '')
+            devices['CUDA']['found'] = getattr(torch, "cuda", None) is not None and torch.cuda.is_available() and not (hasattr(torch.version, "hip") and torch.version.hip is not None)
+            devices['ROCM']['found'] = hasattr(torch.version, "hip") and torch.version.hip is not None and torch.cuda.is_available()
+            devices['MPS']['found'] = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
+            devices['XPU']['found'] = getattr(torch, "xpu", None) is not None and torch.xpu.is_available()
+        except ImportError:
+            pass
+        cuda_only_packages = ('deepspeed')
        with open(file_path, 'r') as f:
            contents = f.read().replace('\r', '\n')
-            packages = [
-                pkg.strip()
-                for pkg in contents.splitlines()
-                if pkg.strip() and regex.search(r'[a-zA-Z0-9]', pkg)
-            ]
+            packages = [pkg.strip() for pkg in contents.splitlines() if pkg.strip() and regex.search(r'[a-zA-Z0-9]', pkg)]
+        if sys.version_info >= (3, 11):
+            packages.append("pymupdf-layout")
        missing_packages = []
+        cuda_markers = ('+cu', '+xpu', '+nv', '+git')
        for package in packages:
+            if ';' in package:
+                pkg_part, marker_part = package.split(';', 1)
+                marker_part = marker_part.strip()
+                try:
+                    marker = Marker(marker_part)
+                    if not marker.evaluate():
+                        continue
+                except Exception as e:
+                    error = f'Warning: Could not evaluate marker {marker_part} for {pkg_part}: {e}'
+                    print(error)
+                package = pkg_part.strip()
+            if 'git+' in package or '://' in package:
+                pkg_name_match = regex.search(r'([\w\-]+)\s*@?\s*git\+', package)
+                pkg_name = pkg_name_match.group(1) if pkg_name_match else None
+                if pkg_name:
+                    spec = importlib.util.find_spec(pkg_name)
+                    if spec is None:
+                        msg = f'{pkg_name} (git package) is missing.'
+                        print(msg)
+                        missing_packages.append(package)
+                else:
+                    error = f'Unrecognized git package: {package}'
+                    print(error)
+                    missing_packages.append(package)
+                continue
            clean_pkg = regex.sub(r'\[.*?\]', '', package)
-            pkg_name  = regex.split(r'[<>=]', clean_pkg, 1)[0].strip()
+            pkg_name = regex.split(r'[<>=]', clean_pkg, maxsplit=1)[0].strip()
+            if pkg_name in cuda_only_packages:
+                has_cuda_build = False
+                if torch_version:
+                    has_cuda_build = any(marker in torch_version for marker in cuda_markers)
+                if not has_cuda_build:
+                    continue
            try:
                installed_version = version(pkg_name)
-                if pkg_name == 'num2words':
-                    code = "ZH_CN"
-                    spec = importlib.util.find_spec(f"num2words.lang_{code}")
-                    if spec is None:
-                        missing_packages.append(package)
            except PackageNotFoundError:
-                error = f'{package} is missing.'
+                error = f'{pkg_name} is not installed.'
                print(error)
                missing_packages.append(package)
+                continue
+            if pkg_name in flexible_packages:
+                continue
+            if '+' in installed_version:
+                continue
            else:
                spec_str = clean_pkg[len(pkg_name):].strip()
                if spec_str:
                    spec = SpecifierSet(spec_str)
-                    # normalize installed version -> major.minor.patch (if available)
                    norm_match = regex.match(r'^(\d+\.\d+(?:\.\d+)?)', installed_version)
                    short_version = norm_match.group(1) if norm_match else installed_version
                    try:
                        installed_v = Version(short_version)
                    except Exception:
-                        installed_v = Version("0")
-                    # detect requirement version -> major.minor.patch (if available)
+                        installed_v = Version('0')
                    req_match = regex.search(r'(\d+\.\d+(?:\.\d+)?)', spec_str)
                    if req_match:
                        req_v = Version(req_match.group(1))
                        imajor, iminor = installed_v.major, installed_v.minor
                        rmajor, rminor = req_v.major, req_v.minor
-                        if "==" in spec_str:
+                        if '==' in spec_str:
                            if imajor != rmajor or iminor != rminor:
                                error = f'{pkg_name} (installed {installed_version}) not in same major.minor as required {req_v}.'
                                print(error)
                                missing_packages.append(package)
-                        elif ">=" in spec_str:
+                        elif '>=' in spec_str:
                            if (imajor < rmajor) or (imajor == rmajor and iminor < rminor):
                                error = f'{pkg_name} (installed {installed_version}) < required {req_v}.'
                                print(error)
                                missing_packages.append(package)
-                        elif "<=" in spec_str:
+                        elif '<=' in spec_str:
                            if (imajor > rmajor) or (imajor == rmajor and iminor > rminor):
                                error = f'{pkg_name} (installed {installed_version}) > allowed {req_v}.'
                                print(error)
                                missing_packages.append(package)
-                        elif ">" in spec_str:
+                        elif '>' in spec_str:
                            if (imajor < rmajor) or (imajor == rmajor and iminor <= rminor):
                                error = f'{pkg_name} (installed {installed_version}) <= required {req_v}.'
                                print(error)
                                missing_packages.append(package)
-                        elif "<" in spec_str:
+                        elif '<' in spec_str:
                            if (imajor > rmajor) or (imajor == rmajor and iminor >= rminor):
                                error = f'{pkg_name} (installed {installed_version}) >= restricted {req_v}.'
                                print(error)
                                missing_packages.append(package)
                        else:
                            if installed_v not in spec:
-                                error = (f'{pkg_name} (installed {installed_version}) does not satisfy "{spec_str}".')
+                                error = f'{pkg_name} (installed {installed_version}) does not satisfy {spec_str}.'
                                print(error)
                                missing_packages.append(package)
        if missing_packages:
@@ -136,25 +178,16 @@ def check_and_install_requirements(file_path:str)->bool:
            print(msg)
            tmp_dir = tempfile.mkdtemp()
            os.environ['TMPDIR'] = tmp_dir
-            result = subprocess.call([sys.executable, '-m', 'pip', 'cache', 'purge'])
+            subprocess.call([sys.executable, '-m', 'pip', 'cache', 'purge'])
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip'])
-            with tqdm(total=len(packages),
-                      desc='Installation 0.00%',
-                      bar_format='{desc}: {n_fmt}/{total_fmt} ',
-                      unit='step') as t:
-                for package in tqdm(missing_packages, desc="Installing", unit="pkg"):
+            with tqdm(total = len(packages), desc = 'Installation 0.00%', bar_format = '{desc}: {n_fmt}/{total_fmt} ', unit = 'step') as t:
+                for package in tqdm(missing_packages, desc = 'Installing', unit = 'pkg'):
                    try:
-                        if package == 'num2words':
-                            pkgs = ['git+https://github.com/savoirfairelinux/num2words.git', '--force']
-                        else:
-                            pkgs = [package]
-                        subprocess.check_call([
-                            sys.executable, '-m', 'pip', 'install',
-                            '--no-cache-dir', '--use-pep517',
-                            *pkgs
-                        ])
+                        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', '--use-pep517', package])
                        t.update(1)
                    except subprocess.CalledProcessError as e:
+                        if package in flexible_packages:
+                            continue
                        error = f'Failed to install {package}: {e}'
                        print(error)
                        return False
@@ -175,7 +208,7 @@ def check_dictionary()->bool:
            error = 'UniDic dictionary not found or incomplete. Downloading now...'
            print(error)
            subprocess.run(['python', '-m', 'unidic', 'download'], check=True)
-        except subprocess.CalledProcessError as e:
+        except (subprocess.CalledProcessError, ConnectionError, OSError) as e:
            error = f'Failed to download UniDic dictionary. Error: {e}. Unable to continue without UniDic. Exiting...'
            raise SystemExit(error)
            return False
@@ -185,6 +218,26 @@ def is_port_in_use(port:int)->bool:
    with socket.socket(socket.AF_INET,socket.SOCK_STREAM) as s:
        return s.connect_ex(('0.0.0.0',port))==0

+def kill_previous_instances(script_name: str):
+    current_pid = os.getpid()
+    this_script_path = os.path.realpath(script_name)
+    import psutil
+    for proc in psutil.process_iter(['pid', 'cmdline']):
+        try:
+            cmdline = proc.info['cmdline']
+            if not cmdline:
+                continue
+            # unify case and absolute paths for comparison
+            joined_cmd = ' '.join(cmdline).lower()
+            if this_script_path.lower().endswith(script_name.lower()) and \
+               (script_name.lower() in joined_cmd) and \
+               proc.info['pid'] != current_pid:
+                print(f"[WARN] Found running instance PID={proc.info['pid']} -> killing it.")
+                proc.kill()
+                proc.wait(timeout=3)
+        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+            continue
+
 def main()->None:
    # Argument parser to handle optional parameters with descriptions
    parser = argparse.ArgumentParser(
@@ -195,12 +248,12 @@ Windows:
    Gradio/GUI:
    ebook2audiobook.cmd
    Headless mode:
-    ebook2audiobook.cmd --headless --ebook '/path/to/file'
+    ebook2audiobook.cmd --headless --ebook '/path/to/file' --language eng
 Linux/Mac:
    Gradio/GUI:
    ./ebook2audiobook.sh
    Headless mode:
-    ./ebook2audiobook.sh --headless --ebook '/path/to/file'
+    ./ebook2audiobook.sh --headless --ebook '/path/to/file' --language eng
    
 Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
        ''',
@@ -233,35 +286,35 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
    headless_optional_group = parser.add_argument_group('optional parameters')
    headless_optional_group.add_argument(options[7], type=str, default=None, help='''(Optional) Path to the voice cloning file for TTS engine. 
    Uses the default voice if not present.''')
-    headless_optional_group.add_argument(options[8], type=str, default=default_device, choices=device_list, help=f'''(Optional) Pprocessor unit type for the conversion. 
-    Default is set in ./lib/conf.py if not present. Fall back to CPU if GPU not available.''')
+    headless_optional_group.add_argument(options[8], type=str, default=default_device, choices=list(devices.values()), help=f'''(Optional) Pprocessor unit type for the conversion. 
+    Default is set in ./lib/conf.py if not present. Fall back to CPU if CUDA or MPS is not available.''')
    headless_optional_group.add_argument(options[9], type=str, default=None, choices=tts_engine_list_keys+tts_engine_list_values, help=f'''(Optional) Preferred TTS engine (available are: {tts_engine_list_keys+tts_engine_list_values}.
    Default depends on the selected language. The tts engine should be compatible with the chosen language''')
    headless_optional_group.add_argument(options[10], type=str, default=None, help=f'''(Optional) Path to the custom model zip file cntaining mandatory model files. 
    Please refer to ./lib/models.py''')
    headless_optional_group.add_argument(options[11], type=str, default=default_fine_tuned, help='''(Optional) Fine tuned model path. Default is builtin model.''')
    headless_optional_group.add_argument(options[12], type=str, default=default_output_format, help=f'''(Optional) Output audio format. Default is set in ./lib/conf.py''')
-    headless_optional_group.add_argument(options[13], type=float, default=None, help=f"""(xtts only, optional) Temperature for the model. 
+    headless_optional_group.add_argument(options[13], type=float, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['temperature'], help=f"""(xtts only, optional) Temperature for the model. 
    Default to config.json model. Higher temperatures lead to more creative outputs.""")
-    headless_optional_group.add_argument(options[14], type=float, default=None, help=f"""(xtts only, optional) A length penalty applied to the autoregressive decoder. 
+    headless_optional_group.add_argument(options[14], type=float, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['length_penalty'], help=f"""(xtts only, optional) A length penalty applied to the autoregressive decoder. 
    Default to config.json model. Not applied to custom models.""")
-    headless_optional_group.add_argument(options[15], type=int, default=None, help=f"""(xtts only, optional) Controls how many alternative sequences the model explores. Must be equal or greater than length penalty. 
+    headless_optional_group.add_argument(options[15], type=int, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['num_beams'], help=f"""(xtts only, optional) Controls how many alternative sequences the model explores. Must be equal or greater than length penalty. 
    Default to config.json model.""")
-    headless_optional_group.add_argument(options[16], type=float, default=None, help=f"""(xtts only, optional) A penalty that prevents the autoregressive decoder from repeating itself. 
+    headless_optional_group.add_argument(options[16], type=float, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['repetition_penalty'], help=f"""(xtts only, optional) A penalty that prevents the autoregressive decoder from repeating itself. 
    Default to config.json model.""")
-    headless_optional_group.add_argument(options[17], type=int, default=None, help=f"""(xtts only, optional) Top-k sampling. 
+    headless_optional_group.add_argument(options[17], type=int, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['top_k'], help=f"""(xtts only, optional) Top-k sampling. 
    Lower values mean more likely outputs and increased audio generation speed. 
    Default to config.json model.""")
-    headless_optional_group.add_argument(options[18], type=float, default=None, help=f"""(xtts only, optional) Top-p sampling. 
+    headless_optional_group.add_argument(options[18], type=float, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['top_p'], help=f"""(xtts only, optional) Top-p sampling. 
    Lower values mean more likely outputs and increased audio generation speed. Default to config.json model.""")
-    headless_optional_group.add_argument(options[19], type=float, default=None, help=f"""(xtts only, optional) Speed factor for the speech generation. 
+    headless_optional_group.add_argument(options[19], type=float, default=default_engine_settings[TTS_ENGINES['XTTSv2']]['speed'], help=f"""(xtts only, optional) Speed factor for the speech generation. 
    Default to config.json model.""")
    headless_optional_group.add_argument(options[20], action='store_true', help=f"""(xtts only, optional) Enable TTS text splitting. This option is known to not be very efficient. 
    Default to config.json model.""")
-    headless_optional_group.add_argument(options[21], type=float, default=None, help=f"""(bark only, optional) Text Temperature for the model. 
-    Default to {default_engine_settings[TTS_ENGINES['BARK']]['text_temp']}. Higher temperatures lead to more creative outputs.""")
-    headless_optional_group.add_argument(options[22], type=float, default=None, help=f"""(bark only, optional) Waveform Temperature for the model. 
-    Default to {default_engine_settings[TTS_ENGINES['BARK']]['waveform_temp']}. Higher temperatures lead to more creative outputs.""")
+    headless_optional_group.add_argument(options[21], type=float, default=default_engine_settings[TTS_ENGINES['BARK']]['text_temp'], help=f"""(bark only, optional) Text Temperature for the model. 
+    Default to config.json model.""")
+    headless_optional_group.add_argument(options[22], type=float, default=default_engine_settings[TTS_ENGINES['BARK']]['waveform_temp'], help=f"""(bark only, optional) Waveform Temperature for the model. 
+    Default to config.json model.""")
    headless_optional_group.add_argument(options[23], type=str, help=f'''(Optional) Path to the output directory. Default is set in ./lib/conf.py''')
    headless_optional_group.add_argument(options[24], action='version', version=f'ebook2audiobook version {prog_version}', help='''Show the version of the script and exit''')
    headless_optional_group.add_argument(options[25], action='store_true', help=argparse.SUPPRESS)
@@ -304,17 +357,30 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
                print(error)
                sys.exit(1)

-        from lib.functions import SessionContext, convert_ebook_batch, convert_ebook, web_interface
-        ctx = SessionContext()
+        import lib.functions as f
+        f.context = f.SessionContext() if f.context is None else f.context
+        f.context_tracker = f.SessionTracker() if f.context_tracker is None else f.context_tracker
+        f.active_sessions = set() if f.active_sessions is None else f.active_sessions
        # Conditions based on the --headless flag
        if args['headless']:
            args['is_gui_process'] = False
-            args['chapters_control'] = False
+            args['chapters_preview'] = False
+            args['event'] = ''
            args['audiobooks_dir'] = os.path.abspath(args['output_dir']) if args['output_dir'] else audiobooks_cli_dir
-            args['device'] = 'cuda' if args['device'] == 'gpu' else args['device']
+            args['device'] = devices['CUDA'] if args['device'] == devices['CUDA'] else args['device']
            args['tts_engine'] = TTS_ENGINES[args['tts_engine']] if args['tts_engine'] in TTS_ENGINES.keys() else args['tts_engine'] if args['tts_engine'] in TTS_ENGINES.values() else None
            args['output_split'] = default_output_split
            args['output_split_hours'] = default_output_split_hours
+            args['xtts_temperature'] = args['temperature']
+            args['xtts_length_penalty'] = args['length_penalty']
+            args['xtts_num_beams'] = args['num_beams']
+            args['xtts_repetition_penalty'] = args['repetition_penalty']
+            args['xtts_top_k'] = args['top_k']
+            args['xtts_top_p'] = args['top_p']
+            args['xtts_speed'] = args['speed']
+            args['xtts_enable_text_splitting'] = False
+            args['bark_text_temp'] = args['text_temp']
+            args['bark_waveform_temp'] = args['waveform_temp']
            engine_setting_keys = {engine: list(settings.keys()) for engine, settings in default_engine_settings.items()}
            valid_model_keys = engine_setting_keys.get(args['tts_engine'], [])
            renamed_args = {}
@@ -349,7 +415,7 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
                    if any(file.endswith(ext) for ext in ebook_formats):
                        full_path = os.path.abspath(os.path.join(args['ebooks_dir'], file))
                        args['ebook_list'].append(full_path)
-                progress_status, passed = convert_ebook_batch(args, ctx)
+                progress_status, passed = f.convert_ebook_batch(args)
                if passed is False:
                    error = f'Conversion failed: {progress_status}'
                    print(error)
@@ -360,7 +426,7 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
                    error = f'Error: The provided --ebook "{args["ebook"]}" does not exist.'
                    print(error)
                    sys.exit(1) 
-                progress_status, passed = convert_ebook(args, ctx)
+                progress_status, passed = f.convert_ebook(args)
                if passed is False:
                    error = f'Conversion failed: {progress_status}'
                    print(error)
@@ -375,10 +441,37 @@ Tip: to add of silence (1.4 seconds) into your text just use "###" or "[pause]".
            allowed_arguments = {'--share', '--script_mode'}
            passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}
            if passed_args_set.issubset(allowed_arguments):
-                 web_interface(args, ctx)
+                try:
+                    #script_name = os.path.basename(sys.argv[0])
+                    #kill_previous_instances(script_name)
+                    app = f.build_interface(args)
+                    if app is not None:
+                        app.queue(
+                            default_concurrency_limit=interface_concurrency_limit
+                        ).launch(
+                            debug=bool(int(os.environ.get('GRADIO_DEBUG', '0'))),
+                            show_error=debug_mode, favicon_path='./favicon.ico', 
+                            server_name=interface_host, 
+                            server_port=interface_port, 
+                            share= args['share'], 
+                            max_file_size=max_upload_size
+                        )
+                except OSError as e:
+                    error = f'Connection error: {e}'
+                    f.alert_exception(error, None)
+                except socket.error as e:
+                    error = f'Socket error: {e}'
+                    f.alert_exception(error, None)
+                except KeyboardInterrupt:
+                    error = 'Server interrupted by user. Shutting down...'
+                    f.alert_exception(error, None)
+                except Exception as e:
+                    error = f'An unexpected error occurred: {e}'
+                    f.alert_exception(error, None)
            else:
-                error = 'Error: In non-headless mode, no option or only --share can be passed'
+                error = 'Error: In GUI mode, no option or only --share can be passed'
                print(error)
                sys.exit(1)
+
 if __name__ == '__main__':
    main()
--- a/ebook2audiobook.cmd
+++ b/ebook2audiobook.cmd
@@ -17,7 +17,7 @@ set "PYTHONUTF8=1"
 set "PYTHONIOENCODING=utf-8"
 set "CURRENT_ENV="

-set "PROGRAMS_LIST=calibre-normal ffmpeg nodejs espeak-ng sox"
+set "PROGRAMS_LIST=calibre-normal ffmpeg nodejs espeak-ng sox tesseract"

 set "TMP=%SCRIPT_DIR%\tmp"
 set "TEMP=%SCRIPT_DIR%\tmp"
@@ -78,11 +78,11 @@ exit /b
 :conda_check
 where /Q conda
 if %errorlevel% neq 0 (
-	call rmdir /s /q "%CONDA_INSTALL_DIR%" 2>nul
-	echo Miniforge3 is not installed. 
+	echo Miniforge3 is not installed.
 	set "CONDA_CHECK=1"
 	goto :install_components
 )
+
 :: Check if running in a Conda environment
 if defined CONDA_DEFAULT_ENV (
 	set "CURRENT_ENV=%CONDA_PREFIX%"
@@ -158,7 +158,9 @@ if not "%CONDA_CHECK%"=="0" (
 		echo Conda installation failed.
 		goto :failed
 	)
-	call conda config --set auto_activate_base false
+	if not exist "%USERPROFILE%\.condarc" (
+		call conda config --set auto_activate false
+	)
 	call conda update conda -y
 	del "%CONDA_INSTALLER%"
 	set "CONDA_CHECK=0"
@@ -169,26 +171,66 @@ if not "%CONDA_CHECK%"=="0" (
 :: Install missing packages one by one
 if not "%PROGRAMS_CHECK%"=="0" (
    echo Installing missing programs...
-	if "%SCOOP_CHECK%"=="0" (
-		call scoop bucket add muggle b https://github.com/hu3rror/scoop-muggle.git
-		call scoop bucket add extras
-		call scoop bucket add versions
-	)
-    for %%p in (%missing_prog_array%) do (
-		call scoop install %%p
-		set "prog=%%p"
-		if "%%p"=="nodejs" (
-			set "prog=node"
-		)
-		if "%%p"=="calibre-normal" set "prog=calibre"
-		where /Q !prog!
-		if !errorlevel! neq 0 (
-			echo %%p installation failed...
-			goto :failed
-		)
+    if "%SCOOP_CHECK%"=="0" (
+        call scoop bucket add muggle b https://github.com/hu3rror/scoop-muggle.git
+        call scoop bucket add extras
+        call scoop bucket add versions
    )
-	call powershell -command "[System.Environment]::SetEnvironmentVariable('Path', [System.Environment]::GetEnvironmentVariable('Path', 'User') + '%SCOOP_SHIMS%;%SCOOP_APPS%;%CONDA_PATH%;%NODE_PATH%;', 'User')"
-	set "SCOOP_CHECK=0"
+    for %%p in (%missing_prog_array%) do (
+        set "prog=%%p"
+        call scoop install %%p
+        if "%%p"=="tesseract" (
+            where /Q !prog!
+            if !errorlevel! equ 0 (
+                set "syslang=%LANG%"
+                if not defined syslang set "syslang=en"
+                set "syslang=!syslang:~0,2!"
+                set "tesslang=eng"
+                if /I "!syslang!"=="fr" set "tesslang=fra"
+                if /I "!syslang!"=="de" set "tesslang=deu"
+                if /I "!syslang!"=="it" set "tesslang=ita"
+                if /I "!syslang!"=="es" set "tesslang=spa"
+                if /I "!syslang!"=="pt" set "tesslang=por"
+                if /I "!syslang!"=="ar" set "tesslang=ara"
+                if /I "!syslang!"=="tr" set "tesslang=tur"
+                if /I "!syslang!"=="ru" set "tesslang=rus"
+                if /I "!syslang!"=="bn" set "tesslang=ben"
+                if /I "!syslang!"=="zh" set "tesslang=chi_sim"
+                if /I "!syslang!"=="fa" set "tesslang=fas"
+                if /I "!syslang!"=="hi" set "tesslang=hin"
+                if /I "!syslang!"=="hu" set "tesslang=hun"
+                if /I "!syslang!"=="id" set "tesslang=ind"
+                if /I "!syslang!"=="jv" set "tesslang=jav"
+                if /I "!syslang!"=="ja" set "tesslang=jpn"
+                if /I "!syslang!"=="ko" set "tesslang=kor"
+                if /I "!syslang!"=="pl" set "tesslang=pol"
+                if /I "!syslang!"=="ta" set "tesslang=tam"
+                if /I "!syslang!"=="te" set "tesslang=tel"
+                if /I "!syslang!"=="yo" set "tesslang=yor"
+                echo Detected system language: !syslang! → downloading OCR language: !tesslang!
+                set "tessdata=%SCOOP_APPS%\tesseract\current\tessdata"
+                if not exist "!tessdata!\!tesslang!.traineddata" (
+                    powershell -Command "Invoke-WebRequest -Uri https://github.com/tesseract-ocr/tessdata_best/raw/main/!tesslang!.traineddata -OutFile '!tessdata!\!tesslang!.traineddata'"
+                )
+                if exist "!tessdata!\!tesslang!.traineddata" (
+                    echo Tesseract OCR language !tesslang! installed in !tessdata!
+                ) else (
+                    echo Failed to install OCR language !tesslang!
+                )
+            )
+        ) else if "%%p"=="nodejs" (
+            set "prog=node"
+        ) else if "%%p"=="calibre-normal" (
+            set "prog=calibre"
+        )
+        where /Q !prog!
+        if !errorlevel! neq 0 (
+            echo %%p installation failed...
+            goto :failed
+        )
+    )
+    call powershell -Command "[System.Environment]::SetEnvironmentVariable('Path', [System.Environment]::GetEnvironmentVariable('Path', 'User') + ';%SCOOP_SHIMS%;%SCOOP_APPS%;%CONDA_PATH%;%NODE_PATH%', 'User')"
+    set "SCOOP_CHECK=0"
    set "PROGRAMS_CHECK=0"
    set "missing_prog_array="
 )
--- a/ebook2audiobook.egg-info/requires.txt
+++ b/ebook2audiobook.egg-info/requires.txt
@@ -1,20 +1,18 @@
+torchvggish
+numpy<2
+num2words @ git+https://github.com/savoirfairelinux/num2words.git
 regex
 tqdm
-cutlet
-deep_translator
 docker
 ebooklib
 fastapi
-num2words
-argostranslate
 beautifulsoup4
 fugashi
 sudachipy
 sudachidict_core
-ray
+PyMuPDF
+pytesseract
 unidic
-pymupdf4llm
-translate
 hangul-romanize
 indic-nlp-library
 iso639-lang
@@ -25,14 +23,14 @@ pypinyin
 pythainlp
 mutagen
 PyOpenGL
-nvidia-ml-py
 phonemizer-fork
 pydub
-pyannote-audio==3.4.0
-demucs==4.0.1
-gradio>=5.49
-transformers==4.51.3
-coqui-tts[languages]==0.26.0
-torch>=2.8.0,<2.9
-torchaudio>=2.8.0,<2.9
-torchvggish
+demucs
+deepspeed
+pyannote-audio<=3.4.0
+stanza<=1.10.1
+argostranslate<=1.10.0
+gradio>=5.49.1
+torch<=2.7.1
+torchaudio<=2.7.1
+coqui-tts[languages]==0.27.2
--- a/ebook2audiobook.sh
+++ b/ebook2audiobook.sh
@@ -1,14 +1,16 @@
 #!/usr/bin/env bash

 if [[ "$OSTYPE" = "darwin"* && -z "$SWITCHED_TO_ZSH" && "$(ps -p $$ -o comm=)" != "zsh" ]]; then
-    export SWITCHED_TO_ZSH=1
-    exec env zsh "$0" "$@"
+	export SWITCHED_TO_ZSH=1
+	exec env zsh "$0" "$@"
 fi

-unset SWITCHED_TO_ZSH
+#unset SWITCHED_TO_ZSH

 ARCH=$(uname -m)
-PYTHON_VERSION="3.12"
+PYTHON_VERSION=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || echo "3.12")
+MIN_PYTHON_VERSION="3.10"
+MAX_PYTHON_VERSION="3.13"

 export PYTHONUTF8="1"
 export PYTHONIOENCODING="utf-8"
@@ -48,7 +50,7 @@ SCRIPT_MODE="$NATIVE"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

 WGET=$(which wget 2>/dev/null)
-REQUIRED_PROGRAMS=("curl" "calibre" "ffmpeg" "nodejs" "espeak-ng" "rust" "sox")
+REQUIRED_PROGRAMS=("curl" "pkg-config" "calibre" "ffmpeg" "nodejs" "espeak-ng" "rust" "sox" "tesseract")
 PYTHON_ENV="python_env"
 CURRENT_ENV=""

@@ -60,9 +62,6 @@ fi
 if [[ "$OSTYPE" = "darwin"* ]]; then
 	CONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-$(uname -m).sh"
 	CONFIG_FILE="$HOME/.zshrc"
-	if [[ "$ARCH" == "x86_64" ]]; then
-		PYTHON_VERSION="3.11"
-	fi
 elif [[ "$OSTYPE" = "linux"* ]]; then
 	CONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
 	CONFIG_FILE="$HOME/.bashrc"
@@ -76,6 +75,20 @@ CONDA_ENV="$CONDA_INSTALL_DIR/etc/profile.d/conda.sh"
 export TMPDIR="$SCRIPT_DIR/.cache"
 export PATH="$CONDA_PATH:$PATH"

+compare_versions() {
+    local ver1=$1
+    local ver2=$2
+    # Pad each version to 3 parts
+    IFS='.' read -r v1_major v1_minor <<<"$ver1"
+    IFS='.' read -r v2_major v2_minor <<<"$ver2"
+
+    ((v1_major < v2_major)) && return 1
+    ((v1_major > v2_major)) && return 2
+    ((v1_minor < v2_minor)) && return 1
+    ((v1_minor > v2_minor)) && return 2
+    return 0
+}
+
 # Check if the current script is run inside a docker container
 if [[ -n "$container" || -f /.dockerenv ]]; then
 	SCRIPT_MODE="$FULL_DOCKER"
@@ -123,14 +136,37 @@ else
 		local programs=("$@")
 		programs_missing=()
 		for program in "${programs[@]}"; do
+			bin="$program"
 			if [ "$program" = "nodejs" ]; then
 				bin="node"
-			elif [ "$program" = "rust" ]; then
-				if command -v apt-get &> /dev/null; then
-					bin="rustc"
+			fi
+			if [ "$program" = "rust" ]; then
+				if command -v apt-get &>/dev/null; then
+					program="rustc"
+				fi
+				bin="rustc"
+			fi
+			if [ "$program" = "tesseract" ]; then
+				if command -v brew &> /dev/null; then
+					program="tesseract"
+				elif command -v emerge &> /dev/null; then
+					program="tesseract"
+				elif command -v dnf &> /dev/null; then
+					program="tesseract"
+				elif command -v yum &> /dev/null; then
+					program="tesseract"
+				elif command -v zypper &> /dev/null; then
+					program="tesseract-ocr"
+				elif command -v pacman &> /dev/null; then
+					program="tesseract"
+				elif command -v apt-get &> /dev/null; then
+					program="tesseract-ocr"
+				elif command -v apk &> /dev/null; then
+					program="tesseract-ocr"
+				else
+					echo "Cannot recognize your applications package manager. Please install the required applications manually."
+					return 1
 				fi
-			else
-				bin="$program"
 			fi
 			if ! command -v "$bin" >/dev/null 2>&1; then
 				echo -e "\e[33m$program is not installed.\e[0m"
@@ -156,8 +192,9 @@ else
 				if ! command -v brew &> /dev/null; then
 					echo -e "\e[33mHomebrew is not installed. Installing Homebrew...\e[0m"
 					/usr/bin/env bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
-					echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> $HOME/.zprofile
-					eval "$(/opt/homebrew/bin/brew shellenv)"
+					echo >> $HOME/.zprofile
+					echo 'eval "$(/usr/local/bin/brew shellenv)"' >> $HOME/.zprofile
+					eval "$(/usr/local/bin/brew shellenv)"
 				fi
 		else
 			SUDO="sudo"
@@ -175,7 +212,7 @@ else
 				PACK_MGR="zypper install"
 				PACK_MGR_OPTIONS="-y"
 			elif command -v pacman &> /dev/null; then
-				PACK_MGR="pacman -Sy"
+				PACK_MGR="pacman -Sy --noconfirm"
 			elif command -v apt-get &> /dev/null; then
 				$SUDO apt-get update
 				PACK_MGR="apt-get install"
@@ -186,7 +223,6 @@ else
 				echo "Cannot recognize your applications package manager. Please install the required applications manually."
 				return 1
 			fi
-
 		fi
 		if [ -z "$WGET" ]; then
 			echo -e "\e[33m wget is missing! trying to install it... \e[0m"
@@ -200,9 +236,9 @@ else
 			fi
 		fi
 		for program in "${programs_missing[@]}"; do
-			if [ "$program" = "calibre" ];then				
+			if [ "$program" = "calibre" ]; then				
 				# avoid conflict with calibre builtin lxml
-				pip uninstall lxml -y 2>/dev/null
+				#pip uninstall lxml -y 2>/dev/null
 				echo -e "\e[33mInstalling Calibre...\e[0m"
 				if [[ "$OSTYPE" = "darwin"* ]]; then
 					eval "$PACK_MGR --cask calibre"
@@ -219,21 +255,75 @@ else
 						echo "$program installation failed."
 					fi
 				fi	
-			elif [ "$program" = "rust" ]; then
-				if command -v apt-get &> /dev/null; then
-					app="rustc"
-				else
-					app="$program"
-				fi
+			elif [[ "$program" = "rust" || "$program" = "rustc" ]]; then
 				curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 				source $HOME/.cargo/env
-				if command -v $app &>/dev/null; then
+				if command -v $program &>/dev/null; then
 					echo -e "\e[32m===============>>> $program is installed! <<===============\e[0m"
 				else
 					echo "$program installation failed."
 				fi
+			elif [[ "$program" = "tesseract" || "$program" = "tesseract-ocr" ]]; then
+				eval "$SUDO $PACK_MGR $program $PACK_MGR_OPTIONS"
+				if command -v $program >/dev/null 2>&1; then
+					echo -e "\e[32m===============>>> $program is installed! <<===============\e[0m"
+					sys_lang=$(echo "${LANG:-en}" | cut -d_ -f1 | tr '[:upper:]' '[:lower:]')
+					case "$sys_lang" in
+						en) tess_lang="eng" ;;
+						fr) tess_lang="fra" ;;
+						de) tess_lang="deu" ;;
+						it) tess_lang="ita" ;;
+						es) tess_lang="spa" ;;
+						pt) tess_lang="por" ;;
+						ar) tess_lang="ara" ;;
+						tr) tess_lang="tur" ;;
+						ru) tess_lang="rus" ;;
+						bn) tess_lang="ben" ;;
+						zh) tess_lang="chi_sim" ;;
+						fa) tess_lang="fas" ;;
+						hi) tess_lang="hin" ;;
+						hu) tess_lang="hun" ;;
+						id) tess_lang="ind" ;;
+						jv) tess_lang="jav" ;;
+						ja) tess_lang="jpn" ;;
+						ko) tess_lang="kor" ;;
+						pl) tess_lang="pol" ;;
+						ta) tess_lang="tam" ;;
+						te) tess_lang="tel" ;;
+						yo) tess_lang="yor" ;;
+						*) tess_lang="eng" ;;
+					esac
+					echo "Detected system language: $sys_lang → installing Tesseract OCR language: $tess_lang"
+					langpack=""
+					if command -v brew &> /dev/null; then
+						langpack="tesseract-lang-$tess_lang"
+					elif command -v apt-get &>/dev/null; then
+						langpack="tesseract-ocr-$tess_lang"
+					elif command -v dnf &>/dev/null || command -v yum &>/dev/null; then
+						langpack="tesseract-langpack-$tess_lang"
+					elif command -v zypper &>/dev/null; then
+						langpack="tesseract-ocr-$tess_lang"
+					elif command -v pacman &>/dev/null; then
+						langpack="tesseract-data-$tess_lang"
+					elif command -v apk &>/dev/null; then
+						langpack="tesseract-ocr-$tess_lang"
+					else
+						echo "Cannot recognize your applications package manager. Please install the required applications manually."
+						return 1
+					fi
+					if [ -n "$langpack" ]; then
+						eval "$SUDO $PACK_MGR $langpack $PACK_MGR_OPTIONS"
+						if tesseract --list-langs | grep -q "$tess_lang"; then
+							echo "Tesseract OCR language '$tess_lang' successfully installed."
+						else
+							echo "Tesseract OCR language '$tess_lang' not installed properly."
+						fi
+					fi
+				else
+					echo "$program installation failed."
+				fi
 			else
-				eval "$SUDO $PACK_MGR $program $PACK_MGR_OPTIONS"				
+				eval "$SUDO $PACK_MGR $program $PACK_MGR_OPTIONS"
 				if command -v $program >/dev/null 2>&1; then
 					echo -e "\e[32m===============>>> $program is installed! <<===============\e[0m"
 				else
@@ -251,18 +341,25 @@ else
 	function conda_check {
 		if ! command -v conda &> /dev/null || [ ! -f "$CONDA_ENV" ]; then
 			echo -e "\e[33mDownloading Miniforge3 installer...\e[0m"
-			if [[ "$OSTYPE" = "darwin"* ]]; then
+			if [[ "$OSTYPE" == darwin* ]]; then
 				curl -fsSLo "$CONDA_INSTALLER" "$CONDA_URL"
+				shell_name="zsh"
 			else
 				wget -O "$CONDA_INSTALLER" "$CONDA_URL"
+				shell_name="bash"
 			fi
 			if [[ -f "$CONDA_INSTALLER" ]]; then
 				echo -e "\e[33mInstalling Miniforge3...\e[0m"
 				bash "$CONDA_INSTALLER" -b -u -p "$CONDA_INSTALL_DIR"
 				rm -f "$CONDA_INSTALLER"
 				if [[ -f "$CONDA_INSTALL_DIR/bin/conda" ]]; then
-					$CONDA_INSTALL_DIR/bin/conda config --set auto_activate_base false
-					source $CONDA_ENV
+					if [ ! -f "$HOME/.condarc" ]; then
+						$CONDA_INSTALL_DIR/bin/conda config --set auto_activate false
+					fi
+					[ -f "$CONFIG_FILE" ] || touch "$CONFIG_FILE"
+					grep -qxF 'export PATH="$HOME/Miniforge3/bin:$PATH"' "$CONFIG_FILE" || echo 'export PATH="$HOME/Miniforge3/bin:$PATH"' >> "$CONFIG_FILE"
+					source "$CONFIG_FILE"
+					conda init "$shell_name"
 					echo -e "\e[32m===============>>> conda is installed! <<===============\e[0m"
 				else
 					echo -e "\e[31mconda installation failed.\e[0m"		
@@ -275,8 +372,20 @@ else
 			fi
 		fi
 		if [[ ! -d "$SCRIPT_DIR/$PYTHON_ENV" ]]; then
+			if [[ "$OSTYPE" = "darwin"* && "$ARCH" = "x86_64" ]]; then
+				PYTHON_VERSION="3.11"
+			else
+				compare_versions "$PYTHON_VERSION" "$MIN_PYTHON_VERSION"
+				case $? in
+					1) PYTHON_VERSION="$MIN_PYTHON_VERSION" ;;
+				esac
+				compare_versions "$PYTHON_VERSION" "$MAX_PYTHON_VERSION"
+				case $? in
+					2) PYTHON_VERSION="$MAX_PYTHON_VERSION" ;;
+				esac
+			fi
 			# Use this condition to chmod writable folders once
-			chmod -R 777 ./audiobooks ./tmp ./models
+			chmod -R u+rwX,go+rX ./audiobooks ./tmp ./models
 			conda create --prefix "$SCRIPT_DIR/$PYTHON_ENV" python=$PYTHON_VERSION -y
 			conda init > /dev/null 2>&1
 			source $CONDA_ENV
@@ -286,7 +395,7 @@ else
 			python -m pip install --upgrade --no-cache-dir --use-pep517 --progress-bar=on -r requirements.txt
 			tts_version=$(python -c "import importlib.metadata; print(importlib.metadata.version('coqui-tts'))" 2>/dev/null)
 			if [[ -n "$tts_version" ]]; then
-				if [[ "$(printf '%s\n' "$tts_version" "0.26.1" | sort -V | tail -n1)" == "0.26.1" ]]; then
+				if [[ "$(printf '%s\n' "$tts_version" "0.26.1" | sort -V | tail -n1)" = "0.26.1" ]]; then
 					python -m pip install --no-cache-dir --use-pep517 --progress-bar=on 'transformers<=4.51.3'
 				fi
 			fi
@@ -295,24 +404,132 @@ else
 		return 0
 	}

+	function create_macos_app_bundle {
+		local APP_NAME="ebook2audiobook"
+		local APP_BUNDLE="$HOME/Applications/$APP_NAME.app"
+		local CONTENTS="$APP_BUNDLE/Contents"
+		local MACOS="$CONTENTS/MacOS"
+		local RESOURCES="$CONTENTS/Resources"
+		local ICON_PATH="$SCRIPT_DIR/icons/mac/appIcon.icns"
+
+		echo "🚀 Creating $APP_NAME.app bundle..."
+		mkdir -p "$MACOS" "$RESOURCES"
+
+		# Create the executable script inside the bundle
+		cat > "$MACOS/$APP_NAME" << EOF
+#!/bin/bash
+
+# Create a temporary script file to run in Terminal
+TEMP_SCRIPT=\$(mktemp)
+
+cat > "\$TEMP_SCRIPT" << 'SCRIPT'
+#!/bin/bash
+cd "$SCRIPT_DIR"
+conda deactivate
+bash ebook2audiobook.sh
+
+# Wait 10 seconds for the server to start
+sleep 10
+
+# Open the browser
+open http://localhost:7860/
+
+SCRIPT
+
+chmod +x "\$TEMP_SCRIPT"
+
+# Open Terminal and run the script
+open -a Terminal "\$TEMP_SCRIPT"
+
+# Clean up the temp script after 60 seconds
+sleep 60
+rm "\$TEMP_SCRIPT"
+
+EOF
+
+		chmod +x "$MACOS/$APP_NAME"
+
+		# Copy the icon to the bundle
+		if [ -f "$ICON_PATH" ]; then
+			cp "$ICON_PATH" "$RESOURCES/AppIcon.icns"
+			echo "✓ Icon copied to bundle"
+		else
+			echo "⚠️  Warning: Icon not found at $ICON_PATH"
+		fi
+
+		# Create the Info.plist file (required for macOS app bundles)
+		cat > "$CONTENTS/Info.plist" << 'PLIST'
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>ebook2audiobook</string>
+	<key>CFBundleIdentifier</key>
+	<string>com.local.ebook2audiobook</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>ebook2audiobook</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSMinimumSystemVersion</key>
+	<string>10.9</string>
+	<key>NSPrincipalClass</key>
+	<string>NSApplication</string>
+	<key>CFBundleIconFile</key>
+	<string>AppIcon</string>
+</dict>
+</plist>
+PLIST
+
+		echo "✓ Info.plist created"
+
+		# Update macOS cache to recognize the new app
+		touch "$APP_BUNDLE"
+
+		echo ""
+		echo "✅ Application bundle created successfully!"
+		echo "📍 Location: $APP_BUNDLE"
+		echo ""
+	}
+
+	function create_linux_app_launcher {
+		# Linux desktop entry creation goes here
+		return 0
+	}
+
+	function create_app_bundle {
+		if [[ "$OSTYPE" = "darwin"* ]]; then
+			create_macos_app_bundle
+		elif [[ "$OSTYPE" = "linux"* ]]; then
+			create_linux_app_launcher
+		fi
+	}
+
 	if [ "$SCRIPT_MODE" = "$FULL_DOCKER" ]; then
 		python app.py --script_mode "$SCRIPT_MODE" "${ARGS[@]}"
 		conda deactivate
 		conda deactivate
 	elif [ "$SCRIPT_MODE" = "$NATIVE" ]; then
-		pass=true
-		if [ "$SCRIPT_MODE" = "$NATIVE" ]; then		   
-			if ! required_programs_check "${REQUIRED_PROGRAMS[@]}"; then
-				if ! install_programs; then
-					pass=false
-				fi
+		pass=true	   
+		if ! required_programs_check "${REQUIRED_PROGRAMS[@]}"; then
+			if ! install_programs; then
+				pass=false
 			fi
 		fi
-		if [ $pass = true ]; then
+		if [ "$pass" = true ]; then
 			if conda_check; then
 				conda init > /dev/null 2>&1
 				source $CONDA_ENV
 				conda activate "$SCRIPT_DIR/$PYTHON_ENV"
+				create_app_bundle
 				python app.py --script_mode "$SCRIPT_MODE" "${ARGS[@]}"
 				conda deactivate
 				conda deactivate
@@ -323,4 +540,4 @@ else
 	fi
 fi

-exit 0
+exit 0
--- a/lib/init.py
+++ b/lib/init.py
@@ -1,15 +1,15 @@
 from .models import (
    TTS_ENGINES, TTS_VOICE_CONVERSION, TTS_SML, default_fine_tuned, default_tts_engine, 
    default_engine_settings, default_vc_model, default_voice_detection_model,
-    loaded_tts, max_custom_model, max_custom_voices,
-    max_tts_in_memory, max_upload_size, models, os, voices_dir
+    loaded_tts, xtts_builtin_speakers_list, max_custom_model, max_custom_voices,
+    max_upload_size, models, os, voices_dir
 )

 from .conf import (
    FULL_DOCKER, NATIVE, audiobooks_cli_dir, audiobooks_gradio_dir,
    audiobooks_host_dir, debug_mode, default_audio_proc_samplerate, 
    default_audio_proc_format, default_device, default_gpu_wiki, 
-    default_chapters_control, default_output_format, device_list, ebook_formats,
+    default_chapters_preview, default_output_format, devices, ebook_formats,
    ebooks_dir, interface_component_options, interface_concurrency_limit,
    interface_host, interface_port, interface_shared_tmp_expire,
    max_python_version, min_python_version, models_dir, os,
@@ -31,15 +31,15 @@ __all__ = [
    # from models
    "TTS_ENGINES", "TTS_VOICE_CONVERSION", "TTS_SML", "default_fine_tuned", "default_tts_engine",
    "default_engine_settings", "default_vc_model", "default_voice_detection_model",
-    "loaded_tts", "max_custom_model",
-    "max_custom_voices", "max_tts_in_memory", "max_upload_size",
+    "loaded_tts", "xtts_builtin_speakers_list", "max_custom_model",
+    "max_custom_voices", "max_upload_size",
    "models", "os", "voices_dir",

    # from conf
    "FULL_DOCKER", "NATIVE", "audiobooks_cli_dir", "audiobooks_gradio_dir",
    "audiobooks_host_dir", "debug_mode", "default_audio_proc_samplerate",
    "default_audio_proc_format", "default_device", "default_gpu_wiki",
-    "default_chapters_control", "default_output_format", "device_list", "ebook_formats",
+    "default_chapters_preview", "default_output_format", "devices", "ebook_formats",
    "ebooks_dir", "interface_component_options", "interface_concurrency_limit",
    "interface_host", "interface_port", "interface_shared_tmp_expire",
    "max_python_version", "min_python_version", "models_dir", "os",
--- a/lib/classes/argos_translator.py
+++ b/lib/classes/argos_translator.py
@@ -3,7 +3,6 @@ import tempfile
 import argostranslate.package
 import argostranslate.translate

-from typing import Any, Optional, Union, Callable
 from iso639 import Lang
 from lib.conf import models_dir
 from lib.lang import language_mapping
@@ -50,7 +49,7 @@ class ArgosTranslator:
        ]
        return language_translate_options
        
-    def get_all_target_packages(self,source_lang:str)->list[Any]:
+    def get_all_target_packages(self,source_lang:str)->list:
        available_packages=argostranslate.package.get_available_packages()
        return [pkg for pkg in available_packages if pkg.from_code==source_lang]

@@ -64,7 +63,7 @@ class ArgosTranslator:
            error=f'is_package_installed() error: {e}'
            return False

-    def download_and_install_argos_package(self,source_lang:str,target_lang:str)->tuple[Optional[str],bool]:
+    def download_and_install_argos_package(self,source_lang:str,target_lang:str)->tuple[str|None,bool]:
        try:
            if self.is_package_installed(source_lang,target_lang):
                print(f"Package for translation from {source_lang} to {target_lang} is already installed.")
@@ -77,6 +76,9 @@ class ArgosTranslator:
                    target_package=pkg
                    break
            if target_package:
+                #tmp_dir = os.path.join(session['process_dir'], "tmp")
+                #os.makedirs(tmp_dir, exist_ok=True)
+                #with tempfile.TemporaryDirectory(dir=tmp_dir) as tmpdirname:
                with tempfile.TemporaryDirectory() as tmpdirname:
                    print(f"Downloading package for translation from {source_lang} to {target_lang}...")
                    package_path=target_package.download()
@@ -97,7 +99,7 @@ class ArgosTranslator:
            error=f'AgrosTranslator.process() error: {e}'
            return error,False

-    def start(self,source_lang:str,target_lang:str)->tuple[Optional[str],bool]:
+    def start(self,source_lang:str,target_lang:str)->tuple[str|None,bool]:
        try:
            if self.neural_machine!="argostranslate":
                error=f"Neural machine '{self.neural_machine}' is not supported."
--- a/lib/classes/background_detector.py
+++ b/lib/classes/background_detector.py
@@ -2,7 +2,6 @@ import os
 import numpy as np
 import librosa

-from typing import Any, Optional, Union, Callable
 from pyannote.audio import Model
 from pyannote.audio.pipelines import VoiceActivityDetection
 from lib.conf import tts_dir
--- a/lib/classes/redirect_console.py
+++ b/lib/classes/redirect_console.py
@@ -2,7 +2,7 @@ import time
 import logging

 from queue import Queue, Empty
-from typing import Any, Optional, Union, Callable
+from typing import Any


 class RedirectConsole:
--- a/lib/classes/subprocess_pipe.py
+++ b/lib/classes/subprocess_pipe.py
@@ -1,42 +1,38 @@
 import subprocess, re, sys, gradio as gr

-from typing import Any, Optional, Union, Callable
-
 class SubprocessPipe:
-    def __init__(self,cmd:str,session:Any,total_duration:float):
-        self.cmd=cmd
-        self.session=session
-        self.total_duration=total_duration
-        self.process=None
-        self._stop_requested=False
-        self.progress_bar=None
-        self.start()
-
-    def _on_start(self)->None:
-        print('Export started')
-        if self.session.get('is_gui_process'):
+    def __init__(self,cmd:str, is_gui_process:bool, total_duration:float, msg:str='Processing'):
+        self.cmd = cmd
+        self.is_gui_process = is_gui_process
+        self.total_duration = total_duration
+        self.msg = msg
+        self.process = None
+        self._stop_requested = False
+        self.progress_bar = None
+        if self.is_gui_process:
            self.progress_bar=gr.Progress(track_tqdm=False)
-            self.progress_bar(0.0,desc='Starting export...')
+        self._run_process()

    def _on_progress(self,percent:float)->None:
-        sys.stdout.write(f'\rFinal Encoding: {percent:.1f}%')
+        sys.stdout.write(f'\r{self.msg}: {percent:.1f}%')
        sys.stdout.flush()
-        if self.session.get('is_gui_process'):
-            self.progress_bar(percent/100,desc='Final Encoding')
+        if self.is_gui_process:
+            self.progress_bar(percent/100,desc=self.msg)

    def _on_complete(self)->None:
-        print('\nExport completed successfully')
-        if self.session.get('is_gui_process'):
-            self.progress_bar(1.0,desc='Export completed')
+        msg = f"\n{self.msg} completed"
+        print(msg)
+        if self.is_gui_process:
+            self.progress_bar(1.0,desc=msg)

-    def _on_error(self,err:Exception)->None:
-        print(f'\nExport failed: {err}')
-        if self.session.get('is_gui_process'):
-            self.progress_bar(0.0,desc='Export failed')
+    def _on_error(self, err:Exception)->None:
+        error = f"\n{self.msg} failed: {err}"
+        print(error)
+        if self.is_gui_process:
+            self.progress_bar(0.0,desc=error)

-    def start(self)->bool:
+    def _run_process(self)->bool:
        try:
-            self._on_start()
            self.process=subprocess.Popen(
                self.cmd,
                stdout=subprocess.DEVNULL,
@@ -48,14 +44,11 @@ class SubprocessPipe:
            last_percent=0.0
            for raw_line in self.process.stderr:
                line=raw_line.decode(errors='ignore')
-                if self._stop_requested or self.session.get('cancellation_requested'):
-                    print('\nExport cancelled')
-                    return self.stop()
                match=time_pattern.search(raw_line)
-                if match and self.total_duration>0:
+                if match and self.total_duration > 0:
                    current_time=int(match.group(1))/1_000_000
                    percent=min((current_time/self.total_duration)*100,100)
-                    if abs(percent-last_percent)>=0.5:
+                    if abs(percent-last_percent) >= 0.5:
                        self._on_progress(percent)
                        last_percent=percent
                elif b'progress=end' in raw_line:
--- a/lib/classes/tts_engines/.template.py
+++ b/lib/classes/tts_engines/.template.py
@@ -1,24 +1,26 @@
-import hashlib
-import math
-import os
-import shutil
-import subprocess
-import tempfile
-import threading
-import uuid
-
-import numpy as np
-import regex as re
-import soundfile as sf
 import torch
-import torchaudio

+_original_load = torch.load
+
+def patched_torch_load(*args, **kwargs):
+    kwargs.setdefault("weights_only", False)
+    return _original_load(*args, **kwargs)
+    
+torch.load = patched_torch_load
+
+import hashlib, math, os, shutil, subprocess, tempfile, threading, uuid
+import numpy as np, regex as re, soundfile as sf, torchaudio
+import gc
+
+from typing import Any
+from multiprocessing.managers import DictProxy
+from torch import Tensor
 from huggingface_hub import hf_hub_download
 from pathlib import Path
 from pprint import pprint

 from lib import *
-from lib.classes.tts_engines.common.utils import unload_tts, append_sentence2vtt
+from lib.classes.tts_engines.common.utils import cleanup_garbage, unload_tts, append_sentence2vtt
 from lib.classes.tts_engines.common.audio_filters import detect_gender, trim_audio, normalize_audio, is_audio_data_valid

 #import logging
@@ -27,149 +29,266 @@ from lib.classes.tts_engines.common.audio_filters import detect_gender, trim_aud
 lock = threading.Lock()

 class Coqui:
-
-    def __init__(self, session):
+    def __init__(self,session:DictProxy):
        try:
            self.session = session
            self.cache_dir = tts_dir
            self.speakers_path = None
            self.tts_key = f"{self.session['tts_engine']}-{self.session['fine_tuned']}"
-            self.tts_vc_key = default_vc_model.rsplit('/', 1)[-1]
-            self.is_bf16 = True if self.session['device'] == 'cuda' and torch.cuda.is_bf16_supported() == True else False
-            self.npz_path = None
-            self.npz_data = None
+            self.engine = None
+            self.tts_zs_key = default_vc_model.rsplit('/',1)[-1]
+            self.engine_zs = None
+            self.pth_voice_file = None
            self.sentences_total_time = 0.0
            self.sentence_idx = 1
-            self.params = {TTS_ENGINES['NEW_TTS']: {}}
+            self.params={TTS_ENGINES['XXX']:{}
            self.params[self.session['tts_engine']]['samplerate'] = models[self.session['tts_engine']][self.session['fine_tuned']]['samplerate']
-            self.vtt_path = os.path.join(self.session['process_dir'], os.path.splitext(self.session['final_name'])[0] + '.vtt')    
+            self.vtt_path = os.path.join(self.session['process_dir'],Path(self.session['final_name']).stem+'.vtt')
            self.resampler_cache = {}
            self.audio_segments = []
-            self._build()
+            if not xtts_builtin_speakers_list:
+                self.speakers_path = hf_hub_download(repo_id=models[TTS_ENGINES['XXX']]['internal']['repo'], filename=default_engine_settings[TTS_ENGINES['XXX']]['files'][4], cache_dir=self.cache_dir)
+                xtts_builtin_speakers_list = torch.load(self.speakers_path)
+                using_gpu = self.session['device'] != devices['CPU']['proc']
+                enough_vram = self.session['free_vram_gb'] > 4.0
+                if using_gpu and enough_vram:
+                    if devices['CUDA']['found'] or devices['ROCM']['found']:
+                        torch.cuda.set_per_process_memory_fraction(0.95)
+                        torch.backends.cudnn.enabled = True
+                        torch.backends.cudnn.benchmark = True
+                        torch.backends.cudnn.deterministic = True
+                        torch.backends.cudnn.allow_tf32 = True
+                        torch.backends.cuda.matmul.allow_tf32 = True
+                        torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
+                        
+                else:
+                    if devices['CUDA']['found'] or devices['ROCM']['found']:
+                        torch.cuda.set_per_process_memory_fraction(0.7)
+                        torch.backends.cudnn.enabled = True
+                        torch.backends.cudnn.benchmark = False
+                        torch.backends.cudnn.deterministic = True
+                        torch.backends.cudnn.allow_tf32 = False
+                        torch.backends.cuda.matmul.allow_tf32 = False
+                        torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
+            self._load_engine()
+            self._load_engine_zs()
        except Exception as e:
            error = f'__init__() error: {e}'
            print(error)
+
+    def _load_api(self, key:str, model_path:str, device:str)->Any:
+        global lock
+        try:
+            with lock:
+                unload_tts()
+                from XXX import TTS as TTSEngine
+                engine = loaded_tts.get(key, False)
+                if not engine:
+                    ###########
+                    ###### Load XXX api
+                    # engine = 
+                    ###########
+                if engine:
+                    loaded_tts[key] = engine
+                return engine
+        except Exception as e:
+            error = f"_load_api() error: {e}"
+            print(error)
            return None

-    def _build(self):
-        try:
-            tts = (loaded_tts.get(self.tts_key) or {}).get('engine', False)
-            if not tts:
-                if self.session['tts_engine'] == TTS_ENGINES['NEW_TTS']:
-                    if self.session['custom_model'] is not None:
-                        msg = f"{self.session['tts_engine']} custom model not implemented yet!"
-                        print(msg)
-                        return False
-                    else:
-                        model_path = models[self.session['tts_engine']][self.session['fine_tuned']]['repo']
-                        tts = self._load_api(self.tts_key, model_path, self.session['device'])
-            return (loaded_tts.get(self.tts_key) or {}).get('engine', False)
-        except Exception as e:
-            error = f'build() error: {e}'
-            print(error)
-            return False
-
-    def _load_api(self, key, model_path, device):
+    def _load_checkpoint(self,**kwargs:Any)->Any:
        global lock
        try:
-            if key in loaded_tts.keys():
-                return loaded_tts[key]['engine']
-            unload_tts(device, [self.tts_key, self.tts_vc_key])
            with lock:
-                tts = NEW_TTS(model_path)
-                if tts
-                    if device == 'cuda':
-                        NEW_TTS.WITH_CUDA
-                    else:
-                        NEW_TTS.WITHOUT_CUDA
-                    loaded_tts[key] = {"engine": tts, "config": None} 
-                    msg = f'{model_path} Loaded!'
-                    print(msg)
-                    return tts
-                else:
-                    error = 'TTS engine could not be created!'
-                    print(error)
-        except Exception as e:
-            error = f'_load_api() error: {e}'
-            print(error)
-        return False
-
-    def _load_checkpoint(self, **kwargs):
-        global lock
-        try:
-            key = kwargs.get('key')
-            if key in loaded_tts.keys():
-                return loaded_tts[key]['engine']
-            tts_engine = kwargs.get('tts_engine')
-            device = kwargs.get('device')
-            unload_tts(device, [self.tts_key])
-            with lock:
-                checkpoint_dir = kwargs.get('checkpoint_dir')
-                NEW_TTS.LOAD_CHECKPOINT(
-                    config,
-                    checkpoint_dir=checkpoint_dir,
-                    eval=True
-                )                    
-            if tts:
-                if device == 'cuda':
-                    NEW_TTS.WITH_CUDA
-                else:
-                    NEW_TTS.WITHOUT_CUDA
-                loaded_tts[key] = {"engine": tts, "config": config}
-                msg = f'{tts_engine} Loaded!'
-                print(msg)
-                return tts
-            else:
-                error = 'TTS engine could not be created!'
-                print(error)
+                key = kwargs.get('key')
+                device = kwargs.get('device')
+                unload_tts()
+                engine = loaded_tts.get(key, False)
+                if not engine:
+                    engine_name = kwargs.get('tts_engine', None)
+                    if engine_name == TTS_ENGINES['XXX']:
+                        from XXX import XXXConfig
+                        from XXX import XXXtts
+                        checkpoint_path = kwargs.get('checkpoint_path')
+                        config_path = kwargs.get('config_path',None)
+                        vocab_path = kwargs.get('vocab_path',None)
+                        if not checkpoint_path or not os.path.exists(checkpoint_path):
+                            raise FileNotFoundError(f"Missing or invalid checkpoint_path: {checkpoint_path}")
+                            return False
+                        if not config_path or not os.path.exists(config_path):
+                            raise FileNotFoundError(f"Missing or invalid config_path: {config_path}")
+                            return False
+                        ###########
+                        ###### Load XXX checkpoint
+                        # engine = 
+                        ###########
+                        ) 
+                if engine:
+                    loaded_tts[key] = engine
+                return engine
        except Exception as e:
            error = f'_load_checkpoint() error: {e}'
-        return False
+            print(error)
+            return None
+
+    def _load_engine(self)->None:
+        try:
+            msg = f"Loading TTS {self.tts_key} model, it takes a while, please be patient..."
+            print(msg)
+            cleanup_garbage()
+            self.engine = loaded_tts.get(self.tts_key, False)
+            if not self.engine:
+                if self.session['tts_engine'] == TTS_ENGINES['XXX']:
+                    if self.session['custom_model'] is not None:
+                        config_path = os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'], default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][0])
+                        checkpoint_path = os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'], default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][1])
+                        vocab_path = os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'],default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][2])
+                        self.tts_key = f"{self.session['tts_engine']}-{self.session['custom_model']}"
+                        self.engine = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=self.session['device'])
+            if self.engine:
+                self.session['model_cache'] = self.tts_key
+                msg = f'TTS {key} Loaded!'
+        except Exception as e:
+            error = f'_load_engine() error: {e}'
+
+    def _load_engine_zs(self)->Any:
+        try:
+            msg = f"Loading ZeroShot {self.tts_zs_key} model, it takes a while, please be patient..."
+            print(msg)
+            cleanup_garbage()
+            self.engine_zs = loaded_tts.get(self.tts_zs_key, False)
+            if not self.engine_zs:
+                self.engine_zs = self._load_api(self.tts_zs_key, default_vc_model, self.session['device'])
+            if self.engine_zs:
+                self.session['model_zs_cache'] = self.tts_zs_key
+                msg = f'ZeroShot {key} Loaded!'
+        except Exception as e:
+            error = f'_load_engine_zs() error: {e}'
+
+    def _check_xtts_builtin_speakers(self, voice_path:str, speaker:str, device:str)->str|bool:
+        try:
+            voice_parts = Path(voice_path).parts
+            if(self.session['language'] not in voice_parts and speaker not in default_engine_settings[TTS_ENGINES['BARK']]['voices'].keys() and self.session['language'] != 'eng'):
+                if self.session['language'] in language_tts[TTS_ENGINES['XTTSv2']].keys():
+                    default_text_file = os.path.join(voices_dir, self.session['language'], 'default.txt')
+                    if os.path.exists(default_text_file):
+                        msg = f"Converting builtin eng voice to {self.session['language']}..."
+                        print(msg)
+                        key = f"{TTS_ENGINES['XTTSv2']}-internal"
+                        default_text = Path(default_text_file).read_text(encoding="utf-8")
+                        cleanup_garbage()
+                        engine = loaded_tts.get(key, False)
+                        if not engine:
+                            hf_repo = models[TTS_ENGINES['XTTSv2']]['internal']['repo']
+                            hf_sub = ''
+                            config_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['XTTSv2']]['internal']['files'][0]}", cache_dir=self.cache_dir)
+                            checkpoint_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['XTTSv2']]['internal']['files'][1]}", cache_dir=self.cache_dir)
+                            vocab_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['XTTSv2']]['internal']['files'][2]}", cache_dir=self.cache_dir)
+                            engine = self._load_checkpoint(tts_engine=TTS_ENGINES['XTTSv2'], key=key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=device)
+                        if engine:
+                            if speaker in default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'].keys():
+                                gpt_cond_latent, speaker_embedding = xtts_builtin_speakers_list[default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'][speaker]].values()
+                            else:
+                                gpt_cond_latent, speaker_embedding = engine.get_conditioning_latents(audio_path=[voice_path])
+                            fine_tuned_params = {
+                                key.removeprefix("xtts_"): cast_type(self.session[key])
+                                for key, cast_type in {
+                                    "xtts_temperature": float,
+                                    "xtts_length_penalty": float,
+                                    "xtts_num_beams": int,
+                                    "xtts_repetition_penalty": float,
+                                    "xtts_top_k": int,
+                                    "xtts_top_p": float,
+                                    "xtts_speed": float,
+                                    "xtts_enable_text_splitting": bool,
+                                }.items()
+                                if self.session.get(key) is not None
+                            }
+                            with torch.no_grad():
+                                result = engine.inference(
+                                    text=default_text.strip(),
+                                    language=self.session['language_iso1'],
+                                    gpt_cond_latent=gpt_cond_latent,
+                                    speaker_embedding=speaker_embedding,
+                                    **fine_tuned_params,
+                                )
+                            audio_sentence = result.get('wav') if isinstance(result, dict) else None
+                            if audio_sentence is not None:
+                                audio_sentence = audio_sentence.tolist()
+                                sourceTensor = self._tensor_type(audio_sentence)
+                                audio_tensor = sourceTensor.clone().detach().unsqueeze(0).cpu()
+                                # CON is a reserved name on windows
+                                lang_dir = 'con-' if self.session['language'] == 'con' else self.session['language']
+                                new_voice_path = re.sub(r'([\\/])eng([\\/])', rf'\1{lang_dir}\2', voice_path)
+                                proc_voice_path = new_voice_path.replace('.wav', '_temp.wav')
+                                torchaudio.save(proc_voice_path, audio_tensor, default_engine_settings[TTS_ENGINES['XTTSv2']]['samplerate'], format='wav')
+                                if normalize_audio(proc_voice_path, new_voice_path, default_audio_proc_samplerate, self.session['is_gui_process']):
+                                    del audio_sentence, sourceTensor, audio_tensor
+                                    Path(proc_voice_path).unlink(missing_ok=True)
+                                    gc.collect()
+                                    return new_voice_path
+                                else:
+                                    error = 'normalize_audio() error:'
+                            else:
+                                error = f'No audio waveform found in _check_xtts_builtin_speakers() result: {result}'
+                        else:
+                            error = f"_check_xtts_builtin_speakers() error: {TTS_ENGINES['XTTSv2']} is False"
+                    else:
+                        error = f'The translated {default_text_file} could not be found! Voice cloning file will stay in English.'
+                    print(error)
+                    return False
+                else:
+                    return voice_path
+            else:
+                return voice_path
+        except Exception as e:
+            error = f'_check_xtts_builtin_speakers() error: {e}'
+            print(error)
+            return False
        
-    def _tensor_type(self, audio_data):
-        if isinstance(audio_data, torch.Tensor):
+    def _tensor_type(self,audio_data:Any)->torch.Tensor:
+        if isinstance(audio_data,torch.Tensor):
            return audio_data
-        elif isinstance(audio_data, np.ndarray):  
+        elif isinstance(audio_data,np.ndarray):
            return torch.from_numpy(audio_data).float()
-        elif isinstance(audio_data, list):  
-            return torch.tensor(audio_data, dtype=torch.float32)
+        elif isinstance(audio_data,list):
+            return torch.tensor(audio_data,dtype=torch.float32)
        else:
            raise TypeError(f"Unsupported type for audio_data: {type(audio_data)}")
            
-    def _get_resampler(self, orig_sr, target_sr):
-        key = (orig_sr, target_sr)
+    def _get_resampler(self,orig_sr:int,target_sr:int)->torchaudio.transforms.Resample:
+        key=(orig_sr,target_sr)
        if key not in self.resampler_cache:
-            self.resampler_cache[key] = torchaudio.transforms.Resample(
-                orig_freq=orig_sr, new_freq=target_sr
+            self.resampler_cache[key]=torchaudio.transforms.Resample(
+                orig_freq = orig_sr,new_freq = target_sr
            )
        return self.resampler_cache[key]

-    def _resample_wav(self, wav_path, expected_sr):
-        waveform, orig_sr = torchaudio.load(wav_path)
-        if orig_sr == expected_sr and waveform.size(0) == 1:
+    def _resample_wav(self,wav_path:str,expected_sr:int)->str:
+        waveform,orig_sr = torchaudio.load(wav_path)
+        if orig_sr==expected_sr and waveform.size(0)==1:
            return wav_path
-        if waveform.size(0) > 1:
-            waveform = waveform.mean(dim=0, keepdim=True)
-        if orig_sr != expected_sr:
-            resampler = self._get_resampler(orig_sr, expected_sr)
+        if waveform.size(0)>1:
+            waveform = waveform.mean(dim=0,keepdim=True)
+        if orig_sr!=expected_sr:
+            resampler = self._get_resampler(orig_sr,expected_sr)
            waveform = resampler(waveform)
        wav_tensor = waveform.squeeze(0)
        wav_numpy = wav_tensor.cpu().numpy()
-        tmp_fh = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        os.path.join(self.session['process_dir'], 'tmp')
+        os.makedirs(tmp_dir, exist_ok=True)
+        tmp_fh = tempfile.NamedTemporaryFile(dir=tmp_dir, suffix=".wav", delete=False)
        tmp_path = tmp_fh.name
        tmp_fh.close()
-        sf.write(tmp_path, wav_numpy, expected_sr, subtype="PCM_16")
+        sf.write(tmp_path,wav_numpy,expected_sr,subtype="PCM_16")
        return tmp_path

-    def convert(self, sentence_number, sentence):
+    def convert(self, sentence_index:int, sentence:str)->bool:
        global xtts_builtin_speakers_list
        try:
            speaker = None
-            audio_data = False
-            trim_audio_buffer = 0.004
+            audio_sentence = False
            settings = self.params[self.session['tts_engine']]
-            final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_number}.{default_audio_proc_format}')
-            sentence = sentence.strip()
            settings['voice_path'] = (
                self.session['voice'] if self.session['voice'] is not None 
                else os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'], 'ref.wav') if self.session['custom_model'] is not None
@@ -177,56 +296,112 @@ class Coqui:
            )
            if settings['voice_path'] is not None:
                speaker = re.sub(r'\.wav$', '', os.path.basename(settings['voice_path']))
-            tts = (loaded_tts.get(self.tts_key) or {}).get('engine', False)
-            if tts:
-                if sentence[-1].isalnum():
-                    sentence = f'{sentence} —'
+                if settings['voice_path'] not in default_engine_settings[TTS_ENGINES['BARK']]['voices'].keys() and os.path.basename(settings['voice_path']) != 'ref.wav':
+                    self.session['voice'] = settings['voice_path'] = self._check_xtts_builtin_speakers(settings['voice_path'], speaker, self.session['device'])
+                    if not settings['voice_path']:
+                        msg = f"Could not create the builtin speaker selected voice in {self.session['language']}"
+                        print(msg)
+                        return False
+            if self.engine:
+                self.engine.to(self.session['device'])
+                trim_audio_buffer = 0.004
+                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
                if sentence == TTS_SML['break']:
-                    break_tensor = torch.zeros(1, int(settings['samplerate'] * (int(np.random.uniform(0.3, 0.6) * 100) / 100))) # 0.4 to 0.7 seconds
+                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
+                    break_tensor = torch.zeros(1, int(settings['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
                    self.audio_segments.append(break_tensor.clone())
                    return True
-                elif sentence == TTS_SML['pause']:
-                    pause_tensor = torch.zeros(1, int(settings['samplerate'] * (int(np.random.uniform(1.0, 1.8) * 100) / 100))) # 1.0 to 1.8 seconds
+                elif not sentence.replace('—', '').strip() or sentence == TTS_SML['pause']:
+                    silence_time = int(np.random.uniform(1.0, 1.8) * 100) / 100
+                    pause_tensor = torch.zeros(1, int(settings['samplerate'] * silence_time)) # 1.0 to 1.8 seconds
                    self.audio_segments.append(pause_tensor.clone())
                    return True
                else:
-                    if self.session['tts_engine'] == TTS_ENGINES['NEW_TTS']:
-                        audio_sentence = NEW_TTS.CONVERT() # audio_sentence must be torch.Tensor or (list, tuple) or np.ndarray
+                    if sentence[-1].isalnum():
+                        sentence = f'{sentence} —'
+                    elif sentence.endswith("'"):
+                        sentence = sentence[:-1]
+                    if self.session['tts_engine'] == TTS_ENGINES['XXX']:
+                        trim_audio_buffer = 0.008
+                        if settings['voice_path'] is not None and settings['voice_path'] in settings['latent_embedding'].keys():
+                            settings['gpt_cond_latent'], settings['speaker_embedding'] = settings['latent_embedding'][settings['voice_path']]
+                        else:
+                            msg = 'Computing speaker latents...'
+                            print(msg)
+                            if speaker in default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'].keys():
+                                settings['gpt_cond_latent'], settings['speaker_embedding'] = xtts_builtin_speakers_list[default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'][speaker]].values()
+                            else:
+                                settings['gpt_cond_latent'], settings['speaker_embedding'] = self.engine.get_conditioning_latents(audio_path=[settings['voice_path']])  
+                            settings['latent_embedding'][settings['voice_path']] = settings['gpt_cond_latent'], settings['speaker_embedding']
+                        fine_tuned_params = {
+                            key.removeprefix("xxx_"): cast_type(self.session[key])
+                            for key, cast_type in {
+                                "xxx_temperature": float,
+                                "xxx_length_penalty": float,
+                                "xxx_num_beams": int,
+                                "xxx_repetition_penalty": float,
+                                "xxx_top_k": int,
+                                "xxx_top_p": float,
+                                "xxx_speed": float,
+                                "xxx_enable_text_splitting": bool
+                            }.items()
+                            if self.session.get(key) is not None
+                        }
+                        with torch.no_grad():
+                            result = self.engine.inference(
+                                text=sentence.replace('.', ' —'),
+                                language=self.session['language_iso1'],
+                                gpt_cond_latent=settings['gpt_cond_latent'],
+                                speaker_embedding=settings['speaker_embedding'],
+                                **fine_tuned_params
+                            )
+                        audio_sentence = result.get('wav')
+                        if is_audio_data_valid(audio_sentence):
+                            audio_sentence = audio_sentence.tolist()
                    if is_audio_data_valid(audio_sentence):
                        sourceTensor = self._tensor_type(audio_sentence)
                        audio_tensor = sourceTensor.clone().detach().unsqueeze(0).cpu()
                        if sentence[-1].isalnum() or sentence[-1] == '—':
-                            audio_tensor = trim_audio(audio_tensor.squeeze(), settings['samplerate'], 0.003, trim_audio_buffer).unsqueeze(0)
-                        self.audio_segments.append(audio_tensor)
-                        if not re.search(r'\w$', sentence, flags=re.UNICODE):
-                            break_tensor = torch.zeros(1, int(settings['samplerate'] * (int(np.random.uniform(0.3, 0.6) * 100) / 100)))
-                            self.audio_segments.append(break_tensor.clone())
-                        if self.audio_segments:
-                            audio_tensor = torch.cat(self.audio_segments, dim=-1)
-                            start_time = self.sentences_total_time
-                            duration = audio_tensor.shape[-1] / settings['samplerate']
-                            end_time = start_time + duration
-                            self.sentences_total_time = end_time
-                            sentence_obj = {
-                                "start": start_time,
-                                "end": end_time,
-                                "text": sentence,
-                                "resume_check": self.sentence_idx
-                            }
-                            self.sentence_idx = append_sentence2vtt(sentence_obj, self.vtt_path)
-                            if self.sentence_idx:
-                                torchaudio.save(final_sentence_file, audio_tensor, settings['samplerate'], format=default_audio_proc_format)
-                                del audio_tensor
-                        self.audio_segments = []
-                        if os.path.exists(final_sentence_file):
-                            return True
-                        else:
-                            error = f"Cannot create {final_sentence_file}"
-                            print(error)
+                            audio_tensor = trim_audio(audio_tensor.squeeze(), settings['samplerate'], 0.001, trim_audio_buffer).unsqueeze(0)
+                        if audio_tensor is not None and audio_tensor.numel() > 0:
+                            self.audio_segments.append(audio_tensor)
+                            if not re.search(r'\w$', sentence, flags=re.UNICODE) and sentence[-1] != '—':
+                                silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
+                                break_tensor = torch.zeros(1, int(settings['samplerate'] * silence_time))
+                                self.audio_segments.append(break_tensor.clone())
+                            if self.audio_segments:
+                                audio_tensor = torch.cat(self.audio_segments, dim=-1)
+                                start_time = self.sentences_total_time
+                                duration = round((audio_tensor.shape[-1] / settings['samplerate']), 2)
+                                end_time = start_time + duration
+                                self.sentences_total_time = end_time
+                                sentence_obj = {
+                                    "start": start_time,
+                                    "end": end_time,
+                                    "text": sentence,
+                                    "resume_check": self.sentence_idx
+                                }
+                                self.sentence_idx = append_sentence2vtt(sentence_obj, self.vtt_path)
+                                if self.sentence_idx:
+                                    torchaudio.save(final_sentence_file, audio_tensor, settings['samplerate'], format=default_audio_proc_format)
+                                    del audio_tensor
+                                    cleanup_garbage()
+                            self.audio_segments = []
+                            if os.path.exists(final_sentence_file):
+                                return True
+                            else:
+                                error = f"Cannot create {final_sentence_file}"
+                                print(error)
+                                return False
+                    else:
+                        error = f"audio_sentence not valide"
+                        print(error)
+                        return False
            else:
-                error = f"convert() error: {self.session['tts_engine']} is None"
+                error = f"TTS engine {self.session['tts_engine']} could not be loaded!\nPossible reason can be not enough VRAM/RAM memory"
                print(error)
+                return False
        except Exception as e:
-            error = f'Coquit.convert(): {e}'
+            error = f'XXX.convert(): {e}'
            raise ValueError(e)
-        return False
+            return False
--- a/lib/classes/tts_engines/common/audio_filters.py
+++ b/lib/classes/tts_engines/common/audio_filters.py
@@ -2,13 +2,16 @@ import numpy as np
 import torch
 import subprocess
 import shutil
+import json

 from torch import Tensor
-from typing import Any, Optional, Union, Callable
+from typing import Any, Union
 from scipy.io import wavfile as wav
 from scipy.signal import find_peaks

-def detect_gender(voice_path:str)->Optional[str]:
+from lib.classes.subprocess_pipe import SubprocessPipe
+
+def detect_gender(voice_path:str)->str|None:
    try:
        samplerate, signal = wav.read(voice_path)
        # Ensure mono
@@ -57,7 +60,29 @@ def trim_audio(audio_data: Union[list[float], Tensor], samplerate: int, silence_
    raise TypeError(error)
    return torch.tensor([], dtype=torch.float32)

-def normalize_audio(input_file:str, output_file:str, samplerate:int)->bool:
+def get_audio_duration(filepath:str)->float:
+    try:
+        ffprobe_cmd = [
+            shutil.which('ffprobe'),
+            '-v', 'error',
+            '-show_entries', 'format=duration',
+            '-of', 'json',
+            filepath
+        ]
+        result = subprocess.run(ffprobe_cmd, capture_output=True, text=True)
+        try:
+            return float(json.loads(result.stdout)['format']['duration'])
+        except Exception:
+            return 0
+    except subprocess.CalledProcessError as e:
+        DependencyError(e)
+        return 0
+    except Exception as e:
+        error = f"get_audio_duration() Error: Failed to process {txt_file} → {out_file}: {e}"
+        print(error)
+        return 0
+
+def normalize_audio(input_file:str, output_file:str, samplerate:int, is_gui_process:bool)->bool:
    filter_complex = (
        'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
        'afftdn=nf=-70,'
@@ -70,24 +95,17 @@ def normalize_audio(input_file:str, output_file:str, samplerate:int)->bool:
        'equalizer=f=9000:t=q:w=2:g=-2,'
        'highpass=f=63[audio]'
    )
-    ffmpeg_cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', input_file]
-    ffmpeg_cmd += [
+    cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', input_file]
+    cmd += [
        '-filter_complex', filter_complex,
        '-map', '[audio]',
        '-ar', str(samplerate),
        '-y', output_file
    ]
-    try:
-        subprocess.run(
-            ffmpeg_cmd,
-            env={},
-            stdout=subprocess.PIPE, 
-            stderr=subprocess.PIPE,
-            encoding='utf-8',
-            errors='ignore'
-        )
+    proc_pipe = SubprocessPipe(cmd, is_gui_process=is_gui_process, total_duration=get_audio_duration(input_file), msg='Normalize')
+    if proc_pipe:
        return True
-    except subprocess.CalledProcessError as e:
+    else:
        error = f"normalize_audio() error: {input_file}: {e}"
        print(error)
        return False
--- a/lib/classes/tts_engines/common/utils.py
+++ b/lib/classes/tts_engines/common/utils.py
@@ -1,31 +1,35 @@
 import os
+import gc
 import torch
 import regex as re
 import stanza

-from typing import Any, Optional, Union, Callable
-from lib.models import loaded_tts, max_tts_in_memory, TTS_ENGINES
+from typing import Any, Union
+from lib.models import loaded_tts, TTS_ENGINES
+from lib.functions import context

-def unload_tts(device:str, reserved_keys:Optional[list[str]] = None, tts_key:Optional[str] = None)->bool:
+def cleanup_garbage():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+        torch.cuda.synchronize()
+
+def unload_tts()->None:
    try:
-        if len(loaded_tts) >= max_tts_in_memory:
-            if reserved_keys is None:
-                reserved_keys = []
-            if tts_key is not None:
-                if tts_key in loaded_tts:
-                    del loaded_tts[tts_key]
-                if device == "cuda":
-                    torch.cuda.empty_cache()
-                    torch.cuda.ipc_collect()
-            else:
-                for key in list(loaded_tts.keys()):
-                    if key not in reserved_keys:
-                        del loaded_tts[key]
-        return True
+        active_models = {
+            cache
+            for session in context.sessions.values()
+            for cache in (session.get('model_cache'), session.get('model_zs_cache'), session.get('stanza_cache'))
+            if cache is not None
+        }
+        for key in list(loaded_tts.keys()):
+            if key not in active_models:
+                del loaded_tts[key]
+        cleanup_garbage()
    except Exception as e:
        error = f"unload_tts() error: {e}"
        print(error)
-        return False

 def append_sentence2vtt(sentence_obj:dict[str, Any], path:str)->Union[int, bool]:

--- a/lib/classes/tts_engines/coqui.py
+++ b/lib/classes/tts_engines/coqui.py
@@ -1,74 +1,170 @@
 import torch
-from typing import Any, Optional, Union, Callable

 _original_load = torch.load

 def patched_torch_load(*args, **kwargs):
    kwargs.setdefault("weights_only", False)
    return _original_load(*args, **kwargs)
-
+    
 torch.load = patched_torch_load

 import hashlib, math, os, shutil, subprocess, tempfile, threading, uuid
 import numpy as np, regex as re, soundfile as sf, torchaudio
+import gc

+from typing import Any
+from multiprocessing.managers import DictProxy
 from torch import Tensor
 from huggingface_hub import hf_hub_download
 from pathlib import Path
 from pprint import pprint

 from lib import *
-from lib.classes.tts_engines.common.utils import unload_tts, append_sentence2vtt
+from lib.classes.tts_engines.common.utils import cleanup_garbage, unload_tts, append_sentence2vtt
 from lib.classes.tts_engines.common.audio_filters import detect_gender, trim_audio, normalize_audio, is_audio_data_valid

 #import logging
 #logging.basicConfig(level=logging.DEBUG)

 lock = threading.Lock()
-xtts_builtin_speakers_list = None

 class Coqui:
-    def __init__(self,session:Any):
+    def __init__(self,session:DictProxy):
        try:
+            global xtts_builtin_speakers_list
            self.session = session
            self.cache_dir = tts_dir
            self.speakers_path = None
-            self.tts = None
            self.tts_key = f"{self.session['tts_engine']}-{self.session['fine_tuned']}"
-            self.tts_vc_key = default_vc_model.rsplit('/',1)[-1]
-            self.is_bf16 = True if self.session['device'] == 'cuda' and torch.cuda.is_bf16_supported()==True else False
-            self.npz_path = None
-            self.npz_data = None
+            self.engine = None
+            self.tts_zs_key = default_vc_model.rsplit('/',1)[-1]
+            self.engine_zs = None
+            self.pth_voice_file = None
            self.sentences_total_time = 0.0
            self.sentence_idx = 1
            self.params={TTS_ENGINES['XTTSv2']:{"latent_embedding":{}},TTS_ENGINES['BARK']:{},TTS_ENGINES['VITS']:{"semitones":{}},TTS_ENGINES['FAIRSEQ']:{"semitones":{}},TTS_ENGINES['TACOTRON2']:{"semitones":{}},TTS_ENGINES['YOURTTS']:{}}
-            self.params[self.session['tts_engine']]['samplerate']=models[self.session['tts_engine']][self.session['fine_tuned']]['samplerate']
+            self.params[self.session['tts_engine']]['samplerate'] = models[self.session['tts_engine']][self.session['fine_tuned']]['samplerate']
            self.vtt_path = os.path.join(self.session['process_dir'],Path(self.session['final_name']).stem+'.vtt')
-            self.resampler_cache={}
-            self.audio_segments=[]
-            self._build()
+            self.resampler_cache = {}
+            self.audio_segments = []
+            if not xtts_builtin_speakers_list:
+                self.speakers_path = hf_hub_download(repo_id=models[TTS_ENGINES['XTTSv2']]['internal']['repo'], filename=default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][4], cache_dir=self.cache_dir)
+                xtts_builtin_speakers_list = torch.load(self.speakers_path)
+                using_gpu = self.session['device'] != devices['CPU']['proc']
+                enough_vram = self.session['free_vram_gb'] > 4.0
+                if using_gpu and enough_vram:
+                    if devices['CUDA']['found'] or devices['ROCM']['found']:
+                        torch.cuda.set_per_process_memory_fraction(0.95)
+                        torch.backends.cudnn.enabled = True
+                        torch.backends.cudnn.benchmark = True
+                        torch.backends.cudnn.deterministic = True
+                        torch.backends.cudnn.allow_tf32 = True
+                        torch.backends.cuda.matmul.allow_tf32 = True
+                        torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
+                        
+                else:
+                    if devices['CUDA']['found'] or devices['ROCM']['found']:
+                        torch.cuda.set_per_process_memory_fraction(0.7)
+                        torch.backends.cudnn.enabled = True
+                        torch.backends.cudnn.benchmark = False
+                        torch.backends.cudnn.deterministic = True
+                        torch.backends.cudnn.allow_tf32 = False
+                        torch.backends.cuda.matmul.allow_tf32 = False
+                        torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
+            self._load_engine()
+            self._load_engine_zs()
        except Exception as e:
            error = f'__init__() error: {e}'
            print(error)

-    def _build(self)->bool:
+    def _load_api(self, key:str, model_path:str, device:str)->Any:
+        global lock
        try:
-            global xtts_builtin_speakers_list
-            load_zeroshot = True if self.session['tts_engine'] in [TTS_ENGINES['VITS'], TTS_ENGINES['FAIRSEQ'], TTS_ENGINES['TACOTRON2']] else False
-            self.tts = (loaded_tts.get(self.tts_key) or {}).get('engine', False)
-            if not self.tts:
-                if xtts_builtin_speakers_list is None:
-                    self.speakers_path = hf_hub_download(repo_id=models[TTS_ENGINES['XTTSv2']]['internal']['repo'], filename=default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][4], cache_dir=self.cache_dir)
-                    xtts_builtin_speakers_list = torch.load(self.speakers_path)
+            with lock:
+                unload_tts()
+                from TTS.api import TTS as TTSEngine
+                engine = loaded_tts.get(key, False)
+                if not engine:
+                    engine = TTSEngine(model_path)
+                if engine:
+                    loaded_tts[key] = engine
+                return engine
+        except Exception as e:
+            error = f"_load_api() error: {e}"
+            print(error)
+            return None
+
+    def _load_checkpoint(self,**kwargs:Any)->Any:
+        global lock
+        try:
+            with lock:
+                key = kwargs.get('key')
+                device = kwargs.get('device')
+                unload_tts()
+                engine = loaded_tts.get(key, False)
+                if not engine:
+                    engine_name = kwargs.get('tts_engine', None)
+                    if engine_name == TTS_ENGINES['XTTSv2']:
+                        from TTS.tts.configs.xtts_config import XttsConfig
+                        from TTS.tts.models.xtts import Xtts
+                        checkpoint_path = kwargs.get('checkpoint_path')
+                        config_path = kwargs.get('config_path',None)
+                        vocab_path = kwargs.get('vocab_path',None)
+                        if not checkpoint_path or not os.path.exists(checkpoint_path):
+                            raise FileNotFoundError(f"Missing or invalid checkpoint_path: {checkpoint_path}")
+                            return False
+                        if not config_path or not os.path.exists(config_path):
+                            raise FileNotFoundError(f"Missing or invalid config_path: {config_path}")
+                            return False
+                        config = XttsConfig()
+                        config.models_dir = os.path.join("models","tts")
+                        config.load_json(config_path)
+                        engine = Xtts.init_from_config(config)
+                        engine.load_checkpoint(
+                            config,
+                            checkpoint_path = checkpoint_path,
+                            vocab_path = vocab_path,
+                            use_deepspeed = default_engine_settings[TTS_ENGINES['XTTSv2']]['use_deepspeed'] if self.session['device'] in [devices['CUDA']['proc'], devices['XPU']['proc'], devices['ROCM']['proc']] else False,
+                            eval = True
+                        )
+                    elif engine_name == TTS_ENGINES['BARK']:
+                        from TTS.tts.configs.bark_config import BarkConfig
+                        from TTS.tts.models.bark import Bark
+                        checkpoint_dir = kwargs.get('checkpoint_dir')
+                        if not checkpoint_dir or not os.path.exists(checkpoint_dir):
+                            raise FileNotFoundError(f"Missing or invalid checkpoint_dir: {checkpoint_dir}")
+                            return False
+                        config = BarkConfig()
+                        config.CACHE_DIR = self.cache_dir
+                        config.USE_SMALLER_MODELS = True if os.environ['SUNO_USE_SMALL_MODELS'] == 'True' else False
+                        engine = Bark.init_from_config(config)
+                        engine.load_checkpoint(
+                            config,
+                            checkpoint_dir = checkpoint_dir,
+                            eval = True
+                        )  
+                if engine:
+                    loaded_tts[key] = engine
+                return engine
+        except Exception as e:
+            error = f'_load_checkpoint() error: {e}'
+            print(error)
+            return None
+
+    def _load_engine(self)->None:
+        try:
+            msg = f"Loading TTS {self.tts_key} model, it takes a while, please be patient..."
+            print(msg)
+            cleanup_garbage()
+            self.engine = loaded_tts.get(self.tts_key, False)
+            if not self.engine:
                if self.session['tts_engine'] == TTS_ENGINES['XTTSv2']:
-                    msg = f"Loading TTS {self.session['tts_engine']} model, it takes a while, please be patient..."
-                    print(msg)
                    if self.session['custom_model'] is not None:
                        config_path = os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'], default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][0])
                        checkpoint_path = os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'], default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][1])
                        vocab_path = os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'],default_engine_settings[TTS_ENGINES['XTTSv2']]['files'][2])
                        self.tts_key = f"{self.session['tts_engine']}-{self.session['custom_model']}"
-                        self.tts = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=self.session['device'])
+                        self.engine = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=self.session['device'])
                    else:
                        hf_repo = models[self.session['tts_engine']][self.session['fine_tuned']]['repo']
                        if self.session['fine_tuned'] == 'internal':
@@ -80,12 +176,11 @@ class Coqui:
                        config_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[self.session['tts_engine']][self.session['fine_tuned']]['files'][0]}", cache_dir=self.cache_dir)
                        checkpoint_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[self.session['tts_engine']][self.session['fine_tuned']]['files'][1]}", cache_dir=self.cache_dir)
                        vocab_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[self.session['tts_engine']][self.session['fine_tuned']]['files'][2]}", cache_dir=self.cache_dir)
-                        self.tts = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=self.session['device'])
+                        self.engine = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=self.session['device'])
                elif self.session['tts_engine'] == TTS_ENGINES['BARK']:      
                    if self.session['custom_model'] is not None:
                        msg = f"{self.session['tts_engine']} custom model not implemented yet!"
                        print(msg)
-                        return False
                    else:
                        hf_repo = models[self.session['tts_engine']][self.session['fine_tuned']]['repo']
                        hf_sub = models[self.session['tts_engine']][self.session['fine_tuned']]['sub']
@@ -93,12 +188,11 @@ class Coqui:
                        coarse_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[self.session['tts_engine']][self.session['fine_tuned']]['files'][1]}", cache_dir=self.cache_dir)
                        fine_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[self.session['tts_engine']][self.session['fine_tuned']]['files'][2]}", cache_dir=self.cache_dir)
                        checkpoint_dir = os.path.dirname(text_model_path)
-                        self.tts = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_dir=checkpoint_dir, device=self.session['device'])
+                        self.engine = self._load_checkpoint(tts_engine=self.session['tts_engine'], key=self.tts_key, checkpoint_dir=checkpoint_dir, device=self.session['device'])
                elif self.session['tts_engine'] == TTS_ENGINES['VITS']:
                    if self.session['custom_model'] is not None:
                        msg = f"{self.session['tts_engine']} custom model not implemented yet!"
-                        print(msg)     
-                        return False
+                        print(msg)
                    else:
                        iso_dir = language_tts[self.session['tts_engine']][self.session['language']]
                        sub_dict = models[self.session['tts_engine']][self.session['fine_tuned']]['sub']
@@ -106,28 +200,23 @@ class Coqui:
                        if sub is not None:
                            self.params[self.session['tts_engine']]['samplerate'] = models[TTS_ENGINES['VITS']][self.session['fine_tuned']]['samplerate'][sub]
                            model_path = models[self.session['tts_engine']][self.session['fine_tuned']]['repo'].replace("[lang_iso1]", iso_dir).replace("[xxx]", sub)
-                            msg = f"Loading TTS {model_path} model, it takes a while, please be patient..."
-                            print(msg)
                            self.tts_key = model_path
-                            self.tts = self._load_api(self.tts_key, model_path, self.session['device'])
+                            self.engine = self._load_api(self.tts_key, model_path, self.session['device'])
                        else:
                            msg = f"{self.session['tts_engine']} checkpoint for {self.session['language']} not found!"
                            print(msg)
-                            return False
                elif self.session['tts_engine'] == TTS_ENGINES['FAIRSEQ']:
                    if self.session['custom_model'] is not None:
                        msg = f"{self.session['tts_engine']} custom model not implemented yet!"
                        print(msg)
-                        return False
                    else:
                        model_path = models[self.session['tts_engine']][self.session['fine_tuned']]['repo'].replace("[lang]", self.session['language'])
                        self.tts_key = model_path
-                        self.tts = self._load_api(self.tts_key, model_path, self.session['device'])
+                        self.engine = self._load_api(self.tts_key, model_path, self.session['device'])
                elif self.session['tts_engine'] == TTS_ENGINES['TACOTRON2']:
                    if self.session['custom_model'] is not None:
                        msg = f"{self.session['tts_engine']} custom model not implemented yet!"
-                        print(msg)     
-                        return False
+                        print(msg)
                    else:
                        iso_dir = language_tts[self.session['tts_engine']][self.session['language']]
                        sub_dict = models[self.session['tts_engine']][self.session['fine_tuned']]['sub']
@@ -138,126 +227,39 @@ class Coqui:
                            sub = next((key for key, lang_list in sub_dict.items() if iso_dir in lang_list), None)
                        if sub is not None:
                            model_path = models[self.session['tts_engine']][self.session['fine_tuned']]['repo'].replace("[lang_iso1]", iso_dir).replace("[xxx]", sub)
-                            msg = f"Loading TTS {model_path} model, it takes a while, please be patient..."
-                            print(msg)
                            self.tts_key = model_path
-                            self.tts = self._load_api(self.tts_key, model_path, self.session['device'])
+                            self.engine = self._load_api(self.tts_key, model_path, self.session['device'])
                        else:
                            msg = f"{self.session['tts_engine']} checkpoint for {self.session['language']} not found!"
                            print(msg)
-                            return False
                elif self.session['tts_engine'] == TTS_ENGINES['YOURTTS']:
                    if self.session['custom_model'] is not None:
                        msg = f"{self.session['tts_engine']} custom model not implemented yet!"
                        print(msg)
-                        return False
                    else:
                        model_path = models[self.session['tts_engine']][self.session['fine_tuned']]['repo']
-                        self.tts = self._load_api(self.tts_key, model_path, self.session['device'])
-            if load_zeroshot:
-                tts_vc = (loaded_tts.get(self.tts_vc_key) or {}).get('engine', False)
-                if not tts_vc:
-                    if self.session['voice'] is not None:
-                        msg = f"Loading TTS {self.tts_vc_key} zeroshot model, it takes a while, please be patient..."
-                        print(msg)
-                        tts_vc = self._load_api(self.tts_vc_key, default_vc_model, self.session['device'])
-            return (loaded_tts.get(self.tts_key) or {}).get('engine', False)
+                        self.engine = self._load_api(self.tts_key, model_path, self.session['device'])
+            if self.engine:
+                self.session['model_cache'] = self.tts_key
+                msg = f'TTS {key} Loaded!'
        except Exception as e:
-            error = f'build() error: {e}'
-            print(error)
-            return False
+            error = f'_load_engine() error: {e}'

-    def _load_api(self, key: str, model_path: str, device: str) -> bool | Any:
-        global lock
+    def _load_engine_zs(self)->Any:
        try:
-            if key in loaded_tts:
-                print(f"Reusing cached TTS engine for key: {key}")
-                tts = loaded_tts[key]['engine']
-                return tts
-            unload_tts(device, [self.tts_key, self.tts_vc_key])
-            from TTS.api import TTS as CoquiAPI
-            with lock:
-                print(f"Loading Coqui model from: {model_path}")
-                tts = CoquiAPI(model_path)
-                if not tts:
-                    return False
-                if device == "cuda" and torch.cuda.is_available():
-                    tts.cuda()
-                elif device == "mps" and torch.backends.mps.is_available():
-                    tts.to(torch.device("mps"))
-                else:
-                    tts.to(device)
-                loaded_tts[key] = {"engine": tts, "config": None}
-                msg = f"Model loaded successfully: {model_path} ({device})"
-                print(msg)
-                return tts
+            msg = f"Loading ZeroShot {self.tts_zs_key} model, it takes a while, please be patient..."
+            print(msg)
+            cleanup_garbage()
+            self.engine_zs = loaded_tts.get(self.tts_zs_key, False)
+            if not self.engine_zs:
+                self.engine_zs = self._load_api(self.tts_zs_key, default_vc_model, self.session['device'])
+            if self.engine_zs:
+                self.session['model_zs_cache'] = self.tts_zs_key
+                msg = f'ZeroShot {key} Loaded!'
        except Exception as e:
-            error = f"_load_api() error: {e}"
-            print(error)
-            return False
-
-    def _load_checkpoint(self,**kwargs:Any)->bool|Any:
-        global lock
-        try:
-            key = kwargs.get('key')
-            if key in loaded_tts.keys():
-                return loaded_tts[key]['engine']
-            tts_engine = kwargs.get('tts_engine')
-            device = kwargs.get('device')
-            unload_tts(device,[self.tts_key,self.tts_vc_key])
-            with lock:
-                if tts_engine==TTS_ENGINES['XTTSv2']:
-                    from TTS.tts.configs.xtts_config import XttsConfig
-                    from TTS.tts.models.xtts import Xtts
-                    checkpoint_path = kwargs.get('checkpoint_path')
-                    config_path = kwargs.get('config_path',None)
-                    vocab_path = kwargs.get('vocab_path',None)
-                    config = XttsConfig()
-                    config.models_dir = os.path.join("models","tts")
-                    config.load_json(config_path)
-                    tts = Xtts.init_from_config(config)
-                    tts.load_checkpoint(
-                        config,
-                        checkpoint_path = checkpoint_path,
-                        vocab_path = vocab_path,
-                        use_deepspeed = default_engine_settings[TTS_ENGINES['XTTSv2']]['use_deepspeed'],
-                        eval = True
-                    )
-                elif tts_engine==TTS_ENGINES['BARK']:
-                    from TTS.tts.configs.bark_config import BarkConfig
-                    from TTS.tts.models.bark import Bark
-                    checkpoint_dir = kwargs.get('checkpoint_dir')
-                    config = BarkConfig()
-                    config.CACHE_DIR = self.cache_dir
-                    config.USE_SMALLER_MODELS = os.environ.get('SUNO_USE_SMALL_MODELS','').lower()=='true'
-                    tts = Bark.init_from_config(config)
-                    tts.load_checkpoint(
-                        config,
-                        checkpoint_dir = checkpoint_dir,
-                        eval = True
-                    )
-            if tts:
-                if device=='cuda':
-                    tts.cuda()
-                else:
-                    if device=='mps':
-                        tts.to(torch.device('mps'))
-                    else:
-                        tts.to(device)
-                loaded_tts[key]={"engine":tts,"config":config}
-                msg = f'{tts_engine} Loaded!'
-                print(msg)
-                return tts
-            else:
-                error='TTS engine could not be created!'
-                print(error)
-        except Exception as e:
-            error = f'_load_checkpoint() error: {e}'
-        return False
+            error = f'_load_engine_zs() error: {e}'

    def _check_xtts_builtin_speakers(self, voice_path:str, speaker:str, device:str)->str|bool:
-        def _valid_tensor(t:Any):
-            return isinstance(t, torch.Tensor) and not (torch.isnan(t).any() or torch.isinf(t).any())
        try:
            voice_parts = Path(voice_path).parts
            if(self.session['language'] not in voice_parts and speaker not in default_engine_settings[TTS_ENGINES['BARK']]['voices'].keys() and self.session['language'] != 'eng'):
@@ -266,23 +268,22 @@ class Coqui:
                    if os.path.exists(default_text_file):
                        msg = f"Converting builtin eng voice to {self.session['language']}..."
                        print(msg)
-                        tts_internal_key = f"{TTS_ENGINES['XTTSv2']}-internal"
+                        key = f"{TTS_ENGINES['XTTSv2']}-internal"
                        default_text = Path(default_text_file).read_text(encoding="utf-8")
-                        hf_repo = models[TTS_ENGINES['XTTSv2']]['internal']['repo']
-                        hf_sub = ''
-                        self.tts = (loaded_tts.get(tts_internal_key) or {}).get('engine', False)
-                        if not self.tts:
-                            for key in list(loaded_tts.keys()):
-                                unload_tts(device, None, key)
+                        cleanup_garbage()
+                        engine = loaded_tts.get(key, False)
+                        if not engine:
+                            hf_repo = models[TTS_ENGINES['XTTSv2']]['internal']['repo']
+                            hf_sub = ''
                            config_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['XTTSv2']]['internal']['files'][0]}", cache_dir=self.cache_dir)
                            checkpoint_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['XTTSv2']]['internal']['files'][1]}", cache_dir=self.cache_dir)
                            vocab_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['XTTSv2']]['internal']['files'][2]}", cache_dir=self.cache_dir)
-                            self.tts = self._load_checkpoint(tts_engine=TTS_ENGINES['XTTSv2'], key=tts_internal_key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=device)
-                        if self.tts:
+                            engine = self._load_checkpoint(tts_engine=TTS_ENGINES['XTTSv2'], key=key, checkpoint_path=checkpoint_path, config_path=config_path, vocab_path=vocab_path, device=device)
+                        if engine:
                            if speaker in default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'].keys():
                                gpt_cond_latent, speaker_embedding = xtts_builtin_speakers_list[default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'][speaker]].values()
                            else:
-                                gpt_cond_latent, speaker_embedding = self.tts.get_conditioning_latents(audio_path=[voice_path])
+                                gpt_cond_latent, speaker_embedding = engine.get_conditioning_latents(audio_path=[voice_path])
                            fine_tuned_params = {
                                key.removeprefix("xtts_"): cast_type(self.session[key])
                                for key, cast_type in {
@@ -298,27 +299,27 @@ class Coqui:
                                if self.session.get(key) is not None
                            }
                            with torch.no_grad():
-                                result = self.tts.inference(
+                                result = engine.inference(
                                    text=default_text.strip(),
                                    language=self.session['language_iso1'],
                                    gpt_cond_latent=gpt_cond_latent,
                                    speaker_embedding=speaker_embedding,
                                    **fine_tuned_params,
                                )
-                            audio_data = result.get('wav') if isinstance(result, dict) else None
-                            if audio_data is not None:
-                                audio_data = audio_data.tolist()
-                                sourceTensor = self._tensor_type(audio_data)
+                            audio_sentence = result.get('wav') if isinstance(result, dict) else None
+                            if audio_sentence is not None:
+                                audio_sentence = audio_sentence.tolist()
+                                sourceTensor = self._tensor_type(audio_sentence)
                                audio_tensor = sourceTensor.clone().detach().unsqueeze(0).cpu()
+                                # CON is a reserved name on windows
                                lang_dir = 'con-' if self.session['language'] == 'con' else self.session['language']
                                new_voice_path = re.sub(r'([\\/])eng([\\/])', rf'\1{lang_dir}\2', voice_path)
                                proc_voice_path = new_voice_path.replace('.wav', '_temp.wav')
                                torchaudio.save(proc_voice_path, audio_tensor, default_engine_settings[TTS_ENGINES['XTTSv2']]['samplerate'], format='wav')
-                                if normalize_audio(proc_voice_path, new_voice_path, default_audio_proc_samplerate):
-                                    del audio_data, sourceTensor, audio_tensor
-                                    if self.session['tts_engine'] != TTS_ENGINES['XTTSv2']:
-                                        del self.tts
-                                        unload_tts(device, None, tts_internal_key)
+                                if normalize_audio(proc_voice_path, new_voice_path, default_audio_proc_samplerate, self.session['is_gui_process']):
+                                    del audio_sentence, sourceTensor, audio_tensor
+                                    Path(proc_voice_path).unlink(missing_ok=True)
+                                    gc.collect()
                                    return new_voice_path
                                else:
                                    error = 'normalize_audio() error:'
@@ -329,6 +330,7 @@ class Coqui:
                    else:
                        error = f'The translated {default_text_file} could not be found! Voice cloning file will stay in English.'
                    print(error)
+                    return False
                else:
                    return voice_path
            else:
@@ -336,68 +338,52 @@ class Coqui:
        except Exception as e:
            error = f'_check_xtts_builtin_speakers() error: {e}'
            print(error)
-        return False
+            return False

-    def _check_bark_npz(self,voice_path:str,bark_dir:str,speaker:str,device:str)->bool:
+    def _check_bark_npz(self, voice_path:str, bark_dir:str, speaker:str, device:str)->bool:
        try:
            if self.session['language'] in language_tts[TTS_ENGINES['BARK']].keys():
-                npz_dir = os.path.join(bark_dir,speaker)
-                npz_file = os.path.join(npz_dir,f'{speaker}.npz')
-                if os.path.exists(npz_file):
+                pth_voice_dir = os.path.join(bark_dir, speaker)
+                pth_voice_file = os.path.join(pth_voice_dir,f'{speaker}.pth')
+                if os.path.exists(pth_voice_file):
                    return True
                else:
-                    os.makedirs(npz_dir,exist_ok=True)
-                    tts_internal_key = f"{TTS_ENGINES['BARK']}-internal"
-                    hf_repo = models[TTS_ENGINES['BARK']]['internal']['repo']
-                    hf_sub = models[TTS_ENGINES['BARK']]['internal']['sub']
-                    self.tts = (loaded_tts.get(tts_internal_key) or {}).get('engine',False)
-                    if not self.tts:
-                        for key in list(loaded_tts.keys()):unload_tts(device,None,key)
-                        text_model_path = hf_hub_download(repo_id=hf_repo,filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][0]}",cache_dir=self.cache_dir)
-                        coarse_model_path = hf_hub_download(repo_id=hf_repo,filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][1]}",cache_dir=self.cache_dir)
-                        fine_model_path = hf_hub_download(repo_id=hf_repo,filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][2]}",cache_dir=self.cache_dir)
-                        checkpoint_dir = os.path.dirname(text_model_path)
-                        self.tts = self._load_checkpoint(tts_engine=TTS_ENGINES['BARK'],key=tts_internal_key,checkpoint_dir=checkpoint_dir,device=device)
-                    if self.tts:
-                        voice_temp=os.path.splitext(npz_file)[0]+'.wav'
-                        shutil.copy(voice_path,voice_temp)
-                        default_text_file = os.path.join(voices_dir,self.session['language'],'default.txt')
-                        default_text = Path(default_text_file).read_text(encoding="utf-8")
-                        fine_tuned_params={
-                            key.removeprefix("bark_"):cast_type(self.session[key])
-                            for key,cast_type in{
-                                "bark_text_temp":float,
-                                "bark_waveform_temp":float
-                            }.items()
-                            if self.session.get(key) is not None
-                        }
-                        with torch.no_grad():
-                            torch.manual_seed(67878789)
-                            audio_data = self.tts.synthesize(
-                                default_text,
-                                loaded_tts[tts_internal_key]['config'],
-                                speaker_id=speaker,
-                                voice_dirs=bark_dir,
-                                silent=True,
-                                **fine_tuned_params
-                            )
-                        os.remove(voice_temp)
-                        del audio_data
-                        if self.session['tts_engine']!=TTS_ENGINES['BARK']:
-                            del self.tts
-                            unload_tts(device,None,tts_internal_key)
-                        msg = f"Saved NPZ file: {npz_file}"
-                        print(msg)
-                        return True
-                    else:
-                        error = f'_check_bark_npz() error: {tts_internal_key} is False'
-                        print(error)
+                    os.makedirs(pth_voice_dir,exist_ok=True)
+                    key = f"{TTS_ENGINES['BARK']}-internal"
+                    voice_temp = os.path.splitext(pth_voice_file)[0]+'.wav'
+                    shutil.copy(voice_path,voice_temp)
+                    default_text_file = os.path.join(voices_dir, self.session['language'], 'default.txt')
+                    default_text = Path(default_text_file).read_text(encoding="utf-8")
+                    fine_tuned_params = {
+                        key.removeprefix("bark_"):cast_type(self.session[key])
+                        for key,cast_type in{
+                            "bark_text_temp":float,
+                            "bark_waveform_temp":float
+                        }.items()
+                        if self.session.get(key) is not None
+                    }
+                    with torch.no_grad():
+                        #torch.manual_seed(67878789)
+                        audio_sentence = self.engine.synthesize(
+                            default_text,
+                            speaker_wav=voice_path,
+                            speaker=speaker,
+                            voice_dir=pth_voice_dir,
+                            silent=True,
+                            **fine_tuned_params
+                        )
+                    os.remove(voice_temp)
+                    del audio_sentence
+                    msg = f"Saved file: {pth_voice_file}"
+                    print(msg)
+                    gc.collect()
+                    return True
            else:
                return True
        except Exception as e:
            error = f'_check_bark_npz() error: {e}'
            print(error)
-        return False
+            return False
        
    def _tensor_type(self,audio_data:Any)->torch.Tensor:
        if isinstance(audio_data,torch.Tensor):
@@ -428,22 +414,19 @@ class Coqui:
            waveform = resampler(waveform)
        wav_tensor = waveform.squeeze(0)
        wav_numpy = wav_tensor.cpu().numpy()
-        tmp_fh = tempfile.NamedTemporaryFile(suffix=".wav",delete=False)
+        os.path.join(self.session['process_dir'], 'tmp')
+        os.makedirs(tmp_dir, exist_ok=True)
+        tmp_fh = tempfile.NamedTemporaryFile(dir=tmp_dir, suffix=".wav", delete=False)
        tmp_path = tmp_fh.name
        tmp_fh.close()
        sf.write(tmp_path,wav_numpy,expected_sr,subtype="PCM_16")
        return tmp_path

-    def convert(self, s_n:int, s:str)->bool:
-        global xtts_builtin_speakers_list
+    def convert(self, sentence_index:int, sentence:str)->bool:
        try:
-            sentence_number = s_n
-            sentence = s
            speaker = None
-            audio_data = False
-            trim_audio_buffer = 0.004
+            audio_sentence = False
            settings = self.params[self.session['tts_engine']]
-            final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_number}.{default_audio_proc_format}')
            settings['voice_path'] = (
                self.session['voice'] if self.session['voice'] is not None 
                else os.path.join(self.session['custom_model_dir'], self.session['tts_engine'], self.session['custom_model'], 'ref.wav') if self.session['custom_model'] is not None
@@ -457,8 +440,10 @@ class Coqui:
                        msg = f"Could not create the builtin speaker selected voice in {self.session['language']}"
                        print(msg)
                        return False
-            self.tts = (loaded_tts.get(self.tts_key) or {}).get('engine', False)
-            if self.tts:
+            if self.engine:
+                self.engine.to(self.session['device'])
+                trim_audio_buffer = 0.004
+                final_sentence_file = os.path.join(self.session['chapters_dir_sentences'], f'{sentence_index}.{default_audio_proc_format}')
                if sentence == TTS_SML['break']:
                    silence_time = int(np.random.uniform(0.3, 0.6) * 100) / 100
                    break_tensor = torch.zeros(1, int(settings['samplerate'] * silence_time)) # 0.4 to 0.7 seconds
@@ -484,7 +469,7 @@ class Coqui:
                            if speaker in default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'].keys():
                                settings['gpt_cond_latent'], settings['speaker_embedding'] = xtts_builtin_speakers_list[default_engine_settings[TTS_ENGINES['XTTSv2']]['voices'][speaker]].values()
                            else:
-                                settings['gpt_cond_latent'], settings['speaker_embedding'] = self.tts.get_conditioning_latents(audio_path=[settings['voice_path']])  
+                                settings['gpt_cond_latent'], settings['speaker_embedding'] = self.engine.get_conditioning_latents(audio_path=[settings['voice_path']])  
                            settings['latent_embedding'][settings['voice_path']] = settings['gpt_cond_latent'], settings['speaker_embedding']
                        fine_tuned_params = {
                            key.removeprefix("xtts_"): cast_type(self.session[key])
@@ -501,7 +486,7 @@ class Coqui:
                            if self.session.get(key) is not None
                        }
                        with torch.no_grad():
-                            result = self.tts.inference(
+                            result = self.engine.inference(
                                text=sentence.replace('.', ' —'),
                                language=self.session['language_iso1'],
                                gpt_cond_latent=settings['gpt_cond_latent'],
@@ -530,10 +515,11 @@ class Coqui:
                        else:
                            bark_dir = os.path.join(os.path.dirname(settings['voice_path']), 'bark')       
                            if not self._check_bark_npz(settings['voice_path'], bark_dir, speaker, self.session['device']):
-                                error = 'Could not create npz file!'
+                                error = 'Could not create pth file!'
                                print(error)
                                return False
-                        npz_file = os.path.join(bark_dir, speaker, f'{speaker}.npz')
+                        pth_voice_dir = os.path.join(bark_dir, speaker)
+                        pth_voice_file = os.path.join(bark_dir, speaker, f'{speaker}.pth')
                        fine_tuned_params = {
                            key.removeprefix("bark_"): cast_type(self.session[key])
                            for key, cast_type in {
@@ -542,22 +528,16 @@ class Coqui:
                            }.items()
                            if self.session.get(key) is not None
                        }
-                        if self.npz_path is None or self.npz_path != npz_file:
-                            self.npz_path = npz_file
-                            self.npz_data = np.load(self.npz_path, allow_pickle=True)
-                        history_prompt = [
-                                self.npz_data["semantic_prompt"],
-                                self.npz_data["coarse_prompt"],
-                                self.npz_data["fine_prompt"]
-                        ]
                        with torch.no_grad():
-                            torch.manual_seed(67878789)
-                            audio_sentence, _ = self.tts.generate_audio(
+                            #torch.manual_seed(67878789)
+                            result = self.engine.synthesize(
                                sentence,
-                                history_prompt=history_prompt,
+                                speaker=speaker,
+                                voice_dir=pth_voice_dir,
                                silent=True,
                                **fine_tuned_params
                            )
+                        audio_sentence = result.get('wav')
                        if is_audio_data_valid(audio_sentence):
                            audio_sentence = audio_sentence.tolist()
                    elif self.session['tts_engine'] == TTS_ENGINES['VITS']:
@@ -573,11 +553,12 @@ class Coqui:
                            os.makedirs(proc_dir, exist_ok=True)
                            tmp_in_wav = os.path.join(proc_dir, f"{uuid.uuid4()}.wav")
                            tmp_out_wav = os.path.join(proc_dir, f"{uuid.uuid4()}.wav")
-                            self.tts.tts_to_file(
-                                text=sentence,
-                                file_path=tmp_in_wav,
-                                **speaker_argument
-                            )
+                            with torch.no_grad():
+                                self.engine.tts_to_file(
+                                    text=sentence,
+                                    file_path=tmp_in_wav,
+                                    **speaker_argument
+                                )
                            if settings['voice_path'] in settings['semitones'].keys():
                                semitones = settings['semitones'][settings['voice_path']]
                            else:
@@ -612,17 +593,16 @@ class Coqui:
                                    return False
                            else:
                                tmp_out_wav = tmp_in_wav
-                            tts_vc = (loaded_tts.get(self.tts_vc_key) or {}).get('engine', False)
-                            if tts_vc:
-                                settings['samplerate'] = TTS_VOICE_CONVERSION[self.tts_vc_key]['samplerate']
+                            if self.engine_zs:
+                                settings['samplerate'] = TTS_VOICE_CONVERSION[self.tts_zs_key]['samplerate']
                                source_wav = self._resample_wav(tmp_out_wav, settings['samplerate'])
                                target_wav = self._resample_wav(settings['voice_path'], settings['samplerate'])
-                                audio_sentence = tts_vc.voice_conversion(
+                                audio_sentence = self.engine_zs.voice_conversion(
                                    source_wav=source_wav,
                                    target_wav=target_wav
                                )
                            else:
-                                error = f'Engine {self.tts_vc_key} is None'
+                                error = f'Engine {self.tts_zs_key} is None'
                                print(error)
                                return False
                            if os.path.exists(tmp_in_wav):
@@ -632,10 +612,11 @@ class Coqui:
                            if os.path.exists(source_wav):
                                os.remove(source_wav)
                        else:
-                            audio_sentence = self.tts.tts(
-                                text=sentence,
-                                **speaker_argument
-                            )
+                            with torch.no_grad():
+                                audio_sentence = self.engine.tts(
+                                    text=sentence,
+                                    **speaker_argument
+                                )
                    elif self.session['tts_engine'] == TTS_ENGINES['FAIRSEQ']:
                        speaker_argument = {}
                        not_supported_punc_pattern = re.compile(r"[.:—]")
@@ -644,11 +625,12 @@ class Coqui:
                            os.makedirs(proc_dir, exist_ok=True)
                            tmp_in_wav = os.path.join(proc_dir, f"{uuid.uuid4()}.wav")
                            tmp_out_wav = os.path.join(proc_dir, f"{uuid.uuid4()}.wav")
-                            self.tts.tts_to_file(
-                                text=re.sub(not_supported_punc_pattern, ' ', sentence),
-                                file_path=tmp_in_wav,
-                                **speaker_argument
-                            )
+                            with torch.no_grad():
+                                self.engine.tts_to_file(
+                                    text=re.sub(not_supported_punc_pattern, ' ', sentence),
+                                    file_path=tmp_in_wav,
+                                    **speaker_argument
+                                )
                            if settings['voice_path'] in settings['semitones'].keys():
                                semitones = settings['semitones'][settings['voice_path']]
                            else:
@@ -672,26 +654,27 @@ class Coqui:
                                    ]
                                    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
                                except subprocess.CalledProcessError as e:
-                                    print(f"Subprocess error: {e.stderr}")
+                                    error = f'Subprocess error: {e.stderr}'
+                                    print(error)
                                    DependencyError(e)
                                    return False
                                except FileNotFoundError as e:
-                                    print(f"File not found: {e}")
+                                    error = f'File not found: {e}'
+                                    print(error)
                                    DependencyError(e)
                                    return False
                            else:
                                tmp_out_wav = tmp_in_wav
-                            tts_vc = (loaded_tts.get(self.tts_vc_key) or {}).get('engine', False)
-                            if tts_vc:
-                                settings['samplerate'] = TTS_VOICE_CONVERSION[self.tts_vc_key]['samplerate']
+                            if self.engine_zs:
+                                settings['samplerate'] = TTS_VOICE_CONVERSION[self.tts_zs_key]['samplerate']
                                source_wav = self._resample_wav(tmp_out_wav, settings['samplerate'])
                                target_wav = self._resample_wav(settings['voice_path'], settings['samplerate'])
-                                audio_sentence = tts_vc.voice_conversion(
+                                audio_sentence = self.engine_zs.voice_conversion(
                                    source_wav=source_wav,
                                    target_wav=target_wav
                                )
                            else:
-                                error = f'Engine {self.tts_vc_key} is None'
+                                error = f'Engine {self.tts_zs_key} is None'
                                print(error)
                                return False
                            if os.path.exists(tmp_in_wav):
@@ -701,23 +684,28 @@ class Coqui:
                            if os.path.exists(source_wav):
                                os.remove(source_wav)
                        else:
-                            audio_sentence = self.tts.tts(
-                                text=re.sub(not_supported_punc_pattern, ' ', sentence),
-                                **speaker_argument
-                            )
+                            with torch.no_grad():
+                                audio_sentence = self.engine.tts(
+                                    text=re.sub(not_supported_punc_pattern, ' ', sentence),
+                                    **speaker_argument
+                                )
                    elif self.session['tts_engine'] == TTS_ENGINES['TACOTRON2']:
                        speaker_argument = {}
-                        not_supported_punc_pattern = re.compile(r'["—…¡¿]')
+                        if self.session['language'] in ['zho', 'jpn', 'kor', 'tha', 'lao', 'mya', 'khm']:
+                            not_supported_punc_pattern = re.compile(r'\p{P}+')
+                        else:
+                            not_supported_punc_pattern = re.compile(r'["—…¡¿]')
                        if settings['voice_path'] is not None:
                            proc_dir = os.path.join(self.session['voice_dir'], 'proc')
                            os.makedirs(proc_dir, exist_ok=True)
                            tmp_in_wav = os.path.join(proc_dir, f"{uuid.uuid4()}.wav")
                            tmp_out_wav = os.path.join(proc_dir, f"{uuid.uuid4()}.wav")
-                            self.tts.tts_to_file(
-                                text=re.sub(not_supported_punc_pattern, '', sentence),
-                                file_path=tmp_in_wav,
-                                **speaker_argument
-                            )
+                            with torch.no_grad():
+                                self.engine.tts_to_file(
+                                    text=re.sub(not_supported_punc_pattern, ' ', sentence),
+                                    file_path=tmp_in_wav,
+                                    **speaker_argument
+                                )
                            if settings['voice_path'] in settings['semitones'].keys():
                                semitones = settings['semitones'][settings['voice_path']]
                            else:
@@ -752,17 +740,16 @@ class Coqui:
                                    return False
                            else:
                                tmp_out_wav = tmp_in_wav
-                            tts_vc = (loaded_tts.get(self.tts_vc_key) or {}).get('engine', False)
-                            if tts_vc:
-                                settings['samplerate'] = TTS_VOICE_CONVERSION[self.tts_vc_key]['samplerate']
+                            if self.engine_zs:
+                                settings['samplerate'] = TTS_VOICE_CONVERSION[self.tts_zs_key]['samplerate']
                                source_wav = self._resample_wav(tmp_out_wav, settings['samplerate'])
                                target_wav = self._resample_wav(settings['voice_path'], settings['samplerate'])
-                                audio_sentence = tts_vc.voice_conversion(
+                                audio_sentence = self.engine_zs.voice_conversion(
                                    source_wav=source_wav,
                                    target_wav=target_wav
                                )
                            else:
-                                error = f'Engine {self.tts_vc_key} is None'
+                                error = f'Engine {self.tts_zs_key} is None'
                                print(error)
                                return False
                            if os.path.exists(tmp_in_wav):
@@ -772,10 +759,11 @@ class Coqui:
                            if os.path.exists(source_wav):
                                os.remove(source_wav)
                        else:
-                            audio_sentence = self.tts.tts(
-                                text=re.sub(not_supported_punc_pattern, '', sentence),
-                                **speaker_argument
-                            )
+                            with torch.no_grad():
+                                audio_sentence = self.engine.tts(
+                                    text=re.sub(not_supported_punc_pattern, ' ', sentence),
+                                    **speaker_argument
+                                )
                    elif self.session['tts_engine'] == TTS_ENGINES['YOURTTS']:
                        trim_audio_buffer = 0.002
                        speaker_argument = {}
@@ -788,8 +776,8 @@ class Coqui:
                            voice_key = default_engine_settings[TTS_ENGINES['YOURTTS']]['voices']['ElectroMale-2']
                            speaker_argument = {"speaker": voice_key}
                        with torch.no_grad():
-                            audio_sentence = self.tts.tts(
-                                text=re.sub(not_supported_punc_pattern, '', sentence),
+                            audio_sentence = self.engine.tts(
+                                text=re.sub(not_supported_punc_pattern, ' ', sentence),
                                language=language,
                                **speaker_argument
                            )
@@ -820,16 +808,23 @@ class Coqui:
                                if self.sentence_idx:
                                    torchaudio.save(final_sentence_file, audio_tensor, settings['samplerate'], format=default_audio_proc_format)
                                    del audio_tensor
+                                    cleanup_garbage()
                            self.audio_segments = []
                            if os.path.exists(final_sentence_file):
                                return True
                            else:
                                error = f"Cannot create {final_sentence_file}"
                                print(error)
+                                return False
+                    else:
+                        error = f"audio_sentence not valide"
+                        print(error)
+                        return False
            else:
-                error = f"convert() error: {self.session['tts_engine']} is None"
+                error = f"TTS engine {self.session['tts_engine']} could not be loaded!\nPossible reason can be not enough VRAM/RAM memory"
                print(error)
+                return False
        except Exception as e:
            error = f'Coquit.convert(): {e}'
            raise ValueError(e)
-        return False
+            return False
--- a/lib/classes/tts_manager.py
+++ b/lib/classes/tts_manager.py
@@ -1,12 +1,12 @@
 import os

-from typing import Any, Optional, Union, Callable
+from typing import Any
 from lib.models import TTS_ENGINES

 class TTSManager:
    def __init__(self, session:Any):   
        self.session = session
-        self.engine = None
+        self.engine = False
        self._build()
 
    def _build(self)->None:
@@ -17,9 +17,6 @@ class TTSManager:
            #elif self.session['tts_engine'] in [TTS_ENGINES['NEW_TTS']]:
            #    from lib.classes.tts_engines.new_tts import NewTts
            #    self.engine = NewTts(self.session)
-            if not self.engine:
-                error='TTS engine could not be created!'
-                print(error)
        else:
            print('Other TTS engines coming soon!')

@@ -32,4 +29,3 @@ class TTSManager:
        except Exception as e:
            error=f'convert_sentence2audio(): {e}'
            raise ValueError(e)
-        return False
--- a/lib/classes/voice_extractor.py
+++ b/lib/classes/voice_extractor.py
@@ -5,8 +5,9 @@ import scipy.fftpack
 import soundfile as sf
 import subprocess
 import shutil
+import json

-from typing import Any, Optional, Union, Callable
+from typing import Any
 from io import BytesIO
 from pydub import AudioSegment, silence
 from pydub.silence import detect_silence
@@ -14,6 +15,7 @@ from pydub.silence import detect_silence
 from lib.conf import voice_formats, default_audio_proc_samplerate
 from lib.models import TTS_ENGINES, models
 from lib.classes.background_detector import BackgroundDetector
+from lib.classes.subprocess_pipe import SubprocessPipe

 class VoiceExtractor:
    def __init__(self, session:Any, voice_file:str, voice_name:str):
@@ -30,7 +32,7 @@ class VoiceExtractor:
    def _validate_format(self)->tuple[bool,str]:
        file_extension = os.path.splitext(self.voice_file)[1].lower()
        if file_extension in voice_formats:
-            msg = 'Input file valid'
+            msg = 'Input file is valid'
            return True,msg
        error = f'Unsupported file format: {file_extension}. Supported formats are: {", ".join(voice_formats)}'
        return False,error
@@ -38,33 +40,21 @@ class VoiceExtractor:
    def _convert2wav(self)->tuple[bool, str]:
        try:
            self.wav_file = os.path.join(self.session['voice_dir'], f'{self.voice_name}.wav')
-            ffmpeg_cmd = [
+            cmd = [
                shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', self.voice_file,
                '-ac', '1', '-y', self.wav_file
-            ]
-            process = subprocess.Popen(
-                ffmpeg_cmd,
-                env={},
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                text=False  # <── raw bytes mode (no implicit UTF-8 decoding)
-            )
-            # Decode safely line by line
-            for raw_line in iter(process.stdout.readline, b''):
-                try:
-                    line = raw_line.decode('utf-8', errors='replace')  # <── replaces invalid bytes
-                except Exception:
-                    line = raw_line.decode('latin-1', errors='replace')
-                print(line, end='')
-
-            process.wait()
-            if process.returncode != 0:
-                error = f'_convert2wav(): process.returncode: {process.returncode}'
-            elif not os.path.exists(self.wav_file) or os.path.getsize(self.wav_file) == 0:
-                error = f'_convert2wav output error: {self.wav_file} was not created or is empty.'
+            ]   
+            proc_pipe = SubprocessPipe(cmd, is_gui_process=self.session['is_gui_process'], total_duration=self._get_audio_duration(self.voice_file), msg='Convert')
+            if proc_pipe:
+                if not os.path.exists(self.wav_file) or os.path.getsize(self.wav_file) == 0:
+                    error = f'_convert2wav output error: {self.wav_file} was not created or is empty.'
+                    return False, error
+                else:
+                    msg = 'Conversion to .wav format for processing successful'
+                    return True, msg
            else:
-                msg = 'Conversion to .wav format for processing successful'
-                return True, msg
+                error = f'_convert2wav() error:: {self.wav_file}'
+                return False, error
        except subprocess.CalledProcessError as e:
            try:
                stderr_text = e.stderr.decode('utf-8', errors='replace')
@@ -201,12 +191,35 @@ class VoiceExtractor:
            error = f'_trim_and_clean() error: {e}'
            raise ValueError(error)

+    def _get_audio_duration(self, filepath:str)->float:
+        try:
+            cmd = [
+                shutil.which('ffprobe'),
+                '-v', 'error',
+                '-show_entries', 'format=duration',
+                '-of', 'json',
+                filepath
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            try:
+                duration = json.loads(result.stdout)['format']['duration']
+                return float(duration)
+            except Exception:
+                return 0
+        except subprocess.CalledProcessError as e:
+            DependencyError(e)
+            return 0
+        except Exception as e:
+            error = f"get_audio_duration() Error: Failed to process {filepath}: {e}"
+            print(error)
+            return 0
+
    def _normalize_audio(self)->tuple[bool, str]:
        error = ''
        try:
            proc_voice_file = os.path.join(self.session['voice_dir'], f'{self.voice_name}_proc.wav')
            final_voice_file = os.path.join(self.session['voice_dir'], f'{self.voice_name}.wav')
-            ffmpeg_cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', self.voice_track]
+            cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', self.voice_track]
            filter_complex = (
                'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
                'afftdn=nf=-70,'
@@ -219,33 +232,26 @@ class VoiceExtractor:
                'equalizer=f=9000:t=q:w=2:g=-2,'
                'highpass=f=63[audio]'
            )
-            ffmpeg_cmd += [
+            cmd += [
                '-filter_complex', filter_complex,
                '-map', '[audio]',
                '-ar', f'{default_audio_proc_samplerate}',
                '-y', proc_voice_file
            ]
            try:
-                process = subprocess.Popen(
-                    ffmpeg_cmd,
-                    env = {},
-                    stdout = subprocess.PIPE, 
-                    stderr = subprocess.PIPE,
-                    encoding = 'utf-8',
-                    errors = 'ignore'
-                )
-                for line in process.stdout:
-                    print(line, end = '')
-                process.wait()
-                if process.returncode != 0:
-                    error = f'_normalize_audio(): process.returncode: {process.returncode}'
-                elif not os.path.exists(proc_voice_file) or os.path.getsize(proc_voice_file) == 0:
-                    error = f'_normalize_audio() error: {proc_voice_file} was not created or is empty.'
+                proc_pipe = SubprocessPipe(cmd, is_gui_process=self.session['is_gui_process'], total_duration=self._get_audio_duration(self.voice_track), msg='Normalize')
+                if proc_pipe:
+                    if not os.path.exists(proc_voice_file) or os.path.getsize(proc_voice_file) == 0:
+                        error = f'_normalize_audio() error: {proc_voice_file} was not created or is empty.'
+                        return False, error
+                    else:
+                        os.replace(proc_voice_file, final_voice_file)
+                        shutil.rmtree(self.demucs_dir, ignore_errors = True)
+                        msg = 'Audio normalization successful!'
+                        return True, msg
                else:
-                    os.replace(proc_voice_file, final_voice_file)
-                    shutil.rmtree(self.demucs_dir, ignore_errors = True)
-                    msg = 'Audio normalization successful!'
-                    return True, msg
+                    error = f'normalize_audio() error: {final_voice_file}'
+                    return False, error
            except subprocess.CalledProcessError as e:
                error = f'_normalize_audio() ffmpeg.Error: {e.stderr.decode()}'
        except FileNotFoundError as e:
--- a/lib/classes/vram_detector.py
+++ b/lib/classes/vram_detector.py
@@ -1,145 +1,110 @@
-import os, platform, subprocess, re, json, psutil, tempfile, time
-
-from typing import Any, Optional, Union, Callable
+import os, platform, json, psutil, subprocess, re
+from typing import Any

 class VRAMDetector:
    def __init__(self):
-        self.system:str = platform.system().lower()
+        self.system = platform.system().lower()

-    def _run(self, cmd:list[str], timeout:int = 3)->str:
+    @staticmethod
+    def _fmt(b:int)->str:
+        if not b: return 'Unknown'
+        if b >= 1024**3: return f'{b/1024**3:.2f} GB'
+        if b >= 1024**2: return f'{b/1024**2:.2f} MB'
+        if b >= 1024: return f'{b/1024:.2f} KB'
+        return f'{b} B'
+
+    def detect_vram(self, device:str, as_json:bool=False)->Any:
+        info = {}
+        # ───────────────────────────── CUDA (NVIDIA)
        try:
-            result = subprocess.run(cmd, stdout = subprocess.PIPE, stderr = subprocess.DEVNULL, text = True, timeout = timeout)
-            return result.stdout.strip()
+            import torch
+            if device == 'cuda':
+                if torch.cuda.is_available():
+                    free, total = torch.cuda.mem_get_info()
+                    alloc = torch.cuda.memory_allocated()
+                    resv = torch.cuda.memory_reserved()
+                    info = {
+                        "os": self.system,
+                        "device_type": "cuda",
+                        "device_name": torch.cuda.get_device_name(0),
+                        "free_bytes": free,
+                        "total_bytes": total,
+                        "allocated_bytes": alloc,
+                        "reserved_bytes": resv,
+                        "free_human": self._fmt(free),
+                        "total_human": self._fmt(total),
+                        "allocated_human": self._fmt(alloc),
+                        "reserved_human": self._fmt(resv),
+                    }
+                    return json.dumps(info, indent=2) if as_json else info
+
+            # ─────────────────────────── ROCm (AMD)
+            if hasattr(torch, 'hip') and torch.hip.is_available():
+                free, total = torch.hip.mem_get_info()
+                alloc = torch.hip.memory_allocated()
+                resv = torch.hip.memory_reserved()
+                info = {
+                    "os": self.system,
+                    "device_type": "rocm",
+                    "device_name": torch.hip.get_device_name(0),
+                    "free_bytes": free,
+                    "total_bytes": total,
+                    "allocated_bytes": alloc,
+                    "reserved_bytes": resv,
+                    "free_human": self._fmt(free),
+                    "total_human": self._fmt(total),
+                    "allocated_human": self._fmt(alloc),
+                    "reserved_human": self._fmt(resv),
+                }
+                return json.dumps(info, indent=2) if as_json else info
+
+            # ─────────────────────────── Intel XPU (oneAPI)
+            if hasattr(torch, 'xpu') and torch.xpu.is_available():
+                free, total = torch.xpu.mem_get_info()
+                alloc = torch.xpu.memory_allocated()
+                resv = torch.xpu.memory_reserved()
+                info = {
+                    "os": self.system,
+                    "device_type": "xpu",
+                    "device_name": torch.xpu.get_device_name(0),
+                    "free_bytes": free,
+                    "total_bytes": total,
+                    "allocated_bytes": alloc,
+                    "reserved_bytes": resv,
+                    "free_human": self._fmt(free),
+                    "total_human": self._fmt(total),
+                    "allocated_human": self._fmt(alloc),
+                    "reserved_human": self._fmt(resv),
+                }
+                return json.dumps(info, indent=2) if as_json else info
+
+            # ─────────────────────────── Apple MPS (Metal)
+            if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+                info = {
+                    "os": self.system,
+                    "device_type": "mps",
+                    "device_name": "Apple GPU (Metal)",
+                    "note": "PyTorch MPS does not expose memory info; reporting system RAM",
+                }
+                mem = psutil.virtual_memory()
+                info['free_bytes'] = mem.available
+                info['total_bytes'] = mem.total
+                info['free_human'] = self._fmt(mem.available)
+                info['total_human'] = self._fmt(mem.total)
+                return json.dumps(info, indent=2) if as_json else info
+
        except Exception:
-            return ""
+            pass

-    def _parse_bytes(self, val:str)->int:
-        if not val:
-            return 0
-        val = val.strip().upper()
-        m = re.findall(r"([\d.]+)", val)
-        if not m:
-            return 0
-        n = float(m[0])
-        if "GB" in val: return int(n*1024**3)
-        if "MB" in val: return int(n*1024**2)
-        if "KB" in val: return int(n*1024)
-        return int(n)
-
-    def _fmt(self, b:int)->str:
-        if not b: return "Unknown"
-        if b >= 1024**3: return f"{b/1024**3:.1f} GB"
-        if b >= 1024**2: return f"{b/1024**2:.1f} MB"
-        return f"{b} B"
-
-    # ---- Windows GPU detection ----
-    def _get_windows_vram(self)->list[dict[str,Any]]:
-        gpus = []
-        out = self._run(["wmic","path","win32_VideoController","get","Name,AdapterRAM","/format:list"])
-        for block in out.split("\n\n"):
-            if "Name = " not in block: continue
-            name = re.search(r"Name = (.*)", block)
-            vram = re.search(r"AdapterRAM = (\d+)", block)
-            if name:
-                val = int(vram.group(1)) if vram else 0
-                gpus.append({"name":name.group(1).strip(),"vram_bytes":val,"vram":self._fmt(val)})
-        if any(g["vram_bytes"]>0 for g in gpus):
-            return gpus
-        with tempfile.NamedTemporaryFile(delete = False, suffix = ".txt") as tf:
-            path = tf.name
-        try:
-            subprocess.Popen(["dxdiag","/t",path],stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL)
-            for _ in range(30):
-                if os.path.exists(path) and os.path.getsize(path)>0:
-                    break
-                time.sleep(0.1)
-            with open(path,encoding = "utf-16",errors = "ignore") as f:
-                data = f.read()
-        except Exception:
-            data = ""
-        finally:
-            try: os.remove(path)
-            except: pass
-        for m in re.finditer(r"Card name:\s*(.*?)\r?\n.*?(?:Dedicated Memory|Display Memory):\s*([^\r\n]+)", data, re.S):
-            name,mem = m.groups()
-            vb = self._parse_bytes(mem)
-            if vb:
-                gpus.append({"name":name.strip(),"vram_bytes":vb,"vram":self._fmt(vb)})
-        return gpus
-
-    def _get_windows_shared(self)->int:
-        try:
-            with tempfile.NamedTemporaryFile(delete = False, suffix = ".txt") as tf:
-                path = tf.name
-            subprocess.Popen(["dxdiag","/t",path],stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL)
-            for _ in range(30):
-                if os.path.exists(path) and os.path.getsize(path)>0:
-                    break
-                time.sleep(0.1)
-            with open(path,encoding = "utf-16",errors = "ignore") as f:
-                data = f.read()
-        except Exception:
-            data = ""
-        finally:
-            try: os.remove(path)
-            except: pass
-        m = re.search(r"Shared Memory:\s*([^\r\n]+)", data)
-        return self._parse_bytes(m.group(1)) if m else 0
-
-    # ---- Linux/macOS simplified ----
-    def _get_linux_vram(self)->list[dict[str,Any]]:
-        out = self._run(["nvidia-smi","--query-gpu = name,memory.total","--format = csv,noheader,nounits"])
-        gpus = []
-        for line in out.splitlines():
-            if "," not in line: continue
-            name,mem = line.split(",",1)
-            vb = int(mem.strip())*1024**2
-            gpus.append({"name":name.strip(),"vram_bytes":vb,"vram":self._fmt(vb)})
-        return gpus
-
-    def _get_linux_shared(self)->int:
-        return psutil.virtual_memory().total//4 if hasattr(psutil,"virtual_memory") else 0
-
-    def _get_macos_vram(self)->list[dict[str,Any]]:
-        out = self._run(["system_profiler","SPDisplaysDataType","-json"])
-        try:data = json.loads(out)
-        except: return []
-        g = []
-        for gpu in data.get("SPDisplaysDataType",[]):
-            v = self._parse_bytes(gpu.get("spdisplays_vram",""))
-            g.append({"name":gpu.get("_name","GPU"),"vram_bytes":v,"vram":self._fmt(v)})
-        return g
-
-    def _get_macos_shared(self)->int:
-        out = self._run(["system_profiler","SPDisplaysDataType","-json"])
-        try:data = json.loads(out)
-        except:return 0
-        for gpu in data.get("SPDisplaysDataType",[]):
-            for key in ("spdisplays_vram_shared","spdisplays_vram_dynamic"):
-                if key in gpu:
-                    return self._parse_bytes(gpu[key])
-        return 0
-
-    # ---- main API ----
-    def detect_vram(self,as_json:bool = False)->Any:
-        sys = self.system
-        if sys == "windows":
-            g = self._get_windows_vram(); s = self._get_windows_shared()
-        elif sys == "linux":
-            g = self._get_linux_vram(); s = self._get_linux_shared()
-        elif sys == "darwin":
-            g = self._get_macos_vram(); s = self._get_macos_shared()
-        else:
-            g = []; s = 0
-        total = sum(x.get("vram_bytes",0) for x in g)
-        res = {
-            "os":sys,
-            "gpu_count":len(g),
-            "gpus":g,
-            "total_vram_bytes":total,
-            "total_vram_human":self._fmt(total),
-            "shared_memory_bytes":s,
-            "shared_memory_human":self._fmt(s),
-            "total_combined_human":self._fmt(total+s)
+        # ─────────────────────────── CPU fallback
+        mem = psutil.virtual_memory()
+        info = {
+            "os": self.system,
+            "device_type": "cpu",
+            "device_name": "System RAM",
+            "free_bytes": mem.available,
+            "total_bytes": mem.total,
+            "free_human": self._fmt(mem.available),
+            "total_human": self._fmt(mem.total),
        }
-        return json.dumps(res,indent = 2) if as_json else res
+        return json.dumps(info, indent=2) if as_json else info
--- a/lib/conf.py
+++ b/lib/conf.py
@@ -1,7 +1,12 @@
 import os
 import platform
+import tempfile
+
+min_python_version = (3,10)
+max_python_version = (3,13)

 tmp_dir = os.path.abspath('tmp')
+tempfile.tempdir = tmp_dir
 tmp_expire = 7 # days

 models_dir = os.path.abspath('models')
@@ -14,10 +19,10 @@ os.environ['PYTHONIOENCODING'] = 'utf-8'
 os.environ['COQUI_TOS_AGREED'] = '1'
 os.environ['PYTHONIOENCODING'] = 'utf-8'
 os.environ['CALIBRE_NO_NATIVE_FILEDIALOGS'] = '1'
-os.environ['GRADIO_DEBUG'] = '1'
-os.environ['DO_NOT_TRACK'] = 'true'
 os.environ['CALIBRE_TEMP_DIR'] = tmp_dir
 os.environ['CALIBRE_CACHE_DIRECTORY'] = tmp_dir
+os.environ['GRADIO_DEBUG'] = '0'
+os.environ['DO_NOT_TRACK'] = 'True'
 os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
 os.environ['HF_HOME'] = tts_dir
 os.environ['HF_DATASETS_CACHE'] = tts_dir
@@ -30,25 +35,27 @@ os.environ['STANZA_RESOURCES_DIR'] = os.path.join(models_dir, 'stanza')
 os.environ['ARGOS_TRANSLATE_PACKAGE_PATH'] = os.path.join(models_dir, 'argostranslate')
 os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
 os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
-os.environ['SUNO_OFFLOAD_CPU'] = 'False' # BARK option: False needs A GPU
-os.environ['SUNO_USE_SMALL_MODELS'] = 'False' # BARK option: False needs a GPU with VRAM > 4GB
+os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:32,garbage_collection_threshold:0.6,expandable_segments:True'
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+os.environ["CUDA_CACHE_MAXSIZE"] = "2147483648"
+os.environ['SUNO_OFFLOAD_CPU'] = 'False'
+os.environ['SUNO_USE_SMALL_MODELS'] = 'False'
 if platform.system() == 'Windows':
    os.environ['ESPEAK_DATA_PATH'] = os.path.expandvars(r"%USERPROFILE%\scoop\apps\espeak-ng\current\eSpeak NG\espeak-ng-data")

 prog_version = (lambda: open('VERSION.txt').read().strip())()

-min_python_version = (3,10)
-max_python_version = (3,12)
-
 NATIVE = 'native'
 FULL_DOCKER = 'full_docker'

-debug_mode = True
+debug_mode = False

-device_list = ['cpu', 'gpu', 'mps']
-default_device = 'cpu'
-default_gpu_wiki = '<a href="https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES">howto wiki</a>'
-default_chapters_control = False
+devices = {"CPU": {"proc": "cpu", "found": True}, "CUDA": {"proc": "cuda", "found": False}, "MPS": {"proc": "mps", "found": False}, "ROCM": {"proc": "rocm", "found": False}, "XPU": {"proc": "xpu", "found": False}}
+default_device = devices['CPU']['proc']
+default_gpu_wiki = '<a href="https://github.com/DrewThomasson/ebook2audiobook/wiki/GPU-ISSUES">GPU howto wiki</a>'
+default_chapters_preview = False

 python_env_dir = os.path.abspath(os.path.join('.','python_env'))
 requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
@@ -56,7 +63,7 @@ requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
 interface_host = '0.0.0.0'
 interface_port = 7860
 interface_shared_tmp_expire = 3 # in days
-interface_concurrency_limit = 1 # or None for unlimited
+interface_concurrency_limit = 1 # or None for unlimited multiple parallele user conversion

 interface_component_options = {
    "gr_tab_xtts_params": True,
--- a/lib/functions.py
+++ b/lib/functions.py
--- a/lib/lang.py
+++ b/lib/lang.py
@@ -834,7 +834,6 @@ language_mapping = {
    "ben": {"name": "Bengali", "native_name": "বাংলা", "max_chars": 142},
    "zho": {"name": "Chinese", "native_name": "中文", "max_chars": 82},
    "eng": {"name": "English", "native_name": "English", "max_chars": 250},
-    "fas": {"name": "Persian", "native_name": "فارسی", "max_chars": 182},
    "fra": {"name": "French", "native_name": "Français", "max_chars": 273},
    "deu": {"name": "German, Standard", "native_name": "Deutsch", "max_chars": 253},
    "hin": {"name": "Hindi", "native_name": "हिन्दी", "max_chars": 142},
@@ -844,6 +843,7 @@ language_mapping = {
    "jav": {"name": "Javanese", "native_name": "Basa Jawa", "max_chars": 182},
    "jpn": {"name": "Japanese", "native_name": "日本語", "max_chars": 71},
    "kor": {"name": "Korean", "native_name": "한국어", "max_chars": 95},
+    "fas": {"name": "Persian", "native_name": "فارسی", "max_chars": 182},
    "pol": {"name": "Polish", "native_name": "Polski", "max_chars": 224},
    "por": {"name": "Portuguese", "native_name": "Português", "max_chars": 203},
    "rus": {"name": "Russian", "native_name": "Русский", "max_chars": 182},
--- a/lib/models.py
+++ b/lib/models.py
@@ -1,7 +1,8 @@
 import os
-
 from lib.conf import tts_dir, voices_dir
+
 loaded_tts = {}
+xtts_builtin_speakers_list = []

 TTS_ENGINES = {
    "XTTSv2": "xtts", 
@@ -30,7 +31,6 @@ default_fine_tuned = 'internal'
 default_vc_model = TTS_VOICE_CONVERSION['knnvc']['path']
 default_voice_detection_model = 'drewThomasson/segmentation'

-max_tts_in_memory = 2 # TTS engines to keep in memory (1 tts engine ~= 4GB to 8GB RAM).
 max_custom_model = 100
 max_custom_voices = 1000
 max_upload_size = '6GB'
@@ -46,10 +46,6 @@ default_engine_settings = {
        "top_p": 0.85,
        "speed": 1.0,
        "enable_text_splitting": False,
-        # to enable deepspeed, you must install it first:
-        # conda activate ./python_env (linux/mac) or .\python_env (windows)
-        # pip install deepspeed
-        # conda deactivate
        "use_deepspeed": False,
        "files": ['config.json', 'model.pth', 'vocab.json', 'ref.wav', 'speakers_xtts.pth'],
        "voices": {
@@ -74,12 +70,12 @@ default_engine_settings = {
            "FerranSimen": "Ferran Simen", "XavierHayasaka": "Xavier Hayasaka", "LuisMoray": "Luis Moray",
            "MarcosRudaski": "Marcos Rudaski"
        },
-        "rating": {"GPU VRAM": 4, "CPU": 3, "RAM": 8, "Realism": 5}
+        "rating": {"VRAM": 2, "CPU": 2, "RAM": 4, "Realism": 5}
    },
    TTS_ENGINES['BARK']: {
        "samplerate": 24000,
-        "text_temp": 0.50,
-        "waveform_temp": 0.50,
+        "text_temp": 0.4,
+        "waveform_temp": 0.6,
        "files": ["text_2.pt", "coarse_2.pt", "fine_2.pt"],
        "speakers_path": os.path.join(voices_dir, '__bark'),
        "voices": {
@@ -128,31 +124,31 @@ default_engine_settings = {
            "zh_speaker_6": "Speaker 6", "zh_speaker_7": "Speaker 7", "zh_speaker_8": "Speaker 8",
            "zh_speaker_9": "Speaker 9"
        },
-        "rating": {"GPU VRAM": 4, "CPU": 1, "RAM": 16, "Realism": 4}
+        "rating": {"VRAM": 6, "CPU": 1, "RAM": 8, "Realism": 5}
    },
    TTS_ENGINES['VITS']: {
        "samplerate": 22050,
        "files": ['config.json', 'model_file.pth', 'language_ids.json'],
        "voices": {},
-        "rating": {"GPU VRAM": 2, "CPU": 3, "RAM": 4, "Realism": 3}
+        "rating": {"VRAM": 2, "CPU": 4, "RAM": 4, "Realism": 4}
    },
    TTS_ENGINES['FAIRSEQ']: {
        "samplerate": 16000,
        "files": ['config.json', 'G_100000.pth', 'vocab.json'],
        "voices": {},
-        "rating": {"GPU VRAM": 2, "CPU": 3, "RAM": 4, "Realism": 3}
+        "rating": {"VRAM": 2, "CPU": 4, "RAM": 4, "Realism": 4}
    },
    TTS_ENGINES['TACOTRON2']: {
        "samplerate": 22050,
        "files": ['config.json', 'best_model.pth', 'vocoder_config.json', 'vocoder_model.pth'],
        "voices": {},
-        "rating": {"GPU VRAM": 2, "CPU": 3, "RAM": 4, "Realism": 3}
+        "rating": {"VRAM": 1, "CPU": 5, "RAM": 2, "Realism": 3}
    },
    TTS_ENGINES['YOURTTS']: {
        "samplerate": 16000,
        "files": ['config.json', 'model_file.pth'],
        "voices": {"Machinella-5": "female-en-5", "ElectroMale-2": "male-en-2", 'Machinella-4': 'female-pt-4\n', 'ElectroMale-3': 'male-pt-3\n'},
-        "rating": {"GPU VRAM": 1, "CPU": 5, "RAM": 4, "Realism": 2}
+        "rating": {"VRAM": 0, "CPU": 5, "RAM": 1, "Realism": 2}
    }
 }
 models = {
@@ -333,6 +329,14 @@ models = {
            "files": default_engine_settings[TTS_ENGINES['XTTSv2']]['files'],
            "samplerate": default_engine_settings[TTS_ENGINES['XTTSv2']]['samplerate']
        },
+        "PeterGriffinFamilyGuy": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/PeterGriffinFamilyGuy/",
+            "voice": os.path.join(voices_dir, 'eng', 'adult', 'male', 'PeterGriffinFamilyGuy.wav'),
+            "files": default_engine_settings[TTS_ENGINES['XTTSv2']]['files'],
+            "samplerate": default_engine_settings[TTS_ENGINES['XTTSv2']]['samplerate']
+        },
        "RafeBeckley": {
            "lang": "eng",
            "repo": "drewThomasson/fineTunedTTSModels",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,69 +1,67 @@
-[build-system]
-name = "ebook2audiobook"
-requires = ["setuptools >= 64"]
-build-backend = "setuptools.build_meta"
-
-[tool.poetry]
-name = "ebook2audiobook"
-version = "25.10.25"
-
-[tool.setuptools.dynamic]
-version = {file = "VERSION.txt"}
-
-[project]
-name = "ebook2audiobook"
-description = "Convert eBooks to audiobooks with chapters and metadata"
-authors = [
-    { name = "Drew Thomasson" }
-]
-dependencies = [
-	"regex",
-	"tqdm",
-	"cutlet",
-	"deep_translator",
-	"docker",
-	"ebooklib",
-	"fastapi",
-	"num2words",
-	"argostranslate",
-	"beautifulsoup4",
-	"fugashi",
-	"sudachipy",
-	"sudachidict_core",
-	"ray",
-	"unidic",
-	"pymupdf4llm",
-	"translate",
-	"hangul-romanize",
-	"indic-nlp-library",
-	"iso639-lang",
-	"jieba",
-	"pycantonese",
-	"soynlp",
-	"pypinyin",
-	"pythainlp",
-	"mutagen",
-	"PyOpenGL",
-	"nvidia-ml-py",
-	"phonemizer-fork",
-	"pydub",
-	"pyannote-audio==3.4.0",
-	"demucs==4.0.1",
-	"gradio>=5.49",
-	"transformers==4.51.3",
-	"coqui-tts[languages]==0.26.0",
-	"torch>=2.8.0,<2.9",
-	"torchaudio>=2.8.0,<2.9",
-	"torchvggish"
-]
-readme = "README.md"
-requires-python = ">3.9,<3.13"
-classifiers = [
-    "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
-]
-scripts = { "ebook2audiobook" = "app:main" }
-
-[project.urls]
-"Homepage" = "https://github.com/DrewThomasson/ebook2audiobook"
+[build-system]
+name = "ebook2audiobook"
+requires = ["setuptools >= 64"]
+build-backend = "setuptools.build_meta"
+
+[tool.poetry]
+name = "ebook2audiobook"
+version = "25.10.30"
+
+[tool.setuptools.dynamic]
+version = {file = "VERSION.txt"}
+
+[project]
+name = "ebook2audiobook"
+description = "Convert eBooks to audiobooks with chapters and metadata"
+authors = [
+    { name = "Drew Thomasson" }
+]
+dependencies = [
+	"torchvggish",
+	"numpy<2",
+	"num2words @ git+https://github.com/savoirfairelinux/num2words.git",
+	"regex",
+	"tqdm",
+	"docker",
+	"ebooklib",
+	"fastapi",
+	"beautifulsoup4",
+	"fugashi",
+	"sudachipy",
+	"sudachidict_core",
+	"PyMuPDF",
+	"pytesseract",
+	"unidic",
+	"hangul-romanize",
+	"indic-nlp-library",
+	"iso639-lang",
+	"jieba",
+	"pycantonese",
+	"soynlp",
+	"pypinyin",
+	"pythainlp",
+	"mutagen",
+	"PyOpenGL",
+	"phonemizer-fork",
+	"pydub",
+	"demucs",
+	"deepspeed",
+	"pyannote-audio<=3.4.0",
+	"stanza<=1.10.1",
+	"argostranslate<=1.10.0",
+	"gradio>=5.49.1",
+	"torch<=2.7.1",
+	"torchaudio<=2.7.1",
+	"coqui-tts[languages]==0.27.2"
+]
+readme = "README.md"
+requires-python = ">3.9,<3.14"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+scripts = { "ebook2audiobook" = "app:main" }
+
+[project.urls]
+"Homepage" = "https://github.com/DrewThomasson/ebook2audiobook"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,20 +1,18 @@
+torchvggish
+numpy<2
+num2words @ git+https://github.com/savoirfairelinux/num2words.git
 regex
 tqdm
-cutlet
-deep_translator
 docker
 ebooklib
 fastapi
-num2words
-argostranslate
 beautifulsoup4
 fugashi
 sudachipy
 sudachidict_core
-ray
+PyMuPDF
+pytesseract
 unidic
-pymupdf4llm
-translate
 hangul-romanize
 indic-nlp-library
 iso639-lang
@@ -25,14 +23,14 @@ pypinyin
 pythainlp
 mutagen
 PyOpenGL
-nvidia-ml-py
 phonemizer-fork
 pydub
-pyannote-audio==3.4.0
-demucs==4.0.1
-gradio>=5.49
-transformers==4.51.3
-coqui-tts[languages]==0.26.0
-torch>=2.8.0,<2.9
-torchaudio>=2.8.0,<2.9
-torchvggish
+demucs
+deepspeed
+pyannote-audio<=3.4.0
+stanza<=1.10.1
+argostranslate<=1.10.0
+gradio>=5.49.1
+torch<=2.7.1
+torchaudio<=2.7.1
+coqui-tts[languages]==0.27.2
--- a/tmp/.gitkeep
+++ b/tmp/.gitkeep
--- a/tools/icons/appLogo.png
+++ b/tools/icons/appLogo.png
--- a/tools/icons/generate-icons.py
+++ b/tools/icons/generate-icons.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+"""
+Multi-platform icon generator
+Converts appLogo.png into platform-specific formats and sizes
+Requires: Pillow (PIL), cairosvg (optional for SVG)
+
+Installation:
+    pip install Pillow cairosvg
+"""
+
+import os
+import sys
+from PIL import Image
+
+# Icon sizes for each platform
+ICON_SIZES = {
+    'windows': [16, 24, 32, 48, 256],
+    'mac': [16, 32, 64, 128, 256, 512, 1024],
+    'linux': [16, 24, 32, 48, 64, 128, 256]
+}
+
+def create_directories():
+    """Create output directories for each platform"""
+    for platform in ICON_SIZES.keys():
+        os.makedirs(f'icons/{platform}', exist_ok=True)
+    print("✓ Directories created")
+
+def resize_image(source_path, output_dir, sizes):
+    """Resize image to multiple sizes"""
+    try:
+        img = Image.open(source_path)
+        # Convert to RGBA to ensure transparency support
+        img = img.convert('RGBA')
+        
+        for size in sizes:
+            resized = img.resize((size, size), Image.Resampling.LANCZOS)
+            output_path = f'{output_dir}/icon-{size}.png'
+            resized.save(output_path, 'PNG')
+            print(f"  ✓ Generated {size}x{size} icon")
+        
+        return True
+    except Exception as e:
+        print(f"✗ Error resizing image: {e}")
+        return False
+
+def create_windows_ico(output_dir):
+    """Create Windows ICO file from PNGs"""
+    try:
+        sizes = ICON_SIZES['windows']
+        images = []
+        
+        for size in sizes:
+            img_path = f'{output_dir}/icon-{size}.png'
+            images.append(Image.open(img_path))
+        
+        # Save as ICO with multiple sizes
+        images[0].save(
+            f'{output_dir}/appIcon.ico',
+            format='ICO',
+            sizes=[(size, size) for size in sizes]
+        )
+        print("✓ Windows ICO file created: icons/windows/appIcon.ico")
+        return True
+    except Exception as e:
+        print(f"✗ Error creating ICO: {e}")
+        return False
+
+def create_mac_icns(output_dir):
+    """Create macOS ICNS file from PNGs (requires imagemagick or online conversion)"""
+    try:
+        import subprocess
+        sizes = ICON_SIZES['mac']
+        
+        # Create iconset directory
+        iconset_dir = f'{output_dir}/appIcon.iconset'
+        os.makedirs(iconset_dir, exist_ok=True)
+        
+        for size in sizes:
+            img_path = f'{output_dir}/icon-{size}.png'
+            # macOS uses specific naming conventions
+            scale = 2 if size > 256 else 1
+            icon_name = f'icon_{size // scale}x{size // scale}'
+            if scale == 2:
+                icon_name += '@2x'
+            
+            output_path = f'{iconset_dir}/{icon_name}.png'
+            os.system(f'cp {img_path} {output_path}')
+        
+        # Try to create ICNS using iconutil (macOS only) or convert
+        try:
+            subprocess.run(['iconutil', '-c', 'icns', '-o', 
+                          f'{output_dir}/appIcon.icns', iconset_dir], 
+                         check=True, capture_output=True)
+            print("✓ macOS ICNS file created: icons/mac/appIcon.icns")
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            print("⚠ Note: iconutil not found. ICNS not created.")
+            print("  On macOS, run: iconutil -c icns -o icons/mac/appIcon.icns icons/mac/appIcon.iconset")
+            return False
+        
+        return True
+    except Exception as e:
+        print(f"✗ Error creating ICNS: {e}")
+        return False
+
+def create_svg_copy(source_path, output_dir):
+    """Create SVG copy for Linux (optional, requires vector source)"""
+    try:
+        import shutil
+        svg_path = source_path.replace('.png', '.svg')
+        
+        if os.path.exists(svg_path):
+            shutil.copy(svg_path, f'{output_dir}/appIcon.svg')
+            print(f"✓ SVG icon copied: icons/linux/appIcon.svg")
+            return True
+        else:
+            print("⚠ No SVG source found (optional for Linux)")
+            return True
+    except Exception as e:
+        print(f"✗ Error copying SVG: {e}")
+        return False
+
+def main():
+    """Main execution"""
+    print("🎨 Multi-Platform Icon Generator\n")
+    
+    # Find source image
+    source_image = 'appLogo.png'
+    if not os.path.exists(source_image):
+        print(f"✗ Error: {source_image} not found in current directory")
+        sys.exit(1)
+    
+    print(f"Source: {source_image}\n")
+    
+    # Create directories
+    create_directories()
+    print()
+    
+    # Generate icons for each platform
+    for platform, sizes in ICON_SIZES.items():
+        print(f"Generating {platform.upper()} icons...")
+        output_dir = f'icons/{platform}'
+        
+        if not resize_image(source_image, output_dir, sizes):
+            sys.exit(1)
+        print()
+    
+    # Create platform-specific formats
+    print("Creating platform-specific formats...\n")
+    
+    if not create_windows_ico('icons/windows'):
+        print("⚠ Continuing despite ICO creation issue\n")
+    
+    if not create_mac_icns('icons/mac'):
+        print("⚠ Continuing despite ICNS creation issue\n")
+    
+    if not create_svg_copy(source_image, 'icons/linux'):
+        print("⚠ Continuing despite SVG copy issue\n")
+    
+    print("✅ Icon generation complete!")
+    print("\nOutput structure:")
+    print("  icons/")
+    print("  ├── windows/")
+    print("  │   ├── appIcon.ico")
+    print("  │   └── icon-*.png")
+    print("  ├── mac/")
+    print("  │   ├── appIcon.icns (if created)")
+    print("  │   └── icon-*.png")
+    print("  └── linux/")
+    print("      ├── appIcon.svg (if available)")
+    print("      └── icon-*.png")
+
+if __name__ == '__main__':
+    main()
--- a/tools/icons/linux/icon-128.png
+++ b/tools/icons/linux/icon-128.png
--- a/tools/icons/linux/icon-16.png
+++ b/tools/icons/linux/icon-16.png
--- a/tools/icons/linux/icon-24.png
+++ b/tools/icons/linux/icon-24.png
--- a/tools/icons/linux/icon-256.png
+++ b/tools/icons/linux/icon-256.png
--- a/tools/icons/linux/icon-32.png
+++ b/tools/icons/linux/icon-32.png
--- a/tools/icons/linux/icon-48.png
+++ b/tools/icons/linux/icon-48.png
--- a/tools/icons/linux/icon-64.png
+++ b/tools/icons/linux/icon-64.png
--- a/tools/icons/mac/appIcon.icns
+++ b/tools/icons/mac/appIcon.icns
--- a/tools/icons/mac/appIcon.iconset/icon_128x128.png
+++ b/tools/icons/mac/appIcon.iconset/icon_128x128.png
--- a/tools/icons/mac/appIcon.iconset/icon_16x16.png
+++ b/tools/icons/mac/appIcon.iconset/icon_16x16.png
--- a/tools/icons/mac/appIcon.iconset/icon_256x256.png
+++ b/tools/icons/mac/appIcon.iconset/icon_256x256.png
--- a/tools/icons/mac/appIcon.iconset/icon_256x256@2x.png
+++ b/tools/icons/mac/appIcon.iconset/icon_256x256@2x.png
--- a/tools/icons/mac/appIcon.iconset/icon_32x32.png
+++ b/tools/icons/mac/appIcon.iconset/icon_32x32.png
--- a/tools/icons/mac/appIcon.iconset/icon_512x512@2x.png
+++ b/tools/icons/mac/appIcon.iconset/icon_512x512@2x.png
--- a/tools/icons/mac/appIcon.iconset/icon_64x64.png
+++ b/tools/icons/mac/appIcon.iconset/icon_64x64.png
--- a/tools/icons/mac/icon-1024.png
+++ b/tools/icons/mac/icon-1024.png
--- a/tools/icons/mac/icon-128.png
+++ b/tools/icons/mac/icon-128.png
--- a/tools/icons/mac/icon-16.png
+++ b/tools/icons/mac/icon-16.png
--- a/tools/icons/mac/icon-256.png
+++ b/tools/icons/mac/icon-256.png
--- a/tools/icons/mac/icon-32.png
+++ b/tools/icons/mac/icon-32.png
--- a/tools/icons/mac/icon-512.png
+++ b/tools/icons/mac/icon-512.png
--- a/tools/icons/mac/icon-64.png
+++ b/tools/icons/mac/icon-64.png
--- a/tools/icons/windows/appIcon.ico
+++ b/tools/icons/windows/appIcon.ico
--- a/tools/icons/windows/icon-16.png
+++ b/tools/icons/windows/icon-16.png
--- a/tools/icons/windows/icon-24.png
+++ b/tools/icons/windows/icon-24.png
--- a/tools/icons/windows/icon-256.png
+++ b/tools/icons/windows/icon-256.png
--- a/tools/icons/windows/icon-32.png
+++ b/tools/icons/windows/icon-32.png
--- a/tools/icons/windows/icon-48.png
+++ b/tools/icons/windows/icon-48.png
--- a/voices/eng/adult/male/PeterGriffinFamilyGuy.wav
+++ b/voices/eng/adult/male/PeterGriffinFamilyGuy.wav
@@ -1 +1 @@
 .10.25
 .11.11