lollms_hub/app/core/binary_manager.py

import os
import platform
import httpx
import shutil
import zipfile
import tarfile
import logging
from pathlib import Path

logger = logging.getLogger(__name__)

# Base directory for internal binaries
BIN_DIR = Path(".lollms/bin")
LLAMACPP_DIR = BIN_DIR / "llamacpp"

class BinaryManager:
    @staticmethod
    def ensure_dirs():
        LLAMACPP_DIR.mkdir(parents=True, exist_ok=True)

    @staticmethod
    async def get_latest_llamacpp_release():
        """Fetches the latest release metadata from GitHub."""
        url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest"
        async with httpx.AsyncClient(follow_redirects=True) as client:
            resp = await client.get(url)
            resp.raise_for_status()
            return resp.json()

    @staticmethod
    def get_os_asset_name():
        """Determines the correct zip/tar name for the current system."""
        sys_name = platform.system().lower()
        arch = platform.machine().lower()

        if sys_name == "windows":
            # Target the avx2 or cuda version for gamer PCs
            return "llama-b{}-bin-win-cuda-cu12.4-x64.zip"
        elif sys_name == "linux":
            return "llama-b{}-bin-ubuntu-x64.tar.gz"
        elif sys_name == "darwin":
            return "llama-b{}-bin-macos-arm64.tar.gz"
        return None

    @staticmethod
    async def download_engine(engine_type="llamacpp"):
        """Downloads and extracts the engine binary."""
        BinaryManager.ensure_dirs()

        if engine_type == "llamacpp":
            release = await BinaryManager.get_latest_llamacpp_release()
            version = release['tag_name'].replace('b', '')
            asset_template = BinaryManager.get_os_asset_name()

            if not asset_template:
                raise RuntimeError(f"Unsupported OS: {platform.system()}")

            target_asset = asset_template.format(version)
            download_url = None

            for asset in release['assets']:
                if asset['name'] == target_asset:
                    download_url = asset['browser_download_url']
                    break

            if not download_url:
                # Fallback to a simpler asset if the specific CUDA one isn't found
                download_url = release['assets'][0]['browser_download_url']

            logger.info(f"Downloading {engine_type} from {download_url}...")

            archive_path = BIN_DIR / target_asset
            async with httpx.AsyncClient(follow_redirects=True, timeout=600.0) as client:
                async with client.stream("GET", download_url) as response:
                    with open(archive_path, "wb") as f:
                        async for chunk in response.aiter_bytes():
                            f.write(chunk)

            # Extraction
            if target_asset.endswith(".zip"):
                with zipfile.ZipFile(archive_path, 'r') as zip_ref:
                    zip_ref.extractall(LLAMACPP_DIR)
            else:
                with tarfile.open(archive_path, 'r:gz') as tar_ref:
                    tar_ref.extractall(LLAMACPP_DIR)

            os.remove(archive_path)

            # Make binary executable on Linux/Mac
            if platform.system() != "Windows":
                for bin_file in LLAMACPP_DIR.glob("llama-*"):
                    bin_file.chmod(0o755)

            return version

binary_manager = BinaryManager()