mirror of
https://github.com/ParisNeo/lollms_hub.git
synced 2026-05-04 03:01:01 -04:00
- Extend admin API with settings endpoints and metadata controls - Update proxy routing for improved request handling - Refactor instance manager with async networking capabilities - Enhance server CRUD for active model queries across all servers - Update database migrations and models schema - Refresh admin UI templates (agents, pools, ensembles, routers, models, instances, dashboard, settings, help)
96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
import os
|
|
import platform
|
|
import httpx
|
|
import shutil
|
|
import zipfile
|
|
import tarfile
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Base directory for internal binaries
|
|
BIN_DIR = Path(".lollms/bin")
|
|
LLAMACPP_DIR = BIN_DIR / "llamacpp"
|
|
|
|
class BinaryManager:
|
|
@staticmethod
|
|
def ensure_dirs():
|
|
LLAMACPP_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
@staticmethod
|
|
async def get_latest_llamacpp_release():
|
|
"""Fetches the latest release metadata from GitHub."""
|
|
url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest"
|
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
|
resp = await client.get(url)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
@staticmethod
|
|
def get_os_asset_name():
|
|
"""Determines the correct zip/tar name for the current system."""
|
|
sys_name = platform.system().lower()
|
|
arch = platform.machine().lower()
|
|
|
|
if sys_name == "windows":
|
|
# Target the avx2 or cuda version for gamer PCs
|
|
return "llama-b{}-bin-win-cuda-cu12.4-x64.zip"
|
|
elif sys_name == "linux":
|
|
return "llama-b{}-bin-ubuntu-x64.tar.gz"
|
|
elif sys_name == "darwin":
|
|
return "llama-b{}-bin-macos-arm64.tar.gz"
|
|
return None
|
|
|
|
@staticmethod
|
|
async def download_engine(engine_type="llamacpp"):
|
|
"""Downloads and extracts the engine binary."""
|
|
BinaryManager.ensure_dirs()
|
|
|
|
if engine_type == "llamacpp":
|
|
release = await BinaryManager.get_latest_llamacpp_release()
|
|
version = release['tag_name'].replace('b', '')
|
|
asset_template = BinaryManager.get_os_asset_name()
|
|
|
|
if not asset_template:
|
|
raise RuntimeError(f"Unsupported OS: {platform.system()}")
|
|
|
|
target_asset = asset_template.format(version)
|
|
download_url = None
|
|
|
|
for asset in release['assets']:
|
|
if asset['name'] == target_asset:
|
|
download_url = asset['browser_download_url']
|
|
break
|
|
|
|
if not download_url:
|
|
# Fallback to a simpler asset if the specific CUDA one isn't found
|
|
download_url = release['assets'][0]['browser_download_url']
|
|
|
|
logger.info(f"Downloading {engine_type} from {download_url}...")
|
|
|
|
archive_path = BIN_DIR / target_asset
|
|
async with httpx.AsyncClient(follow_redirects=True, timeout=600.0) as client:
|
|
async with client.stream("GET", download_url) as response:
|
|
with open(archive_path, "wb") as f:
|
|
async for chunk in response.aiter_bytes():
|
|
f.write(chunk)
|
|
|
|
# Extraction
|
|
if target_asset.endswith(".zip"):
|
|
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
|
zip_ref.extractall(LLAMACPP_DIR)
|
|
else:
|
|
with tarfile.open(archive_path, 'r:gz') as tar_ref:
|
|
tar_ref.extractall(LLAMACPP_DIR)
|
|
|
|
os.remove(archive_path)
|
|
|
|
# Make binary executable on Linux/Mac
|
|
if platform.system() != "Windows":
|
|
for bin_file in LLAMACPP_DIR.glob("llama-*"):
|
|
bin_file.chmod(0o755)
|
|
|
|
return version
|
|
|
|
binary_manager = BinaryManager() |