mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-08 05:14:02 -05:00
Build llama.cpp wheels in forked repo + support reinstallation
This commit is contained in:
126
.github/workflows/create-release.yml
vendored
126
.github/workflows/create-release.yml
vendored
@@ -1,126 +0,0 @@
|
||||
name: Create Release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_notes:
|
||||
description: "Release Notes"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build_wheels:
|
||||
name: Build wheels for ${{ matrix.arch }} (HA ${{ matrix.home_assistant_image }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# ARM64
|
||||
- home_assistant_image: "aarch64-homeassistant:2025.4.1"
|
||||
arch: "aarch64"
|
||||
|
||||
# 32bit ARM (Raspberry pis)
|
||||
- home_assistant_image: "armhf-homeassistant:2025.4.1"
|
||||
arch: "armhf"
|
||||
|
||||
# x64
|
||||
- home_assistant_image: "amd64-homeassistant:2025.4.1"
|
||||
arch: "x86_64"
|
||||
|
||||
# 32 bit for older processors
|
||||
- home_assistant_image: "i386-homeassistant:2025.4.1"
|
||||
arch: "i386"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Verify version match
|
||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
||||
run: |
|
||||
tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//')
|
||||
component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json)
|
||||
component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=')
|
||||
|
||||
if [ "$tag_version" != "$component_version_manifest" ]; then
|
||||
echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$tag_version" != "$component_version_const" ]; then
|
||||
echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All required versions match."
|
||||
|
||||
- name: Read llama-cpp-python version
|
||||
run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
|
||||
|
||||
- name: Build artifact
|
||||
uses: uraimo/run-on-arch-action@v2
|
||||
id: build
|
||||
with:
|
||||
arch: none
|
||||
distro: none
|
||||
base_image: homeassistant/${{ matrix.home_assistant_image }}
|
||||
|
||||
# Create an artifacts directory
|
||||
setup: |
|
||||
mkdir -p "${PWD}/artifacts"
|
||||
|
||||
# Mount the artifacts directory as /artifacts in the container
|
||||
dockerRunArgs: |
|
||||
--volume "${PWD}/artifacts:/artifacts"
|
||||
|
||||
# The shell to run commands with in the container
|
||||
shell: /bin/bash
|
||||
|
||||
# Produce a binary artifact and place it in the mounted volume
|
||||
run: |
|
||||
apk update
|
||||
apk add build-base python3-dev cmake
|
||||
pip3 install build
|
||||
|
||||
cd /tmp
|
||||
git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}"
|
||||
cd llama-cpp-python
|
||||
|
||||
tag="homellm"
|
||||
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
|
||||
|
||||
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON"
|
||||
python3 -m build --wheel
|
||||
|
||||
mv ./dist/*.whl /artifacts
|
||||
ls -la /artifacts/
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: ./artifacts/*.whl
|
||||
name: artifact_${{ matrix.arch }}
|
||||
|
||||
release:
|
||||
name: Create Release
|
||||
needs: [ build_wheels ]
|
||||
runs-on: ubuntu-latest
|
||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
||||
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: dist
|
||||
merge-multiple: true
|
||||
|
||||
- name: Create GitHub release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: dist/*
|
||||
body: ${{ inputs.release_notes }}
|
||||
make_latest: true
|
||||
1
TODO.md
1
TODO.md
@@ -2,6 +2,7 @@
|
||||
- [x] proper tool calling support
|
||||
- [ ] fix old GGUFs to support tool calling
|
||||
- [x] home assistant component text streaming support
|
||||
- [x] move llama-cpp build to forked repo + add support for multi backend builds (no more -noavx)
|
||||
- [ ] new model based on qwen3 0.6b
|
||||
- [ ] new model based on gemma3 270m
|
||||
- [ ] support AI task API
|
||||
|
||||
@@ -44,7 +44,6 @@ from .const import (
|
||||
BACKEND_TYPE_OLLAMA,
|
||||
BACKEND_TYPE_LLAMA_EXISTING_OLD,
|
||||
BACKEND_TYPE_LLAMA_HF_OLD,
|
||||
EMBEDDED_LLAMA_CPP_PYTHON_VERSION
|
||||
)
|
||||
from .entity import LocalLLMClient, LocalLLMConfigEntry
|
||||
from .backends.llamacpp import LlamaCppClient
|
||||
@@ -141,7 +140,7 @@ async def async_migrate_entry(hass: HomeAssistant, config_entry: LocalLLMConfigE
|
||||
if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD:
|
||||
backend = BACKEND_TYPE_LLAMA_CPP
|
||||
entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP
|
||||
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version) or EMBEDDED_LLAMA_CPP_PYTHON_VERSION
|
||||
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version)
|
||||
else:
|
||||
# ensure all remote backends have a path set
|
||||
entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "")
|
||||
|
||||
@@ -39,6 +39,7 @@ from custom_components.llama_conversation.const import (
|
||||
CONF_LLAMACPP_BATCH_SIZE,
|
||||
CONF_LLAMACPP_THREAD_COUNT,
|
||||
CONF_LLAMACPP_BATCH_THREAD_COUNT,
|
||||
CONF_INSTALLED_LLAMACPP_VERSION,
|
||||
DEFAULT_MAX_TOKENS,
|
||||
DEFAULT_PROMPT,
|
||||
DEFAULT_TEMPERATURE,
|
||||
@@ -78,6 +79,7 @@ def snapshot_settings(options: dict[str, Any]) -> dict[str, Any]:
|
||||
CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT),
|
||||
CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT),
|
||||
CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION),
|
||||
CONF_INSTALLED_LLAMACPP_VERSION: options.get(CONF_INSTALLED_LLAMACPP_VERSION, ""),
|
||||
CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE),
|
||||
CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED),
|
||||
}
|
||||
@@ -115,7 +117,7 @@ class LlamaCppClient(LocalLLMClient):
|
||||
|
||||
@staticmethod
|
||||
def get_name(client_options: dict[str, Any]):
|
||||
return f"Llama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})"
|
||||
return "Llama.cpp"
|
||||
|
||||
async def async_get_available_models(self) -> List[str]:
|
||||
return [] # TODO: find available "huggingface_hub" models that have been downloaded
|
||||
@@ -215,6 +217,11 @@ class LlamaCppClient(LocalLLMClient):
|
||||
should_reload = True
|
||||
elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION):
|
||||
should_reload = True
|
||||
elif loaded_options[CONF_INSTALLED_LLAMACPP_VERSION] != entity_options.get(CONF_INSTALLED_LLAMACPP_VERSION):
|
||||
should_reload = True
|
||||
_LOGGER.debug(f"Reloading llama.cpp...")
|
||||
if self.llama_cpp_module:
|
||||
self.llama_cpp_module = importlib.reload(self.llama_cpp_module)
|
||||
|
||||
model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "")
|
||||
model_name = entity_options.get(CONF_CHAT_MODEL, "")
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Config flow for Local LLM Conversation integration."""
|
||||
from __future__ import annotations
|
||||
|
||||
from asyncio import Task
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
@@ -39,7 +40,8 @@ from homeassistant.helpers.selector import (
|
||||
BooleanSelectorConfig,
|
||||
)
|
||||
|
||||
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, is_valid_hostname, MissingQuantizationException
|
||||
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, \
|
||||
is_valid_hostname, get_available_llama_cpp_versions, MissingQuantizationException
|
||||
from .const import (
|
||||
CONF_CHAT_MODEL,
|
||||
CONF_MAX_TOKENS,
|
||||
@@ -87,6 +89,7 @@ from .const import (
|
||||
CONF_LLAMACPP_BATCH_SIZE,
|
||||
CONF_LLAMACPP_THREAD_COUNT,
|
||||
CONF_LLAMACPP_BATCH_THREAD_COUNT,
|
||||
CONF_LLAMACPP_REINSTALL,
|
||||
DEFAULT_CHAT_MODEL,
|
||||
DEFAULT_PORT,
|
||||
DEFAULT_SSL,
|
||||
@@ -258,14 +261,14 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
|
||||
if backend == BACKEND_TYPE_LLAMA_CPP:
|
||||
installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
|
||||
_LOGGER.debug(f"installed version: {installed_version}")
|
||||
if installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
||||
if installed_version and installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
||||
self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version
|
||||
return await self.async_step_finish()
|
||||
else:
|
||||
self.internal_step = "install_local_wheels"
|
||||
_LOGGER.debug("Queuing install task...")
|
||||
async def install_task():
|
||||
await self.hass.async_add_executor_job(
|
||||
return await self.hass.async_add_executor_job(
|
||||
install_llama_cpp_python, self.hass.config.config_dir
|
||||
)
|
||||
|
||||
@@ -376,7 +379,7 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
|
||||
|
||||
@classmethod
|
||||
def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool:
|
||||
return config_entry.data[CONF_BACKEND_TYPE] != BACKEND_TYPE_LLAMA_CPP
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def async_get_options_flow(
|
||||
@@ -399,6 +402,9 @@ class OptionsFlow(BaseOptionsFlow):
|
||||
"""Local LLM config flow options handler."""
|
||||
|
||||
model_config: dict[str, Any] | None = None
|
||||
reinstall_task: Task[Any] | None = None
|
||||
wheel_install_error: str | None = None
|
||||
wheel_install_successful: bool = False
|
||||
|
||||
async def async_step_init(
|
||||
self, user_input: dict[str, Any] | None = None
|
||||
@@ -410,32 +416,112 @@ class OptionsFlow(BaseOptionsFlow):
|
||||
backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE)
|
||||
client_config = dict(self.config_entry.options)
|
||||
|
||||
if self.wheel_install_error:
|
||||
_LOGGER.warning("Failed to install wheel: %s", repr(self.wheel_install_error))
|
||||
return self.async_abort(reason="pip_wheel_error")
|
||||
|
||||
if self.wheel_install_successful:
|
||||
client_config[CONF_INSTALLED_LLAMACPP_VERSION] = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
|
||||
_LOGGER.debug(f"new version is: {client_config[CONF_INSTALLED_LLAMACPP_VERSION]}")
|
||||
return self.async_create_entry(data=client_config)
|
||||
|
||||
if backend_type == BACKEND_TYPE_LLAMA_CPP:
|
||||
potential_versions = await get_available_llama_cpp_versions(self.hass)
|
||||
|
||||
schema = vol.Schema({
|
||||
vol.Required(CONF_LLAMACPP_REINSTALL, default=False): BooleanSelector(BooleanSelectorConfig()),
|
||||
vol.Required(CONF_INSTALLED_LLAMACPP_VERSION, default=client_config.get(CONF_INSTALLED_LLAMACPP_VERSION, "not installed")): SelectSelector(
|
||||
SelectSelectorConfig(
|
||||
options=[ SelectOptionDict(value=x[0], label=x[0] if not x[1] else f"{x[0]} (local)") for x in potential_versions ],
|
||||
mode=SelectSelectorMode.DROPDOWN,
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
return self.async_show_form(
|
||||
step_id="reinstall",
|
||||
data_schema=schema,
|
||||
)
|
||||
else:
|
||||
|
||||
if user_input is not None:
|
||||
client_config.update(user_input)
|
||||
|
||||
# validate remote connections
|
||||
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
|
||||
|
||||
if not connect_err:
|
||||
return self.async_create_entry(data=client_config)
|
||||
else:
|
||||
errors["base"] = "failed_to_connect"
|
||||
description_placeholders["exception"] = str(connect_err)
|
||||
|
||||
schema = remote_connection_schema(
|
||||
backend_type=backend_type,
|
||||
host=client_config.get(CONF_HOST),
|
||||
port=client_config.get(CONF_PORT),
|
||||
ssl=client_config.get(CONF_SSL),
|
||||
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
|
||||
)
|
||||
|
||||
return self.async_show_form(
|
||||
step_id="init",
|
||||
data_schema=schema,
|
||||
errors=errors,
|
||||
description_placeholders=description_placeholders,
|
||||
)
|
||||
|
||||
async def async_step_reinstall(self, user_input: dict[str, Any] | None = None) -> ConfigFlowResult:
|
||||
client_config = dict(self.config_entry.options)
|
||||
|
||||
if user_input is not None:
|
||||
client_config.update(user_input)
|
||||
|
||||
# validate remote connections
|
||||
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
|
||||
|
||||
if not connect_err:
|
||||
if not user_input[CONF_LLAMACPP_REINSTALL]:
|
||||
_LOGGER.debug("Reinstall was not selected, finishing")
|
||||
return self.async_create_entry(data=client_config)
|
||||
|
||||
if not self.reinstall_task:
|
||||
if not user_input:
|
||||
return self.async_abort(reason="unknown")
|
||||
|
||||
desired_version = user_input.get(CONF_INSTALLED_LLAMACPP_VERSION)
|
||||
async def install_task():
|
||||
return await self.hass.async_add_executor_job(
|
||||
install_llama_cpp_python, self.hass.config.config_dir, True, desired_version
|
||||
)
|
||||
|
||||
self.reinstall_task = self.hass.async_create_background_task(
|
||||
install_task(), name="llama_cpp_python_installation")
|
||||
|
||||
_LOGGER.debug("Queuing reinstall task...")
|
||||
return self.async_show_progress(
|
||||
progress_task=self.reinstall_task,
|
||||
step_id="reinstall",
|
||||
progress_action="install_local_wheels",
|
||||
)
|
||||
|
||||
if not self.reinstall_task.done():
|
||||
return self.async_show_progress(
|
||||
progress_task=self.reinstall_task,
|
||||
step_id="reinstall",
|
||||
progress_action="install_local_wheels",
|
||||
)
|
||||
|
||||
_LOGGER.debug("done... checking result")
|
||||
install_exception = self.reinstall_task.exception()
|
||||
if install_exception:
|
||||
self.wheel_install_error = repr(install_exception)
|
||||
_LOGGER.debug(f"Hit error: {self.wheel_install_error}")
|
||||
return self.async_show_progress_done(next_step_id="init")
|
||||
else:
|
||||
wheel_install_result = self.reinstall_task.result()
|
||||
if not wheel_install_result:
|
||||
self.wheel_install_error = "Pip returned false"
|
||||
_LOGGER.debug(f"Hit error: {self.wheel_install_error} ({wheel_install_result})")
|
||||
return self.async_show_progress_done(next_step_id="init")
|
||||
else:
|
||||
errors["base"] = "failed_to_connect"
|
||||
description_placeholders["exception"] = str(connect_err)
|
||||
|
||||
schema = remote_connection_schema(
|
||||
backend_type=backend_type,
|
||||
host=client_config.get(CONF_HOST),
|
||||
port=client_config.get(CONF_PORT),
|
||||
ssl=client_config.get(CONF_SSL),
|
||||
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
|
||||
)
|
||||
|
||||
return self.async_show_form(
|
||||
step_id="init",
|
||||
data_schema=schema,
|
||||
errors=errors,
|
||||
description_placeholders=description_placeholders,
|
||||
)
|
||||
_LOGGER.debug(f"Finished install: {wheel_install_result}")
|
||||
self.wheel_install_successful = True
|
||||
return self.async_show_progress_done(next_step_id="init")
|
||||
|
||||
|
||||
def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None):
|
||||
|
||||
@@ -191,6 +191,7 @@ CONF_LLAMACPP_THREAD_COUNT = "n_threads"
|
||||
DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count()
|
||||
CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads"
|
||||
DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count()
|
||||
CONF_LLAMACPP_REINSTALL = "reinstall_llama_cpp"
|
||||
|
||||
DEFAULT_OPTIONS = types.MappingProxyType(
|
||||
{
|
||||
@@ -318,4 +319,4 @@ OPTIONS_OVERRIDES = {
|
||||
|
||||
# INTEGRATION_VERSION = "0.4.0"
|
||||
INTEGRATION_VERSION = "0.3.11"
|
||||
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16"
|
||||
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153"
|
||||
|
||||
@@ -187,13 +187,27 @@
|
||||
"text_generation_webui_admin_key": "Admin Key",
|
||||
"text_generation_webui_preset": "Generation Preset/Character Name",
|
||||
"text_generation_webui_chat_mode": "Chat Mode"
|
||||
}
|
||||
},
|
||||
"description": "Please provide the connection details to connect to the API that is hosting the model.",
|
||||
"title": "Configure Connection"
|
||||
},
|
||||
"reinstall": {
|
||||
"data": {
|
||||
"reinstall_llama_cpp": "Reinstall Llama.cpp",
|
||||
"installed_llama_cpp_version": "Version to (re)install"
|
||||
},
|
||||
"description": "__If you are experiencing issues with Llama.cpp__, you can force a reinstall of the package here. This will attempt to re-install or upgrade the llama-cpp-python package from GitHub *or* a local wheel file placed in the `/config/custom_components/llama_conversation/` directory.",
|
||||
"title": "Reinstall Llama.cpp"
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"failed_to_connect": "Failed to connect to the remote API: {exception}",
|
||||
"invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.",
|
||||
"unknown": "Unexpected error"
|
||||
"unknown": "Unexpected error",
|
||||
"pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
|
||||
},
|
||||
"progress": {
|
||||
"install_local_wheels": "Please wait while Llama.cpp is installed..."
|
||||
}
|
||||
},
|
||||
"selector": {
|
||||
|
||||
@@ -9,13 +9,14 @@ import multiprocessing
|
||||
import voluptuous as vol
|
||||
import webcolors
|
||||
import json
|
||||
from typing import Any, Dict, List, Sequence, cast
|
||||
from typing import Any, Dict, List, Sequence, Tuple, cast
|
||||
from webcolors import CSS3
|
||||
from importlib.metadata import version
|
||||
|
||||
from homeassistant.core import HomeAssistant
|
||||
from homeassistant.components import conversation
|
||||
from homeassistant.helpers import config_validation as cv
|
||||
from homeassistant.helpers import intent, llm
|
||||
from homeassistant.helpers import intent, llm, aiohttp_client
|
||||
from homeassistant.requirements import pip_kwargs
|
||||
from homeassistant.util import color
|
||||
from homeassistant.util.package import install_package, is_installed
|
||||
@@ -191,18 +192,11 @@ def validate_llama_cpp_python_installation():
|
||||
def get_llama_cpp_python_version():
|
||||
if not is_installed("llama-cpp-python"):
|
||||
return None
|
||||
return version("llama-cpp-python").split("+")[0]
|
||||
return version("llama-cpp-python")
|
||||
|
||||
def install_llama_cpp_python(config_dir: str):
|
||||
def get_runtime_and_platform_suffix() -> Tuple[str, str]:
|
||||
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
||||
|
||||
installed_wrong_version = False
|
||||
if is_installed("llama-cpp-python"):
|
||||
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
||||
installed_wrong_version = True
|
||||
else:
|
||||
time.sleep(0.5) # I still don't know why this is required
|
||||
return True
|
||||
|
||||
platform_suffix = platform.machine()
|
||||
# remap other names for architectures to the names we use
|
||||
if platform_suffix == "arm64":
|
||||
@@ -210,42 +204,65 @@ def install_llama_cpp_python(config_dir: str):
|
||||
if platform_suffix == "i386" or platform_suffix == "amd64":
|
||||
platform_suffix = "x86_64"
|
||||
|
||||
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
||||
|
||||
return runtime_version, platform_suffix
|
||||
|
||||
async def get_available_llama_cpp_versions(hass: HomeAssistant) -> List[Tuple[str, bool]]:
|
||||
github_index_url = "https://acon96.github.io/llama-cpp-python/whl/ha/llama-cpp-python/"
|
||||
session = aiohttp_client.async_get_clientsession(hass)
|
||||
try:
|
||||
async with session.get(github_index_url) as resp:
|
||||
if resp.status != 200:
|
||||
raise Exception(f"Failed to fetch available versions from GitHub (HTTP {resp.status})")
|
||||
text = await resp.text()
|
||||
# pull version numbers out of h2 tags
|
||||
versions = re.findall(r"<h2.*>(.+)</h2>", text)
|
||||
remote = sorted([(v, False) for v in versions], reverse=True)
|
||||
except Exception as ex:
|
||||
_LOGGER.warning(f"Error fetching available versions from GitHub: {repr(ex)}")
|
||||
remote = []
|
||||
|
||||
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
|
||||
folder = os.path.dirname(__file__)
|
||||
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
|
||||
potential_wheels = [ wheel for wheel in potential_wheels if runtime_version in wheel ]
|
||||
potential_wheels = [ wheel for wheel in potential_wheels if f"{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm" in wheel ]
|
||||
local = [ (wheel, True) for wheel in potential_wheels if runtime_version in wheel and "llama_cpp_python" in wheel]
|
||||
|
||||
return remote + local
|
||||
|
||||
_LOGGER.debug(f"{potential_wheels=}")
|
||||
if len(potential_wheels) > 0:
|
||||
def install_llama_cpp_python(config_dir: str, force_reinstall: bool = False, specific_version: str | None = None) -> bool:
|
||||
|
||||
latest_wheel = potential_wheels[0]
|
||||
|
||||
_LOGGER.info("Installing llama-cpp-python from local wheel")
|
||||
_LOGGER.debug(f"Wheel location: {latest_wheel}")
|
||||
return install_package(os.path.join(folder, latest_wheel), **pip_kwargs(config_dir))
|
||||
installed_wrong_version = False
|
||||
if is_installed("llama-cpp-python") and not force_reinstall:
|
||||
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
||||
installed_wrong_version = True
|
||||
else:
|
||||
time.sleep(0.5) # I still don't know why this is required
|
||||
return True
|
||||
|
||||
# scikit-build-core v0.9.7+ doesn't recognize these builds as musllinux, and just tags them as generic linux
|
||||
# github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}.whl"
|
||||
github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
|
||||
if install_package(github_release_url, **pip_kwargs(config_dir)):
|
||||
_LOGGER.info("llama-cpp-python successfully installed from GitHub release")
|
||||
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
|
||||
|
||||
if not specific_version:
|
||||
specific_version = EMBEDDED_LLAMA_CPP_PYTHON_VERSION
|
||||
|
||||
if ".whl" in specific_version:
|
||||
wheel_location = os.path.join(os.path.dirname(__file__), specific_version)
|
||||
else:
|
||||
wheel_location = f"https://github.com/acon96/llama-cpp-python/releases/download/{specific_version}/llama_cpp_python-{specific_version}-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
|
||||
|
||||
if install_package(wheel_location, **pip_kwargs(config_dir)):
|
||||
_LOGGER.info("llama-cpp-python successfully installed")
|
||||
return True
|
||||
|
||||
# if it is just the wrong version installed then ignore the installation error
|
||||
if not installed_wrong_version:
|
||||
_LOGGER.error(
|
||||
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
|
||||
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
|
||||
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
|
||||
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
|
||||
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
|
||||
)
|
||||
return False
|
||||
else:
|
||||
_LOGGER.info(
|
||||
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
|
||||
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
|
||||
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
|
||||
f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!"
|
||||
)
|
||||
time.sleep(0.5) # I still don't know why this is required
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
# Don't run this. This is executed inside of the home assistant container to build the wheel
|
||||
|
||||
apk update
|
||||
apk add build-base python3-dev
|
||||
apk add build-base python3-dev linux-headers
|
||||
|
||||
tag=$1
|
||||
|
||||
cd /tmp
|
||||
git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch $1
|
||||
git clone --recurse-submodules https://github.com/acon96/llama-cpp-python --branch $tag --depth 1 --shallow-submodules
|
||||
cd llama-cpp-python
|
||||
pip3 install build
|
||||
|
||||
tag="homellm"
|
||||
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
|
||||
sed -i -E "s/^(__version__ *= *\")[^\"]+\"/\1${tag}\"/" llama_cpp/__init__.py
|
||||
|
||||
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON"
|
||||
python3 -m build --wheel
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
VERSION_TO_BUILD="v0.3.16"
|
||||
VERSION_TO_BUILD="0.3.16+b6713"
|
||||
|
||||
# make python 11 wheels
|
||||
# docker run -it --rm \
|
||||
|
||||
Reference in New Issue
Block a user