Build llama.cpp wheels in forked repo + support reinstallation

This commit is contained in:
Alex O'Connell
2025-10-08 21:19:06 -04:00
parent 286cf9a888
commit 2df454985d
10 changed files with 196 additions and 197 deletions

View File

@@ -1,126 +0,0 @@
name: Create Release
on:
workflow_dispatch:
inputs:
release_notes:
description: "Release Notes"
required: true
type: string
permissions:
contents: write
jobs:
build_wheels:
name: Build wheels for ${{ matrix.arch }} (HA ${{ matrix.home_assistant_image }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# ARM64
- home_assistant_image: "aarch64-homeassistant:2025.4.1"
arch: "aarch64"
# 32bit ARM (Raspberry pis)
- home_assistant_image: "armhf-homeassistant:2025.4.1"
arch: "armhf"
# x64
- home_assistant_image: "amd64-homeassistant:2025.4.1"
arch: "x86_64"
# 32 bit for older processors
- home_assistant_image: "i386-homeassistant:2025.4.1"
arch: "i386"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Verify version match
if: startsWith(github.event.ref, 'refs/tags/v')
run: |
tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//')
component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json)
component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=')
if [ "$tag_version" != "$component_version_manifest" ]; then
echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!"
exit 1
fi
if [ "$tag_version" != "$component_version_const" ]; then
echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!"
exit 1
fi
echo "All required versions match."
- name: Read llama-cpp-python version
run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
- name: Build artifact
uses: uraimo/run-on-arch-action@v2
id: build
with:
arch: none
distro: none
base_image: homeassistant/${{ matrix.home_assistant_image }}
# Create an artifacts directory
setup: |
mkdir -p "${PWD}/artifacts"
# Mount the artifacts directory as /artifacts in the container
dockerRunArgs: |
--volume "${PWD}/artifacts:/artifacts"
# The shell to run commands with in the container
shell: /bin/bash
# Produce a binary artifact and place it in the mounted volume
run: |
apk update
apk add build-base python3-dev cmake
pip3 install build
cd /tmp
git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}"
cd llama-cpp-python
tag="homellm"
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON"
python3 -m build --wheel
mv ./dist/*.whl /artifacts
ls -la /artifacts/
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
path: ./artifacts/*.whl
name: artifact_${{ matrix.arch }}
release:
name: Create Release
needs: [ build_wheels ]
runs-on: ubuntu-latest
if: startsWith(github.event.ref, 'refs/tags/v')
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: Create GitHub release
uses: softprops/action-gh-release@v2
with:
files: dist/*
body: ${{ inputs.release_notes }}
make_latest: true

View File

@@ -2,6 +2,7 @@
- [x] proper tool calling support - [x] proper tool calling support
- [ ] fix old GGUFs to support tool calling - [ ] fix old GGUFs to support tool calling
- [x] home assistant component text streaming support - [x] home assistant component text streaming support
- [x] move llama-cpp build to forked repo + add support for multi backend builds (no more -noavx)
- [ ] new model based on qwen3 0.6b - [ ] new model based on qwen3 0.6b
- [ ] new model based on gemma3 270m - [ ] new model based on gemma3 270m
- [ ] support AI task API - [ ] support AI task API

View File

@@ -44,7 +44,6 @@ from .const import (
BACKEND_TYPE_OLLAMA, BACKEND_TYPE_OLLAMA,
BACKEND_TYPE_LLAMA_EXISTING_OLD, BACKEND_TYPE_LLAMA_EXISTING_OLD,
BACKEND_TYPE_LLAMA_HF_OLD, BACKEND_TYPE_LLAMA_HF_OLD,
EMBEDDED_LLAMA_CPP_PYTHON_VERSION
) )
from .entity import LocalLLMClient, LocalLLMConfigEntry from .entity import LocalLLMClient, LocalLLMConfigEntry
from .backends.llamacpp import LlamaCppClient from .backends.llamacpp import LlamaCppClient
@@ -141,7 +140,7 @@ async def async_migrate_entry(hass: HomeAssistant, config_entry: LocalLLMConfigE
if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD: if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD:
backend = BACKEND_TYPE_LLAMA_CPP backend = BACKEND_TYPE_LLAMA_CPP
entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version) or EMBEDDED_LLAMA_CPP_PYTHON_VERSION entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version)
else: else:
# ensure all remote backends have a path set # ensure all remote backends have a path set
entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "") entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "")

View File

@@ -39,6 +39,7 @@ from custom_components.llama_conversation.const import (
CONF_LLAMACPP_BATCH_SIZE, CONF_LLAMACPP_BATCH_SIZE,
CONF_LLAMACPP_THREAD_COUNT, CONF_LLAMACPP_THREAD_COUNT,
CONF_LLAMACPP_BATCH_THREAD_COUNT, CONF_LLAMACPP_BATCH_THREAD_COUNT,
CONF_INSTALLED_LLAMACPP_VERSION,
DEFAULT_MAX_TOKENS, DEFAULT_MAX_TOKENS,
DEFAULT_PROMPT, DEFAULT_PROMPT,
DEFAULT_TEMPERATURE, DEFAULT_TEMPERATURE,
@@ -78,6 +79,7 @@ def snapshot_settings(options: dict[str, Any]) -> dict[str, Any]:
CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT), CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT),
CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT), CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT),
CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION), CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION),
CONF_INSTALLED_LLAMACPP_VERSION: options.get(CONF_INSTALLED_LLAMACPP_VERSION, ""),
CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE), CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE),
CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED), CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED),
} }
@@ -115,7 +117,7 @@ class LlamaCppClient(LocalLLMClient):
@staticmethod @staticmethod
def get_name(client_options: dict[str, Any]): def get_name(client_options: dict[str, Any]):
return f"Llama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})" return "Llama.cpp"
async def async_get_available_models(self) -> List[str]: async def async_get_available_models(self) -> List[str]:
return [] # TODO: find available "huggingface_hub" models that have been downloaded return [] # TODO: find available "huggingface_hub" models that have been downloaded
@@ -215,6 +217,11 @@ class LlamaCppClient(LocalLLMClient):
should_reload = True should_reload = True
elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION): elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION):
should_reload = True should_reload = True
elif loaded_options[CONF_INSTALLED_LLAMACPP_VERSION] != entity_options.get(CONF_INSTALLED_LLAMACPP_VERSION):
should_reload = True
_LOGGER.debug(f"Reloading llama.cpp...")
if self.llama_cpp_module:
self.llama_cpp_module = importlib.reload(self.llama_cpp_module)
model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "") model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "")
model_name = entity_options.get(CONF_CHAT_MODEL, "") model_name = entity_options.get(CONF_CHAT_MODEL, "")

View File

@@ -1,6 +1,7 @@
"""Config flow for Local LLM Conversation integration.""" """Config flow for Local LLM Conversation integration."""
from __future__ import annotations from __future__ import annotations
from asyncio import Task
import logging import logging
import os import os
from typing import Any from typing import Any
@@ -39,7 +40,8 @@ from homeassistant.helpers.selector import (
BooleanSelectorConfig, BooleanSelectorConfig,
) )
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, is_valid_hostname, MissingQuantizationException from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, \
is_valid_hostname, get_available_llama_cpp_versions, MissingQuantizationException
from .const import ( from .const import (
CONF_CHAT_MODEL, CONF_CHAT_MODEL,
CONF_MAX_TOKENS, CONF_MAX_TOKENS,
@@ -87,6 +89,7 @@ from .const import (
CONF_LLAMACPP_BATCH_SIZE, CONF_LLAMACPP_BATCH_SIZE,
CONF_LLAMACPP_THREAD_COUNT, CONF_LLAMACPP_THREAD_COUNT,
CONF_LLAMACPP_BATCH_THREAD_COUNT, CONF_LLAMACPP_BATCH_THREAD_COUNT,
CONF_LLAMACPP_REINSTALL,
DEFAULT_CHAT_MODEL, DEFAULT_CHAT_MODEL,
DEFAULT_PORT, DEFAULT_PORT,
DEFAULT_SSL, DEFAULT_SSL,
@@ -258,14 +261,14 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
if backend == BACKEND_TYPE_LLAMA_CPP: if backend == BACKEND_TYPE_LLAMA_CPP:
installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version) installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
_LOGGER.debug(f"installed version: {installed_version}") _LOGGER.debug(f"installed version: {installed_version}")
if installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION: if installed_version and installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version
return await self.async_step_finish() return await self.async_step_finish()
else: else:
self.internal_step = "install_local_wheels" self.internal_step = "install_local_wheels"
_LOGGER.debug("Queuing install task...") _LOGGER.debug("Queuing install task...")
async def install_task(): async def install_task():
await self.hass.async_add_executor_job( return await self.hass.async_add_executor_job(
install_llama_cpp_python, self.hass.config.config_dir install_llama_cpp_python, self.hass.config.config_dir
) )
@@ -376,7 +379,7 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
@classmethod @classmethod
def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool: def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool:
return config_entry.data[CONF_BACKEND_TYPE] != BACKEND_TYPE_LLAMA_CPP return True
@staticmethod @staticmethod
def async_get_options_flow( def async_get_options_flow(
@@ -399,6 +402,9 @@ class OptionsFlow(BaseOptionsFlow):
"""Local LLM config flow options handler.""" """Local LLM config flow options handler."""
model_config: dict[str, Any] | None = None model_config: dict[str, Any] | None = None
reinstall_task: Task[Any] | None = None
wheel_install_error: str | None = None
wheel_install_successful: bool = False
async def async_step_init( async def async_step_init(
self, user_input: dict[str, Any] | None = None self, user_input: dict[str, Any] | None = None
@@ -410,32 +416,112 @@ class OptionsFlow(BaseOptionsFlow):
backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE) backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE)
client_config = dict(self.config_entry.options) client_config = dict(self.config_entry.options)
if self.wheel_install_error:
_LOGGER.warning("Failed to install wheel: %s", repr(self.wheel_install_error))
return self.async_abort(reason="pip_wheel_error")
if self.wheel_install_successful:
client_config[CONF_INSTALLED_LLAMACPP_VERSION] = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
_LOGGER.debug(f"new version is: {client_config[CONF_INSTALLED_LLAMACPP_VERSION]}")
return self.async_create_entry(data=client_config)
if backend_type == BACKEND_TYPE_LLAMA_CPP:
potential_versions = await get_available_llama_cpp_versions(self.hass)
schema = vol.Schema({
vol.Required(CONF_LLAMACPP_REINSTALL, default=False): BooleanSelector(BooleanSelectorConfig()),
vol.Required(CONF_INSTALLED_LLAMACPP_VERSION, default=client_config.get(CONF_INSTALLED_LLAMACPP_VERSION, "not installed")): SelectSelector(
SelectSelectorConfig(
options=[ SelectOptionDict(value=x[0], label=x[0] if not x[1] else f"{x[0]} (local)") for x in potential_versions ],
mode=SelectSelectorMode.DROPDOWN,
)
)
})
return self.async_show_form(
step_id="reinstall",
data_schema=schema,
)
else:
if user_input is not None:
client_config.update(user_input)
# validate remote connections
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
if not connect_err:
return self.async_create_entry(data=client_config)
else:
errors["base"] = "failed_to_connect"
description_placeholders["exception"] = str(connect_err)
schema = remote_connection_schema(
backend_type=backend_type,
host=client_config.get(CONF_HOST),
port=client_config.get(CONF_PORT),
ssl=client_config.get(CONF_SSL),
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
)
return self.async_show_form(
step_id="init",
data_schema=schema,
errors=errors,
description_placeholders=description_placeholders,
)
async def async_step_reinstall(self, user_input: dict[str, Any] | None = None) -> ConfigFlowResult:
client_config = dict(self.config_entry.options)
if user_input is not None: if user_input is not None:
client_config.update(user_input) if not user_input[CONF_LLAMACPP_REINSTALL]:
_LOGGER.debug("Reinstall was not selected, finishing")
# validate remote connections
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
if not connect_err:
return self.async_create_entry(data=client_config) return self.async_create_entry(data=client_config)
if not self.reinstall_task:
if not user_input:
return self.async_abort(reason="unknown")
desired_version = user_input.get(CONF_INSTALLED_LLAMACPP_VERSION)
async def install_task():
return await self.hass.async_add_executor_job(
install_llama_cpp_python, self.hass.config.config_dir, True, desired_version
)
self.reinstall_task = self.hass.async_create_background_task(
install_task(), name="llama_cpp_python_installation")
_LOGGER.debug("Queuing reinstall task...")
return self.async_show_progress(
progress_task=self.reinstall_task,
step_id="reinstall",
progress_action="install_local_wheels",
)
if not self.reinstall_task.done():
return self.async_show_progress(
progress_task=self.reinstall_task,
step_id="reinstall",
progress_action="install_local_wheels",
)
_LOGGER.debug("done... checking result")
install_exception = self.reinstall_task.exception()
if install_exception:
self.wheel_install_error = repr(install_exception)
_LOGGER.debug(f"Hit error: {self.wheel_install_error}")
return self.async_show_progress_done(next_step_id="init")
else:
wheel_install_result = self.reinstall_task.result()
if not wheel_install_result:
self.wheel_install_error = "Pip returned false"
_LOGGER.debug(f"Hit error: {self.wheel_install_error} ({wheel_install_result})")
return self.async_show_progress_done(next_step_id="init")
else: else:
errors["base"] = "failed_to_connect" _LOGGER.debug(f"Finished install: {wheel_install_result}")
description_placeholders["exception"] = str(connect_err) self.wheel_install_successful = True
return self.async_show_progress_done(next_step_id="init")
schema = remote_connection_schema(
backend_type=backend_type,
host=client_config.get(CONF_HOST),
port=client_config.get(CONF_PORT),
ssl=client_config.get(CONF_SSL),
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
)
return self.async_show_form(
step_id="init",
data_schema=schema,
errors=errors,
description_placeholders=description_placeholders,
)
def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None): def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None):

View File

@@ -191,6 +191,7 @@ CONF_LLAMACPP_THREAD_COUNT = "n_threads"
DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count() DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count()
CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads" CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads"
DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count() DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count()
CONF_LLAMACPP_REINSTALL = "reinstall_llama_cpp"
DEFAULT_OPTIONS = types.MappingProxyType( DEFAULT_OPTIONS = types.MappingProxyType(
{ {
@@ -318,4 +319,4 @@ OPTIONS_OVERRIDES = {
# INTEGRATION_VERSION = "0.4.0" # INTEGRATION_VERSION = "0.4.0"
INTEGRATION_VERSION = "0.3.11" INTEGRATION_VERSION = "0.3.11"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16" EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153"

View File

@@ -187,13 +187,27 @@
"text_generation_webui_admin_key": "Admin Key", "text_generation_webui_admin_key": "Admin Key",
"text_generation_webui_preset": "Generation Preset/Character Name", "text_generation_webui_preset": "Generation Preset/Character Name",
"text_generation_webui_chat_mode": "Chat Mode" "text_generation_webui_chat_mode": "Chat Mode"
} },
"description": "Please provide the connection details to connect to the API that is hosting the model.",
"title": "Configure Connection"
},
"reinstall": {
"data": {
"reinstall_llama_cpp": "Reinstall Llama.cpp",
"installed_llama_cpp_version": "Version to (re)install"
},
"description": "__If you are experiencing issues with Llama.cpp__, you can force a reinstall of the package here. This will attempt to re-install or upgrade the llama-cpp-python package from GitHub *or* a local wheel file placed in the `/config/custom_components/llama_conversation/` directory.",
"title": "Reinstall Llama.cpp"
} }
}, },
"error": { "error": {
"failed_to_connect": "Failed to connect to the remote API: {exception}", "failed_to_connect": "Failed to connect to the remote API: {exception}",
"invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.", "invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.",
"unknown": "Unexpected error" "unknown": "Unexpected error",
"pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
},
"progress": {
"install_local_wheels": "Please wait while Llama.cpp is installed..."
} }
}, },
"selector": { "selector": {

View File

@@ -9,13 +9,14 @@ import multiprocessing
import voluptuous as vol import voluptuous as vol
import webcolors import webcolors
import json import json
from typing import Any, Dict, List, Sequence, cast from typing import Any, Dict, List, Sequence, Tuple, cast
from webcolors import CSS3 from webcolors import CSS3
from importlib.metadata import version from importlib.metadata import version
from homeassistant.core import HomeAssistant
from homeassistant.components import conversation from homeassistant.components import conversation
from homeassistant.helpers import config_validation as cv from homeassistant.helpers import config_validation as cv
from homeassistant.helpers import intent, llm from homeassistant.helpers import intent, llm, aiohttp_client
from homeassistant.requirements import pip_kwargs from homeassistant.requirements import pip_kwargs
from homeassistant.util import color from homeassistant.util import color
from homeassistant.util.package import install_package, is_installed from homeassistant.util.package import install_package, is_installed
@@ -191,18 +192,11 @@ def validate_llama_cpp_python_installation():
def get_llama_cpp_python_version(): def get_llama_cpp_python_version():
if not is_installed("llama-cpp-python"): if not is_installed("llama-cpp-python"):
return None return None
return version("llama-cpp-python").split("+")[0] return version("llama-cpp-python")
def install_llama_cpp_python(config_dir: str): def get_runtime_and_platform_suffix() -> Tuple[str, str]:
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
installed_wrong_version = False
if is_installed("llama-cpp-python"):
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
installed_wrong_version = True
else:
time.sleep(0.5) # I still don't know why this is required
return True
platform_suffix = platform.machine() platform_suffix = platform.machine()
# remap other names for architectures to the names we use # remap other names for architectures to the names we use
if platform_suffix == "arm64": if platform_suffix == "arm64":
@@ -210,42 +204,65 @@ def install_llama_cpp_python(config_dir: str):
if platform_suffix == "i386" or platform_suffix == "amd64": if platform_suffix == "i386" or platform_suffix == "amd64":
platform_suffix = "x86_64" platform_suffix = "x86_64"
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}" return runtime_version, platform_suffix
async def get_available_llama_cpp_versions(hass: HomeAssistant) -> List[Tuple[str, bool]]:
github_index_url = "https://acon96.github.io/llama-cpp-python/whl/ha/llama-cpp-python/"
session = aiohttp_client.async_get_clientsession(hass)
try:
async with session.get(github_index_url) as resp:
if resp.status != 200:
raise Exception(f"Failed to fetch available versions from GitHub (HTTP {resp.status})")
text = await resp.text()
# pull version numbers out of h2 tags
versions = re.findall(r"<h2.*>(.+)</h2>", text)
remote = sorted([(v, False) for v in versions], reverse=True)
except Exception as ex:
_LOGGER.warning(f"Error fetching available versions from GitHub: {repr(ex)}")
remote = []
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
folder = os.path.dirname(__file__) folder = os.path.dirname(__file__)
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True) potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
potential_wheels = [ wheel for wheel in potential_wheels if runtime_version in wheel ] local = [ (wheel, True) for wheel in potential_wheels if runtime_version in wheel and "llama_cpp_python" in wheel]
potential_wheels = [ wheel for wheel in potential_wheels if f"{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm" in wheel ]
return remote + local
_LOGGER.debug(f"{potential_wheels=}") def install_llama_cpp_python(config_dir: str, force_reinstall: bool = False, specific_version: str | None = None) -> bool:
if len(potential_wheels) > 0:
latest_wheel = potential_wheels[0] installed_wrong_version = False
if is_installed("llama-cpp-python") and not force_reinstall:
_LOGGER.info("Installing llama-cpp-python from local wheel") if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
_LOGGER.debug(f"Wheel location: {latest_wheel}") installed_wrong_version = True
return install_package(os.path.join(folder, latest_wheel), **pip_kwargs(config_dir)) else:
time.sleep(0.5) # I still don't know why this is required
return True
# scikit-build-core v0.9.7+ doesn't recognize these builds as musllinux, and just tags them as generic linux runtime_version, platform_suffix = get_runtime_and_platform_suffix()
# github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}.whl"
github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl" if not specific_version:
if install_package(github_release_url, **pip_kwargs(config_dir)): specific_version = EMBEDDED_LLAMA_CPP_PYTHON_VERSION
_LOGGER.info("llama-cpp-python successfully installed from GitHub release")
if ".whl" in specific_version:
wheel_location = os.path.join(os.path.dirname(__file__), specific_version)
else:
wheel_location = f"https://github.com/acon96/llama-cpp-python/releases/download/{specific_version}/llama_cpp_python-{specific_version}-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
if install_package(wheel_location, **pip_kwargs(config_dir)):
_LOGGER.info("llama-cpp-python successfully installed")
return True return True
# if it is just the wrong version installed then ignore the installation error # if it is just the wrong version installed then ignore the installation error
if not installed_wrong_version: if not installed_wrong_version:
_LOGGER.error( _LOGGER.error(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \ "Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \ "Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page." "Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
) )
return False return False
else: else:
_LOGGER.info( _LOGGER.info(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \ "Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!" f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!"
) )
time.sleep(0.5) # I still don't know why this is required time.sleep(0.5) # I still don't know why this is required

View File

@@ -2,15 +2,15 @@
# Don't run this. This is executed inside of the home assistant container to build the wheel # Don't run this. This is executed inside of the home assistant container to build the wheel
apk update apk update
apk add build-base python3-dev apk add build-base python3-dev linux-headers
tag=$1
cd /tmp cd /tmp
git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch $1 git clone --recurse-submodules https://github.com/acon96/llama-cpp-python --branch $tag --depth 1 --shallow-submodules
cd llama-cpp-python cd llama-cpp-python
pip3 install build pip3 install build
sed -i -E "s/^(__version__ *= *\")[^\"]+\"/\1${tag}\"/" llama_cpp/__init__.py
tag="homellm"
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON" export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON"
python3 -m build --wheel python3 -m build --wheel

View File

@@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
VERSION_TO_BUILD="v0.3.16" VERSION_TO_BUILD="0.3.16+b6713"
# make python 11 wheels # make python 11 wheels
# docker run -it --rm \ # docker run -it --rm \