Build llama.cpp wheels in forked repo + support reinstallation

This commit is contained in:
Alex O'Connell
2025-10-08 21:19:06 -04:00
parent 286cf9a888
commit 2df454985d
10 changed files with 196 additions and 197 deletions

View File

@@ -1,126 +0,0 @@
name: Create Release
on:
workflow_dispatch:
inputs:
release_notes:
description: "Release Notes"
required: true
type: string
permissions:
contents: write
jobs:
build_wheels:
name: Build wheels for ${{ matrix.arch }} (HA ${{ matrix.home_assistant_image }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
# ARM64
- home_assistant_image: "aarch64-homeassistant:2025.4.1"
arch: "aarch64"
# 32bit ARM (Raspberry pis)
- home_assistant_image: "armhf-homeassistant:2025.4.1"
arch: "armhf"
# x64
- home_assistant_image: "amd64-homeassistant:2025.4.1"
arch: "x86_64"
# 32 bit for older processors
- home_assistant_image: "i386-homeassistant:2025.4.1"
arch: "i386"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Verify version match
if: startsWith(github.event.ref, 'refs/tags/v')
run: |
tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//')
component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json)
component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=')
if [ "$tag_version" != "$component_version_manifest" ]; then
echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!"
exit 1
fi
if [ "$tag_version" != "$component_version_const" ]; then
echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!"
exit 1
fi
echo "All required versions match."
- name: Read llama-cpp-python version
run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
- name: Build artifact
uses: uraimo/run-on-arch-action@v2
id: build
with:
arch: none
distro: none
base_image: homeassistant/${{ matrix.home_assistant_image }}
# Create an artifacts directory
setup: |
mkdir -p "${PWD}/artifacts"
# Mount the artifacts directory as /artifacts in the container
dockerRunArgs: |
--volume "${PWD}/artifacts:/artifacts"
# The shell to run commands with in the container
shell: /bin/bash
# Produce a binary artifact and place it in the mounted volume
run: |
apk update
apk add build-base python3-dev cmake
pip3 install build
cd /tmp
git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}"
cd llama-cpp-python
tag="homellm"
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON"
python3 -m build --wheel
mv ./dist/*.whl /artifacts
ls -la /artifacts/
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
path: ./artifacts/*.whl
name: artifact_${{ matrix.arch }}
release:
name: Create Release
needs: [ build_wheels ]
runs-on: ubuntu-latest
if: startsWith(github.event.ref, 'refs/tags/v')
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: Create GitHub release
uses: softprops/action-gh-release@v2
with:
files: dist/*
body: ${{ inputs.release_notes }}
make_latest: true

View File

@@ -2,6 +2,7 @@
- [x] proper tool calling support
- [ ] fix old GGUFs to support tool calling
- [x] home assistant component text streaming support
- [x] move llama-cpp build to forked repo + add support for multi backend builds (no more -noavx)
- [ ] new model based on qwen3 0.6b
- [ ] new model based on gemma3 270m
- [ ] support AI task API

View File

@@ -44,7 +44,6 @@ from .const import (
BACKEND_TYPE_OLLAMA,
BACKEND_TYPE_LLAMA_EXISTING_OLD,
BACKEND_TYPE_LLAMA_HF_OLD,
EMBEDDED_LLAMA_CPP_PYTHON_VERSION
)
from .entity import LocalLLMClient, LocalLLMConfigEntry
from .backends.llamacpp import LlamaCppClient
@@ -141,7 +140,7 @@ async def async_migrate_entry(hass: HomeAssistant, config_entry: LocalLLMConfigE
if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD:
backend = BACKEND_TYPE_LLAMA_CPP
entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version) or EMBEDDED_LLAMA_CPP_PYTHON_VERSION
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version)
else:
# ensure all remote backends have a path set
entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "")

View File

@@ -39,6 +39,7 @@ from custom_components.llama_conversation.const import (
CONF_LLAMACPP_BATCH_SIZE,
CONF_LLAMACPP_THREAD_COUNT,
CONF_LLAMACPP_BATCH_THREAD_COUNT,
CONF_INSTALLED_LLAMACPP_VERSION,
DEFAULT_MAX_TOKENS,
DEFAULT_PROMPT,
DEFAULT_TEMPERATURE,
@@ -78,6 +79,7 @@ def snapshot_settings(options: dict[str, Any]) -> dict[str, Any]:
CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT),
CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT),
CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION),
CONF_INSTALLED_LLAMACPP_VERSION: options.get(CONF_INSTALLED_LLAMACPP_VERSION, ""),
CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE),
CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED),
}
@@ -115,7 +117,7 @@ class LlamaCppClient(LocalLLMClient):
@staticmethod
def get_name(client_options: dict[str, Any]):
return f"Llama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})"
return "Llama.cpp"
async def async_get_available_models(self) -> List[str]:
return [] # TODO: find available "huggingface_hub" models that have been downloaded
@@ -215,6 +217,11 @@ class LlamaCppClient(LocalLLMClient):
should_reload = True
elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION):
should_reload = True
elif loaded_options[CONF_INSTALLED_LLAMACPP_VERSION] != entity_options.get(CONF_INSTALLED_LLAMACPP_VERSION):
should_reload = True
_LOGGER.debug(f"Reloading llama.cpp...")
if self.llama_cpp_module:
self.llama_cpp_module = importlib.reload(self.llama_cpp_module)
model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "")
model_name = entity_options.get(CONF_CHAT_MODEL, "")

View File

@@ -1,6 +1,7 @@
"""Config flow for Local LLM Conversation integration."""
from __future__ import annotations
from asyncio import Task
import logging
import os
from typing import Any
@@ -39,7 +40,8 @@ from homeassistant.helpers.selector import (
BooleanSelectorConfig,
)
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, is_valid_hostname, MissingQuantizationException
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, \
is_valid_hostname, get_available_llama_cpp_versions, MissingQuantizationException
from .const import (
CONF_CHAT_MODEL,
CONF_MAX_TOKENS,
@@ -87,6 +89,7 @@ from .const import (
CONF_LLAMACPP_BATCH_SIZE,
CONF_LLAMACPP_THREAD_COUNT,
CONF_LLAMACPP_BATCH_THREAD_COUNT,
CONF_LLAMACPP_REINSTALL,
DEFAULT_CHAT_MODEL,
DEFAULT_PORT,
DEFAULT_SSL,
@@ -258,14 +261,14 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
if backend == BACKEND_TYPE_LLAMA_CPP:
installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
_LOGGER.debug(f"installed version: {installed_version}")
if installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
if installed_version and installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version
return await self.async_step_finish()
else:
self.internal_step = "install_local_wheels"
_LOGGER.debug("Queuing install task...")
async def install_task():
await self.hass.async_add_executor_job(
return await self.hass.async_add_executor_job(
install_llama_cpp_python, self.hass.config.config_dir
)
@@ -376,7 +379,7 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
@classmethod
def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool:
return config_entry.data[CONF_BACKEND_TYPE] != BACKEND_TYPE_LLAMA_CPP
return True
@staticmethod
def async_get_options_flow(
@@ -399,6 +402,9 @@ class OptionsFlow(BaseOptionsFlow):
"""Local LLM config flow options handler."""
model_config: dict[str, Any] | None = None
reinstall_task: Task[Any] | None = None
wheel_install_error: str | None = None
wheel_install_successful: bool = False
async def async_step_init(
self, user_input: dict[str, Any] | None = None
@@ -410,32 +416,112 @@ class OptionsFlow(BaseOptionsFlow):
backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE)
client_config = dict(self.config_entry.options)
if self.wheel_install_error:
_LOGGER.warning("Failed to install wheel: %s", repr(self.wheel_install_error))
return self.async_abort(reason="pip_wheel_error")
if self.wheel_install_successful:
client_config[CONF_INSTALLED_LLAMACPP_VERSION] = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
_LOGGER.debug(f"new version is: {client_config[CONF_INSTALLED_LLAMACPP_VERSION]}")
return self.async_create_entry(data=client_config)
if backend_type == BACKEND_TYPE_LLAMA_CPP:
potential_versions = await get_available_llama_cpp_versions(self.hass)
schema = vol.Schema({
vol.Required(CONF_LLAMACPP_REINSTALL, default=False): BooleanSelector(BooleanSelectorConfig()),
vol.Required(CONF_INSTALLED_LLAMACPP_VERSION, default=client_config.get(CONF_INSTALLED_LLAMACPP_VERSION, "not installed")): SelectSelector(
SelectSelectorConfig(
options=[ SelectOptionDict(value=x[0], label=x[0] if not x[1] else f"{x[0]} (local)") for x in potential_versions ],
mode=SelectSelectorMode.DROPDOWN,
)
)
})
return self.async_show_form(
step_id="reinstall",
data_schema=schema,
)
else:
if user_input is not None:
client_config.update(user_input)
# validate remote connections
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
if not connect_err:
return self.async_create_entry(data=client_config)
else:
errors["base"] = "failed_to_connect"
description_placeholders["exception"] = str(connect_err)
schema = remote_connection_schema(
backend_type=backend_type,
host=client_config.get(CONF_HOST),
port=client_config.get(CONF_PORT),
ssl=client_config.get(CONF_SSL),
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
)
return self.async_show_form(
step_id="init",
data_schema=schema,
errors=errors,
description_placeholders=description_placeholders,
)
async def async_step_reinstall(self, user_input: dict[str, Any] | None = None) -> ConfigFlowResult:
client_config = dict(self.config_entry.options)
if user_input is not None:
client_config.update(user_input)
# validate remote connections
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
if not connect_err:
if not user_input[CONF_LLAMACPP_REINSTALL]:
_LOGGER.debug("Reinstall was not selected, finishing")
return self.async_create_entry(data=client_config)
if not self.reinstall_task:
if not user_input:
return self.async_abort(reason="unknown")
desired_version = user_input.get(CONF_INSTALLED_LLAMACPP_VERSION)
async def install_task():
return await self.hass.async_add_executor_job(
install_llama_cpp_python, self.hass.config.config_dir, True, desired_version
)
self.reinstall_task = self.hass.async_create_background_task(
install_task(), name="llama_cpp_python_installation")
_LOGGER.debug("Queuing reinstall task...")
return self.async_show_progress(
progress_task=self.reinstall_task,
step_id="reinstall",
progress_action="install_local_wheels",
)
if not self.reinstall_task.done():
return self.async_show_progress(
progress_task=self.reinstall_task,
step_id="reinstall",
progress_action="install_local_wheels",
)
_LOGGER.debug("done... checking result")
install_exception = self.reinstall_task.exception()
if install_exception:
self.wheel_install_error = repr(install_exception)
_LOGGER.debug(f"Hit error: {self.wheel_install_error}")
return self.async_show_progress_done(next_step_id="init")
else:
wheel_install_result = self.reinstall_task.result()
if not wheel_install_result:
self.wheel_install_error = "Pip returned false"
_LOGGER.debug(f"Hit error: {self.wheel_install_error} ({wheel_install_result})")
return self.async_show_progress_done(next_step_id="init")
else:
errors["base"] = "failed_to_connect"
description_placeholders["exception"] = str(connect_err)
schema = remote_connection_schema(
backend_type=backend_type,
host=client_config.get(CONF_HOST),
port=client_config.get(CONF_PORT),
ssl=client_config.get(CONF_SSL),
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
)
return self.async_show_form(
step_id="init",
data_schema=schema,
errors=errors,
description_placeholders=description_placeholders,
)
_LOGGER.debug(f"Finished install: {wheel_install_result}")
self.wheel_install_successful = True
return self.async_show_progress_done(next_step_id="init")
def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None):

View File

@@ -191,6 +191,7 @@ CONF_LLAMACPP_THREAD_COUNT = "n_threads"
DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count()
CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads"
DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count()
CONF_LLAMACPP_REINSTALL = "reinstall_llama_cpp"
DEFAULT_OPTIONS = types.MappingProxyType(
{
@@ -318,4 +319,4 @@ OPTIONS_OVERRIDES = {
# INTEGRATION_VERSION = "0.4.0"
INTEGRATION_VERSION = "0.3.11"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153"

View File

@@ -187,13 +187,27 @@
"text_generation_webui_admin_key": "Admin Key",
"text_generation_webui_preset": "Generation Preset/Character Name",
"text_generation_webui_chat_mode": "Chat Mode"
}
},
"description": "Please provide the connection details to connect to the API that is hosting the model.",
"title": "Configure Connection"
},
"reinstall": {
"data": {
"reinstall_llama_cpp": "Reinstall Llama.cpp",
"installed_llama_cpp_version": "Version to (re)install"
},
"description": "__If you are experiencing issues with Llama.cpp__, you can force a reinstall of the package here. This will attempt to re-install or upgrade the llama-cpp-python package from GitHub *or* a local wheel file placed in the `/config/custom_components/llama_conversation/` directory.",
"title": "Reinstall Llama.cpp"
}
},
"error": {
"failed_to_connect": "Failed to connect to the remote API: {exception}",
"invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.",
"unknown": "Unexpected error"
"unknown": "Unexpected error",
"pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
},
"progress": {
"install_local_wheels": "Please wait while Llama.cpp is installed..."
}
},
"selector": {

View File

@@ -9,13 +9,14 @@ import multiprocessing
import voluptuous as vol
import webcolors
import json
from typing import Any, Dict, List, Sequence, cast
from typing import Any, Dict, List, Sequence, Tuple, cast
from webcolors import CSS3
from importlib.metadata import version
from homeassistant.core import HomeAssistant
from homeassistant.components import conversation
from homeassistant.helpers import config_validation as cv
from homeassistant.helpers import intent, llm
from homeassistant.helpers import intent, llm, aiohttp_client
from homeassistant.requirements import pip_kwargs
from homeassistant.util import color
from homeassistant.util.package import install_package, is_installed
@@ -191,18 +192,11 @@ def validate_llama_cpp_python_installation():
def get_llama_cpp_python_version():
if not is_installed("llama-cpp-python"):
return None
return version("llama-cpp-python").split("+")[0]
return version("llama-cpp-python")
def install_llama_cpp_python(config_dir: str):
def get_runtime_and_platform_suffix() -> Tuple[str, str]:
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
installed_wrong_version = False
if is_installed("llama-cpp-python"):
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
installed_wrong_version = True
else:
time.sleep(0.5) # I still don't know why this is required
return True
platform_suffix = platform.machine()
# remap other names for architectures to the names we use
if platform_suffix == "arm64":
@@ -210,42 +204,65 @@ def install_llama_cpp_python(config_dir: str):
if platform_suffix == "i386" or platform_suffix == "amd64":
platform_suffix = "x86_64"
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
return runtime_version, platform_suffix
async def get_available_llama_cpp_versions(hass: HomeAssistant) -> List[Tuple[str, bool]]:
github_index_url = "https://acon96.github.io/llama-cpp-python/whl/ha/llama-cpp-python/"
session = aiohttp_client.async_get_clientsession(hass)
try:
async with session.get(github_index_url) as resp:
if resp.status != 200:
raise Exception(f"Failed to fetch available versions from GitHub (HTTP {resp.status})")
text = await resp.text()
# pull version numbers out of h2 tags
versions = re.findall(r"<h2.*>(.+)</h2>", text)
remote = sorted([(v, False) for v in versions], reverse=True)
except Exception as ex:
_LOGGER.warning(f"Error fetching available versions from GitHub: {repr(ex)}")
remote = []
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
folder = os.path.dirname(__file__)
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
potential_wheels = [ wheel for wheel in potential_wheels if runtime_version in wheel ]
potential_wheels = [ wheel for wheel in potential_wheels if f"{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm" in wheel ]
local = [ (wheel, True) for wheel in potential_wheels if runtime_version in wheel and "llama_cpp_python" in wheel]
return remote + local
_LOGGER.debug(f"{potential_wheels=}")
if len(potential_wheels) > 0:
def install_llama_cpp_python(config_dir: str, force_reinstall: bool = False, specific_version: str | None = None) -> bool:
latest_wheel = potential_wheels[0]
_LOGGER.info("Installing llama-cpp-python from local wheel")
_LOGGER.debug(f"Wheel location: {latest_wheel}")
return install_package(os.path.join(folder, latest_wheel), **pip_kwargs(config_dir))
installed_wrong_version = False
if is_installed("llama-cpp-python") and not force_reinstall:
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
installed_wrong_version = True
else:
time.sleep(0.5) # I still don't know why this is required
return True
# scikit-build-core v0.9.7+ doesn't recognize these builds as musllinux, and just tags them as generic linux
# github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}.whl"
github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
if install_package(github_release_url, **pip_kwargs(config_dir)):
_LOGGER.info("llama-cpp-python successfully installed from GitHub release")
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
if not specific_version:
specific_version = EMBEDDED_LLAMA_CPP_PYTHON_VERSION
if ".whl" in specific_version:
wheel_location = os.path.join(os.path.dirname(__file__), specific_version)
else:
wheel_location = f"https://github.com/acon96/llama-cpp-python/releases/download/{specific_version}/llama_cpp_python-{specific_version}-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
if install_package(wheel_location, **pip_kwargs(config_dir)):
_LOGGER.info("llama-cpp-python successfully installed")
return True
# if it is just the wrong version installed then ignore the installation error
if not installed_wrong_version:
_LOGGER.error(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
)
return False
else:
_LOGGER.info(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!"
)
time.sleep(0.5) # I still don't know why this is required

View File

@@ -2,15 +2,15 @@
# Don't run this. This is executed inside of the home assistant container to build the wheel
apk update
apk add build-base python3-dev
apk add build-base python3-dev linux-headers
tag=$1
cd /tmp
git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch $1
git clone --recurse-submodules https://github.com/acon96/llama-cpp-python --branch $tag --depth 1 --shallow-submodules
cd llama-cpp-python
pip3 install build
tag="homellm"
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
sed -i -E "s/^(__version__ *= *\")[^\"]+\"/\1${tag}\"/" llama_cpp/__init__.py
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON"
python3 -m build --wheel

View File

@@ -1,6 +1,6 @@
#!/bin/bash
VERSION_TO_BUILD="v0.3.16"
VERSION_TO_BUILD="0.3.16+b6713"
# make python 11 wheels
# docker run -it --rm \