mirror of
https://github.com/acon96/home-llm.git
synced 2026-01-09 21:58:00 -05:00
Build llama.cpp wheels in forked repo + support reinstallation
This commit is contained in:
126
.github/workflows/create-release.yml
vendored
126
.github/workflows/create-release.yml
vendored
@@ -1,126 +0,0 @@
|
|||||||
name: Create Release
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
release_notes:
|
|
||||||
description: "Release Notes"
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build_wheels:
|
|
||||||
name: Build wheels for ${{ matrix.arch }} (HA ${{ matrix.home_assistant_image }})
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
# ARM64
|
|
||||||
- home_assistant_image: "aarch64-homeassistant:2025.4.1"
|
|
||||||
arch: "aarch64"
|
|
||||||
|
|
||||||
# 32bit ARM (Raspberry pis)
|
|
||||||
- home_assistant_image: "armhf-homeassistant:2025.4.1"
|
|
||||||
arch: "armhf"
|
|
||||||
|
|
||||||
# x64
|
|
||||||
- home_assistant_image: "amd64-homeassistant:2025.4.1"
|
|
||||||
arch: "x86_64"
|
|
||||||
|
|
||||||
# 32 bit for older processors
|
|
||||||
- home_assistant_image: "i386-homeassistant:2025.4.1"
|
|
||||||
arch: "i386"
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Verify version match
|
|
||||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
|
||||||
run: |
|
|
||||||
tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//')
|
|
||||||
component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json)
|
|
||||||
component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=')
|
|
||||||
|
|
||||||
if [ "$tag_version" != "$component_version_manifest" ]; then
|
|
||||||
echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$tag_version" != "$component_version_const" ]; then
|
|
||||||
echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "All required versions match."
|
|
||||||
|
|
||||||
- name: Read llama-cpp-python version
|
|
||||||
run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Build artifact
|
|
||||||
uses: uraimo/run-on-arch-action@v2
|
|
||||||
id: build
|
|
||||||
with:
|
|
||||||
arch: none
|
|
||||||
distro: none
|
|
||||||
base_image: homeassistant/${{ matrix.home_assistant_image }}
|
|
||||||
|
|
||||||
# Create an artifacts directory
|
|
||||||
setup: |
|
|
||||||
mkdir -p "${PWD}/artifacts"
|
|
||||||
|
|
||||||
# Mount the artifacts directory as /artifacts in the container
|
|
||||||
dockerRunArgs: |
|
|
||||||
--volume "${PWD}/artifacts:/artifacts"
|
|
||||||
|
|
||||||
# The shell to run commands with in the container
|
|
||||||
shell: /bin/bash
|
|
||||||
|
|
||||||
# Produce a binary artifact and place it in the mounted volume
|
|
||||||
run: |
|
|
||||||
apk update
|
|
||||||
apk add build-base python3-dev cmake
|
|
||||||
pip3 install build
|
|
||||||
|
|
||||||
cd /tmp
|
|
||||||
git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}"
|
|
||||||
cd llama-cpp-python
|
|
||||||
|
|
||||||
tag="homellm"
|
|
||||||
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
|
|
||||||
|
|
||||||
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON"
|
|
||||||
python3 -m build --wheel
|
|
||||||
|
|
||||||
mv ./dist/*.whl /artifacts
|
|
||||||
ls -la /artifacts/
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
path: ./artifacts/*.whl
|
|
||||||
name: artifact_${{ matrix.arch }}
|
|
||||||
|
|
||||||
release:
|
|
||||||
name: Create Release
|
|
||||||
needs: [ build_wheels ]
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: startsWith(github.event.ref, 'refs/tags/v')
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Download artifacts
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
path: dist
|
|
||||||
merge-multiple: true
|
|
||||||
|
|
||||||
- name: Create GitHub release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
with:
|
|
||||||
files: dist/*
|
|
||||||
body: ${{ inputs.release_notes }}
|
|
||||||
make_latest: true
|
|
||||||
1
TODO.md
1
TODO.md
@@ -2,6 +2,7 @@
|
|||||||
- [x] proper tool calling support
|
- [x] proper tool calling support
|
||||||
- [ ] fix old GGUFs to support tool calling
|
- [ ] fix old GGUFs to support tool calling
|
||||||
- [x] home assistant component text streaming support
|
- [x] home assistant component text streaming support
|
||||||
|
- [x] move llama-cpp build to forked repo + add support for multi backend builds (no more -noavx)
|
||||||
- [ ] new model based on qwen3 0.6b
|
- [ ] new model based on qwen3 0.6b
|
||||||
- [ ] new model based on gemma3 270m
|
- [ ] new model based on gemma3 270m
|
||||||
- [ ] support AI task API
|
- [ ] support AI task API
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ from .const import (
|
|||||||
BACKEND_TYPE_OLLAMA,
|
BACKEND_TYPE_OLLAMA,
|
||||||
BACKEND_TYPE_LLAMA_EXISTING_OLD,
|
BACKEND_TYPE_LLAMA_EXISTING_OLD,
|
||||||
BACKEND_TYPE_LLAMA_HF_OLD,
|
BACKEND_TYPE_LLAMA_HF_OLD,
|
||||||
EMBEDDED_LLAMA_CPP_PYTHON_VERSION
|
|
||||||
)
|
)
|
||||||
from .entity import LocalLLMClient, LocalLLMConfigEntry
|
from .entity import LocalLLMClient, LocalLLMConfigEntry
|
||||||
from .backends.llamacpp import LlamaCppClient
|
from .backends.llamacpp import LlamaCppClient
|
||||||
@@ -141,7 +140,7 @@ async def async_migrate_entry(hass: HomeAssistant, config_entry: LocalLLMConfigE
|
|||||||
if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD:
|
if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD:
|
||||||
backend = BACKEND_TYPE_LLAMA_CPP
|
backend = BACKEND_TYPE_LLAMA_CPP
|
||||||
entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP
|
entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP
|
||||||
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version) or EMBEDDED_LLAMA_CPP_PYTHON_VERSION
|
entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version)
|
||||||
else:
|
else:
|
||||||
# ensure all remote backends have a path set
|
# ensure all remote backends have a path set
|
||||||
entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "")
|
entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "")
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ from custom_components.llama_conversation.const import (
|
|||||||
CONF_LLAMACPP_BATCH_SIZE,
|
CONF_LLAMACPP_BATCH_SIZE,
|
||||||
CONF_LLAMACPP_THREAD_COUNT,
|
CONF_LLAMACPP_THREAD_COUNT,
|
||||||
CONF_LLAMACPP_BATCH_THREAD_COUNT,
|
CONF_LLAMACPP_BATCH_THREAD_COUNT,
|
||||||
|
CONF_INSTALLED_LLAMACPP_VERSION,
|
||||||
DEFAULT_MAX_TOKENS,
|
DEFAULT_MAX_TOKENS,
|
||||||
DEFAULT_PROMPT,
|
DEFAULT_PROMPT,
|
||||||
DEFAULT_TEMPERATURE,
|
DEFAULT_TEMPERATURE,
|
||||||
@@ -78,6 +79,7 @@ def snapshot_settings(options: dict[str, Any]) -> dict[str, Any]:
|
|||||||
CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT),
|
CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT),
|
||||||
CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT),
|
CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT),
|
||||||
CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION),
|
CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION),
|
||||||
|
CONF_INSTALLED_LLAMACPP_VERSION: options.get(CONF_INSTALLED_LLAMACPP_VERSION, ""),
|
||||||
CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE),
|
CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE),
|
||||||
CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED),
|
CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED),
|
||||||
}
|
}
|
||||||
@@ -115,7 +117,7 @@ class LlamaCppClient(LocalLLMClient):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_name(client_options: dict[str, Any]):
|
def get_name(client_options: dict[str, Any]):
|
||||||
return f"Llama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})"
|
return "Llama.cpp"
|
||||||
|
|
||||||
async def async_get_available_models(self) -> List[str]:
|
async def async_get_available_models(self) -> List[str]:
|
||||||
return [] # TODO: find available "huggingface_hub" models that have been downloaded
|
return [] # TODO: find available "huggingface_hub" models that have been downloaded
|
||||||
@@ -215,6 +217,11 @@ class LlamaCppClient(LocalLLMClient):
|
|||||||
should_reload = True
|
should_reload = True
|
||||||
elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION):
|
elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION):
|
||||||
should_reload = True
|
should_reload = True
|
||||||
|
elif loaded_options[CONF_INSTALLED_LLAMACPP_VERSION] != entity_options.get(CONF_INSTALLED_LLAMACPP_VERSION):
|
||||||
|
should_reload = True
|
||||||
|
_LOGGER.debug(f"Reloading llama.cpp...")
|
||||||
|
if self.llama_cpp_module:
|
||||||
|
self.llama_cpp_module = importlib.reload(self.llama_cpp_module)
|
||||||
|
|
||||||
model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "")
|
model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "")
|
||||||
model_name = entity_options.get(CONF_CHAT_MODEL, "")
|
model_name = entity_options.get(CONF_CHAT_MODEL, "")
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Config flow for Local LLM Conversation integration."""
|
"""Config flow for Local LLM Conversation integration."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from asyncio import Task
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -39,7 +40,8 @@ from homeassistant.helpers.selector import (
|
|||||||
BooleanSelectorConfig,
|
BooleanSelectorConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, is_valid_hostname, MissingQuantizationException
|
from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, \
|
||||||
|
is_valid_hostname, get_available_llama_cpp_versions, MissingQuantizationException
|
||||||
from .const import (
|
from .const import (
|
||||||
CONF_CHAT_MODEL,
|
CONF_CHAT_MODEL,
|
||||||
CONF_MAX_TOKENS,
|
CONF_MAX_TOKENS,
|
||||||
@@ -87,6 +89,7 @@ from .const import (
|
|||||||
CONF_LLAMACPP_BATCH_SIZE,
|
CONF_LLAMACPP_BATCH_SIZE,
|
||||||
CONF_LLAMACPP_THREAD_COUNT,
|
CONF_LLAMACPP_THREAD_COUNT,
|
||||||
CONF_LLAMACPP_BATCH_THREAD_COUNT,
|
CONF_LLAMACPP_BATCH_THREAD_COUNT,
|
||||||
|
CONF_LLAMACPP_REINSTALL,
|
||||||
DEFAULT_CHAT_MODEL,
|
DEFAULT_CHAT_MODEL,
|
||||||
DEFAULT_PORT,
|
DEFAULT_PORT,
|
||||||
DEFAULT_SSL,
|
DEFAULT_SSL,
|
||||||
@@ -258,14 +261,14 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
|
|||||||
if backend == BACKEND_TYPE_LLAMA_CPP:
|
if backend == BACKEND_TYPE_LLAMA_CPP:
|
||||||
installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
|
installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
|
||||||
_LOGGER.debug(f"installed version: {installed_version}")
|
_LOGGER.debug(f"installed version: {installed_version}")
|
||||||
if installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
if installed_version and installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
||||||
self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version
|
self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version
|
||||||
return await self.async_step_finish()
|
return await self.async_step_finish()
|
||||||
else:
|
else:
|
||||||
self.internal_step = "install_local_wheels"
|
self.internal_step = "install_local_wheels"
|
||||||
_LOGGER.debug("Queuing install task...")
|
_LOGGER.debug("Queuing install task...")
|
||||||
async def install_task():
|
async def install_task():
|
||||||
await self.hass.async_add_executor_job(
|
return await self.hass.async_add_executor_job(
|
||||||
install_llama_cpp_python, self.hass.config.config_dir
|
install_llama_cpp_python, self.hass.config.config_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -376,7 +379,7 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool:
|
def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool:
|
||||||
return config_entry.data[CONF_BACKEND_TYPE] != BACKEND_TYPE_LLAMA_CPP
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def async_get_options_flow(
|
def async_get_options_flow(
|
||||||
@@ -399,6 +402,9 @@ class OptionsFlow(BaseOptionsFlow):
|
|||||||
"""Local LLM config flow options handler."""
|
"""Local LLM config flow options handler."""
|
||||||
|
|
||||||
model_config: dict[str, Any] | None = None
|
model_config: dict[str, Any] | None = None
|
||||||
|
reinstall_task: Task[Any] | None = None
|
||||||
|
wheel_install_error: str | None = None
|
||||||
|
wheel_install_successful: bool = False
|
||||||
|
|
||||||
async def async_step_init(
|
async def async_step_init(
|
||||||
self, user_input: dict[str, Any] | None = None
|
self, user_input: dict[str, Any] | None = None
|
||||||
@@ -410,32 +416,112 @@ class OptionsFlow(BaseOptionsFlow):
|
|||||||
backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE)
|
backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE)
|
||||||
client_config = dict(self.config_entry.options)
|
client_config = dict(self.config_entry.options)
|
||||||
|
|
||||||
|
if self.wheel_install_error:
|
||||||
|
_LOGGER.warning("Failed to install wheel: %s", repr(self.wheel_install_error))
|
||||||
|
return self.async_abort(reason="pip_wheel_error")
|
||||||
|
|
||||||
|
if self.wheel_install_successful:
|
||||||
|
client_config[CONF_INSTALLED_LLAMACPP_VERSION] = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
|
||||||
|
_LOGGER.debug(f"new version is: {client_config[CONF_INSTALLED_LLAMACPP_VERSION]}")
|
||||||
|
return self.async_create_entry(data=client_config)
|
||||||
|
|
||||||
|
if backend_type == BACKEND_TYPE_LLAMA_CPP:
|
||||||
|
potential_versions = await get_available_llama_cpp_versions(self.hass)
|
||||||
|
|
||||||
|
schema = vol.Schema({
|
||||||
|
vol.Required(CONF_LLAMACPP_REINSTALL, default=False): BooleanSelector(BooleanSelectorConfig()),
|
||||||
|
vol.Required(CONF_INSTALLED_LLAMACPP_VERSION, default=client_config.get(CONF_INSTALLED_LLAMACPP_VERSION, "not installed")): SelectSelector(
|
||||||
|
SelectSelectorConfig(
|
||||||
|
options=[ SelectOptionDict(value=x[0], label=x[0] if not x[1] else f"{x[0]} (local)") for x in potential_versions ],
|
||||||
|
mode=SelectSelectorMode.DROPDOWN,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.async_show_form(
|
||||||
|
step_id="reinstall",
|
||||||
|
data_schema=schema,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
|
||||||
|
if user_input is not None:
|
||||||
|
client_config.update(user_input)
|
||||||
|
|
||||||
|
# validate remote connections
|
||||||
|
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
|
||||||
|
|
||||||
|
if not connect_err:
|
||||||
|
return self.async_create_entry(data=client_config)
|
||||||
|
else:
|
||||||
|
errors["base"] = "failed_to_connect"
|
||||||
|
description_placeholders["exception"] = str(connect_err)
|
||||||
|
|
||||||
|
schema = remote_connection_schema(
|
||||||
|
backend_type=backend_type,
|
||||||
|
host=client_config.get(CONF_HOST),
|
||||||
|
port=client_config.get(CONF_PORT),
|
||||||
|
ssl=client_config.get(CONF_SSL),
|
||||||
|
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.async_show_form(
|
||||||
|
step_id="init",
|
||||||
|
data_schema=schema,
|
||||||
|
errors=errors,
|
||||||
|
description_placeholders=description_placeholders,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def async_step_reinstall(self, user_input: dict[str, Any] | None = None) -> ConfigFlowResult:
|
||||||
|
client_config = dict(self.config_entry.options)
|
||||||
|
|
||||||
if user_input is not None:
|
if user_input is not None:
|
||||||
client_config.update(user_input)
|
if not user_input[CONF_LLAMACPP_REINSTALL]:
|
||||||
|
_LOGGER.debug("Reinstall was not selected, finishing")
|
||||||
# validate remote connections
|
|
||||||
connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config)
|
|
||||||
|
|
||||||
if not connect_err:
|
|
||||||
return self.async_create_entry(data=client_config)
|
return self.async_create_entry(data=client_config)
|
||||||
|
|
||||||
|
if not self.reinstall_task:
|
||||||
|
if not user_input:
|
||||||
|
return self.async_abort(reason="unknown")
|
||||||
|
|
||||||
|
desired_version = user_input.get(CONF_INSTALLED_LLAMACPP_VERSION)
|
||||||
|
async def install_task():
|
||||||
|
return await self.hass.async_add_executor_job(
|
||||||
|
install_llama_cpp_python, self.hass.config.config_dir, True, desired_version
|
||||||
|
)
|
||||||
|
|
||||||
|
self.reinstall_task = self.hass.async_create_background_task(
|
||||||
|
install_task(), name="llama_cpp_python_installation")
|
||||||
|
|
||||||
|
_LOGGER.debug("Queuing reinstall task...")
|
||||||
|
return self.async_show_progress(
|
||||||
|
progress_task=self.reinstall_task,
|
||||||
|
step_id="reinstall",
|
||||||
|
progress_action="install_local_wheels",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.reinstall_task.done():
|
||||||
|
return self.async_show_progress(
|
||||||
|
progress_task=self.reinstall_task,
|
||||||
|
step_id="reinstall",
|
||||||
|
progress_action="install_local_wheels",
|
||||||
|
)
|
||||||
|
|
||||||
|
_LOGGER.debug("done... checking result")
|
||||||
|
install_exception = self.reinstall_task.exception()
|
||||||
|
if install_exception:
|
||||||
|
self.wheel_install_error = repr(install_exception)
|
||||||
|
_LOGGER.debug(f"Hit error: {self.wheel_install_error}")
|
||||||
|
return self.async_show_progress_done(next_step_id="init")
|
||||||
|
else:
|
||||||
|
wheel_install_result = self.reinstall_task.result()
|
||||||
|
if not wheel_install_result:
|
||||||
|
self.wheel_install_error = "Pip returned false"
|
||||||
|
_LOGGER.debug(f"Hit error: {self.wheel_install_error} ({wheel_install_result})")
|
||||||
|
return self.async_show_progress_done(next_step_id="init")
|
||||||
else:
|
else:
|
||||||
errors["base"] = "failed_to_connect"
|
_LOGGER.debug(f"Finished install: {wheel_install_result}")
|
||||||
description_placeholders["exception"] = str(connect_err)
|
self.wheel_install_successful = True
|
||||||
|
return self.async_show_progress_done(next_step_id="init")
|
||||||
schema = remote_connection_schema(
|
|
||||||
backend_type=backend_type,
|
|
||||||
host=client_config.get(CONF_HOST),
|
|
||||||
port=client_config.get(CONF_PORT),
|
|
||||||
ssl=client_config.get(CONF_SSL),
|
|
||||||
selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH)
|
|
||||||
)
|
|
||||||
|
|
||||||
return self.async_show_form(
|
|
||||||
step_id="init",
|
|
||||||
data_schema=schema,
|
|
||||||
errors=errors,
|
|
||||||
description_placeholders=description_placeholders,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None):
|
def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None):
|
||||||
|
|||||||
@@ -191,6 +191,7 @@ CONF_LLAMACPP_THREAD_COUNT = "n_threads"
|
|||||||
DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count()
|
DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count()
|
||||||
CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads"
|
CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads"
|
||||||
DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count()
|
DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count()
|
||||||
|
CONF_LLAMACPP_REINSTALL = "reinstall_llama_cpp"
|
||||||
|
|
||||||
DEFAULT_OPTIONS = types.MappingProxyType(
|
DEFAULT_OPTIONS = types.MappingProxyType(
|
||||||
{
|
{
|
||||||
@@ -318,4 +319,4 @@ OPTIONS_OVERRIDES = {
|
|||||||
|
|
||||||
# INTEGRATION_VERSION = "0.4.0"
|
# INTEGRATION_VERSION = "0.4.0"
|
||||||
INTEGRATION_VERSION = "0.3.11"
|
INTEGRATION_VERSION = "0.3.11"
|
||||||
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16"
|
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153"
|
||||||
|
|||||||
@@ -187,13 +187,27 @@
|
|||||||
"text_generation_webui_admin_key": "Admin Key",
|
"text_generation_webui_admin_key": "Admin Key",
|
||||||
"text_generation_webui_preset": "Generation Preset/Character Name",
|
"text_generation_webui_preset": "Generation Preset/Character Name",
|
||||||
"text_generation_webui_chat_mode": "Chat Mode"
|
"text_generation_webui_chat_mode": "Chat Mode"
|
||||||
}
|
},
|
||||||
|
"description": "Please provide the connection details to connect to the API that is hosting the model.",
|
||||||
|
"title": "Configure Connection"
|
||||||
|
},
|
||||||
|
"reinstall": {
|
||||||
|
"data": {
|
||||||
|
"reinstall_llama_cpp": "Reinstall Llama.cpp",
|
||||||
|
"installed_llama_cpp_version": "Version to (re)install"
|
||||||
|
},
|
||||||
|
"description": "__If you are experiencing issues with Llama.cpp__, you can force a reinstall of the package here. This will attempt to re-install or upgrade the llama-cpp-python package from GitHub *or* a local wheel file placed in the `/config/custom_components/llama_conversation/` directory.",
|
||||||
|
"title": "Reinstall Llama.cpp"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"failed_to_connect": "Failed to connect to the remote API: {exception}",
|
"failed_to_connect": "Failed to connect to the remote API: {exception}",
|
||||||
"invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.",
|
"invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.",
|
||||||
"unknown": "Unexpected error"
|
"unknown": "Unexpected error",
|
||||||
|
"pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
|
||||||
|
},
|
||||||
|
"progress": {
|
||||||
|
"install_local_wheels": "Please wait while Llama.cpp is installed..."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"selector": {
|
"selector": {
|
||||||
|
|||||||
@@ -9,13 +9,14 @@ import multiprocessing
|
|||||||
import voluptuous as vol
|
import voluptuous as vol
|
||||||
import webcolors
|
import webcolors
|
||||||
import json
|
import json
|
||||||
from typing import Any, Dict, List, Sequence, cast
|
from typing import Any, Dict, List, Sequence, Tuple, cast
|
||||||
from webcolors import CSS3
|
from webcolors import CSS3
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
|
|
||||||
|
from homeassistant.core import HomeAssistant
|
||||||
from homeassistant.components import conversation
|
from homeassistant.components import conversation
|
||||||
from homeassistant.helpers import config_validation as cv
|
from homeassistant.helpers import config_validation as cv
|
||||||
from homeassistant.helpers import intent, llm
|
from homeassistant.helpers import intent, llm, aiohttp_client
|
||||||
from homeassistant.requirements import pip_kwargs
|
from homeassistant.requirements import pip_kwargs
|
||||||
from homeassistant.util import color
|
from homeassistant.util import color
|
||||||
from homeassistant.util.package import install_package, is_installed
|
from homeassistant.util.package import install_package, is_installed
|
||||||
@@ -191,18 +192,11 @@ def validate_llama_cpp_python_installation():
|
|||||||
def get_llama_cpp_python_version():
|
def get_llama_cpp_python_version():
|
||||||
if not is_installed("llama-cpp-python"):
|
if not is_installed("llama-cpp-python"):
|
||||||
return None
|
return None
|
||||||
return version("llama-cpp-python").split("+")[0]
|
return version("llama-cpp-python")
|
||||||
|
|
||||||
def install_llama_cpp_python(config_dir: str):
|
def get_runtime_and_platform_suffix() -> Tuple[str, str]:
|
||||||
|
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
||||||
|
|
||||||
installed_wrong_version = False
|
|
||||||
if is_installed("llama-cpp-python"):
|
|
||||||
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
|
||||||
installed_wrong_version = True
|
|
||||||
else:
|
|
||||||
time.sleep(0.5) # I still don't know why this is required
|
|
||||||
return True
|
|
||||||
|
|
||||||
platform_suffix = platform.machine()
|
platform_suffix = platform.machine()
|
||||||
# remap other names for architectures to the names we use
|
# remap other names for architectures to the names we use
|
||||||
if platform_suffix == "arm64":
|
if platform_suffix == "arm64":
|
||||||
@@ -210,42 +204,65 @@ def install_llama_cpp_python(config_dir: str):
|
|||||||
if platform_suffix == "i386" or platform_suffix == "amd64":
|
if platform_suffix == "i386" or platform_suffix == "amd64":
|
||||||
platform_suffix = "x86_64"
|
platform_suffix = "x86_64"
|
||||||
|
|
||||||
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
|
return runtime_version, platform_suffix
|
||||||
|
|
||||||
|
async def get_available_llama_cpp_versions(hass: HomeAssistant) -> List[Tuple[str, bool]]:
|
||||||
|
github_index_url = "https://acon96.github.io/llama-cpp-python/whl/ha/llama-cpp-python/"
|
||||||
|
session = aiohttp_client.async_get_clientsession(hass)
|
||||||
|
try:
|
||||||
|
async with session.get(github_index_url) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
raise Exception(f"Failed to fetch available versions from GitHub (HTTP {resp.status})")
|
||||||
|
text = await resp.text()
|
||||||
|
# pull version numbers out of h2 tags
|
||||||
|
versions = re.findall(r"<h2.*>(.+)</h2>", text)
|
||||||
|
remote = sorted([(v, False) for v in versions], reverse=True)
|
||||||
|
except Exception as ex:
|
||||||
|
_LOGGER.warning(f"Error fetching available versions from GitHub: {repr(ex)}")
|
||||||
|
remote = []
|
||||||
|
|
||||||
|
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
|
||||||
folder = os.path.dirname(__file__)
|
folder = os.path.dirname(__file__)
|
||||||
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
|
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
|
||||||
potential_wheels = [ wheel for wheel in potential_wheels if runtime_version in wheel ]
|
local = [ (wheel, True) for wheel in potential_wheels if runtime_version in wheel and "llama_cpp_python" in wheel]
|
||||||
potential_wheels = [ wheel for wheel in potential_wheels if f"{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm" in wheel ]
|
|
||||||
|
return remote + local
|
||||||
|
|
||||||
_LOGGER.debug(f"{potential_wheels=}")
|
def install_llama_cpp_python(config_dir: str, force_reinstall: bool = False, specific_version: str | None = None) -> bool:
|
||||||
if len(potential_wheels) > 0:
|
|
||||||
|
|
||||||
latest_wheel = potential_wheels[0]
|
installed_wrong_version = False
|
||||||
|
if is_installed("llama-cpp-python") and not force_reinstall:
|
||||||
_LOGGER.info("Installing llama-cpp-python from local wheel")
|
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
|
||||||
_LOGGER.debug(f"Wheel location: {latest_wheel}")
|
installed_wrong_version = True
|
||||||
return install_package(os.path.join(folder, latest_wheel), **pip_kwargs(config_dir))
|
else:
|
||||||
|
time.sleep(0.5) # I still don't know why this is required
|
||||||
|
return True
|
||||||
|
|
||||||
# scikit-build-core v0.9.7+ doesn't recognize these builds as musllinux, and just tags them as generic linux
|
runtime_version, platform_suffix = get_runtime_and_platform_suffix()
|
||||||
# github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}.whl"
|
|
||||||
github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
|
if not specific_version:
|
||||||
if install_package(github_release_url, **pip_kwargs(config_dir)):
|
specific_version = EMBEDDED_LLAMA_CPP_PYTHON_VERSION
|
||||||
_LOGGER.info("llama-cpp-python successfully installed from GitHub release")
|
|
||||||
|
if ".whl" in specific_version:
|
||||||
|
wheel_location = os.path.join(os.path.dirname(__file__), specific_version)
|
||||||
|
else:
|
||||||
|
wheel_location = f"https://github.com/acon96/llama-cpp-python/releases/download/{specific_version}/llama_cpp_python-{specific_version}-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl"
|
||||||
|
|
||||||
|
if install_package(wheel_location, **pip_kwargs(config_dir)):
|
||||||
|
_LOGGER.info("llama-cpp-python successfully installed")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# if it is just the wrong version installed then ignore the installation error
|
# if it is just the wrong version installed then ignore the installation error
|
||||||
if not installed_wrong_version:
|
if not installed_wrong_version:
|
||||||
_LOGGER.error(
|
_LOGGER.error(
|
||||||
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
|
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
|
||||||
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
|
|
||||||
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
|
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
|
||||||
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
|
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
_LOGGER.info(
|
_LOGGER.info(
|
||||||
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
|
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \
|
||||||
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
|
|
||||||
f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!"
|
f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!"
|
||||||
)
|
)
|
||||||
time.sleep(0.5) # I still don't know why this is required
|
time.sleep(0.5) # I still don't know why this is required
|
||||||
|
|||||||
@@ -2,15 +2,15 @@
|
|||||||
# Don't run this. This is executed inside of the home assistant container to build the wheel
|
# Don't run this. This is executed inside of the home assistant container to build the wheel
|
||||||
|
|
||||||
apk update
|
apk update
|
||||||
apk add build-base python3-dev
|
apk add build-base python3-dev linux-headers
|
||||||
|
|
||||||
|
tag=$1
|
||||||
|
|
||||||
cd /tmp
|
cd /tmp
|
||||||
git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch $1
|
git clone --recurse-submodules https://github.com/acon96/llama-cpp-python --branch $tag --depth 1 --shallow-submodules
|
||||||
cd llama-cpp-python
|
cd llama-cpp-python
|
||||||
pip3 install build
|
pip3 install build
|
||||||
|
sed -i -E "s/^(__version__ *= *\")[^\"]+\"/\1${tag}\"/" llama_cpp/__init__.py
|
||||||
tag="homellm"
|
|
||||||
sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py
|
|
||||||
|
|
||||||
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON"
|
export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON"
|
||||||
python3 -m build --wheel
|
python3 -m build --wheel
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
VERSION_TO_BUILD="v0.3.16"
|
VERSION_TO_BUILD="0.3.16+b6713"
|
||||||
|
|
||||||
# make python 11 wheels
|
# make python 11 wheels
|
||||||
# docker run -it --rm \
|
# docker run -it --rm \
|
||||||
|
|||||||
Reference in New Issue
Block a user