From 2df454985d3feabb3d4898c44812ab0506889496 Mon Sep 17 00:00:00 2001 From: Alex O'Connell Date: Wed, 8 Oct 2025 21:19:06 -0400 Subject: [PATCH] Build llama.cpp wheels in forked repo + support reinstallation --- .github/workflows/create-release.yml | 126 ---------------- TODO.md | 1 + .../llama_conversation/__init__.py | 3 +- .../llama_conversation/backends/llamacpp.py | 9 +- .../llama_conversation/config_flow.py | 140 ++++++++++++++---- custom_components/llama_conversation/const.py | 3 +- .../llama_conversation/translations/en.json | 18 ++- custom_components/llama_conversation/utils.py | 81 ++++++---- scripts/make_wheel.sh | 10 +- scripts/run_docker_to_make_wheels.sh | 2 +- 10 files changed, 196 insertions(+), 197 deletions(-) delete mode 100644 .github/workflows/create-release.yml diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml deleted file mode 100644 index a8cab75..0000000 --- a/.github/workflows/create-release.yml +++ /dev/null @@ -1,126 +0,0 @@ -name: Create Release - -on: - workflow_dispatch: - inputs: - release_notes: - description: "Release Notes" - required: true - type: string - -permissions: - contents: write - -jobs: - build_wheels: - name: Build wheels for ${{ matrix.arch }} (HA ${{ matrix.home_assistant_image }}) - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - # ARM64 - - home_assistant_image: "aarch64-homeassistant:2025.4.1" - arch: "aarch64" - - # 32bit ARM (Raspberry pis) - - home_assistant_image: "armhf-homeassistant:2025.4.1" - arch: "armhf" - - # x64 - - home_assistant_image: "amd64-homeassistant:2025.4.1" - arch: "x86_64" - - # 32 bit for older processors - - home_assistant_image: "i386-homeassistant:2025.4.1" - arch: "i386" - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Verify version match - if: startsWith(github.event.ref, 'refs/tags/v') - run: | - tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//') - component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json) - component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=') - - if [ "$tag_version" != "$component_version_manifest" ]; then - echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!" - exit 1 - fi - - if [ "$tag_version" != "$component_version_const" ]; then - echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!" - exit 1 - fi - - echo "All required versions match." - - - name: Read llama-cpp-python version - run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV - - - name: Build artifact - uses: uraimo/run-on-arch-action@v2 - id: build - with: - arch: none - distro: none - base_image: homeassistant/${{ matrix.home_assistant_image }} - - # Create an artifacts directory - setup: | - mkdir -p "${PWD}/artifacts" - - # Mount the artifacts directory as /artifacts in the container - dockerRunArgs: | - --volume "${PWD}/artifacts:/artifacts" - - # The shell to run commands with in the container - shell: /bin/bash - - # Produce a binary artifact and place it in the mounted volume - run: | - apk update - apk add build-base python3-dev cmake - pip3 install build - - cd /tmp - git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}" - cd llama-cpp-python - - tag="homellm" - sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py - - export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON" - python3 -m build --wheel - - mv ./dist/*.whl /artifacts - ls -la /artifacts/ - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: ./artifacts/*.whl - name: artifact_${{ matrix.arch }} - - release: - name: Create Release - needs: [ build_wheels ] - runs-on: ubuntu-latest - if: startsWith(github.event.ref, 'refs/tags/v') - - steps: - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - path: dist - merge-multiple: true - - - name: Create GitHub release - uses: softprops/action-gh-release@v2 - with: - files: dist/* - body: ${{ inputs.release_notes }} - make_latest: true \ No newline at end of file diff --git a/TODO.md b/TODO.md index c421433..508f1db 100644 --- a/TODO.md +++ b/TODO.md @@ -2,6 +2,7 @@ - [x] proper tool calling support - [ ] fix old GGUFs to support tool calling - [x] home assistant component text streaming support +- [x] move llama-cpp build to forked repo + add support for multi backend builds (no more -noavx) - [ ] new model based on qwen3 0.6b - [ ] new model based on gemma3 270m - [ ] support AI task API diff --git a/custom_components/llama_conversation/__init__.py b/custom_components/llama_conversation/__init__.py index 8d2cf81..ef4eaf6 100644 --- a/custom_components/llama_conversation/__init__.py +++ b/custom_components/llama_conversation/__init__.py @@ -44,7 +44,6 @@ from .const import ( BACKEND_TYPE_OLLAMA, BACKEND_TYPE_LLAMA_EXISTING_OLD, BACKEND_TYPE_LLAMA_HF_OLD, - EMBEDDED_LLAMA_CPP_PYTHON_VERSION ) from .entity import LocalLLMClient, LocalLLMConfigEntry from .backends.llamacpp import LlamaCppClient @@ -141,7 +140,7 @@ async def async_migrate_entry(hass: HomeAssistant, config_entry: LocalLLMConfigE if backend == BACKEND_TYPE_LLAMA_EXISTING_OLD or backend == BACKEND_TYPE_LLAMA_HF_OLD: backend = BACKEND_TYPE_LLAMA_CPP entry_data[CONF_BACKEND_TYPE] = BACKEND_TYPE_LLAMA_CPP - entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version) or EMBEDDED_LLAMA_CPP_PYTHON_VERSION + entry_options[CONF_INSTALLED_LLAMACPP_VERSION] = await hass.async_add_executor_job(get_llama_cpp_python_version) else: # ensure all remote backends have a path set entry_options[CONF_GENERIC_OPENAI_PATH] = entry_options.get(CONF_GENERIC_OPENAI_PATH, "") diff --git a/custom_components/llama_conversation/backends/llamacpp.py b/custom_components/llama_conversation/backends/llamacpp.py index 0fcb3a5..68b1902 100644 --- a/custom_components/llama_conversation/backends/llamacpp.py +++ b/custom_components/llama_conversation/backends/llamacpp.py @@ -39,6 +39,7 @@ from custom_components.llama_conversation.const import ( CONF_LLAMACPP_BATCH_SIZE, CONF_LLAMACPP_THREAD_COUNT, CONF_LLAMACPP_BATCH_THREAD_COUNT, + CONF_INSTALLED_LLAMACPP_VERSION, DEFAULT_MAX_TOKENS, DEFAULT_PROMPT, DEFAULT_TEMPERATURE, @@ -78,6 +79,7 @@ def snapshot_settings(options: dict[str, Any]) -> dict[str, Any]: CONF_LLAMACPP_THREAD_COUNT: options.get(CONF_LLAMACPP_THREAD_COUNT, DEFAULT_LLAMACPP_THREAD_COUNT), CONF_LLAMACPP_BATCH_THREAD_COUNT: options.get(CONF_LLAMACPP_BATCH_THREAD_COUNT, DEFAULT_LLAMACPP_BATCH_THREAD_COUNT), CONF_LLAMACPP_ENABLE_FLASH_ATTENTION: options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION), + CONF_INSTALLED_LLAMACPP_VERSION: options.get(CONF_INSTALLED_LLAMACPP_VERSION, ""), CONF_GBNF_GRAMMAR_FILE: options.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE), CONF_PROMPT_CACHING_ENABLED: options.get(CONF_PROMPT_CACHING_ENABLED, DEFAULT_PROMPT_CACHING_ENABLED), } @@ -115,7 +117,7 @@ class LlamaCppClient(LocalLLMClient): @staticmethod def get_name(client_options: dict[str, Any]): - return f"Llama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})" + return "Llama.cpp" async def async_get_available_models(self) -> List[str]: return [] # TODO: find available "huggingface_hub" models that have been downloaded @@ -215,6 +217,11 @@ class LlamaCppClient(LocalLLMClient): should_reload = True elif loaded_options[CONF_LLAMACPP_ENABLE_FLASH_ATTENTION] != entity_options.get(CONF_LLAMACPP_ENABLE_FLASH_ATTENTION, DEFAULT_LLAMACPP_ENABLE_FLASH_ATTENTION): should_reload = True + elif loaded_options[CONF_INSTALLED_LLAMACPP_VERSION] != entity_options.get(CONF_INSTALLED_LLAMACPP_VERSION): + should_reload = True + _LOGGER.debug(f"Reloading llama.cpp...") + if self.llama_cpp_module: + self.llama_cpp_module = importlib.reload(self.llama_cpp_module) model_path = entity_options.get(CONF_DOWNLOADED_MODEL_FILE, "") model_name = entity_options.get(CONF_CHAT_MODEL, "") diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py index d456f4e..b2baf19 100644 --- a/custom_components/llama_conversation/config_flow.py +++ b/custom_components/llama_conversation/config_flow.py @@ -1,6 +1,7 @@ """Config flow for Local LLM Conversation integration.""" from __future__ import annotations +from asyncio import Task import logging import os from typing import Any @@ -39,7 +40,8 @@ from homeassistant.helpers.selector import ( BooleanSelectorConfig, ) -from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, is_valid_hostname, MissingQuantizationException +from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, \ + is_valid_hostname, get_available_llama_cpp_versions, MissingQuantizationException from .const import ( CONF_CHAT_MODEL, CONF_MAX_TOKENS, @@ -87,6 +89,7 @@ from .const import ( CONF_LLAMACPP_BATCH_SIZE, CONF_LLAMACPP_THREAD_COUNT, CONF_LLAMACPP_BATCH_THREAD_COUNT, + CONF_LLAMACPP_REINSTALL, DEFAULT_CHAT_MODEL, DEFAULT_PORT, DEFAULT_SSL, @@ -258,14 +261,14 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN): if backend == BACKEND_TYPE_LLAMA_CPP: installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version) _LOGGER.debug(f"installed version: {installed_version}") - if installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION: + if installed_version and installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION: self.client_config[CONF_INSTALLED_LLAMACPP_VERSION] = installed_version return await self.async_step_finish() else: self.internal_step = "install_local_wheels" _LOGGER.debug("Queuing install task...") async def install_task(): - await self.hass.async_add_executor_job( + return await self.hass.async_add_executor_job( install_llama_cpp_python, self.hass.config.config_dir ) @@ -376,7 +379,7 @@ class ConfigFlow(BaseConfigFlow, domain=DOMAIN): @classmethod def async_supports_options_flow(cls, config_entry: ConfigEntry) -> bool: - return config_entry.data[CONF_BACKEND_TYPE] != BACKEND_TYPE_LLAMA_CPP + return True @staticmethod def async_get_options_flow( @@ -399,6 +402,9 @@ class OptionsFlow(BaseOptionsFlow): """Local LLM config flow options handler.""" model_config: dict[str, Any] | None = None + reinstall_task: Task[Any] | None = None + wheel_install_error: str | None = None + wheel_install_successful: bool = False async def async_step_init( self, user_input: dict[str, Any] | None = None @@ -410,32 +416,112 @@ class OptionsFlow(BaseOptionsFlow): backend_type = self.config_entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE) client_config = dict(self.config_entry.options) + if self.wheel_install_error: + _LOGGER.warning("Failed to install wheel: %s", repr(self.wheel_install_error)) + return self.async_abort(reason="pip_wheel_error") + + if self.wheel_install_successful: + client_config[CONF_INSTALLED_LLAMACPP_VERSION] = await self.hass.async_add_executor_job(get_llama_cpp_python_version) + _LOGGER.debug(f"new version is: {client_config[CONF_INSTALLED_LLAMACPP_VERSION]}") + return self.async_create_entry(data=client_config) + + if backend_type == BACKEND_TYPE_LLAMA_CPP: + potential_versions = await get_available_llama_cpp_versions(self.hass) + + schema = vol.Schema({ + vol.Required(CONF_LLAMACPP_REINSTALL, default=False): BooleanSelector(BooleanSelectorConfig()), + vol.Required(CONF_INSTALLED_LLAMACPP_VERSION, default=client_config.get(CONF_INSTALLED_LLAMACPP_VERSION, "not installed")): SelectSelector( + SelectSelectorConfig( + options=[ SelectOptionDict(value=x[0], label=x[0] if not x[1] else f"{x[0]} (local)") for x in potential_versions ], + mode=SelectSelectorMode.DROPDOWN, + ) + ) + }) + + return self.async_show_form( + step_id="reinstall", + data_schema=schema, + ) + else: + + if user_input is not None: + client_config.update(user_input) + + # validate remote connections + connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config) + + if not connect_err: + return self.async_create_entry(data=client_config) + else: + errors["base"] = "failed_to_connect" + description_placeholders["exception"] = str(connect_err) + + schema = remote_connection_schema( + backend_type=backend_type, + host=client_config.get(CONF_HOST), + port=client_config.get(CONF_PORT), + ssl=client_config.get(CONF_SSL), + selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH) + ) + + return self.async_show_form( + step_id="init", + data_schema=schema, + errors=errors, + description_placeholders=description_placeholders, + ) + + async def async_step_reinstall(self, user_input: dict[str, Any] | None = None) -> ConfigFlowResult: + client_config = dict(self.config_entry.options) + if user_input is not None: - client_config.update(user_input) - - # validate remote connections - connect_err = await BACKEND_TO_CLS[backend_type].async_validate_connection(self.hass, client_config) - - if not connect_err: + if not user_input[CONF_LLAMACPP_REINSTALL]: + _LOGGER.debug("Reinstall was not selected, finishing") return self.async_create_entry(data=client_config) + + if not self.reinstall_task: + if not user_input: + return self.async_abort(reason="unknown") + + desired_version = user_input.get(CONF_INSTALLED_LLAMACPP_VERSION) + async def install_task(): + return await self.hass.async_add_executor_job( + install_llama_cpp_python, self.hass.config.config_dir, True, desired_version + ) + + self.reinstall_task = self.hass.async_create_background_task( + install_task(), name="llama_cpp_python_installation") + + _LOGGER.debug("Queuing reinstall task...") + return self.async_show_progress( + progress_task=self.reinstall_task, + step_id="reinstall", + progress_action="install_local_wheels", + ) + + if not self.reinstall_task.done(): + return self.async_show_progress( + progress_task=self.reinstall_task, + step_id="reinstall", + progress_action="install_local_wheels", + ) + + _LOGGER.debug("done... checking result") + install_exception = self.reinstall_task.exception() + if install_exception: + self.wheel_install_error = repr(install_exception) + _LOGGER.debug(f"Hit error: {self.wheel_install_error}") + return self.async_show_progress_done(next_step_id="init") + else: + wheel_install_result = self.reinstall_task.result() + if not wheel_install_result: + self.wheel_install_error = "Pip returned false" + _LOGGER.debug(f"Hit error: {self.wheel_install_error} ({wheel_install_result})") + return self.async_show_progress_done(next_step_id="init") else: - errors["base"] = "failed_to_connect" - description_placeholders["exception"] = str(connect_err) - - schema = remote_connection_schema( - backend_type=backend_type, - host=client_config.get(CONF_HOST), - port=client_config.get(CONF_PORT), - ssl=client_config.get(CONF_SSL), - selected_path=client_config.get(CONF_GENERIC_OPENAI_PATH) - ) - - return self.async_show_form( - step_id="init", - data_schema=schema, - errors=errors, - description_placeholders=description_placeholders, - ) + _LOGGER.debug(f"Finished install: {wheel_install_result}") + self.wheel_install_successful = True + return self.async_show_progress_done(next_step_id="init") def STEP_LOCAL_MODEL_SELECTION_DATA_SCHEMA(model_file=None, chat_model=None, downloaded_model_quantization=None, available_quantizations=None): diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py index 42cf340..9029b94 100644 --- a/custom_components/llama_conversation/const.py +++ b/custom_components/llama_conversation/const.py @@ -191,6 +191,7 @@ CONF_LLAMACPP_THREAD_COUNT = "n_threads" DEFAULT_LLAMACPP_THREAD_COUNT = os.cpu_count() CONF_LLAMACPP_BATCH_THREAD_COUNT = "n_batch_threads" DEFAULT_LLAMACPP_BATCH_THREAD_COUNT = os.cpu_count() +CONF_LLAMACPP_REINSTALL = "reinstall_llama_cpp" DEFAULT_OPTIONS = types.MappingProxyType( { @@ -318,4 +319,4 @@ OPTIONS_OVERRIDES = { # INTEGRATION_VERSION = "0.4.0" INTEGRATION_VERSION = "0.3.11" -EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16" +EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153" diff --git a/custom_components/llama_conversation/translations/en.json b/custom_components/llama_conversation/translations/en.json index c1ab991..4c3f1da 100644 --- a/custom_components/llama_conversation/translations/en.json +++ b/custom_components/llama_conversation/translations/en.json @@ -187,13 +187,27 @@ "text_generation_webui_admin_key": "Admin Key", "text_generation_webui_preset": "Generation Preset/Character Name", "text_generation_webui_chat_mode": "Chat Mode" - } + }, + "description": "Please provide the connection details to connect to the API that is hosting the model.", + "title": "Configure Connection" + }, + "reinstall": { + "data": { + "reinstall_llama_cpp": "Reinstall Llama.cpp", + "installed_llama_cpp_version": "Version to (re)install" + }, + "description": "__If you are experiencing issues with Llama.cpp__, you can force a reinstall of the package here. This will attempt to re-install or upgrade the llama-cpp-python package from GitHub *or* a local wheel file placed in the `/config/custom_components/llama_conversation/` directory.", + "title": "Reinstall Llama.cpp" } }, "error": { "failed_to_connect": "Failed to connect to the remote API: {exception}", "invalid_hostname": "The provided hostname was invalid. Please ensure you only provide the domain or IP address and not the full API endpoint.", - "unknown": "Unexpected error" + "unknown": "Unexpected error", + "pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details." + }, + "progress": { + "install_local_wheels": "Please wait while Llama.cpp is installed..." } }, "selector": { diff --git a/custom_components/llama_conversation/utils.py b/custom_components/llama_conversation/utils.py index f9dd084..bc1cf31 100644 --- a/custom_components/llama_conversation/utils.py +++ b/custom_components/llama_conversation/utils.py @@ -9,13 +9,14 @@ import multiprocessing import voluptuous as vol import webcolors import json -from typing import Any, Dict, List, Sequence, cast +from typing import Any, Dict, List, Sequence, Tuple, cast from webcolors import CSS3 from importlib.metadata import version +from homeassistant.core import HomeAssistant from homeassistant.components import conversation from homeassistant.helpers import config_validation as cv -from homeassistant.helpers import intent, llm +from homeassistant.helpers import intent, llm, aiohttp_client from homeassistant.requirements import pip_kwargs from homeassistant.util import color from homeassistant.util.package import install_package, is_installed @@ -191,18 +192,11 @@ def validate_llama_cpp_python_installation(): def get_llama_cpp_python_version(): if not is_installed("llama-cpp-python"): return None - return version("llama-cpp-python").split("+")[0] + return version("llama-cpp-python") -def install_llama_cpp_python(config_dir: str): +def get_runtime_and_platform_suffix() -> Tuple[str, str]: + runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}" - installed_wrong_version = False - if is_installed("llama-cpp-python"): - if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION: - installed_wrong_version = True - else: - time.sleep(0.5) # I still don't know why this is required - return True - platform_suffix = platform.machine() # remap other names for architectures to the names we use if platform_suffix == "arm64": @@ -210,42 +204,65 @@ def install_llama_cpp_python(config_dir: str): if platform_suffix == "i386" or platform_suffix == "amd64": platform_suffix = "x86_64" - runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}" - + return runtime_version, platform_suffix + +async def get_available_llama_cpp_versions(hass: HomeAssistant) -> List[Tuple[str, bool]]: + github_index_url = "https://acon96.github.io/llama-cpp-python/whl/ha/llama-cpp-python/" + session = aiohttp_client.async_get_clientsession(hass) + try: + async with session.get(github_index_url) as resp: + if resp.status != 200: + raise Exception(f"Failed to fetch available versions from GitHub (HTTP {resp.status})") + text = await resp.text() + # pull version numbers out of h2 tags + versions = re.findall(r"(.+)", text) + remote = sorted([(v, False) for v in versions], reverse=True) + except Exception as ex: + _LOGGER.warning(f"Error fetching available versions from GitHub: {repr(ex)}") + remote = [] + + runtime_version, platform_suffix = get_runtime_and_platform_suffix() folder = os.path.dirname(__file__) potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True) - potential_wheels = [ wheel for wheel in potential_wheels if runtime_version in wheel ] - potential_wheels = [ wheel for wheel in potential_wheels if f"{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm" in wheel ] + local = [ (wheel, True) for wheel in potential_wheels if runtime_version in wheel and "llama_cpp_python" in wheel] + + return remote + local - _LOGGER.debug(f"{potential_wheels=}") - if len(potential_wheels) > 0: +def install_llama_cpp_python(config_dir: str, force_reinstall: bool = False, specific_version: str | None = None) -> bool: - latest_wheel = potential_wheels[0] - - _LOGGER.info("Installing llama-cpp-python from local wheel") - _LOGGER.debug(f"Wheel location: {latest_wheel}") - return install_package(os.path.join(folder, latest_wheel), **pip_kwargs(config_dir)) + installed_wrong_version = False + if is_installed("llama-cpp-python") and not force_reinstall: + if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION: + installed_wrong_version = True + else: + time.sleep(0.5) # I still don't know why this is required + return True - # scikit-build-core v0.9.7+ doesn't recognize these builds as musllinux, and just tags them as generic linux - # github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}.whl" - github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}+homellm-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl" - if install_package(github_release_url, **pip_kwargs(config_dir)): - _LOGGER.info("llama-cpp-python successfully installed from GitHub release") + runtime_version, platform_suffix = get_runtime_and_platform_suffix() + + if not specific_version: + specific_version = EMBEDDED_LLAMA_CPP_PYTHON_VERSION + + if ".whl" in specific_version: + wheel_location = os.path.join(os.path.dirname(__file__), specific_version) + else: + wheel_location = f"https://github.com/acon96/llama-cpp-python/releases/download/{specific_version}/llama_cpp_python-{specific_version}-{runtime_version}-{runtime_version}-linux_{platform_suffix}.whl" + + if install_package(wheel_location, **pip_kwargs(config_dir)): + _LOGGER.info("llama-cpp-python successfully installed") return True # if it is just the wrong version installed then ignore the installation error if not installed_wrong_version: _LOGGER.error( - "Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \ - f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \ + "Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \ "Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \ "Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page." ) return False else: _LOGGER.info( - "Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \ - f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \ + "Error installing llama-cpp-python. Could not install the binary wheels from GitHub." + \ f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!" ) time.sleep(0.5) # I still don't know why this is required diff --git a/scripts/make_wheel.sh b/scripts/make_wheel.sh index 71e2e17..a65e18e 100644 --- a/scripts/make_wheel.sh +++ b/scripts/make_wheel.sh @@ -2,15 +2,15 @@ # Don't run this. This is executed inside of the home assistant container to build the wheel apk update -apk add build-base python3-dev +apk add build-base python3-dev linux-headers + +tag=$1 cd /tmp -git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch $1 +git clone --recurse-submodules https://github.com/acon96/llama-cpp-python --branch $tag --depth 1 --shallow-submodules cd llama-cpp-python pip3 install build - -tag="homellm" -sed -i -E "s/^(__version__ *= *\"[0-9]+\.[0-9]+\.[0-9]+)\"/\1+${tag}\"/" llama_cpp/__init__.py +sed -i -E "s/^(__version__ *= *\")[^\"]+\"/\1${tag}\"/" llama_cpp/__init__.py export CMAKE_ARGS="-DLLAVA_BUILD=OFF -DGGML_NATIVE=ON" python3 -m build --wheel diff --git a/scripts/run_docker_to_make_wheels.sh b/scripts/run_docker_to_make_wheels.sh index 007a0f3..3dda311 100755 --- a/scripts/run_docker_to_make_wheels.sh +++ b/scripts/run_docker_to_make_wheels.sh @@ -1,6 +1,6 @@ #!/bin/bash -VERSION_TO_BUILD="v0.3.16" +VERSION_TO_BUILD="0.3.16+b6713" # make python 11 wheels # docker run -it --rm \