fix wheel install + huggingface quant detection

2026-01-08 21:28:05 -05:00 · 2024-12-15 11:50:58 -05:00
parent ed5312e9ff
commit 9fa795bbd9
4 changed files with 24 additions and 13 deletions
--- a/.github/workflows/create-release.yml
+++ b/.github/workflows/create-release.yml
@@ -20,33 +20,33 @@ jobs:
      matrix:
        include:
        # ARM variants
-        - home_assistant_version: "2024.12"
+        - home_assistant_version: "2024.12.3"
          arch: "aarch64"
-        - home_assistant_version: "2024.12"
+        - home_assistant_version: "2024.12.3"
          arch: "armhf"

        # Base x86
-        - home_assistant_version: "2024.12"
+        - home_assistant_version: "2024.12.3"
          suffix: "-noavx"
          arch: "amd64"
          extra_defines: "-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF"
-        - home_assistant_version: "2024.12.1"
+        - home_assistant_version: "2024.12.3.1"
          arch: "i386"
          suffix: "-noavx"
          extra_defines: "-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF"

        # AVX2 and AVX512
-        - home_assistant_version: "2024.12"
+        - home_assistant_version: "2024.12.3"
          arch: "amd64"
          extra_defines: "-DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_FMA=ON -DGGML_F16C=ON"
-        - home_assistant_version: "2024.12.1"
+        - home_assistant_version: "2024.12.3.1"
          arch: "amd64"
          suffix: "-avx512"
          extra_defines: "-DGGML_AVX512=ON -DGGML_FMA=ON -DGGML_F16C=ON"
-        - home_assistant_version: "2024.12"
+        - home_assistant_version: "2024.12.3"
          arch: "i386"
          extra_defines: "-DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_FMA=ON -DGGML_F16C=ON"
-        - home_assistant_version: "2024.12"
+        - home_assistant_version: "2024.12.3"
          arch: "i386"
          suffix: "-avx512"
          extra_defines: "-DGGML_AVX512=ON -DGGML_FMA=ON -DGGML_F16C=ON"
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -37,7 +37,7 @@ from homeassistant.helpers.selector import (
 from homeassistant.util.package import is_installed
 from importlib.metadata import version

-from .utils import download_model_from_hf, install_llama_cpp_python, format_url, MissingQuantizationException
+from .utils import download_model_from_hf, get_llama_cpp_python_version, install_llama_cpp_python, format_url, MissingQuantizationException
 from .const import (
    CONF_CHAT_MODEL,
    CONF_MAX_TOKENS,
@@ -352,7 +352,9 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
            local_backend = is_local_backend(user_input[CONF_BACKEND_TYPE])
            self.model_config.update(user_input)
            if local_backend:
-                if is_installed("llama-cpp-python") and version("llama-cpp-python") == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
+                installed_version = await self.hass.async_add_executor_job(get_llama_cpp_python_version)
+                _LOGGER.debug(f"installed version: {installed_version}")
+                if installed_version == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
                    return await self.async_step_local_model()
                else:
                    return await self.async_step_install_local_wheels()
--- a/custom_components/llama_conversation/utils.py
+++ b/custom_components/llama_conversation/utils.py
@@ -1,5 +1,6 @@
 import time
 import os
+import re
 import sys
 import platform
 import logging
@@ -105,10 +106,13 @@ def download_model_from_hf(model_name: str, quantization_type: str, storage_fold
    
    fs = HfFileSystem()
    potential_files = [ f for f in fs.glob(f"{model_name}/*.gguf") ]
-    wanted_file = [f for f in potential_files if (f".{quantization_type.lower()}." in f or f".{quantization_type.upper()}." in f)]
+    wanted_file = [f for f in potential_files if (f"{quantization_type.lower()}.gguf" in f or f"{quantization_type.upper()}.gguf" in f)]

    if len(wanted_file) != 1:
-        available_quants = [file.split(".")[-2].upper() for file in potential_files]
+        available_quants = [
+            re.split(r"\.|-", file.removesuffix(".gguf"))[-1].upper()
+            for file in potential_files
+        ]
        raise MissingQuantizationException(quantization_type, available_quants)
    try:
        os.makedirs(storage_folder, exist_ok=True)
@@ -146,6 +150,11 @@ def validate_llama_cpp_python_installation():
    if process.exitcode != 0:
        raise Exception(f"Failed to properly initialize llama-cpp-python. (Exit code {process.exitcode}.)")

+def get_llama_cpp_python_version():
+    if not is_installed("llama-cpp-python"):
+        return None
+    return version("llama-cpp-python")
+
 def install_llama_cpp_python(config_dir: str):

    installed_wrong_version = False
--- a/scripts/run_docker_to_make_wheels.sh
+++ b/scripts/run_docker_to_make_wheels.sh
@@ -18,4 +18,4 @@ VERSION_TO_BUILD="v0.3.5"
 docker run -it --rm \
    --entrypoint bash \
    -v $(pwd):/tmp/dist \
-    homeassistant/home-assistant:2024.12 /tmp/dist/make_wheel.sh $VERSION_TO_BUILD
+    homeassistant/home-assistant:2024.12.3 /tmp/dist/make_wheel.sh $VERSION_TO_BUILD