working with wheel install + grammar

2026-01-09 13:48:05 -05:00 · 2024-01-13 20:05:53 -05:00
parent 70a02d68f9
commit 4aab796cd3
5 changed files with 61 additions and 26 deletions
--- a/custom_components/llama_conversation/init.py
+++ b/custom_components/llama_conversation/init.py
@@ -67,9 +67,6 @@ CONFIG_SCHEMA = cv.config_entry_only_config_schema(DOMAIN)
 async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
    """Set up Local LLaMA Conversation from a config entry."""

-    hass.data.setdefault(DOMAIN, {})
-    hass.data[DOMAIN][entry.entry_id] = entry
-
    use_local_backend = entry.data.get(
        CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE
    ) != BACKEND_TYPE_REMOTE
@@ -86,6 +83,9 @@ async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
    agent = await hass.async_add_executor_job(create_agent)

    conversation.async_set_agent(hass, entry, agent)
+
+    hass.data.setdefault(DOMAIN, {})
+    hass.data[DOMAIN][entry.entry_id] = entry
    return True


@@ -130,12 +130,14 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
        if self.use_local_backend:
            if not model_path:
                raise Exception(f"Model was not found at '{model_path}'!")
-            
+
            # don't import it until now because the wheel is installed by config_flow.py
            module = importlib.import_module("llama_cpp")
            Llama = getattr(module, "Llama")
            LlamaGrammar = getattr(module, "LlamaGrammar")

+            _LOGGER.debug("Loading model...")
+
            self.llm = Llama(
                model_path=model_path,
                n_ctx=2048,
@@ -144,9 +146,11 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
                # n_threads_batch=4,
            )

-            with open(os.path.join(os.path.dirname(__file__), GBNF_GRAMMAR_FILE)) as f:
-                grammar_str = "".join(f.readlines())
-            self.grammar = LlamaGrammar.from_string(grammar_str)
+            # _LOGGER.debug("Loading grammar...")
+
+            # with open(os.path.join(os.path.dirname(__file__), GBNF_GRAMMAR_FILE)) as f:
+            #     grammar_str = "".join(f.readlines())
+            # self.grammar = LlamaGrammar.from_string(grammar_str)

            _LOGGER.info("Model loaded")
        else:
@@ -276,7 +280,7 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
        return conversation.ConversationResult(
            response=intent_response, conversation_id=conversation_id
        )
-    
+
    def _load_remote_model(self):
        try:
            currently_loaded_result = requests.get(f"{self.api_host}/v1/internal/model/info")
@@ -396,7 +400,7 @@ class LLaMAAgent(conversation.AbstractConversationAgent):

        if include_generation_prompt:
            formatted_prompt = formatted_prompt + template_desc["generation_prompt"]
-            
+
        return formatted_prompt

    def _async_generate_prompt(self, prompt_template: str) -> str:
@@ -423,7 +427,7 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
                        value = F"{closest_color(value)} {value}"
                    elif attribute_name == "volume_level":
                        value = f"{int(value*100)}%"
-                        
+
                    result = result + ";" + str(value)
            return result

--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -1,6 +1,7 @@
 """Config flow for Local LLaMA Conversation integration."""
 from __future__ import annotations

+import time
 import os
 import logging
 import types
@@ -75,7 +76,7 @@ def STEP_INIT_DATA_SCHEMA(backend_type=None):
    return vol.Schema(
        {
            vol.Required(
-                CONF_BACKEND_TYPE, 
+                CONF_BACKEND_TYPE,
                default=backend_type if backend_type else DEFAULT_BACKEND_TYPE
            ): SelectSelector(SelectSelectorConfig(
                options=[ BACKEND_TYPE_LLAMA_HF, BACKEND_TYPE_LLAMA_EXISTING, BACKEND_TYPE_REMOTE ],
@@ -140,7 +141,7 @@ def download_model_from_hf(
        )
    except Exception as ex:
        return ex
-    
+
 def install_llama_cpp_python(config_dir: str):
    try:
        if not is_installed("llama-cpp-python"):
@@ -150,16 +151,22 @@ def install_llama_cpp_python(config_dir: str):
                platform_suffix = "aarch64"
            folder = os.path.dirname(__file__)
            potential_wheels = [ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ]
-            if len(potential_wheels) == 1:
+            if len(potential_wheels) == 0:
+                # someone who is better at async can figure out why this is necessary
+                time.sleep(0.5)
+                return Exception("missing_wheels")
+            elif len(potential_wheels) == 1:
                wheel_to_install = potential_wheels[0]
            else:
                _LOGGER.info("There are multiple potential wheels to install... Using the latest one")
                wheel_to_install = sorted(potential_wheels, reverse=True)[0]

-            _LOGGER.debug("Wheel location: ", wheel_to_install)
+            _LOGGER.debug(f"Wheel location: {wheel_to_install}")
            return install_package(os.path.join(folder, wheel_to_install), pip_kwargs(config_dir))
        else:
            _LOGGER.info("llama-cpp-python is already installed")
+            # someone who is better at async can figure out why this is necessary
+            time.sleep(0.5)
        return True
    except Exception as ex:
        _LOGGER.exception("Install failed!")
@@ -174,12 +181,18 @@ class BaseLlamaConversationConfigFlow(FlowHandler, ABC):
    def flow_manager(self) -> FlowManager:
        """Return the flow manager of the flow."""

+    @abstractmethod
+    async def async_step_pick_backend(
+        self, user_input: dict[str, Any] | None = None
+    ) -> FlowResult:
+        """ Select backend """
+
    @abstractmethod
    async def async_step_install_local_wheels(
        self, user_input: dict[str, Any] | None = None
    ) -> FlowResult:
        """ Install pre-built wheels """
-    
+
    @abstractmethod
    async def async_step_local_model(
        self, user_input: dict[str, Any] | None = None
@@ -236,6 +249,12 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom

    async def async_step_user(
        self, user_input: dict[str, Any] | None = None
+    ) -> FlowResult:
+        """Handle the initial step."""
+        return await self.async_step_pick_backend()
+
+    async def async_step_pick_backend(
+        self, user_input: dict[str, Any] | None = None
    ) -> FlowResult:
        """Handle the initial step."""
        errors = {}
@@ -262,15 +281,17 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
                            )

                    if "base" not in errors:
-                        # return await self.async_step_install_local_wheels()
-                        return await self.async_step_local_model()
+                        return await self.async_step_install_local_wheels()
                else:
                    return await self.async_step_remote_model()
+        elif self.install_wheel_error:
+            errors["base"] = str(self.install_wheel_error)
+            self.install_wheel_error = None

        return self.async_show_form(
-            step_id="user", data_schema=schema, errors=errors
+            step_id="pick_backend", data_schema=schema, errors=errors
        )
-    
+
    async def async_step_install_local_wheels(
      self, user_input: dict[str, Any] | None = None
    ) -> FlowResult:
@@ -296,10 +317,17 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
        wheel_install_result = user_input["result"]
        if isinstance(wheel_install_result, Exception):
            _LOGGER.warning("Failed to install wheel: %s", repr(wheel_install_result))
-            self.wheel_install_error = wheel_install_result
-            return self.async_show_progress_done(next_step_id="user")
+            self.install_wheel_error = wheel_install_result
+            self.install_wheel_task = None
+            return self.async_show_progress_done(next_step_id="pick_backend")
+        elif wheel_install_result == False:
+            _LOGGER.warning("Failed to install wheel: %s", repr(wheel_install_result))
+            self.install_wheel_error = "pip_wheel_error"
+            self.install_wheel_task = None
+            return self.async_show_progress_done(next_step_id="pick_backend")
        else:
            _LOGGER.debug(f"Finished install: {wheel_install_result}")
+            self.install_wheel_task = None
            return self.async_show_progress_done(next_step_id="local_model")

    async def async_step_local_model(
--- a/custom_components/llama_conversation/output.gbnf
+++ b/custom_components/llama_conversation/output.gbnf
@@ -1,7 +1,7 @@
-root   ::= (ws tosay)+ ws functioncalls?
+root   ::= (tosay "\n")+ functioncalls?

-tosay ::= [0-9a-zA-Z .#%]*
-functioncalls ::= 
+tosay ::= [0-9a-zA-Z #%.?!]*
+functioncalls ::=
  "```homeassistant\n" (object ws)* "```"

 value  ::= object | array | string | number | ("true" | "false" | "null") ws
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -6,7 +6,9 @@
            "missing_model_api": "The selected model is not provided by this API.",
            "missing_model_file": "The provided file does not exist.",
            "other_existing_local": "Another model is already loaded locally. Please unload it or configure a remote model.",
-            "unknown": "Unexpected error"
+            "unknown": "Unexpected error",
+            "missing_wheel": "Llama.cpp is not installed and could not find any wheels to install!",
+            "pip_wheel_error": "Pip returned an error while installing the wheel!"
        },
        "progress": {
            "download": "Please wait while the model is being downloaded from HuggingFace. This can take a few minutes.",
@@ -31,7 +33,7 @@
                "description": "Provide the connection details for an instance of text-generation-webui that is hosting the model.",
                "title": "Configure connection to remote API"
            },
-            "user": {
+            "pick_backend": {
                "data": {
                    "download_model_from_hf": "Download model from HuggingFace",
                    "use_local_backend": "Use Llama.cpp"
--- a/generate.sh
+++ b/generate.sh
@@ -10,5 +10,6 @@ if [[ ! -d "./models/$MODEL_NAME" ]]; then
    exit -1
 fi

+dos2unix $PROMPT_SRC
 PROMPT=$(cat $PROMPT_SRC)
 $LLAMA_CPP/build/bin/main --model "./models/$MODEL_NAME/$MODEL_NAME.$QUANT_TYPE.gguf" --temp 0.1 --ctx-size 2048 --prompt "$PROMPT" --grammar-file ./custom_components/llama_conversation/output.gbnf