fix issue with model default value detection, fix model tool usage with home-llm api, and add better descriptions for certain settings

2026-01-09 21:58:00 -05:00 · 2025-10-25 22:42:47 -04:00
parent f50997d1a3
commit 050a539f72
7 changed files with 147 additions and 117 deletions
--- a/custom_components/llama_conversation/backends/ollama.py
+++ b/custom_components/llama_conversation/backends/ollama.py
@@ -131,12 +131,9 @@ class OllamaAPIClient(LocalLLMClient):
            tool_calls = [ llm.ToolInput(tool_name=x["function"]["name"], tool_args=x["function"]["arguments"]) for x in raw_tool_calls] if raw_tool_calls else None
            stop_reason = response_json.get("done_reason")

-        _LOGGER.debug(f"{response=} {tool_calls=}")
+        # _LOGGER.debug(f"{response=} {tool_calls=}")

        return response, tool_calls
-        # return TextGenerationResult(
-        #     response=response, tool_calls=tool_calls, stop_reason=stop_reason, response_streamed=True
-        # )

    def _generate_stream(self, conversation: List[conversation.Content], llm_api: llm.APIInstance | None, user_input: conversation.ConversationInput, entity_options: Dict[str, Any]) -> AsyncGenerator[TextGenerationResult, None]:
        model_name = entity_options.get(CONF_CHAT_MODEL, "")
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -148,7 +148,7 @@ from .const import (
    DOMAIN,
    HOME_LLM_API_ID,
    DEFAULT_OPTIONS,
-    OPTIONS_OVERRIDES,
+    option_overrides,
    RECOMMENDED_CHAT_MODELS,
    EMBEDDED_LLAMA_CPP_PYTHON_VERSION
 )
@@ -1120,13 +1120,15 @@ class LocalLLMSubentryFlowHandler(ConfigSubentryFlow):
        entry = self._get_entry()
        backend_type = entry.data[CONF_BACKEND_TYPE]

-        if not self.model_config:
+        if CONF_PROMPT not in self.model_config:
            # determine selected language from model config or parent options
            selected_language = self.model_config.get(
                CONF_SELECTED_LANGUAGE, entry.options.get(CONF_SELECTED_LANGUAGE, "en")
            )
            model_name = self.model_config.get(CONF_CHAT_MODEL, "").lower()

+            OPTIONS_OVERRIDES = option_overrides(backend_type)
+
            selected_default_options = {**DEFAULT_OPTIONS}
            for key in OPTIONS_OVERRIDES.keys():
                if key in model_name:
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -1,5 +1,6 @@
 """Constants for the Local LLM Conversation integration."""
 import types, os
+from typing import Any

 DOMAIN = "llama_conversation"
 HOME_LLM_API_ID = "home-llm-service-api"
@@ -224,98 +225,106 @@ DEFAULT_OPTIONS = types.MappingProxyType(
    }
 )

-OPTIONS_OVERRIDES = {
-    "home-llama-3.2": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_CONTEXT_LENGTH: 131072,
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "home-3b-v3": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "home-3b-v2": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "home-3b-v1": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "home-1b-v3": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "home-1b-v2": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "home-1b-v1": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-        CONF_TOOL_CALL_PREFIX: "```homeassistant",
-        CONF_TOOL_CALL_SUFFIX: "```",
-        CONF_MAX_TOOL_CALL_ITERATIONS: 1,
-        CONF_ENABLE_LEGACY_TOOL_CALLING: True
-    },
-    "mistral": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
-        CONF_MIN_P: 0.1,
-        CONF_TYPICAL_P: 0.9,
-    },
-    "mixtral": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
-        CONF_MIN_P: 0.1,
-        CONF_TYPICAL_P: 0.9,
-    },
-    "llama-3": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
-    },
-    "llama3": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
-    },
-    "zephyr": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
-    },
-    "phi-3": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
-    },
-    "command-r": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
-    },
-    "stablehome": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-    },
-    "tinyhome": {
-        CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
-        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
-    },
-}
+def option_overrides(backend_type: str) -> dict[str, Any]:
+    return {
+        "home-llama-3.2": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_CONTEXT_LENGTH: 131072,
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # llama cpp server doesn't support custom tool calling formats. so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: backend_type == BACKEND_TYPE_LLAMA_CPP_SERVER
+        },
+        "home-3b-v3": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # llama cpp server doesn't support custom tool calling formats. so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: backend_type == BACKEND_TYPE_LLAMA_CPP_SERVER
+        },
+        "home-3b-v2": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # no prompt formats with tool calling support, so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: True
+        },
+        "home-3b-v1": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # no prompt formats with tool calling support, so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: True
+        },
+        "home-1b-v3": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # no prompt formats with tool calling support, so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: True
+        },
+        "home-1b-v2": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # no prompt formats with tool calling support, so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: True
+        },
+        "home-1b-v1": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+            CONF_TOOL_CALL_PREFIX: "```homeassistant",
+            CONF_TOOL_CALL_SUFFIX: "```",
+            CONF_MAX_TOOL_CALL_ITERATIONS: 0,
+            # no prompt formats with tool calling support, so just use legacy tool calling
+            CONF_ENABLE_LEGACY_TOOL_CALLING: True
+        },
+        "mistral": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
+            CONF_MIN_P: 0.1,
+            CONF_TYPICAL_P: 0.9,
+        },
+        "mixtral": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
+            CONF_MIN_P: 0.1,
+            CONF_TYPICAL_P: 0.9,
+        },
+        "llama-3": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
+        },
+        "llama3": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
+        },
+        "zephyr": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
+        },
+        "phi-3": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
+        },
+        "command-r": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
+        },
+        "stablehome": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        },
+        "tinyhome": {
+            CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
+            CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        },
+    }

 INTEGRATION_VERSION = "0.4.1"
 EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153"
--- a/custom_components/llama_conversation/entity.py
+++ b/custom_components/llama_conversation/entity.py
@@ -53,6 +53,8 @@ from .const import (
    DEFAULT_TOOL_CALL_PREFIX,
    DEFAULT_TOOL_CALL_SUFFIX,
    DEFAULT_ENABLE_LEGACY_TOOL_CALLING,
+    HOME_LLM_API_ID,
+    SERVICE_TOOL_NAME,
 )

 _LOGGER = logging.getLogger(__name__)
@@ -187,10 +189,20 @@ class LocalLLMClient:
    ):
        async def async_iterator():
            async for input_chunk in result:
-                # _LOGGER.debug("Received chunk: %s", input_chunk)
+                _LOGGER.debug("Received chunk: %s", input_chunk)
+
+                tool_calls = input_chunk.tool_calls
+                # fix tool calls for the service tool
+                if tool_calls and chat_log.llm_api and chat_log.llm_api.api.id == HOME_LLM_API_ID:
+                    tool_calls = [
+                        llm.ToolInput(
+                            tool_name=SERVICE_TOOL_NAME,
+                            tool_args={**tc.tool_args, "service": tc.tool_name}
+                        ) for tc in tool_calls
+                    ]
                yield conversation.AssistantContentDeltaDict(
                    content=input_chunk.response,
-                    tool_calls=input_chunk.tool_calls
+                    tool_calls=tool_calls
                )
        
        return chat_log.async_add_delta_content_stream(user_input.agent_id, stream=async_iterator())
@@ -263,6 +275,7 @@ class LocalLLMClient:
        tool_calls: List[Tuple[llm.ToolInput, Any]] = []
        # if max tool calls is 0 then we expect to generate the response & tool call in one go
        for idx in range(max(1, max_tool_call_iterations)):
+            _LOGGER.debug(f"Generating response for {user_input.text=}, iteration {idx+1}/{max_tool_call_iterations}")
            generation_result = await self._async_generate(message_history, user_input, chat_log, entity_options)
            
            last_generation_had_tool_calls = False
@@ -270,6 +283,7 @@ class LocalLLMClient:
                try:
                    message = await anext(generation_result)
                    message_history.append(message)
+                    _LOGGER.debug("Added message to history: %s", message)
                    if message.role == "assistant":
                        if message.tool_calls and len(message.tool_calls) > 0:
                            last_generation_had_tool_calls = True
@@ -367,10 +381,10 @@ class LocalLLMClient:
        in_thinking = False
        in_tool_call = False
        tool_content = ""
-        last_5_tokens = [] # FIXME: this still returns the first few tokens of the tool call if the prefix is split across chunks
+        last_5_tokens = []
        cur_match_length = 0
        async for chunk in token_generator:
-            _LOGGER.debug(f"Handling chunk: {chunk} {in_thinking=} {in_tool_call=} {last_5_tokens=}")
+            # _LOGGER.debug(f"Handling chunk: {chunk} {in_thinking=} {in_tool_call=} {last_5_tokens=}")
            tool_calls: Optional[List[str | llm.ToolInput | dict]]
            content, tool_calls = chunk

@@ -578,7 +592,7 @@ class LocalLLMClient:
                if attribute_name not in attributes:
                    continue

-                _LOGGER.debug(f"{attribute_name} = {attributes[attribute_name]}")
+                # _LOGGER.debug(f"{attribute_name} = {attributes[attribute_name]}")

                value = attributes[attribute_name]
                if value is not None:
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -114,7 +114,9 @@
                        "in_context_examples": "If you are using a model that is not specifically fine-tuned for use with this integration: enable this",
                        "extra_attributes_to_expose": "This is the list of Home Assistant 'attributes' that are exposed to the model. This limits how much information the model is able to see and answer questions on.",
                        "gbnf_grammar": "Forces the model to output properly formatted responses. Ensure the file specified below exists in the integration directory.",
-                        "prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below."
+                        "prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below.",
+                        "enable_legacy_tool_calling": "Prefer to process tool calls locally rather than relying on the backend to handle the tool calling format. Can be more reliable, however it requires properly setting the tool call prefix and suffix.",
+                        "max_tool_call_iterations": "Set to 0 to generate the response and tool call in one attempt, without looping (use this for Home models v1-v3)."
                    },
                    "description": "Please configure the model according to how it should be prompted. There are many different options and selecting the correct ones for your model is essential to getting optimal performance. See [here](https://github.com/acon96/home-llm/blob/develop/docs/Backend%20Configuration.md) for more information about the options on this page.\n\n**Some defaults may have been chosen for you based on the name of the selected model name or filename.** If you renamed a file or are using a fine-tuning of a supported model, then the defaults may not have been detected.",
                    "title": "Configure the selected model"
@@ -166,7 +168,9 @@
                        "in_context_examples": "If you are using a model that is not specifically fine-tuned for use with this integration: enable this",
                        "extra_attributes_to_expose": "This is the list of Home Assistant 'attributes' that are exposed to the model. This limits how much information the model is able to see and answer questions on.",
                        "gbnf_grammar": "Forces the model to output properly formatted responses. Ensure the file specified below exists in the integration directory.",
-                        "prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below."
+                        "prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below.",
+                        "enable_legacy_tool_calling": "Prefer to process tool calls locally rather than relying on the backend to handle the tool calling format. Can be more reliable, however it requires properly setting the tool call prefix and suffix.",
+                        "max_tool_call_iterations": "Set to 0 to generate the response and tool call in one attempt, without looping (use this for Home models v1-v3)."
                    },
                    "description": "Please configure the model according to how it should be prompted. There are many different options and selecting the correct ones for your model is essential to getting optimal performance. See [here](https://github.com/acon96/home-llm/blob/develop/docs/Backend%20Configuration.md) for more information about the options on this page.\n\n**Some defaults may have been chosen for you based on the name of the selected model name or filename.** If you renamed a file or are using a fine-tuning of a supported model, then the defaults may not have been detected.",
                    "title": "Configure the selected model"
--- a/custom_components/llama_conversation/utils.py
+++ b/custom_components/llama_conversation/utils.py
@@ -430,18 +430,11 @@ def parse_raw_tool_call(raw_block: str | dict, llm_api: llm.APIInstance, user_in
    if "rgb_color" in args_dict and isinstance(args_dict["rgb_color"], str):
        args_dict["rgb_color"] = [ int(x) for x in args_dict["rgb_color"][1:-1].split(",") ]

-    if llm_api.api.id == HOME_LLM_API_ID:
-        to_say = parsed_tool_call.pop("to_say", "")
-        tool_input = llm.ToolInput(
-            tool_name=SERVICE_TOOL_NAME,
-            tool_args=args_dict,
-        )
-    else:
-        to_say = ""
-        tool_input = llm.ToolInput(
-            tool_name=tool_name,
-            tool_args=args_dict,
-        )
+    to_say = args_dict.pop("to_say", "")
+    tool_input = llm.ToolInput(
+        tool_name=tool_name,
+        tool_args=args_dict,
+    )

    return tool_input, to_say