fix issue with model default value detection, fix model tool usage with home-llm api, and add better descriptions for certain settings

This commit is contained in:
Alex O'Connell
2025-10-25 22:42:47 -04:00
parent f50997d1a3
commit 050a539f72
7 changed files with 147 additions and 117 deletions

View File

@@ -131,12 +131,9 @@ class OllamaAPIClient(LocalLLMClient):
tool_calls = [ llm.ToolInput(tool_name=x["function"]["name"], tool_args=x["function"]["arguments"]) for x in raw_tool_calls] if raw_tool_calls else None
stop_reason = response_json.get("done_reason")
_LOGGER.debug(f"{response=} {tool_calls=}")
# _LOGGER.debug(f"{response=} {tool_calls=}")
return response, tool_calls
# return TextGenerationResult(
# response=response, tool_calls=tool_calls, stop_reason=stop_reason, response_streamed=True
# )
def _generate_stream(self, conversation: List[conversation.Content], llm_api: llm.APIInstance | None, user_input: conversation.ConversationInput, entity_options: Dict[str, Any]) -> AsyncGenerator[TextGenerationResult, None]:
model_name = entity_options.get(CONF_CHAT_MODEL, "")

View File

@@ -148,7 +148,7 @@ from .const import (
DOMAIN,
HOME_LLM_API_ID,
DEFAULT_OPTIONS,
OPTIONS_OVERRIDES,
option_overrides,
RECOMMENDED_CHAT_MODELS,
EMBEDDED_LLAMA_CPP_PYTHON_VERSION
)
@@ -1120,13 +1120,15 @@ class LocalLLMSubentryFlowHandler(ConfigSubentryFlow):
entry = self._get_entry()
backend_type = entry.data[CONF_BACKEND_TYPE]
if not self.model_config:
if CONF_PROMPT not in self.model_config:
# determine selected language from model config or parent options
selected_language = self.model_config.get(
CONF_SELECTED_LANGUAGE, entry.options.get(CONF_SELECTED_LANGUAGE, "en")
)
model_name = self.model_config.get(CONF_CHAT_MODEL, "").lower()
OPTIONS_OVERRIDES = option_overrides(backend_type)
selected_default_options = {**DEFAULT_OPTIONS}
for key in OPTIONS_OVERRIDES.keys():
if key in model_name:

View File

@@ -1,5 +1,6 @@
"""Constants for the Local LLM Conversation integration."""
import types, os
from typing import Any
DOMAIN = "llama_conversation"
HOME_LLM_API_ID = "home-llm-service-api"
@@ -224,98 +225,106 @@ DEFAULT_OPTIONS = types.MappingProxyType(
}
)
OPTIONS_OVERRIDES = {
"home-llama-3.2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_CONTEXT_LENGTH: 131072,
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-3b-v3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-3b-v2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-3b-v1": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-1b-v3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-1b-v2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-1b-v1": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 1,
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"mistral": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
CONF_MIN_P: 0.1,
CONF_TYPICAL_P: 0.9,
},
"mixtral": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
CONF_MIN_P: 0.1,
CONF_TYPICAL_P: 0.9,
},
"llama-3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"llama3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"zephyr": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"phi-3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"command-r": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"stablehome": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
},
"tinyhome": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
},
}
def option_overrides(backend_type: str) -> dict[str, Any]:
return {
"home-llama-3.2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_CONTEXT_LENGTH: 131072,
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# llama cpp server doesn't support custom tool calling formats. so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: backend_type == BACKEND_TYPE_LLAMA_CPP_SERVER
},
"home-3b-v3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# llama cpp server doesn't support custom tool calling formats. so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: backend_type == BACKEND_TYPE_LLAMA_CPP_SERVER
},
"home-3b-v2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# no prompt formats with tool calling support, so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-3b-v1": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# no prompt formats with tool calling support, so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-1b-v3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# no prompt formats with tool calling support, so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-1b-v2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# no prompt formats with tool calling support, so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"home-1b-v1": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_TOOL_CALL_PREFIX: "```homeassistant",
CONF_TOOL_CALL_SUFFIX: "```",
CONF_MAX_TOOL_CALL_ITERATIONS: 0,
# no prompt formats with tool calling support, so just use legacy tool calling
CONF_ENABLE_LEGACY_TOOL_CALLING: True
},
"mistral": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
CONF_MIN_P: 0.1,
CONF_TYPICAL_P: 0.9,
},
"mixtral": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_NO_SYSTEM_PROMPT_EXTRAS,
CONF_MIN_P: 0.1,
CONF_TYPICAL_P: 0.9,
},
"llama-3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"llama3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"zephyr": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"phi-3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"command-r": {
CONF_PROMPT: DEFAULT_PROMPT_BASE + ICL_EXTRAS,
},
"stablehome": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
},
"tinyhome": {
CONF_PROMPT: DEFAULT_PROMPT_BASE_LEGACY,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
},
}
INTEGRATION_VERSION = "0.4.1"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.3.16+b6153"

View File

@@ -53,6 +53,8 @@ from .const import (
DEFAULT_TOOL_CALL_PREFIX,
DEFAULT_TOOL_CALL_SUFFIX,
DEFAULT_ENABLE_LEGACY_TOOL_CALLING,
HOME_LLM_API_ID,
SERVICE_TOOL_NAME,
)
_LOGGER = logging.getLogger(__name__)
@@ -187,10 +189,20 @@ class LocalLLMClient:
):
async def async_iterator():
async for input_chunk in result:
# _LOGGER.debug("Received chunk: %s", input_chunk)
_LOGGER.debug("Received chunk: %s", input_chunk)
tool_calls = input_chunk.tool_calls
# fix tool calls for the service tool
if tool_calls and chat_log.llm_api and chat_log.llm_api.api.id == HOME_LLM_API_ID:
tool_calls = [
llm.ToolInput(
tool_name=SERVICE_TOOL_NAME,
tool_args={**tc.tool_args, "service": tc.tool_name}
) for tc in tool_calls
]
yield conversation.AssistantContentDeltaDict(
content=input_chunk.response,
tool_calls=input_chunk.tool_calls
tool_calls=tool_calls
)
return chat_log.async_add_delta_content_stream(user_input.agent_id, stream=async_iterator())
@@ -263,6 +275,7 @@ class LocalLLMClient:
tool_calls: List[Tuple[llm.ToolInput, Any]] = []
# if max tool calls is 0 then we expect to generate the response & tool call in one go
for idx in range(max(1, max_tool_call_iterations)):
_LOGGER.debug(f"Generating response for {user_input.text=}, iteration {idx+1}/{max_tool_call_iterations}")
generation_result = await self._async_generate(message_history, user_input, chat_log, entity_options)
last_generation_had_tool_calls = False
@@ -270,6 +283,7 @@ class LocalLLMClient:
try:
message = await anext(generation_result)
message_history.append(message)
_LOGGER.debug("Added message to history: %s", message)
if message.role == "assistant":
if message.tool_calls and len(message.tool_calls) > 0:
last_generation_had_tool_calls = True
@@ -367,10 +381,10 @@ class LocalLLMClient:
in_thinking = False
in_tool_call = False
tool_content = ""
last_5_tokens = [] # FIXME: this still returns the first few tokens of the tool call if the prefix is split across chunks
last_5_tokens = []
cur_match_length = 0
async for chunk in token_generator:
_LOGGER.debug(f"Handling chunk: {chunk} {in_thinking=} {in_tool_call=} {last_5_tokens=}")
# _LOGGER.debug(f"Handling chunk: {chunk} {in_thinking=} {in_tool_call=} {last_5_tokens=}")
tool_calls: Optional[List[str | llm.ToolInput | dict]]
content, tool_calls = chunk
@@ -578,7 +592,7 @@ class LocalLLMClient:
if attribute_name not in attributes:
continue
_LOGGER.debug(f"{attribute_name} = {attributes[attribute_name]}")
# _LOGGER.debug(f"{attribute_name} = {attributes[attribute_name]}")
value = attributes[attribute_name]
if value is not None:

View File

@@ -114,7 +114,9 @@
"in_context_examples": "If you are using a model that is not specifically fine-tuned for use with this integration: enable this",
"extra_attributes_to_expose": "This is the list of Home Assistant 'attributes' that are exposed to the model. This limits how much information the model is able to see and answer questions on.",
"gbnf_grammar": "Forces the model to output properly formatted responses. Ensure the file specified below exists in the integration directory.",
"prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below."
"prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below.",
"enable_legacy_tool_calling": "Prefer to process tool calls locally rather than relying on the backend to handle the tool calling format. Can be more reliable, however it requires properly setting the tool call prefix and suffix.",
"max_tool_call_iterations": "Set to 0 to generate the response and tool call in one attempt, without looping (use this for Home models v1-v3)."
},
"description": "Please configure the model according to how it should be prompted. There are many different options and selecting the correct ones for your model is essential to getting optimal performance. See [here](https://github.com/acon96/home-llm/blob/develop/docs/Backend%20Configuration.md) for more information about the options on this page.\n\n**Some defaults may have been chosen for you based on the name of the selected model name or filename.** If you renamed a file or are using a fine-tuning of a supported model, then the defaults may not have been detected.",
"title": "Configure the selected model"
@@ -166,7 +168,9 @@
"in_context_examples": "If you are using a model that is not specifically fine-tuned for use with this integration: enable this",
"extra_attributes_to_expose": "This is the list of Home Assistant 'attributes' that are exposed to the model. This limits how much information the model is able to see and answer questions on.",
"gbnf_grammar": "Forces the model to output properly formatted responses. Ensure the file specified below exists in the integration directory.",
"prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below."
"prompt_caching": "Prompt caching attempts to pre-process the prompt (house state) and cache the processing that needs to be done to understand the prompt. Enabling this will cause the model to re-process the prompt any time an entity state changes in the house, restricted by the interval below.",
"enable_legacy_tool_calling": "Prefer to process tool calls locally rather than relying on the backend to handle the tool calling format. Can be more reliable, however it requires properly setting the tool call prefix and suffix.",
"max_tool_call_iterations": "Set to 0 to generate the response and tool call in one attempt, without looping (use this for Home models v1-v3)."
},
"description": "Please configure the model according to how it should be prompted. There are many different options and selecting the correct ones for your model is essential to getting optimal performance. See [here](https://github.com/acon96/home-llm/blob/develop/docs/Backend%20Configuration.md) for more information about the options on this page.\n\n**Some defaults may have been chosen for you based on the name of the selected model name or filename.** If you renamed a file or are using a fine-tuning of a supported model, then the defaults may not have been detected.",
"title": "Configure the selected model"

View File

@@ -430,18 +430,11 @@ def parse_raw_tool_call(raw_block: str | dict, llm_api: llm.APIInstance, user_in
if "rgb_color" in args_dict and isinstance(args_dict["rgb_color"], str):
args_dict["rgb_color"] = [ int(x) for x in args_dict["rgb_color"][1:-1].split(",") ]
if llm_api.api.id == HOME_LLM_API_ID:
to_say = parsed_tool_call.pop("to_say", "")
tool_input = llm.ToolInput(
tool_name=SERVICE_TOOL_NAME,
tool_args=args_dict,
)
else:
to_say = ""
tool_input = llm.ToolInput(
tool_name=tool_name,
tool_args=args_dict,
)
to_say = args_dict.pop("to_say", "")
tool_input = llm.ToolInput(
tool_name=tool_name,
tool_args=args_dict,
)
return tool_input, to_say