Merge pull request #162 from acon96/release/v0.3.2

Release v0.3.2
2026-01-09 13:48:05 -05:00 · 2024-06-08 16:41:42 -04:00
parent f407e53191 8f511886f9
commit 9f7aa19d48
11 changed files with 192 additions and 73 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -7,7 +7,13 @@ assignees: ''

 ---

-***Please do not report issues with the model generating incorrect output. This includes any instance where the model responds with `Failed to run: ...` or outputs badly formatted responses. If you are having trouble getting the correct output from the model, please open a Discussion thread instead.***
+<!-- 
+
+Please do not report issues with the model generating incorrect output. This includes any instance where the model responds with `Failed to run: ...` or outputs badly formatted responses. If you are having trouble getting the correct output from the model, please open a Discussion thread instead.
+
+If you recently updated Home Assistant to a newly released version, please indicate that in your report.
+
+-->

 **Describe the bug**  
 A clear and concise description of what the bug is.
--- a/README.md
+++ b/README.md
@@ -132,6 +132,7 @@ In order to facilitate running the project entirely on the system where Home Ass
 ## Version History
 | Version | Description                                                                                                                                                                                                          |
 |---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| v0.3.2  | Fix for exposed script entities causing errors, fix missing GBNF error, trim whitespace from model output                                                                                                            |
 | v0.3.1  | Adds basic area support in prompting, Fix for broken requirements, fix for issue with formatted tools, fix custom API not registering on startup properly                                                            |
 | v0.3    | Adds support for Home Assistant LLM APIs, improved model prompting and tool formatting options, and automatic detection of GGUF quantization levels on HuggingFace                                                   |
 | v0.2.17 | Disable native llama.cpp wheel optimizations, add Command R prompt format                                                                                                                                            |
--- a/custom_components/llama_conversation/agent.py
+++ b/custom_components/llama_conversation/agent.py
@@ -394,10 +394,12 @@ class LocalLLMAgent(AbstractConversationAgent):

            try:
                tool_response = await llm_api.async_call_tool(tool_input)
+                _LOGGER.debug("Tool response: %s", tool_response)
            except (HomeAssistantError, vol.Invalid) as e:
                tool_response = {"error": type(e).__name__}
                if str(e):
                    tool_response["error_text"] = str(e)
+                _LOGGER.debug("Tool response: %s", tool_response)

                intent_response = intent.IntentResponse(language=user_input.language)
                intent_response.async_set_error(
@@ -408,8 +410,6 @@ class LocalLLMAgent(AbstractConversationAgent):
                    response=intent_response, conversation_id=conversation_id
                )

-            _LOGGER.debug("Tool response: %s", tool_response)
-
        # handle models that generate a function call and wait for the result before providing a response
        if self.entry.options.get(CONF_TOOL_MULTI_TURN_CHAT, DEFAULT_TOOL_MULTI_TURN_CHAT):
            conversation.append({"role": "tool", "message": json.dumps(tool_response)})
@@ -436,7 +436,7 @@ class LocalLLMAgent(AbstractConversationAgent):
        
        # generate intent response to Home Assistant
        intent_response = intent.IntentResponse(language=user_input.language)
-        intent_response.async_set_speech(to_say)
+        intent_response.async_set_speech(to_say.strip())
        return ConversationResult(
            response=intent_response, conversation_id=conversation_id
        )
@@ -672,7 +672,8 @@ class LocalLLMAgent(AbstractConversationAgent):
                "state": state,
                "attributes": exposed_attributes,
                "area_name": attributes.get("area_name"),
-                "area_id": attributes.get("area_id")
+                "area_id": attributes.get("area_id"),
+                "is_alias": False
            })
            if "aliases" in attributes:
                for alias in attributes["aliases"]:
@@ -683,17 +684,25 @@ class LocalLLMAgent(AbstractConversationAgent):
                        "state": state,
                        "attributes": exposed_attributes,
                        "area_name": attributes.get("area_name"),
-                        "area_id": attributes.get("area_id")
+                        "area_id": attributes.get("area_id"),
+                        "is_alias": True
                    })

        if llm_api:
            if llm_api.api.id == HOME_LLM_API_ID:
                service_dict = self.hass.services.async_services()
                all_services = []
+                scripts_added = False
                for domain in domains:
                    # scripts show up as individual services
-                    if domain == "script":
-                        all_services.extend(["script.reload()", "script.turn_on()", "script.turn_off()", "script.toggle()"])
+                    if domain == "script" and not scripts_added:
+                        all_services.extend([
+                            ("script.reload", vol.Schema({}), ""),
+                            ("script.turn_on", vol.Schema({}), ""),
+                            ("script.turn_off", vol.Schema({}), ""),
+                            ("script.toggle", vol.Schema({}), ""),
+                        ])
+                        scripts_added = True
                        continue
                    
                    for name, service in service_dict.get(domain, {}).items():
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -636,18 +636,34 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
        schema = vol.Schema(local_llama_config_option_schema(self.hass, selected_default_options, backend_type))

        if user_input:
+            if not user_input.get(CONF_REFRESH_SYSTEM_PROMPT) and user_input.get(CONF_PROMPT_CACHING_ENABLED):
+                errors["base"] = "sys_refresh_caching_enabled"
+
+            if user_input.get(CONF_USE_GBNF_GRAMMAR):
+                filename = user_input.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE)
+                if not os.path.isfile(os.path.join(os.path.dirname(__file__), filename)):
+                    errors["base"] = "missing_gbnf_file"
+                    description_placeholders["filename"] = filename
+            
+            if user_input.get(CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES):
+                filename = user_input.get(CONF_IN_CONTEXT_EXAMPLES_FILE, DEFAULT_IN_CONTEXT_EXAMPLES_FILE)
+                if not os.path.isfile(os.path.join(os.path.dirname(__file__), filename)):
+                    errors["base"] = "missing_icl_file"
+                    description_placeholders["filename"] = filename
+
            if user_input[CONF_LLM_HASS_API] == "none":
                user_input.pop(CONF_LLM_HASS_API)
            
-            try:
-                # validate input
-                schema(user_input)
+            if len(errors) == 0:
+                try:
+                    # validate input
+                    schema(user_input)

-                self.options = user_input
-                return await self.async_step_finish()
-            except Exception as ex:
-                _LOGGER.exception("An unknown error has occurred!")
-                errors["base"] = "unknown"
+                    self.options = user_input
+                    return await self.async_step_finish()
+                except Exception as ex:
+                    _LOGGER.exception("An unknown error has occurred!")
+                    errors["base"] = "unknown"

        return self.async_show_form(
            step_id="model_parameters", data_schema=schema, errors=errors, description_placeholders=description_placeholders,
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -323,5 +323,5 @@ OPTIONS_OVERRIDES = {
    }
 }

-INTEGRATION_VERSION = "0.3.1"
+INTEGRATION_VERSION = "0.3.2"
 EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.77"
--- a/custom_components/llama_conversation/manifest.json
+++ b/custom_components/llama_conversation/manifest.json
@@ -1,7 +1,7 @@
 {
  "domain": "llama_conversation",
  "name": "Local LLM Conversation",
-  "version": "0.3.1",
+  "version": "0.3.2",
  "codeowners": ["@acon96"],
  "config_flow": true,
  "dependencies": ["conversation"],
--- a/custom_components/llama_conversation/output.gbnf
+++ b/custom_components/llama_conversation/output.gbnf
@@ -0,0 +1,29 @@
+root   ::= (tosay "\n")+ functioncalls?
+
+tosay ::= [0-9a-zA-Z #%.?!]*
+functioncalls ::=
+  "```homeassistant\n" (object ws)* "```"
+
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -9,7 +9,10 @@
            "missing_model_file": "The provided file does not exist.",
            "other_existing_local": "Another model is already loaded locally. Please unload it or configure a remote model.",
            "unknown": "Unexpected error",
-            "pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
+            "pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details.",
+            "sys_refresh_caching_enabled": "System prompt refresh must be enabled for prompt caching to work!",
+            "missing_gbnf_file": "The GBNF file was not found: {filename}",
+            "missing_icl_file": "The in context learning example CSV file was not found: {filename}"
        },
        "progress": {
            "download": "Please wait while the model is being downloaded from HuggingFace. This can take a few minutes.",
@@ -157,8 +160,8 @@
        },
        "error": {
            "sys_refresh_caching_enabled": "System prompt refresh must be enabled for prompt caching to work!",
-            "missing_gbnf_file": "The GBNF file was not found: '{filename}'",
-            "missing_icl_file": "The in context learning example CSV file was not found: '{filename}'"
+            "missing_gbnf_file": "The GBNF file was not found: {filename}",
+            "missing_icl_file": "The in context learning example CSV file was not found: {filename}"
        }
    },
    "selector": {
--- a/docs/Setup.md
+++ b/docs/Setup.md
@@ -14,6 +14,11 @@
    * [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model)
    * [Step 2: Connect to the Ollama API](#step-2-connect-to-the-ollama-api)
    * [Step 3: Model Configuration](#step-3-model-configuration-1)
+* [Path 3: Using Llama-3-8B-Instruct with LM Studio](#path-3-using-llama-3-8b-instruct-with-lm-studio)
+    * [Overview](#overview-2)
+    * [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model-1)
+    * [Step 2: Connect to the LM Studio API](#step-2-connect-to-the-lm-studio-api)
+    * [Step 3: Model Configuration](#step-3-model-configuration-2)
 * [Configuring the Integration as a Conversation Agent](#configuring-the-integration-as-a-conversation-agent)
 * [Finished!](#finished)

@@ -103,6 +108,41 @@ Once the desired API has been selected, scroll to the bottom and click `Submit`.

 > NOTE: The key settings in this case are that our prompt references the `{{ response_examples }}` variable and the `Enable in context learning (ICL) examples` option is turned on.

+## Path 3: Using Llama-3-8B-Instruct with LM Studio
+### Overview
+Another model you can use if you have a GPU is Meta's Llama-3-8B-Instruct Model. This path assumes you have a machine with a GPU that already has [LM Studio](https://lmstudio.ai/) installed on it.  This path utilizes in-context learning examples, to prompt the model to produce the output that we expect.
+
+### Step 1: Downloading and serving the Model
+Llama 3 8B can be set up and downloaded on the serving machine using LM Studio by:
+1. Search for `lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF` in the main interface.
+2. Select and download the version of the model that is recommended for your VRAM configuration.
+3. Select the 'Local Server' tab on the left side of the application.
+4. Load the model by selecting it from the bar in the top middle of the screen. The server should start automatically when the model finishes loading.
+5. Take note of the port that the server is running on.
+
+### Step 2: Connect to the LM Studio API
+
+1. In Home Assistant: navigate to `Settings > Devices and Services`
+2. Select the `+ Add Integration` button in the bottom right corner
+3. Search for, and select `Local LLM Conversation`
+4. Select `Generic OpenAI Compatible API` from the dropdown and click `Submit`
+5. Set up the connection to the API:
+    - **IP Address**: Fill out IP Address for the machine hosting LM Studio
+    - **Port**: enter the port that was listed in LM Studio
+    - **Use HTTPS**: unchecked
+    - **Model Name**: This can be any value, as LM Studio uses the currently loaded model for all incoming requests.
+    - **API Key**: leave blank
+6. Click `Submit`
+
+### Step 3: Model Configuration
+This step allows you to configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.
+
+For now, defaults for the model should have been populated. If you would like the model to be able to control devices then you must select the `Assist` API.
+
+Once the desired API has been selected, scroll to the bottom and click `Submit`.
+
+> NOTE: The key settings in this case are that our prompt references the `{{ response_examples }}` variable and the `Enable in context learning (ICL) examples` option is turned on.
+
 ## Configuring the Integration as a Conversation Agent
 Now that the integration is configured and providing the conversation agent, we need to configure Home Assistant to use our conversation agent instead of the built in intent recognition system.

--- a/tests/llama_conversation/test_agent.py
+++ b/tests/llama_conversation/test_agent.py
@@ -86,8 +86,10 @@ from homeassistant.components.conversation import ConversationInput
 from homeassistant.const import (
    CONF_HOST,
    CONF_PORT,
-    CONF_SSL
+    CONF_SSL,
+    CONF_LLM_HASS_API
 )
+from homeassistant.helpers.llm import LLM_API_ASSIST, APIInstance

 _LOGGER = logging.getLogger(__name__)

@@ -122,26 +124,19 @@ def config_entry():
        },
        options={
            **DEFAULT_OPTIONS,
+            CONF_LLM_HASS_API: LLM_API_ASSIST,
            CONF_PROMPT: DEFAULT_PROMPT_BASE,
+            CONF_SERVICE_CALL_REGEX: r"({[\S \t]*})"
        }
    )

@pytest.fixture
-def home_assistant_mock():
-    mock_home_assistant = MagicMock()
-    async def call_now(func, *args, **kwargs):
-        return func(*args, **kwargs)
-    mock_home_assistant.async_add_executor_job.side_effect = call_now
-    mock_home_assistant.services.async_call = AsyncMock()
-
-    yield mock_home_assistant
-
-@pytest.fixture
-def local_llama_agent_fixture(config_entry, home_assistant_mock):
+def local_llama_agent_fixture(config_entry, hass, enable_custom_integrations):
    with patch.object(LlamaCppAgent, '_load_icl_examples') as load_icl_examples_mock, \
         patch.object(LlamaCppAgent, '_load_grammar') as load_grammar_mock, \
         patch.object(LlamaCppAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
         patch.object(LlamaCppAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
+         patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
         patch('homeassistant.helpers.template.Template') as template_mock, \
         patch('custom_components.llama_conversation.agent.importlib.import_module') as import_module_mock, \
         patch('custom_components.llama_conversation.agent.install_llama_cpp_python') as install_llama_cpp_python_mock:
@@ -166,14 +161,17 @@ def local_llama_agent_fixture(config_entry, home_assistant_mock):
        generate_mock.return_value = list(range(20))

        detokenize_mock = llama_instance_mock.detokenize
-        detokenize_mock.return_value = json.dumps({
-            "to_say": "I am saying something!",
-            "service": "light.turn_on",
-            "target_device": "light.kitchen_light",
-        }).encode()
+        detokenize_mock.return_value = ("I am saying something!\n" + json.dumps({
+            "name": "HassTurnOn",
+            "arguments": {
+                "name": "light.kitchen_light"
+            }
+        })).encode()
+
+        call_tool_mock.return_value = {"result": "success"}

        agent_obj = LlamaCppAgent(
-            home_assistant_mock,
+            hass,
            config_entry
        )

@@ -268,10 +266,11 @@ async def test_local_llama_agent(local_llama_agent_fixture):
    )
    
@pytest.fixture
-def ollama_agent_fixture(config_entry, home_assistant_mock):
+def ollama_agent_fixture(config_entry, hass, enable_custom_integrations):
    with patch.object(OllamaAPIAgent, '_load_icl_examples') as load_icl_examples_mock, \
         patch.object(OllamaAPIAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
         patch.object(OllamaAPIAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
+         patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
         patch('homeassistant.helpers.template.Template') as template_mock, \
         patch('custom_components.llama_conversation.agent.requests.get') as requests_get_mock, \
         patch('custom_components.llama_conversation.agent.requests.post') as requests_post_mock:
@@ -291,8 +290,10 @@ def ollama_agent_fixture(config_entry, home_assistant_mock):
        response_mock.json.return_value = { "models": [ {"name": config_entry.data[CONF_CHAT_MODEL] }] }
        requests_get_mock.return_value = response_mock

+        call_tool_mock.return_value = {"result": "success"}
+
        agent_obj = OllamaAPIAgent(
-            home_assistant_mock,
+            hass,
            config_entry
        )

@@ -318,10 +319,11 @@ async def test_ollama_agent(ollama_agent_fixture):
    response_mock.json.return_value = {
        "model": ollama_agent.entry.data[CONF_CHAT_MODEL],
        "created_at": "2023-11-09T21:07:55.186497Z",
-        "response": json.dumps({
-            "to_say": "I am saying something!",
-            "service": "light.turn_on",
-            "target_device": "light.kitchen_light",
+        "response": "I am saying something!\n" + json.dumps({
+            "name": "HassTurnOn",
+            "arguments": {
+                "name": "light.kitchen_light"
+            }
        }),
        "done": True,
        "context": [1, 2, 3],
@@ -410,10 +412,11 @@ async def test_ollama_agent(ollama_agent_fixture):


@pytest.fixture
-def text_generation_webui_agent_fixture(config_entry, home_assistant_mock):
+def text_generation_webui_agent_fixture(config_entry, hass, enable_custom_integrations):
    with patch.object(TextGenerationWebuiAgent, '_load_icl_examples') as load_icl_examples_mock, \
         patch.object(TextGenerationWebuiAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
         patch.object(TextGenerationWebuiAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
+         patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
         patch('homeassistant.helpers.template.Template') as template_mock, \
         patch('custom_components.llama_conversation.agent.requests.get') as requests_get_mock, \
         patch('custom_components.llama_conversation.agent.requests.post') as requests_post_mock:
@@ -433,8 +436,10 @@ def text_generation_webui_agent_fixture(config_entry, home_assistant_mock):
        response_mock.json.return_value = { "model_name": config_entry.data[CONF_CHAT_MODEL] }
        requests_get_mock.return_value = response_mock

+        call_tool_mock.return_value = {"result": "success"}
+
        agent_obj = TextGenerationWebuiAgent(
-            home_assistant_mock,
+            hass,
            config_entry
        )

@@ -464,10 +469,11 @@ async def test_text_generation_webui_agent(text_generation_webui_agent_fixture):
        "model": "gpt-3.5-turbo-instruct",
        "system_fingerprint": "fp_44709d6fcb",
        "choices": [{
-            "text": json.dumps({
-                "to_say": "I am saying something!",
-                "service": "light.turn_on",
-                "target_device": "light.kitchen_light",
+            "text": "I am saying something!\n" + json.dumps({
+                "name": "HassTurnOn",
+                "arguments": {
+                    "name": "light.kitchen_light"
+                }
            }),
            "index": 0,
            "logprobs": None,
@@ -559,10 +565,11 @@ async def test_text_generation_webui_agent(text_generation_webui_agent_fixture):
            "index": 0,
            "message": {
                "role": "assistant",
-                "content": json.dumps({
-                    "to_say": "I am saying something!",
-                    "service": "light.turn_on",
-                    "target_device": "light.kitchen_light",
+                "content": "I am saying something!\n" + json.dumps({
+                    "name": "HassTurnOn",
+                    "arguments": {
+                        "name": "light.kitchen_light"
+                    }
                }),
            },
            "logprobs": None,
@@ -669,10 +676,11 @@ async def test_text_generation_webui_agent(text_generation_webui_agent_fixture):
    )

@pytest.fixture
-def generic_openai_agent_fixture(config_entry, home_assistant_mock):
+def generic_openai_agent_fixture(config_entry, hass, enable_custom_integrations):
    with patch.object(GenericOpenAIAPIAgent, '_load_icl_examples') as load_icl_examples_mock, \
         patch.object(GenericOpenAIAPIAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
         patch.object(GenericOpenAIAPIAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
+         patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
         patch('homeassistant.helpers.template.Template') as template_mock, \
         patch('custom_components.llama_conversation.agent.requests.get') as requests_get_mock, \
         patch('custom_components.llama_conversation.agent.requests.post') as requests_post_mock:
@@ -688,8 +696,10 @@ def generic_openai_agent_fixture(config_entry, home_assistant_mock):
            ["light", "switch", "fan"]
        )

+        call_tool_mock.return_value = {"result": "success"}
+
        agent_obj = GenericOpenAIAPIAgent(
-            home_assistant_mock,
+            hass,
            config_entry
        )

@@ -714,10 +724,11 @@ async def test_generic_openai_agent(generic_openai_agent_fixture):
        "model": "gpt-3.5-turbo-instruct",
        "system_fingerprint": "fp_44709d6fcb",
        "choices": [{
-            "text": json.dumps({
-                "to_say": "I am saying something!",
-                "service": "light.turn_on",
-                "target_device": "light.kitchen_light",
+            "text": "I am saying something!\n" + json.dumps({
+                "name": "HassTurnOn",
+                "arguments": {
+                    "name": "light.kitchen_light"
+                }
            }),
            "index": 0,
            "logprobs": None,
@@ -774,10 +785,11 @@ async def test_generic_openai_agent(generic_openai_agent_fixture):
            "index": 0,
            "message": {
                "role": "assistant",
-                "content": json.dumps({
-                    "to_say": "I am saying something!",
-                    "service": "light.turn_on",
-                    "target_device": "light.kitchen_light",
+                "content": "I am saying something!\n" + json.dumps({
+                    "name": "HassTurnOn",
+                    "arguments": {
+                        "name": "light.kitchen_light"
+                    }
                }),
            },
            "logprobs": None,
--- a/tests/llama_conversation/test_config_flow.py
+++ b/tests/llama_conversation/test_config_flow.py
@@ -7,6 +7,7 @@ from homeassistant.const import (
    CONF_HOST,
    CONF_PORT,
    CONF_SSL,
+    CONF_LLM_HASS_API,
 )
 from homeassistant.data_entry_flow import FlowResultType

@@ -25,6 +26,8 @@ from custom_components.llama_conversation.const import (
    CONF_DOWNLOADED_MODEL_FILE,
    CONF_EXTRA_ATTRIBUTES_TO_EXPOSE,
    CONF_PROMPT_TEMPLATE,
+    CONF_TOOL_FORMAT,
+    CONF_TOOL_MULTI_TURN_CHAT,
    CONF_ENABLE_FLASH_ATTENTION,
    CONF_USE_GBNF_GRAMMAR,
    CONF_GBNF_GRAMMAR_FILE,
@@ -290,16 +293,16 @@ async def test_validate_config_flow_ollama(mock_setup_entry, hass: HomeAssistant

 # TODO: write tests for configflow setup for llama.cpp (both versions) + text-generation-webui

-def test_validate_options_schema():
+def test_validate_options_schema(hass: HomeAssistant):

    universal_options = [
-        CONF_PROMPT, CONF_PROMPT_TEMPLATE,
+        CONF_LLM_HASS_API, CONF_PROMPT, CONF_PROMPT_TEMPLATE, CONF_TOOL_FORMAT, CONF_TOOL_MULTI_TURN_CHAT,
        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES, CONF_IN_CONTEXT_EXAMPLES_FILE, CONF_NUM_IN_CONTEXT_EXAMPLES,
        CONF_MAX_TOKENS, CONF_EXTRA_ATTRIBUTES_TO_EXPOSE,
        CONF_SERVICE_CALL_REGEX, CONF_REFRESH_SYSTEM_PROMPT, CONF_REMEMBER_CONVERSATION, CONF_REMEMBER_NUM_INTERACTIONS,
    ]

-    options_llama_hf = local_llama_config_option_schema(None, BACKEND_TYPE_LLAMA_HF)
+    options_llama_hf = local_llama_config_option_schema(hass, None, BACKEND_TYPE_LLAMA_HF)
    assert set(options_llama_hf.keys()) == set(universal_options + [
        CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_MIN_P, CONF_TYPICAL_P, # supports all sampling parameters
        CONF_BATCH_SIZE, CONF_THREAD_COUNT, CONF_BATCH_THREAD_COUNT, CONF_ENABLE_FLASH_ATTENTION, # llama.cpp specific
@@ -308,7 +311,7 @@ def test_validate_options_schema():
        CONF_PROMPT_CACHING_ENABLED, CONF_PROMPT_CACHING_INTERVAL # supports prompt caching
    ])

-    options_llama_existing = local_llama_config_option_schema(None, BACKEND_TYPE_LLAMA_EXISTING)
+    options_llama_existing = local_llama_config_option_schema(hass, None, BACKEND_TYPE_LLAMA_EXISTING)
    assert set(options_llama_existing.keys()) == set(universal_options + [
        CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_MIN_P, CONF_TYPICAL_P, # supports all sampling parameters
        CONF_BATCH_SIZE, CONF_THREAD_COUNT, CONF_BATCH_THREAD_COUNT, CONF_ENABLE_FLASH_ATTENTION, # llama.cpp specific
@@ -317,7 +320,7 @@ def test_validate_options_schema():
        CONF_PROMPT_CACHING_ENABLED, CONF_PROMPT_CACHING_INTERVAL # supports prompt caching
    ])

-    options_ollama = local_llama_config_option_schema(None, BACKEND_TYPE_OLLAMA)
+    options_ollama = local_llama_config_option_schema(hass, None, BACKEND_TYPE_OLLAMA)
    assert set(options_ollama.keys()) == set(universal_options + [
        CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_TYPICAL_P, # supports top_k temperature, top_p and typical_p samplers
        CONF_OLLAMA_KEEP_ALIVE_MIN, CONF_OLLAMA_JSON_MODE, # ollama specific
@@ -325,7 +328,7 @@ def test_validate_options_schema():
        CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_REQUEST_TIMEOUT, # is a remote backend
    ])

-    options_text_gen_webui = local_llama_config_option_schema(None, BACKEND_TYPE_TEXT_GEN_WEBUI)
+    options_text_gen_webui = local_llama_config_option_schema(hass, None, BACKEND_TYPE_TEXT_GEN_WEBUI)
    assert set(options_text_gen_webui.keys()) == set(universal_options + [
        CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_MIN_P, CONF_TYPICAL_P, # supports all sampling parameters
        CONF_TEXT_GEN_WEBUI_CHAT_MODE, CONF_TEXT_GEN_WEBUI_PRESET, # text-gen-webui specific
@@ -333,13 +336,13 @@ def test_validate_options_schema():
        CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_REQUEST_TIMEOUT, # is a remote backend
    ])

-    options_generic_openai = local_llama_config_option_schema(None, BACKEND_TYPE_GENERIC_OPENAI)
+    options_generic_openai = local_llama_config_option_schema(hass, None, BACKEND_TYPE_GENERIC_OPENAI)
    assert set(options_generic_openai.keys()) == set(universal_options + [
        CONF_TEMPERATURE, CONF_TOP_P, # only supports top_p and temperature sampling
        CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_REQUEST_TIMEOUT, # is a remote backend
    ])

-    options_llama_cpp_python_server = local_llama_config_option_schema(None, BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER)
+    options_llama_cpp_python_server = local_llama_config_option_schema(hass, None, BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER)
    assert set(options_llama_cpp_python_server.keys()) == set(universal_options + [
        CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, # supports top_k, temperature, and top p sampling
        CONF_USE_GBNF_GRAMMAR, CONF_GBNF_GRAMMAR_FILE, # supports GBNF