Merge pull request #162 from acon96/release/v0.3.2

Release v0.3.2
This commit is contained in:
Alex O'Connell
2024-06-08 16:41:42 -04:00
committed by GitHub
11 changed files with 192 additions and 73 deletions

View File

@@ -7,7 +7,13 @@ assignees: ''
---
***Please do not report issues with the model generating incorrect output. This includes any instance where the model responds with `Failed to run: ...` or outputs badly formatted responses. If you are having trouble getting the correct output from the model, please open a Discussion thread instead.***
<!--
Please do not report issues with the model generating incorrect output. This includes any instance where the model responds with `Failed to run: ...` or outputs badly formatted responses. If you are having trouble getting the correct output from the model, please open a Discussion thread instead.
If you recently updated Home Assistant to a newly released version, please indicate that in your report.
-->
**Describe the bug**
A clear and concise description of what the bug is.

View File

@@ -132,6 +132,7 @@ In order to facilitate running the project entirely on the system where Home Ass
## Version History
| Version | Description |
|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| v0.3.2 | Fix for exposed script entities causing errors, fix missing GBNF error, trim whitespace from model output |
| v0.3.1 | Adds basic area support in prompting, Fix for broken requirements, fix for issue with formatted tools, fix custom API not registering on startup properly |
| v0.3 | Adds support for Home Assistant LLM APIs, improved model prompting and tool formatting options, and automatic detection of GGUF quantization levels on HuggingFace |
| v0.2.17 | Disable native llama.cpp wheel optimizations, add Command R prompt format |

View File

@@ -394,10 +394,12 @@ class LocalLLMAgent(AbstractConversationAgent):
try:
tool_response = await llm_api.async_call_tool(tool_input)
_LOGGER.debug("Tool response: %s", tool_response)
except (HomeAssistantError, vol.Invalid) as e:
tool_response = {"error": type(e).__name__}
if str(e):
tool_response["error_text"] = str(e)
_LOGGER.debug("Tool response: %s", tool_response)
intent_response = intent.IntentResponse(language=user_input.language)
intent_response.async_set_error(
@@ -408,8 +410,6 @@ class LocalLLMAgent(AbstractConversationAgent):
response=intent_response, conversation_id=conversation_id
)
_LOGGER.debug("Tool response: %s", tool_response)
# handle models that generate a function call and wait for the result before providing a response
if self.entry.options.get(CONF_TOOL_MULTI_TURN_CHAT, DEFAULT_TOOL_MULTI_TURN_CHAT):
conversation.append({"role": "tool", "message": json.dumps(tool_response)})
@@ -436,7 +436,7 @@ class LocalLLMAgent(AbstractConversationAgent):
# generate intent response to Home Assistant
intent_response = intent.IntentResponse(language=user_input.language)
intent_response.async_set_speech(to_say)
intent_response.async_set_speech(to_say.strip())
return ConversationResult(
response=intent_response, conversation_id=conversation_id
)
@@ -672,7 +672,8 @@ class LocalLLMAgent(AbstractConversationAgent):
"state": state,
"attributes": exposed_attributes,
"area_name": attributes.get("area_name"),
"area_id": attributes.get("area_id")
"area_id": attributes.get("area_id"),
"is_alias": False
})
if "aliases" in attributes:
for alias in attributes["aliases"]:
@@ -683,17 +684,25 @@ class LocalLLMAgent(AbstractConversationAgent):
"state": state,
"attributes": exposed_attributes,
"area_name": attributes.get("area_name"),
"area_id": attributes.get("area_id")
"area_id": attributes.get("area_id"),
"is_alias": True
})
if llm_api:
if llm_api.api.id == HOME_LLM_API_ID:
service_dict = self.hass.services.async_services()
all_services = []
scripts_added = False
for domain in domains:
# scripts show up as individual services
if domain == "script":
all_services.extend(["script.reload()", "script.turn_on()", "script.turn_off()", "script.toggle()"])
if domain == "script" and not scripts_added:
all_services.extend([
("script.reload", vol.Schema({}), ""),
("script.turn_on", vol.Schema({}), ""),
("script.turn_off", vol.Schema({}), ""),
("script.toggle", vol.Schema({}), ""),
])
scripts_added = True
continue
for name, service in service_dict.get(domain, {}).items():

View File

@@ -636,18 +636,34 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
schema = vol.Schema(local_llama_config_option_schema(self.hass, selected_default_options, backend_type))
if user_input:
if not user_input.get(CONF_REFRESH_SYSTEM_PROMPT) and user_input.get(CONF_PROMPT_CACHING_ENABLED):
errors["base"] = "sys_refresh_caching_enabled"
if user_input.get(CONF_USE_GBNF_GRAMMAR):
filename = user_input.get(CONF_GBNF_GRAMMAR_FILE, DEFAULT_GBNF_GRAMMAR_FILE)
if not os.path.isfile(os.path.join(os.path.dirname(__file__), filename)):
errors["base"] = "missing_gbnf_file"
description_placeholders["filename"] = filename
if user_input.get(CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES):
filename = user_input.get(CONF_IN_CONTEXT_EXAMPLES_FILE, DEFAULT_IN_CONTEXT_EXAMPLES_FILE)
if not os.path.isfile(os.path.join(os.path.dirname(__file__), filename)):
errors["base"] = "missing_icl_file"
description_placeholders["filename"] = filename
if user_input[CONF_LLM_HASS_API] == "none":
user_input.pop(CONF_LLM_HASS_API)
try:
# validate input
schema(user_input)
if len(errors) == 0:
try:
# validate input
schema(user_input)
self.options = user_input
return await self.async_step_finish()
except Exception as ex:
_LOGGER.exception("An unknown error has occurred!")
errors["base"] = "unknown"
self.options = user_input
return await self.async_step_finish()
except Exception as ex:
_LOGGER.exception("An unknown error has occurred!")
errors["base"] = "unknown"
return self.async_show_form(
step_id="model_parameters", data_schema=schema, errors=errors, description_placeholders=description_placeholders,

View File

@@ -323,5 +323,5 @@ OPTIONS_OVERRIDES = {
}
}
INTEGRATION_VERSION = "0.3.1"
INTEGRATION_VERSION = "0.3.2"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.77"

View File

@@ -1,7 +1,7 @@
{
"domain": "llama_conversation",
"name": "Local LLM Conversation",
"version": "0.3.1",
"version": "0.3.2",
"codeowners": ["@acon96"],
"config_flow": true,
"dependencies": ["conversation"],

View File

@@ -0,0 +1,29 @@
root ::= (tosay "\n")+ functioncalls?
tosay ::= [0-9a-zA-Z #%.?!]*
functioncalls ::=
"```homeassistant\n" (object ws)* "```"
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= ([ \t\n] ws)?

View File

@@ -9,7 +9,10 @@
"missing_model_file": "The provided file does not exist.",
"other_existing_local": "Another model is already loaded locally. Please unload it or configure a remote model.",
"unknown": "Unexpected error",
"pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
"pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details.",
"sys_refresh_caching_enabled": "System prompt refresh must be enabled for prompt caching to work!",
"missing_gbnf_file": "The GBNF file was not found: {filename}",
"missing_icl_file": "The in context learning example CSV file was not found: {filename}"
},
"progress": {
"download": "Please wait while the model is being downloaded from HuggingFace. This can take a few minutes.",
@@ -157,8 +160,8 @@
},
"error": {
"sys_refresh_caching_enabled": "System prompt refresh must be enabled for prompt caching to work!",
"missing_gbnf_file": "The GBNF file was not found: '{filename}'",
"missing_icl_file": "The in context learning example CSV file was not found: '{filename}'"
"missing_gbnf_file": "The GBNF file was not found: {filename}",
"missing_icl_file": "The in context learning example CSV file was not found: {filename}"
}
},
"selector": {

View File

@@ -14,6 +14,11 @@
* [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model)
* [Step 2: Connect to the Ollama API](#step-2-connect-to-the-ollama-api)
* [Step 3: Model Configuration](#step-3-model-configuration-1)
* [Path 3: Using Llama-3-8B-Instruct with LM Studio](#path-3-using-llama-3-8b-instruct-with-lm-studio)
* [Overview](#overview-2)
* [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model-1)
* [Step 2: Connect to the LM Studio API](#step-2-connect-to-the-lm-studio-api)
* [Step 3: Model Configuration](#step-3-model-configuration-2)
* [Configuring the Integration as a Conversation Agent](#configuring-the-integration-as-a-conversation-agent)
* [Finished!](#finished)
@@ -103,6 +108,41 @@ Once the desired API has been selected, scroll to the bottom and click `Submit`.
> NOTE: The key settings in this case are that our prompt references the `{{ response_examples }}` variable and the `Enable in context learning (ICL) examples` option is turned on.
## Path 3: Using Llama-3-8B-Instruct with LM Studio
### Overview
Another model you can use if you have a GPU is Meta's Llama-3-8B-Instruct Model. This path assumes you have a machine with a GPU that already has [LM Studio](https://lmstudio.ai/) installed on it. This path utilizes in-context learning examples, to prompt the model to produce the output that we expect.
### Step 1: Downloading and serving the Model
Llama 3 8B can be set up and downloaded on the serving machine using LM Studio by:
1. Search for `lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF` in the main interface.
2. Select and download the version of the model that is recommended for your VRAM configuration.
3. Select the 'Local Server' tab on the left side of the application.
4. Load the model by selecting it from the bar in the top middle of the screen. The server should start automatically when the model finishes loading.
5. Take note of the port that the server is running on.
### Step 2: Connect to the LM Studio API
1. In Home Assistant: navigate to `Settings > Devices and Services`
2. Select the `+ Add Integration` button in the bottom right corner
3. Search for, and select `Local LLM Conversation`
4. Select `Generic OpenAI Compatible API` from the dropdown and click `Submit`
5. Set up the connection to the API:
- **IP Address**: Fill out IP Address for the machine hosting LM Studio
- **Port**: enter the port that was listed in LM Studio
- **Use HTTPS**: unchecked
- **Model Name**: This can be any value, as LM Studio uses the currently loaded model for all incoming requests.
- **API Key**: leave blank
6. Click `Submit`
### Step 3: Model Configuration
This step allows you to configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.
For now, defaults for the model should have been populated. If you would like the model to be able to control devices then you must select the `Assist` API.
Once the desired API has been selected, scroll to the bottom and click `Submit`.
> NOTE: The key settings in this case are that our prompt references the `{{ response_examples }}` variable and the `Enable in context learning (ICL) examples` option is turned on.
## Configuring the Integration as a Conversation Agent
Now that the integration is configured and providing the conversation agent, we need to configure Home Assistant to use our conversation agent instead of the built in intent recognition system.

View File

@@ -86,8 +86,10 @@ from homeassistant.components.conversation import ConversationInput
from homeassistant.const import (
CONF_HOST,
CONF_PORT,
CONF_SSL
CONF_SSL,
CONF_LLM_HASS_API
)
from homeassistant.helpers.llm import LLM_API_ASSIST, APIInstance
_LOGGER = logging.getLogger(__name__)
@@ -122,26 +124,19 @@ def config_entry():
},
options={
**DEFAULT_OPTIONS,
CONF_LLM_HASS_API: LLM_API_ASSIST,
CONF_PROMPT: DEFAULT_PROMPT_BASE,
CONF_SERVICE_CALL_REGEX: r"({[\S \t]*})"
}
)
@pytest.fixture
def home_assistant_mock():
mock_home_assistant = MagicMock()
async def call_now(func, *args, **kwargs):
return func(*args, **kwargs)
mock_home_assistant.async_add_executor_job.side_effect = call_now
mock_home_assistant.services.async_call = AsyncMock()
yield mock_home_assistant
@pytest.fixture
def local_llama_agent_fixture(config_entry, home_assistant_mock):
def local_llama_agent_fixture(config_entry, hass, enable_custom_integrations):
with patch.object(LlamaCppAgent, '_load_icl_examples') as load_icl_examples_mock, \
patch.object(LlamaCppAgent, '_load_grammar') as load_grammar_mock, \
patch.object(LlamaCppAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
patch.object(LlamaCppAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
patch('homeassistant.helpers.template.Template') as template_mock, \
patch('custom_components.llama_conversation.agent.importlib.import_module') as import_module_mock, \
patch('custom_components.llama_conversation.agent.install_llama_cpp_python') as install_llama_cpp_python_mock:
@@ -166,14 +161,17 @@ def local_llama_agent_fixture(config_entry, home_assistant_mock):
generate_mock.return_value = list(range(20))
detokenize_mock = llama_instance_mock.detokenize
detokenize_mock.return_value = json.dumps({
"to_say": "I am saying something!",
"service": "light.turn_on",
"target_device": "light.kitchen_light",
}).encode()
detokenize_mock.return_value = ("I am saying something!\n" + json.dumps({
"name": "HassTurnOn",
"arguments": {
"name": "light.kitchen_light"
}
})).encode()
call_tool_mock.return_value = {"result": "success"}
agent_obj = LlamaCppAgent(
home_assistant_mock,
hass,
config_entry
)
@@ -268,10 +266,11 @@ async def test_local_llama_agent(local_llama_agent_fixture):
)
@pytest.fixture
def ollama_agent_fixture(config_entry, home_assistant_mock):
def ollama_agent_fixture(config_entry, hass, enable_custom_integrations):
with patch.object(OllamaAPIAgent, '_load_icl_examples') as load_icl_examples_mock, \
patch.object(OllamaAPIAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
patch.object(OllamaAPIAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
patch('homeassistant.helpers.template.Template') as template_mock, \
patch('custom_components.llama_conversation.agent.requests.get') as requests_get_mock, \
patch('custom_components.llama_conversation.agent.requests.post') as requests_post_mock:
@@ -291,8 +290,10 @@ def ollama_agent_fixture(config_entry, home_assistant_mock):
response_mock.json.return_value = { "models": [ {"name": config_entry.data[CONF_CHAT_MODEL] }] }
requests_get_mock.return_value = response_mock
call_tool_mock.return_value = {"result": "success"}
agent_obj = OllamaAPIAgent(
home_assistant_mock,
hass,
config_entry
)
@@ -318,10 +319,11 @@ async def test_ollama_agent(ollama_agent_fixture):
response_mock.json.return_value = {
"model": ollama_agent.entry.data[CONF_CHAT_MODEL],
"created_at": "2023-11-09T21:07:55.186497Z",
"response": json.dumps({
"to_say": "I am saying something!",
"service": "light.turn_on",
"target_device": "light.kitchen_light",
"response": "I am saying something!\n" + json.dumps({
"name": "HassTurnOn",
"arguments": {
"name": "light.kitchen_light"
}
}),
"done": True,
"context": [1, 2, 3],
@@ -410,10 +412,11 @@ async def test_ollama_agent(ollama_agent_fixture):
@pytest.fixture
def text_generation_webui_agent_fixture(config_entry, home_assistant_mock):
def text_generation_webui_agent_fixture(config_entry, hass, enable_custom_integrations):
with patch.object(TextGenerationWebuiAgent, '_load_icl_examples') as load_icl_examples_mock, \
patch.object(TextGenerationWebuiAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
patch.object(TextGenerationWebuiAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
patch('homeassistant.helpers.template.Template') as template_mock, \
patch('custom_components.llama_conversation.agent.requests.get') as requests_get_mock, \
patch('custom_components.llama_conversation.agent.requests.post') as requests_post_mock:
@@ -433,8 +436,10 @@ def text_generation_webui_agent_fixture(config_entry, home_assistant_mock):
response_mock.json.return_value = { "model_name": config_entry.data[CONF_CHAT_MODEL] }
requests_get_mock.return_value = response_mock
call_tool_mock.return_value = {"result": "success"}
agent_obj = TextGenerationWebuiAgent(
home_assistant_mock,
hass,
config_entry
)
@@ -464,10 +469,11 @@ async def test_text_generation_webui_agent(text_generation_webui_agent_fixture):
"model": "gpt-3.5-turbo-instruct",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"text": json.dumps({
"to_say": "I am saying something!",
"service": "light.turn_on",
"target_device": "light.kitchen_light",
"text": "I am saying something!\n" + json.dumps({
"name": "HassTurnOn",
"arguments": {
"name": "light.kitchen_light"
}
}),
"index": 0,
"logprobs": None,
@@ -559,10 +565,11 @@ async def test_text_generation_webui_agent(text_generation_webui_agent_fixture):
"index": 0,
"message": {
"role": "assistant",
"content": json.dumps({
"to_say": "I am saying something!",
"service": "light.turn_on",
"target_device": "light.kitchen_light",
"content": "I am saying something!\n" + json.dumps({
"name": "HassTurnOn",
"arguments": {
"name": "light.kitchen_light"
}
}),
},
"logprobs": None,
@@ -669,10 +676,11 @@ async def test_text_generation_webui_agent(text_generation_webui_agent_fixture):
)
@pytest.fixture
def generic_openai_agent_fixture(config_entry, home_assistant_mock):
def generic_openai_agent_fixture(config_entry, hass, enable_custom_integrations):
with patch.object(GenericOpenAIAPIAgent, '_load_icl_examples') as load_icl_examples_mock, \
patch.object(GenericOpenAIAPIAgent, 'entry', new_callable=PropertyMock) as entry_mock, \
patch.object(GenericOpenAIAPIAgent, '_async_get_exposed_entities') as get_exposed_entities_mock, \
patch.object(APIInstance, 'async_call_tool') as call_tool_mock, \
patch('homeassistant.helpers.template.Template') as template_mock, \
patch('custom_components.llama_conversation.agent.requests.get') as requests_get_mock, \
patch('custom_components.llama_conversation.agent.requests.post') as requests_post_mock:
@@ -688,8 +696,10 @@ def generic_openai_agent_fixture(config_entry, home_assistant_mock):
["light", "switch", "fan"]
)
call_tool_mock.return_value = {"result": "success"}
agent_obj = GenericOpenAIAPIAgent(
home_assistant_mock,
hass,
config_entry
)
@@ -714,10 +724,11 @@ async def test_generic_openai_agent(generic_openai_agent_fixture):
"model": "gpt-3.5-turbo-instruct",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"text": json.dumps({
"to_say": "I am saying something!",
"service": "light.turn_on",
"target_device": "light.kitchen_light",
"text": "I am saying something!\n" + json.dumps({
"name": "HassTurnOn",
"arguments": {
"name": "light.kitchen_light"
}
}),
"index": 0,
"logprobs": None,
@@ -774,10 +785,11 @@ async def test_generic_openai_agent(generic_openai_agent_fixture):
"index": 0,
"message": {
"role": "assistant",
"content": json.dumps({
"to_say": "I am saying something!",
"service": "light.turn_on",
"target_device": "light.kitchen_light",
"content": "I am saying something!\n" + json.dumps({
"name": "HassTurnOn",
"arguments": {
"name": "light.kitchen_light"
}
}),
},
"logprobs": None,

View File

@@ -7,6 +7,7 @@ from homeassistant.const import (
CONF_HOST,
CONF_PORT,
CONF_SSL,
CONF_LLM_HASS_API,
)
from homeassistant.data_entry_flow import FlowResultType
@@ -25,6 +26,8 @@ from custom_components.llama_conversation.const import (
CONF_DOWNLOADED_MODEL_FILE,
CONF_EXTRA_ATTRIBUTES_TO_EXPOSE,
CONF_PROMPT_TEMPLATE,
CONF_TOOL_FORMAT,
CONF_TOOL_MULTI_TURN_CHAT,
CONF_ENABLE_FLASH_ATTENTION,
CONF_USE_GBNF_GRAMMAR,
CONF_GBNF_GRAMMAR_FILE,
@@ -290,16 +293,16 @@ async def test_validate_config_flow_ollama(mock_setup_entry, hass: HomeAssistant
# TODO: write tests for configflow setup for llama.cpp (both versions) + text-generation-webui
def test_validate_options_schema():
def test_validate_options_schema(hass: HomeAssistant):
universal_options = [
CONF_PROMPT, CONF_PROMPT_TEMPLATE,
CONF_LLM_HASS_API, CONF_PROMPT, CONF_PROMPT_TEMPLATE, CONF_TOOL_FORMAT, CONF_TOOL_MULTI_TURN_CHAT,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES, CONF_IN_CONTEXT_EXAMPLES_FILE, CONF_NUM_IN_CONTEXT_EXAMPLES,
CONF_MAX_TOKENS, CONF_EXTRA_ATTRIBUTES_TO_EXPOSE,
CONF_SERVICE_CALL_REGEX, CONF_REFRESH_SYSTEM_PROMPT, CONF_REMEMBER_CONVERSATION, CONF_REMEMBER_NUM_INTERACTIONS,
]
options_llama_hf = local_llama_config_option_schema(None, BACKEND_TYPE_LLAMA_HF)
options_llama_hf = local_llama_config_option_schema(hass, None, BACKEND_TYPE_LLAMA_HF)
assert set(options_llama_hf.keys()) == set(universal_options + [
CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_MIN_P, CONF_TYPICAL_P, # supports all sampling parameters
CONF_BATCH_SIZE, CONF_THREAD_COUNT, CONF_BATCH_THREAD_COUNT, CONF_ENABLE_FLASH_ATTENTION, # llama.cpp specific
@@ -308,7 +311,7 @@ def test_validate_options_schema():
CONF_PROMPT_CACHING_ENABLED, CONF_PROMPT_CACHING_INTERVAL # supports prompt caching
])
options_llama_existing = local_llama_config_option_schema(None, BACKEND_TYPE_LLAMA_EXISTING)
options_llama_existing = local_llama_config_option_schema(hass, None, BACKEND_TYPE_LLAMA_EXISTING)
assert set(options_llama_existing.keys()) == set(universal_options + [
CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_MIN_P, CONF_TYPICAL_P, # supports all sampling parameters
CONF_BATCH_SIZE, CONF_THREAD_COUNT, CONF_BATCH_THREAD_COUNT, CONF_ENABLE_FLASH_ATTENTION, # llama.cpp specific
@@ -317,7 +320,7 @@ def test_validate_options_schema():
CONF_PROMPT_CACHING_ENABLED, CONF_PROMPT_CACHING_INTERVAL # supports prompt caching
])
options_ollama = local_llama_config_option_schema(None, BACKEND_TYPE_OLLAMA)
options_ollama = local_llama_config_option_schema(hass, None, BACKEND_TYPE_OLLAMA)
assert set(options_ollama.keys()) == set(universal_options + [
CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_TYPICAL_P, # supports top_k temperature, top_p and typical_p samplers
CONF_OLLAMA_KEEP_ALIVE_MIN, CONF_OLLAMA_JSON_MODE, # ollama specific
@@ -325,7 +328,7 @@ def test_validate_options_schema():
CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_REQUEST_TIMEOUT, # is a remote backend
])
options_text_gen_webui = local_llama_config_option_schema(None, BACKEND_TYPE_TEXT_GEN_WEBUI)
options_text_gen_webui = local_llama_config_option_schema(hass, None, BACKEND_TYPE_TEXT_GEN_WEBUI)
assert set(options_text_gen_webui.keys()) == set(universal_options + [
CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, CONF_MIN_P, CONF_TYPICAL_P, # supports all sampling parameters
CONF_TEXT_GEN_WEBUI_CHAT_MODE, CONF_TEXT_GEN_WEBUI_PRESET, # text-gen-webui specific
@@ -333,13 +336,13 @@ def test_validate_options_schema():
CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_REQUEST_TIMEOUT, # is a remote backend
])
options_generic_openai = local_llama_config_option_schema(None, BACKEND_TYPE_GENERIC_OPENAI)
options_generic_openai = local_llama_config_option_schema(hass, None, BACKEND_TYPE_GENERIC_OPENAI)
assert set(options_generic_openai.keys()) == set(universal_options + [
CONF_TEMPERATURE, CONF_TOP_P, # only supports top_p and temperature sampling
CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_REQUEST_TIMEOUT, # is a remote backend
])
options_llama_cpp_python_server = local_llama_config_option_schema(None, BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER)
options_llama_cpp_python_server = local_llama_config_option_schema(hass, None, BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER)
assert set(options_llama_cpp_python_server.keys()) == set(universal_options + [
CONF_TOP_K, CONF_TEMPERATURE, CONF_TOP_P, # supports top_k, temperature, and top p sampling
CONF_USE_GBNF_GRAMMAR, CONF_GBNF_GRAMMAR_FILE, # supports GBNF