working with wheel install + grammar

This commit is contained in:
Alex O'Connell
2024-01-13 20:05:53 -05:00
parent 70a02d68f9
commit 4aab796cd3
5 changed files with 61 additions and 26 deletions

View File

@@ -67,9 +67,6 @@ CONFIG_SCHEMA = cv.config_entry_only_config_schema(DOMAIN)
async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
"""Set up Local LLaMA Conversation from a config entry."""
hass.data.setdefault(DOMAIN, {})
hass.data[DOMAIN][entry.entry_id] = entry
use_local_backend = entry.data.get(
CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE
) != BACKEND_TYPE_REMOTE
@@ -86,6 +83,9 @@ async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
agent = await hass.async_add_executor_job(create_agent)
conversation.async_set_agent(hass, entry, agent)
hass.data.setdefault(DOMAIN, {})
hass.data[DOMAIN][entry.entry_id] = entry
return True
@@ -130,12 +130,14 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
if self.use_local_backend:
if not model_path:
raise Exception(f"Model was not found at '{model_path}'!")
# don't import it until now because the wheel is installed by config_flow.py
module = importlib.import_module("llama_cpp")
Llama = getattr(module, "Llama")
LlamaGrammar = getattr(module, "LlamaGrammar")
_LOGGER.debug("Loading model...")
self.llm = Llama(
model_path=model_path,
n_ctx=2048,
@@ -144,9 +146,11 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
# n_threads_batch=4,
)
with open(os.path.join(os.path.dirname(__file__), GBNF_GRAMMAR_FILE)) as f:
grammar_str = "".join(f.readlines())
self.grammar = LlamaGrammar.from_string(grammar_str)
# _LOGGER.debug("Loading grammar...")
# with open(os.path.join(os.path.dirname(__file__), GBNF_GRAMMAR_FILE)) as f:
# grammar_str = "".join(f.readlines())
# self.grammar = LlamaGrammar.from_string(grammar_str)
_LOGGER.info("Model loaded")
else:
@@ -276,7 +280,7 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
return conversation.ConversationResult(
response=intent_response, conversation_id=conversation_id
)
def _load_remote_model(self):
try:
currently_loaded_result = requests.get(f"{self.api_host}/v1/internal/model/info")
@@ -396,7 +400,7 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
if include_generation_prompt:
formatted_prompt = formatted_prompt + template_desc["generation_prompt"]
return formatted_prompt
def _async_generate_prompt(self, prompt_template: str) -> str:
@@ -423,7 +427,7 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
value = F"{closest_color(value)} {value}"
elif attribute_name == "volume_level":
value = f"{int(value*100)}%"
result = result + ";" + str(value)
return result

View File

@@ -1,6 +1,7 @@
"""Config flow for Local LLaMA Conversation integration."""
from __future__ import annotations
import time
import os
import logging
import types
@@ -75,7 +76,7 @@ def STEP_INIT_DATA_SCHEMA(backend_type=None):
return vol.Schema(
{
vol.Required(
CONF_BACKEND_TYPE,
CONF_BACKEND_TYPE,
default=backend_type if backend_type else DEFAULT_BACKEND_TYPE
): SelectSelector(SelectSelectorConfig(
options=[ BACKEND_TYPE_LLAMA_HF, BACKEND_TYPE_LLAMA_EXISTING, BACKEND_TYPE_REMOTE ],
@@ -140,7 +141,7 @@ def download_model_from_hf(
)
except Exception as ex:
return ex
def install_llama_cpp_python(config_dir: str):
try:
if not is_installed("llama-cpp-python"):
@@ -150,16 +151,22 @@ def install_llama_cpp_python(config_dir: str):
platform_suffix = "aarch64"
folder = os.path.dirname(__file__)
potential_wheels = [ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ]
if len(potential_wheels) == 1:
if len(potential_wheels) == 0:
# someone who is better at async can figure out why this is necessary
time.sleep(0.5)
return Exception("missing_wheels")
elif len(potential_wheels) == 1:
wheel_to_install = potential_wheels[0]
else:
_LOGGER.info("There are multiple potential wheels to install... Using the latest one")
wheel_to_install = sorted(potential_wheels, reverse=True)[0]
_LOGGER.debug("Wheel location: ", wheel_to_install)
_LOGGER.debug(f"Wheel location: {wheel_to_install}")
return install_package(os.path.join(folder, wheel_to_install), pip_kwargs(config_dir))
else:
_LOGGER.info("llama-cpp-python is already installed")
# someone who is better at async can figure out why this is necessary
time.sleep(0.5)
return True
except Exception as ex:
_LOGGER.exception("Install failed!")
@@ -174,12 +181,18 @@ class BaseLlamaConversationConfigFlow(FlowHandler, ABC):
def flow_manager(self) -> FlowManager:
"""Return the flow manager of the flow."""
@abstractmethod
async def async_step_pick_backend(
self, user_input: dict[str, Any] | None = None
) -> FlowResult:
""" Select backend """
@abstractmethod
async def async_step_install_local_wheels(
self, user_input: dict[str, Any] | None = None
) -> FlowResult:
""" Install pre-built wheels """
@abstractmethod
async def async_step_local_model(
self, user_input: dict[str, Any] | None = None
@@ -236,6 +249,12 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
async def async_step_user(
self, user_input: dict[str, Any] | None = None
) -> FlowResult:
"""Handle the initial step."""
return await self.async_step_pick_backend()
async def async_step_pick_backend(
self, user_input: dict[str, Any] | None = None
) -> FlowResult:
"""Handle the initial step."""
errors = {}
@@ -262,15 +281,17 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
)
if "base" not in errors:
# return await self.async_step_install_local_wheels()
return await self.async_step_local_model()
return await self.async_step_install_local_wheels()
else:
return await self.async_step_remote_model()
elif self.install_wheel_error:
errors["base"] = str(self.install_wheel_error)
self.install_wheel_error = None
return self.async_show_form(
step_id="user", data_schema=schema, errors=errors
step_id="pick_backend", data_schema=schema, errors=errors
)
async def async_step_install_local_wheels(
self, user_input: dict[str, Any] | None = None
) -> FlowResult:
@@ -296,10 +317,17 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
wheel_install_result = user_input["result"]
if isinstance(wheel_install_result, Exception):
_LOGGER.warning("Failed to install wheel: %s", repr(wheel_install_result))
self.wheel_install_error = wheel_install_result
return self.async_show_progress_done(next_step_id="user")
self.install_wheel_error = wheel_install_result
self.install_wheel_task = None
return self.async_show_progress_done(next_step_id="pick_backend")
elif wheel_install_result == False:
_LOGGER.warning("Failed to install wheel: %s", repr(wheel_install_result))
self.install_wheel_error = "pip_wheel_error"
self.install_wheel_task = None
return self.async_show_progress_done(next_step_id="pick_backend")
else:
_LOGGER.debug(f"Finished install: {wheel_install_result}")
self.install_wheel_task = None
return self.async_show_progress_done(next_step_id="local_model")
async def async_step_local_model(

View File

@@ -1,7 +1,7 @@
root ::= (ws tosay)+ ws functioncalls?
root ::= (tosay "\n")+ functioncalls?
tosay ::= [0-9a-zA-Z .#%]*
functioncalls ::=
tosay ::= [0-9a-zA-Z #%.?!]*
functioncalls ::=
"```homeassistant\n" (object ws)* "```"
value ::= object | array | string | number | ("true" | "false" | "null") ws

View File

@@ -6,7 +6,9 @@
"missing_model_api": "The selected model is not provided by this API.",
"missing_model_file": "The provided file does not exist.",
"other_existing_local": "Another model is already loaded locally. Please unload it or configure a remote model.",
"unknown": "Unexpected error"
"unknown": "Unexpected error",
"missing_wheel": "Llama.cpp is not installed and could not find any wheels to install!",
"pip_wheel_error": "Pip returned an error while installing the wheel!"
},
"progress": {
"download": "Please wait while the model is being downloaded from HuggingFace. This can take a few minutes.",
@@ -31,7 +33,7 @@
"description": "Provide the connection details for an instance of text-generation-webui that is hosting the model.",
"title": "Configure connection to remote API"
},
"user": {
"pick_backend": {
"data": {
"download_model_from_hf": "Download model from HuggingFace",
"use_local_backend": "Use Llama.cpp"

View File

@@ -10,5 +10,6 @@ if [[ ! -d "./models/$MODEL_NAME" ]]; then
exit -1
fi
dos2unix $PROMPT_SRC
PROMPT=$(cat $PROMPT_SRC)
$LLAMA_CPP/build/bin/main --model "./models/$MODEL_NAME/$MODEL_NAME.$QUANT_TYPE.gguf" --temp 0.1 --ctx-size 2048 --prompt "$PROMPT" --grammar-file ./custom_components/llama_conversation/output.gbnf