add generic openai backend type

2026-01-10 14:18:00 -05:00 · 2024-01-13 23:56:32 -05:00
parent 060bba819d
commit 62adce7a22
5 changed files with 41 additions and 24 deletions
--- a/TODO.md
+++ b/TODO.md
@@ -9,7 +9,7 @@
 [x] Finish Readme/docs for initial release
 [x] Function calling as JSON
 [ ] multi-turn prompts; better instruct dataset like dolphin/wizardlm?
-[ ] Fine tune Phi-1 and Phi-1.5 versions
+[x] Fine tune Phi-1.5 version
 [ ] "context requests"
    - basically just let the model decide what RAG/extra context it wants
    - the model predicts special tokens as the first few tokens of its output
@@ -22,6 +22,6 @@
 [x] make llama-cpp-python wheels for "llama-cpp-python>=0.2.24"
 [ ] prime kv cache with current "state" so that requests are faster
 [ ] make a proper evaluation framework to run. not just loss. should test accuracy on the function calling
-[ ] add LocalAI backend
+[x] add LocalAI backend
 [x] more config options for prompt template (allow other than chatml)
 [ ] publish snapshot of dataset on HF
--- a/custom_components/llama_conversation/init.py
+++ b/custom_components/llama_conversation/init.py
@@ -54,7 +54,8 @@ from .const import (
    DEFAULT_REQUEST_TIMEOUT,
    DEFAULT_EXTRA_ATTRIBUTES_TO_EXPOSE,
    DEFAULT_PROMPT_TEMPLATE,
-    BACKEND_TYPE_REMOTE,
+    BACKEND_TYPE_TEXT_GEN_WEBUI,
+    BACKEND_TYPE_GENERIC_OPENAI,
    DOMAIN,
    GBNF_GRAMMAR_FILE,
    PROMPT_TEMPLATE_DESCRIPTIONS,
@@ -64,12 +65,15 @@ _LOGGER = logging.getLogger(__name__)

 CONFIG_SCHEMA = cv.config_entry_only_config_schema(DOMAIN)

+def is_local_backend(backend):
+    return backend not in [BACKEND_TYPE_TEXT_GEN_WEBUI, BACKEND_TYPE_GENERIC_OPENAI]
+
 async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
    """Set up Local LLaMA Conversation from a config entry."""

-    use_local_backend = entry.data.get(
-        CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE
-    ) != BACKEND_TYPE_REMOTE
+    use_local_backend = is_local_backend(
+        entry.data.get(CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE)
+    )

    if use_local_backend:
        _LOGGER.info(
@@ -114,9 +118,10 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
        self.entry = entry
        self.history: dict[str, list[dict]] = {}

-        self.use_local_backend = self.entry.data.get(
+        self.backend_type = self.entry.data.get(
            CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE
-        ) != BACKEND_TYPE_REMOTE
+        )
+        self.use_local_backend = is_local_backend(self.backend_type)

        self.api_host = None
        self.llm = None
@@ -158,7 +163,9 @@ class LLaMAAgent(conversation.AbstractConversationAgent):
            port = entry.data[CONF_PORT]
            self.api_host = f"http://{host}:{port}"

-            self._load_remote_model()
+            # only load model if using text-generation-webui
+            if self.backend_type == BACKEND_TYPE_TEXT_GEN_WEBUI:
+                self._load_remote_model()

    @property
    def supported_languages(self) -> list[str] | Literal["*"]:
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -61,7 +61,8 @@ from .const import (
    DEFAULT_PROMPT_TEMPLATE,
    BACKEND_TYPE_LLAMA_HF,
    BACKEND_TYPE_LLAMA_EXISTING,
-    BACKEND_TYPE_REMOTE,
+    BACKEND_TYPE_TEXT_GEN_WEBUI,
+    BACKEND_TYPE_GENERIC_OPENAI,
    PROMPT_TEMPLATE_CHATML,
    PROMPT_TEMPLATE_ALPACA,
    PROMPT_TEMPLATE_VICUNA,
@@ -72,6 +73,9 @@ from .const import (

 _LOGGER = logging.getLogger(__name__)

+def is_local_backend(backend):
+    return backend not in [BACKEND_TYPE_TEXT_GEN_WEBUI, BACKEND_TYPE_GENERIC_OPENAI]
+
 def STEP_INIT_DATA_SCHEMA(backend_type=None):
    return vol.Schema(
        {
@@ -79,7 +83,7 @@ def STEP_INIT_DATA_SCHEMA(backend_type=None):
                CONF_BACKEND_TYPE,
                default=backend_type if backend_type else DEFAULT_BACKEND_TYPE
            ): SelectSelector(SelectSelectorConfig(
-                options=[ BACKEND_TYPE_LLAMA_HF, BACKEND_TYPE_LLAMA_EXISTING, BACKEND_TYPE_REMOTE ],
+                options=[ BACKEND_TYPE_LLAMA_HF, BACKEND_TYPE_LLAMA_EXISTING, BACKEND_TYPE_TEXT_GEN_WEBUI, BACKEND_TYPE_GENERIC_OPENAI ],
                translation_key=CONF_BACKEND_TYPE,
                multiple=False,
                mode=SelectSelectorMode.LIST,
@@ -263,7 +267,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom

        if user_input:
            try:
-                local_backend = user_input[CONF_BACKEND_TYPE] != BACKEND_TYPE_REMOTE
+                local_backend = is_local_backend(user_input[CONF_BACKEND_TYPE])
                self.model_options.update(user_input)

            except Exception:  # pylint: disable=broad-except
@@ -435,14 +439,18 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
            try:
                self.model_options.update(user_input)

-                error_reason = await self.hass.async_add_executor_job(self._validate_remote_api)
-                if error_reason:
-                    errors["base"] = error_reason
-                    schema = STEP_REMOTE_SETUP_DATA_SCHEMA(
-                        host=user_input[CONF_HOST],
-                        port=user_input[CONF_PORT],
-                        chat_model=user_input[CONF_CHAT_MODEL],
-                    )
+                # only validate and load when using text-generation-webui
+                if self.model_options[CONF_BACKEND_TYPE] == BACKEND_TYPE_TEXT_GEN_WEBUI:
+                    error_reason = await self.hass.async_add_executor_job(self._validate_remote_api)
+                    if error_reason:
+                        errors["base"] = error_reason
+                        schema = STEP_REMOTE_SETUP_DATA_SCHEMA(
+                            host=user_input[CONF_HOST],
+                            port=user_input[CONF_PORT],
+                            chat_model=user_input[CONF_CHAT_MODEL],
+                        )
+                    else:
+                        return await self.async_step_finish()
                else:
                    return await self.async_step_finish()

@@ -461,7 +469,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
        model_name = self.model_options.get(CONF_CHAT_MODEL)
        if not model_name:
            model_name = os.path.basename(self.model_options.get(CONF_DOWNLOADED_MODEL_FILE))
-        location = "remote" if self.model_options[CONF_BACKEND_TYPE] == BACKEND_TYPE_REMOTE else "llama.cpp"
+        location = "llama.cpp" if is_local_backend(self.model_options[CONF_BACKEND_TYPE]) else "remote"

        return self.async_create_entry(
            title=f"LLM Model '{model_name}' ({location})",
@@ -490,7 +498,7 @@ class OptionsFlow(config_entries.OptionsFlow):
        """Manage the options."""
        if user_input is not None:
            return self.async_create_entry(title="LLaMA Conversation", data=user_input)
-        is_local_backend = self.config_entry.data[CONF_BACKEND_TYPE] != BACKEND_TYPE_REMOTE
+        is_local_backend = is_local_backend(self.config_entry.data[CONF_BACKEND_TYPE])
        schema = local_llama_config_option_schema(self.config_entry.options, is_local_backend)
        return self.async_show_form(
            step_id="init",
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -21,7 +21,8 @@ DEFAULT_REQUEST_TIMEOUT = 90
 CONF_BACKEND_TYPE = "model_backend"
 BACKEND_TYPE_LLAMA_HF = "llama_cpp_hf"
 BACKEND_TYPE_LLAMA_EXISTING = "llama_cpp_existing"
-BACKEND_TYPE_REMOTE = "text-generation-webui_api"
+BACKEND_TYPE_TEXT_GEN_WEBUI = "text-generation-webui_api"
+BACKEND_TYPE_GENERIC_OPENAI = "generic_openai"
 DEFAULT_BACKEND_TYPE = BACKEND_TYPE_LLAMA_HF
 CONF_DOWNLOADED_MODEL_QUANTIZATION = "downloaded_model_quantization"
 CONF_DOWNLOADED_MODEL_QUANTIZATION_OPTIONS = ["Q8_0", "Q5_K_M", "Q4_K_M", "Q3_K_M"]
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -71,7 +71,8 @@
            "options": {
                "llama_cpp_hf": "Llama.cpp (HuggingFace)",
                "llama_cpp_existing": "Llama.cpp (existing model)",
-                "text-generation-webui_api": "text-generation-webui API"
+                "text-generation-webui_api": "text-generation-webui API",
+                "generic_openai": "Generic OpenAI Compatible API"
            }
        }
    }