From 1e79426f1a39557600615f95112cde12127bf147 Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Sun, 21 Sep 2025 20:28:59 -0400
Subject: [PATCH] clean up docs with new install steps + remove deprecated
 addon

---
 README.md                                     |  6 +-
 addon/Dockerfile                              | 67 -------------------
 addon/README.md                               |  4 --
 addon/build.yaml                              |  4 --
 addon/config.yaml                             | 26 -------
 .../s6-rc.d/text-generation-webui/run         | 33 ---------
 .../s6-rc.d/text-generation-webui/type        |  1 -
 .../user/contents.d/text-generation-webui     |  0
 .../backends/generic_openai.py                |  2 -
 .../llama_conversation/backends/llamacpp.py   | 20 +-----
 .../backends/tailored_openai.py               |  6 +-
 .../llama_conversation/config_flow.py         | 20 ++++--
 .../llama_conversation/entity.py              |  1 +
 .../llama_conversation/translations/en.json   |  2 +-
 docs/Setup.md                                 | 42 ++++++------
 15 files changed, 47 insertions(+), 187 deletions(-)
 delete mode 100644 addon/Dockerfile
 delete mode 100644 addon/README.md
 delete mode 100644 addon/build.yaml
 delete mode 100644 addon/config.yaml
 delete mode 100755 addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/run
 delete mode 100644 addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/type
 delete mode 100644 addon/rootfs/etc/s6-overlay/s6-rc.d/user/contents.d/text-generation-webui
diff --git a/README.md b/README.md
index 6893446..bfae0dd 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ The integration can either run the model in 2 different ways:
     - [Ollama](https://ollama.com/) (easier)
     - [LocalAI](https://localai.io/) via the Generic OpenAI backend (easier)
     - [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) project (advanced)
-    - [llama.cpp example server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md) (advanced)
+    - [llama.cpp example server](https://github.com/ggml-org/llama.cpp/tree/master/tools/server#readme) (advanced)
 
 ## Home LLM Model
 The "Home" models are a fine tuning of various Large Languages Models that are under 5B parameters.  The models are able to control devices in the user's house as well as perform basic question and answering.  The fine tuning dataset is a [custom synthetic dataset](./data) designed to teach the model function calling based on the device information in the context.
@@ -155,10 +155,6 @@ python3 train.py \
 </details>
 
 
-## Home Assistant Addon
-In order to facilitate running the project entirely on the system where Home Assistant is installed, there is an experimental Home Assistant Add-on that runs the oobabooga/text-generation-webui to connect to using the "remote" backend options.  The addon can be found in the [addon/](./addon/README.md) directory.
-
-
 ## Version History
 | Version | Description                                                                                                                                                                                                          |
 |---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
diff --git a/addon/Dockerfile b/addon/Dockerfile
deleted file mode 100644
index b75756f..0000000
--- a/addon/Dockerfile
+++ /dev/null
@@ -1,67 +0,0 @@
-ARG BUILD_FROM=ghcr.io/hassio-addons/ubuntu-base:9.0.2
-
-# hadolint ignore=DL3006
-FROM ${BUILD_FROM}
-
-# Set shell
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
-
-# Install text-generation-webui
-ARG BUILD_ARCH=amd64
-ARG APP_DIR=/app
-RUN \
-    apt-get update \
-    \
-    && apt-get install -y --no-install-recommends \
-        ca-certificates \
-        curl \
-        git \
-        build-essential \
-        cmake \
-        python3.10 \
-        python3-dev \
-        python3-venv \
-        python3-pip \
-    \
-    && git clone https://github.com/oobabooga/text-generation-webui.git ${APP_DIR} --branch snapshot-2024-04-28 \
-    && python3 -m pip install torch torchvision torchaudio py-cpuinfo==9.0.0 \
-    && python3 -m pip install -r ${APP_DIR}/requirements_cpu_only_noavx2.txt llama-cpp-python \
-    && apt-get purge -y --auto-remove \
-        git \
-        build-essential \
-        cmake \
-        python3-dev \
-    && apt-get clean \
-    && rm -fr \
-        /tmp/* \
-        /var/{cache,log}/* \
-        /var/lib/apt/lists/*
-
-# Copy root filesystem for our image
-COPY rootfs /
-
-# Build arugments
-ARG BUILD_DATE
-ARG BUILD_REF
-ARG BUILD_VERSION
-ARG BUILD_REPOSITORY
-
-# Labels
-LABEL \
-    io.hass.name="oobabooga text-generation-webui for ${BUILD_ARCH}" \
-    io.hass.description="Home Assistant Community Add-on: ${BUILD_ARCH} oobabooga text-generation-webui" \
-    io.hass.arch="${BUILD_ARCH}" \
-    io.hass.type="addon" \
-    io.hass.version=${BUILD_VERSION} \
-    maintainer="github.com/acon96" \
-    org.opencontainers.image.title="oobabooga text-generation-webui for ${BUILD_ARCH}" \
-    org.opencontainers.image.description="Home Assistant Community Add-on: ${BUILD_ARCH} oobabooga text-generation-webui" \
-    org.opencontainers.image.vendor="Home Assistant Community Add-ons" \
-    org.opencontainers.image.authors="github.com/acon96" \
-    org.opencontainers.image.licenses="TBD" \
-    org.opencontainers.image.url="https://addons.community" \
-    org.opencontainers.image.source="https://github.com/${BUILD_REPOSITORY}" \
-    org.opencontainers.image.documentation="https://github.com/${BUILD_REPOSITORY}/blob/main/README.md" \
-    org.opencontainers.image.created=${BUILD_DATE} \
-    org.opencontainers.image.revision=${BUILD_REF} \
-    org.opencontainers.image.version=${BUILD_VERSION}
diff --git a/addon/README.md b/addon/README.md
deleted file mode 100644
index e76910f..0000000
--- a/addon/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# text-generation-webui - Home Assistant Addon
-Installs text-generation-webui into a docker container using CPU only mode (llama.cpp)
-
-NOTE: This addon is not the preferred way to run LLama.cpp as part of Home Assistant and will not be updated.
\ No newline at end of file
diff --git a/addon/build.yaml b/addon/build.yaml
deleted file mode 100644
index f53cc46..0000000
--- a/addon/build.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
----
-build_from:
-  aarch64: ghcr.io/hassio-addons/ubuntu-base:9.0.2
-  amd64: ghcr.io/hassio-addons/ubuntu-base:9.0.2
\ No newline at end of file
diff --git a/addon/config.yaml b/addon/config.yaml
deleted file mode 100644
index b33a174..0000000
--- a/addon/config.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: oobabooga-text-generation-webui
-version: 2024.04.28
-slug: text-generation-webui
-description: "A tool for running Large Language Models"
-url: "https://github.com/oobabooga/text-generation-webui"
-init: false
-arch:
-  - amd64
-  - aarch64
-ports:
-  7860/tcp: 7860 # ingress
-  5000/tcp: 5000 # api
-ports_description:
-  7860/tcp: Web interface (Not required for Ingress)
-  5000/tcp: OpenAI compatible API Server
-ingress: true
-ingress_port: 7860
-options: {}
-schema:
-  log_level: list(trace|debug|info|notice|warning|error|fatal)?
-  models_directory: str?
-map:
-  - media:rw
-  - share:rw
-  - addon_config:rw
\ No newline at end of file
diff --git a/addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/run b/addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/run
deleted file mode 100755
index 09c903b..0000000
--- a/addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/run
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/command/with-contenv bashio
-# ==============================================================================
-# Home Assistant Community Add-on: text-generation-webui
-# Runs the webui
-# ==============================================================================
-bashio::log.info "Starting Text Generation Webui..."
-
-APP_DIR="/app"
-DEFAULT_MODELS_DIR="/config/models"
-
-if bashio::config.has_value "models_directory" && ! bashio::config.is_empty "models_directory"; then
-    MODELS_DIR=$(bashio::config 'models_directory')
-    if ! bashio::fs.directory_exists "$MODELS_DIR"; then
-        MODELS_DIR=$DEFAULT_MODELS_DIR
-        mkdir -p $MODELS_DIR
-        bashio::log.warning "The provided models directory '$MODELS_DIR' does not exist! Defaulting to '$DEFAULT_MODELS_DIR'"
-    else
-        bashio::log.info "Using chosen storage for models: '$MODELS_DIR'"
-    fi
-else
-    MODELS_DIR=$DEFAULT_MODELS_DIR
-    mkdir -p $MODELS_DIR
-    bashio::log.info "Using default local storage for models."
-fi
-
-# ensure we can access the folder
-chmod 0777 $MODELS_DIR
-
-export GRADIO_ROOT_PATH=$(bashio::addon.ingress_entry)
-bashio::log.info "Serving app from $GRADIO_ROOT_PATH"
-
-cd $APP_DIR
-exec python3 server.py --listen --verbose --api --model-dir $MODELS_DIR
diff --git a/addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/type b/addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/type
deleted file mode 100644
index 1780f9f..0000000
--- a/addon/rootfs/etc/s6-overlay/s6-rc.d/text-generation-webui/type
+++ /dev/null
@@ -1 +0,0 @@
-longrun
\ No newline at end of file
diff --git a/addon/rootfs/etc/s6-overlay/s6-rc.d/user/contents.d/text-generation-webui b/addon/rootfs/etc/s6-overlay/s6-rc.d/user/contents.d/text-generation-webui
deleted file mode 100644
index e69de29..0000000
diff --git a/custom_components/llama_conversation/backends/generic_openai.py b/custom_components/llama_conversation/backends/generic_openai.py
index d12c16b..c4d2a8b 100644
--- a/custom_components/llama_conversation/backends/generic_openai.py
+++ b/custom_components/llama_conversation/backends/generic_openai.py
@@ -122,7 +122,6 @@ class GenericOpenAIAPIClient(LocalLLMClient):
                          user_input: conversation.ConversationInput,
                          entity_options: dict[str, Any]) -> AsyncGenerator[TextGenerationResult, None]:
         model_name = entity_options[CONF_CHAT_MODEL]
-        max_tokens = entity_options.get(CONF_MAX_TOKENS, DEFAULT_MAX_TOKENS)
         temperature = entity_options.get(CONF_TEMPERATURE, DEFAULT_TEMPERATURE)
         top_p = entity_options.get(CONF_TOP_P, DEFAULT_TOP_P)
         timeout = entity_options.get(CONF_REQUEST_TIMEOUT, DEFAULT_REQUEST_TIMEOUT)
@@ -134,7 +133,6 @@ class GenericOpenAIAPIClient(LocalLLMClient):
         request_params = {
             "model": model_name,
             "stream": True,
-            "max_tokens": max_tokens,
             "temperature": temperature,
             "top_p": top_p,
             "messages": messages
diff --git a/custom_components/llama_conversation/backends/llamacpp.py b/custom_components/llama_conversation/backends/llamacpp.py
index 2ba30ed..0fcb3a5 100644
--- a/custom_components/llama_conversation/backends/llamacpp.py
+++ b/custom_components/llama_conversation/backends/llamacpp.py
@@ -115,7 +115,7 @@ class LlamaCppClient(LocalLLMClient):
 
     @staticmethod
     def get_name(client_options: dict[str, Any]):
-        return f"LLama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})"
+        return f"Llama.cpp (llama-cpp-python v{client_options[CONF_INSTALLED_LLAMACPP_VERSION]})"
 
     async def async_get_available_models(self) -> List[str]:
         return [] # TODO: find available "huggingface_hub" models that have been downloaded
@@ -391,7 +391,6 @@ class LlamaCppClient(LocalLLMClient):
         finally:
             self.model_lock.release()
 
-
         # schedule a refresh using async_call_later
         # if the flag is set after the delay then we do another refresh
 
@@ -428,23 +427,6 @@ class LlamaCppClient(LocalLLMClient):
 
         _LOGGER.debug(f"Options: {entity_options}")
 
-        # TODO: re-enable the context length check
-        #     # FIXME: use the high level API so we can use the built-in prompt formatting
-        #     input_tokens = self.llm.tokenize(
-        #         prompt.encode(), add_bos=False
-        #     )
-
-        #     context_len = self.entry.options.get(CONF_CONTEXT_LENGTH, DEFAULT_CONTEXT_LENGTH)
-        #     if len(input_tokens) >= context_len:
-        #         num_entities = len(self._async_get_exposed_entities())
-        #         context_size = self.entry.options.get(CONF_CONTEXT_LENGTH, DEFAULT_CONTEXT_LENGTH)
-        #         self._warn_context_size()
-        #         raise Exception(f"The model failed to produce a result because too many devices are exposed ({num_entities} devices) for the context size ({context_size} tokens)!")
-        #     if len(input_tokens) + max_tokens >= context_len:
-        #         self._warn_context_size()
-
-        #     _LOGGER.debug(f"Processing {len(input_tokens)} input tokens...")
-
         messages = get_oai_formatted_messages(conversation)
         tools = None
         if llm_api:
diff --git a/custom_components/llama_conversation/backends/tailored_openai.py b/custom_components/llama_conversation/backends/tailored_openai.py
index bf12dca..b4899a8 100644
--- a/custom_components/llama_conversation/backends/tailored_openai.py
+++ b/custom_components/llama_conversation/backends/tailored_openai.py
@@ -12,6 +12,7 @@ from homeassistant.helpers.aiohttp_client import async_get_clientsession
 
 from custom_components.llama_conversation.const import (
     CONF_CHAT_MODEL,
+    CONF_MAX_TOKENS,
     CONF_TOP_K,
     CONF_TYPICAL_P,
     CONF_MIN_P,
@@ -22,6 +23,7 @@ from custom_components.llama_conversation.const import (
     CONF_TEXT_GEN_WEBUI_CHAT_MODE,
     CONF_CONTEXT_LENGTH,
     CONF_GENERIC_OPENAI_PATH,
+    DEFAULT_MAX_TOKENS,
     DEFAULT_TOP_K,
     DEFAULT_MIN_P,
     DEFAULT_TYPICAL_P,
@@ -125,13 +127,15 @@ class LlamaCppServerClient(GenericOpenAIAPIClient):
         port = client_options[CONF_PORT]
         ssl = client_options[CONF_SSL]
         path = "/" + client_options[CONF_GENERIC_OPENAI_PATH]
-        return f"LLama.cpp Server at '{format_url(hostname=host, port=port, ssl=ssl, path=path)}'"
+        return f"Llama.cpp Server at '{format_url(hostname=host, port=port, ssl=ssl, path=path)}'"
     
     def _chat_completion_params(self, entity_options: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
         top_k = int(entity_options.get(CONF_TOP_K, DEFAULT_TOP_K))
+        max_tokens = int(entity_options.get(CONF_MAX_TOKENS, DEFAULT_MAX_TOKENS))
         endpoint, request_params = super()._chat_completion_params(entity_options)
 
         request_params["top_k"] = top_k
+        request_params["max_tokens"] = max_tokens
 
         if entity_options.get(CONF_USE_GBNF_GRAMMAR, DEFAULT_USE_GBNF_GRAMMAR):
             request_params["grammar"] = self.grammar
diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py
index 6054163..a80e649 100644
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -523,11 +523,6 @@ def local_llama_config_option_schema(
             description={"suggested_value": options.get(CONF_NUM_IN_CONTEXT_EXAMPLES)},
             default=DEFAULT_NUM_IN_CONTEXT_EXAMPLES,
         ): NumberSelector(NumberSelectorConfig(min=1, max=16, step=1)),
-        vol.Required(
-            CONF_MAX_TOKENS,
-            description={"suggested_value": options.get(CONF_MAX_TOKENS)},
-            default=DEFAULT_MAX_TOKENS,
-        ): NumberSelector(NumberSelectorConfig(min=1, max=8192, step=1)),
         vol.Required(
             CONF_EXTRA_ATTRIBUTES_TO_EXPOSE,
             description={"suggested_value": options.get(CONF_EXTRA_ATTRIBUTES_TO_EXPOSE)},
@@ -562,6 +557,11 @@ def local_llama_config_option_schema(
 
     if backend_type == BACKEND_TYPE_LLAMA_CPP:
         result.update({
+                vol.Required(
+                CONF_MAX_TOKENS,
+                description={"suggested_value": options.get(CONF_MAX_TOKENS)},
+                default=DEFAULT_MAX_TOKENS,
+            ): NumberSelector(NumberSelectorConfig(min=1, max=8192, step=1)),
             vol.Required(
                 CONF_TOP_K,
                 description={"suggested_value": options.get(CONF_TOP_K)},
@@ -715,6 +715,11 @@ def local_llama_config_option_schema(
         })
     elif backend_type == BACKEND_TYPE_LLAMA_CPP_SERVER:
         result.update({
+                vol.Required(
+                CONF_MAX_TOKENS,
+                description={"suggested_value": options.get(CONF_MAX_TOKENS)},
+                default=DEFAULT_MAX_TOKENS,
+            ): NumberSelector(NumberSelectorConfig(min=1, max=8192, step=1)),
             vol.Required(
                 CONF_TOP_K,
                 description={"suggested_value": options.get(CONF_TOP_K)},
@@ -743,6 +748,11 @@ def local_llama_config_option_schema(
         })
     elif backend_type == BACKEND_TYPE_OLLAMA:
         result.update({
+                vol.Required(
+                CONF_MAX_TOKENS,
+                description={"suggested_value": options.get(CONF_MAX_TOKENS)},
+                default=DEFAULT_MAX_TOKENS,
+            ): NumberSelector(NumberSelectorConfig(min=1, max=8192, step=1)),
             vol.Required(
                 CONF_CONTEXT_LENGTH,
                 description={"suggested_value": options.get(CONF_CONTEXT_LENGTH)},
diff --git a/custom_components/llama_conversation/entity.py b/custom_components/llama_conversation/entity.py
index 012011c..53d944b 100644
--- a/custom_components/llama_conversation/entity.py
+++ b/custom_components/llama_conversation/entity.py
@@ -280,6 +280,7 @@ class LocalLLMClient:
                 except MalformedToolCallException as err:
                     message_history.extend(err.as_tool_messages())
                     last_generation_had_tool_calls = True
+                    _LOGGER.debug("Malformed tool call produced", exc_info=err)
                 except Exception as err:
                     _LOGGER.exception("There was a problem talking to the backend")
                     intent_response = intent.IntentResponse(language=user_input.language)
diff --git a/custom_components/llama_conversation/translations/en.json b/custom_components/llama_conversation/translations/en.json
index 4afadde..a530858 100644
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -57,7 +57,7 @@
                 "reconfigure_successful": "Successfully updated model options."
             },
             "step": {
-                "init": {
+                "pick_model": {
                     "data": {
                         "huggingface_model": "Model Name",
                         "downloaded_model_file": "Local file name",
diff --git a/docs/Setup.md b/docs/Setup.md
index 31c1f1f..dde2f79 100644
--- a/docs/Setup.md
+++ b/docs/Setup.md
@@ -13,13 +13,13 @@
     * [Overview](#overview-1)
     * [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model)
     * [Step 2: Connect to the Ollama API](#step-2-connect-to-the-ollama-api)
-    * [Step 3: Model Configuration](#step-3-model-configuration-1)
+    * [Step 3: Model Selection & Configuration](#step-3-model-selection-configuration)
 * [Path 3: Using Llama-3-8B-Instruct with LM Studio](#path-3-using-llama-3-8b-instruct-with-lm-studio)
     * [Overview](#overview-2)
     * [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model-1)
     * [Step 2: Connect to the LM Studio API](#step-2-connect-to-the-lm-studio-api)
-    * [Step 3: Model Configuration](#step-3-model-configuration-2)
-* [Configuring the Integration as a Conversation Agent](#configuring-the-integration-as-a-conversation-agent)
+    * [Step 3: Model Selection & Configuration](#step-3-model-selection-configuration-1)
+* [Using the Integration as a Conversation Agent](#using-the-integration-as-a-conversation-agent)
 * [Finished!](#finished)
 
 
@@ -40,7 +40,7 @@ The following link will open your Home Assistant installation and download the i
 
 [![Open your Home Assistant instance and open a repository inside the Home Assistant Community Store.](https://my.home-assistant.io/badges/hacs_repository.svg)](https://my.home-assistant.io/redirect/hacs_repository/?category=Integration&repository=home-llm&owner=acon96)
 
-After installation, A "Local LLM Conversation" device should show up in the `Settings > Devices and Services > [Devices]` tab now.
+After installation, A "Local LLM" device should show up in the `Settings > Devices and Services > [Devices]` tab now.
 
 ## Path 1: Using the Home Model with the Llama.cpp Backend
 ### Overview
@@ -49,8 +49,8 @@ This setup path involves downloading a fine-tuned model from HuggingFace and int
 ### Step 1: Wheel Installation for llama-cpp-python
 1. In Home Assistant: navigate to `Settings > Devices and Services`
 2. Select the `+ Add Integration` button in the bottom right corner
-3. Search for, and select `Local LLM Conversation`
-4. With the `Llama.cpp (HuggingFace)` backend selected, click `Submit`
+3. Search for, and select `Local LLM`
+4. With the `Llama.cpp` backend selected, click `Submit`
 
 This should download and install `llama-cpp-python` from GitHub. If the installation fails for any reason, follow the manual installation instructions [here](./Backend%20Configuration.md#wheels).
 
@@ -59,8 +59,9 @@ Once `llama-cpp-python` is installed, continue to the model selection.
 ### Step 2: Model Selection
 The next step is to specify which model will be used by the integration. You may select any repository on HuggingFace that has a model in GGUF format in it.  We will use `acon96/Home-3B-v3-GGUF` for this example.  If you have less than 4GB of RAM then use `acon96/Home-1B-v3-GGUF`.
 
+1. Under the `Llama.cpp` service that you just created, select `+ Add conversation agent`
 **Model Name**: Use either `acon96/Home-3B-v3-GGUF` or `acon96/Home-1B-v3-GGUF`  
-**Quantization Level**: The model will be downloaded in the selected quantization level from the HuggingFace repository. If unsure which level to choose, select `Q4_K_M`.  
+**Quantization Level**: The model will be downloaded in the selected quantization level from the HuggingFace repository. If unsure which level to choose, select `Q4_K_M` or `Q4_0`.  
 
 Pressing `Submit` will download the model from HuggingFace. The downloaded files will be stored by default in `/media/models/`.
 
@@ -89,22 +90,25 @@ In order to access the model from another machine, we need to run the Ollama API
 
 1. In Home Assistant: navigate to `Settings > Devices and Services`
 2. Select the `+ Add Integration` button in the bottom right corner
-3. Search for, and select `Local LLM Conversation`
+3. Search for, and select `Local LLM`
 4. Select `Ollama API` from the dropdown and click `Submit`
 5. Set up the connection to the API:
     - **IP Address**: Fill out IP Address for the machine hosting Ollama
     - **Port**: leave on `11434`
     - **Use HTTPS**: unchecked
-    - **Model Name**: `mistral:latest`
     - **API Key**: leave blank
+    - **Path**: leave blank **UNLESS** you are using OpenWebUI to host Ollama; if so set to `/ollama`
 6. Click `Submit`
 
-### Step 3: Model Configuration
-This step allows you to configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.
+### Step 3: Model Selection & Configuration
+1. You must create the conversation agent based on the model you wish to use.  
+    Under the `Ollama at '<url>` service that you just created, select `+ Add conversation agent`  
+    - **Model Name**: Select the Mistral 7B model. This should automatically populated based on the model you already downloaded 
+2. You can configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.  
 
 For now, defaults for the model should have been populated. If you would like the model to be able to control devices then you must select the `Assist` API.
 
-Once the desired API has been selected, scroll to the bottom and click `Submit`.
+Once the desired model has been selected & configured, scroll to the bottom and click `Submit`.
 
 > NOTE: The key settings in this case are that our prompt references the `{{ response_examples }}` variable and the `Enable in context learning (ICL) examples` option is turned on.
 
@@ -124,27 +128,27 @@ Llama 3 8B can be set up and downloaded on the serving machine using LM Studio b
 
 1. In Home Assistant: navigate to `Settings > Devices and Services`
 2. Select the `+ Add Integration` button in the bottom right corner
-3. Search for, and select `Local LLM Conversation`
+3. Search for, and select `Local LLM`
 4. Select `Generic OpenAI Compatible API` from the dropdown and click `Submit`
 5. Set up the connection to the API:
     - **IP Address**: Fill out IP Address for the machine hosting LM Studio
     - **Port**: enter the port that was listed in LM Studio
     - **Use HTTPS**: unchecked
-    - **Model Name**: Set this to the name of the model as it appears in LM Studio. If you receive an error that the model does not exist, then select the model from the dropdown list.
     - **API Key**: leave blank
     - **API Path**: leave as `/v1`
 6. Click `Submit`
 
-### Step 3: Model Configuration
-This step allows you to configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.
+### Step 3: Model Selection & Configuration
+1. You must create the conversation agent based on the model you wish to use.  
+    Under the `Ollama at '<url>` service that you just created, select `+ Add conversation agent`  
+    - - **Model Name**: Set this to the name of the model as it appears in LM Studio. The dropdown list should pre-populate with the models that are already installed.
+2. You can configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.  
 
 For now, defaults for the model should have been populated. If you would like the model to be able to control devices then you must select the `Assist` API.
 
-Once the desired API has been selected, scroll to the bottom and click `Submit`.
-
 > NOTE: The key settings in this case are that our prompt references the `{{ response_examples }}` variable and the `Enable in context learning (ICL) examples` option is turned on.
 
-## Configuring the Integration as a Conversation Agent
+## Using the Integration as a Conversation Agent
 Now that the integration is configured and providing the conversation agent, we need to configure Home Assistant to use our conversation agent instead of the built in intent recognition system.
 
 > 🛑 Warning 🛑