diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
new file mode 100644
index 0000000..f44c3ae
--- /dev/null
+++ b/.github/workflows/create-release.yml
@@ -0,0 +1,88 @@
+name: Create Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      release_notes: 
+        description: "Release Notes"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.arch }} (HA ${{ matrix.home_assistant_version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        home_assistant_version: ["2023.12.4", "2024.2.1"]
+        arch: ["aarch64", "armhf", "amd64", "i386"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        
+      - name: Read llama-cpp-python version
+        run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
+
+      - name: Build artifact
+        uses: uraimo/run-on-arch-action@v2
+        id: build
+        with:
+          arch: none
+          distro: none
+          base_image: homeassistant/${{ matrix.arch }}-homeassistant:${{ matrix.home_assistant_version }}
+
+          # Create an artifacts directory
+          setup: |
+            mkdir -p "${PWD}/artifacts"
+
+          # Mount the artifacts directory as /artifacts in the container
+          dockerRunArgs: |
+            --volume "${PWD}/artifacts:/artifacts"
+
+          # The shell to run commands with in the container
+          shell: /bin/bash
+
+          # Produce a binary artifact and place it in the mounted volume
+          run: |
+            apk update
+            apk add build-base python3-dev cmake
+            pip3 install build
+
+            cd /tmp
+            git clone --quiet --recurse-submodules https://github.com/abetlen/llama-cpp-python --branch "v${{ env.EMBEDDED_LLAMA_CPP_PYTHON_VERSION }}"
+            cd llama-cpp-python
+            
+            export CMAKE_ARGS="-DLLAVA_BUILD=OFF"
+            python3 -m build --wheel
+            cp -f ./dist/*.whl /artifacts/
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          path: ./artifacts/*.whl
+          name: artifact_${{ matrix.arch }}_${{ matrix.home_assistant_version }}
+
+  release:
+    name: Create Release
+    needs: [ build_wheels ]
+    runs-on: ubuntu-latest
+    if: "startsWith(github.event.ref, 'refs/tags/v')" # only create a release if this was run on a tag
+
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: dist
+          merge-multiple: true
+      
+      - name: Create GitHub release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: dist/*
+          body: ${{ inputs.release_notes }}
+          make_latest: true
\ No newline at end of file
diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py
index 60b7a4d..d5475e2 100644
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -301,7 +301,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
         install_exception = self.install_wheel_task.exception()
         if install_exception:
             _LOGGER.warning("Failed to install wheel: %s", repr(install_exception))
-            self.install_wheel_error = install_exception
+            self.install_wheel_error = "pip_wheel_error"
             next_step = "pick_backend"
         else:
             wheel_install_result = self.install_wheel_task.result()
diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py
index da11673..fa7e46b 100644
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -232,4 +232,7 @@ OPTIONS_OVERRIDES = {
 # CONFIG_SCHEMA = vol.Schema(
 #     { DOMAIN: vol.All(cv.ensure_list, [MODEL_CONFIG_SCHEMA]) },
 #     extra=vol.ALLOW_EXTRA,
-# )
\ No newline at end of file
+# )
+
+INTEGRATION_VERSION = "0.2.10"
+EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.56"
\ No newline at end of file
diff --git a/custom_components/llama_conversation/translations/en.json b/custom_components/llama_conversation/translations/en.json
index a14fecc..a0b0190 100644
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -7,8 +7,7 @@
             "missing_model_file": "The provided file does not exist.",
             "other_existing_local": "Another model is already loaded locally. Please unload it or configure a remote model.",
             "unknown": "Unexpected error",
-            "missing_wheels": "Llama.cpp is not installed and could not find any wheels to install! See the logs for more information.",
-            "pip_wheel_error": "Pip returned an error while installing the wheel!"
+            "pip_wheel_error": "Pip returned an error while installing the wheel! Please check the Home Assistant logs for more details."
         },
         "progress": {
             "download": "Please wait while the model is being downloaded from HuggingFace. This can take a few minutes.",
diff --git a/custom_components/llama_conversation/utils.py b/custom_components/llama_conversation/utils.py
index acc3fb5..c2d825a 100644
--- a/custom_components/llama_conversation/utils.py
+++ b/custom_components/llama_conversation/utils.py
@@ -11,6 +11,11 @@ from huggingface_hub import hf_hub_download, HfFileSystem
 from homeassistant.requirements import pip_kwargs
 from homeassistant.util.package import install_package, is_installed
 
+from .const import (
+    INTEGRATION_VERSION,
+    EMBEDDED_LLAMA_CPP_PYTHON_VERSION,
+)
+
 _LOGGER = logging.getLogger(__name__)
 
 def closest_color(requested_color):
@@ -60,38 +65,37 @@ def download_model_from_hf(model_name: str, quantization_type: str, storage_fold
     )
 
 def install_llama_cpp_python(config_dir: str):
+
+    if is_installed("llama-cpp-python"):
+        _LOGGER.info("llama-cpp-python is already installed")
+        return True
+    
     platform_suffix = platform.machine()
     if platform_suffix == "arm64":
         platform_suffix = "aarch64"
+
+    runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
+    
+    github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}.whl"
+    if install_package(github_release_url, pip_kwargs(config_dir)):
+        _LOGGER.info("llama-cpp-python successfully installed from GitHub release")
+        return True
+    
     folder = os.path.dirname(__file__)
     potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
     potential_wheels = [ wheel for wheel in potential_wheels if f"cp{sys.version_info.major}{sys.version_info.minor}" in wheel ]
     if len(potential_wheels) == 0:
-        # someone who is better at async can figure out why this is necessary
-        time.sleep(0.5)
-
-        if is_installed("llama-cpp-python"):
-            _LOGGER.info("llama-cpp-python is already installed")
-            return True
         
         _LOGGER.error(
             "Error installing llama-cpp-python. Could not find any wheels that match the following filters. " + \
             f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
             "If you recently updated Home Assistant, then you may need to use a different wheel than previously. " + \
-            "Make sure that the correct .whl file is located in config/custom_components/llama_conversation/*"
+            "Make sure that you download the correct .whl file from the GitHub releases page"
         )
-        raise Exception("missing_wheels")
+        return False
     
     latest_wheel = potential_wheels[0]
-    latest_version = latest_wheel.split("-")[1]
 
-    if not is_installed("llama-cpp-python") or version("llama-cpp-python") != latest_version:
-        _LOGGER.info("Installing llama-cpp-python from wheel")
-        _LOGGER.debug(f"Wheel location: {latest_wheel}")
-        return install_package(os.path.join(folder, latest_wheel), pip_kwargs(config_dir))
-    else:
-        # someone who is better at async can figure out why this is necessary
-        time.sleep(0.5)
-
-        _LOGGER.info("llama-cpp-python is already installed")
-        return True
\ No newline at end of file
+    _LOGGER.info("Installing llama-cpp-python from local wheel")
+    _LOGGER.debug(f"Wheel location: {latest_wheel}")
+    return install_package(os.path.join(folder, latest_wheel), pip_kwargs(config_dir))