manually set model language, zephyr2 prompt format , upgrading llama-cpp-python

This commit is contained in:
Alex O'Connell
2024-04-24 20:58:40 -04:00
parent 03c23f1a8c
commit 92058d0694
5 changed files with 107 additions and 40 deletions

View File

@@ -34,6 +34,7 @@ from homeassistant.helpers.selector import (
BooleanSelectorConfig,
)
from homeassistant.util.package import is_installed
from importlib.metadata import version
from .utils import download_model_from_hf, install_llama_cpp_python
from .const import (
@@ -47,6 +48,8 @@ from .const import (
CONF_TYPICAL_P,
CONF_REQUEST_TIMEOUT,
CONF_BACKEND_TYPE,
CONF_SELECTED_LANGUAGE,
CONF_SELECTED_LANGUAGE_OPTIONS,
CONF_DOWNLOADED_MODEL_FILE,
CONF_DOWNLOADED_MODEL_QUANTIZATION,
CONF_DOWNLOADED_MODEL_QUANTIZATION_OPTIONS,
@@ -125,6 +128,7 @@ from .const import (
DEFAULT_OPTIONS,
OPTIONS_OVERRIDES,
RECOMMENDED_CHAT_MODELS,
EMBEDDED_LLAMA_CPP_PYTHON_VERSION
)
_LOGGER = logging.getLogger(__name__)
@@ -153,14 +157,20 @@ def STEP_INIT_DATA_SCHEMA(backend_type=None):
}
)
def STEP_LOCAL_SETUP_EXISTING_DATA_SCHEMA(model_file=None):
def STEP_LOCAL_SETUP_EXISTING_DATA_SCHEMA(model_file=None, selected_language=None):
return vol.Schema(
{
vol.Required(CONF_DOWNLOADED_MODEL_FILE, default=model_file if model_file else ""): str,
vol.Required(CONF_SELECTED_LANGUAGE, default=selected_language if selected_language else "en"): SelectSelector(SelectSelectorConfig(
options=CONF_SELECTED_LANGUAGE_OPTIONS,
translation_key=CONF_SELECTED_LANGUAGE,
multiple=False,
mode=SelectSelectorMode.DROPDOWN,
)),
}
)
def STEP_LOCAL_SETUP_DOWNLOAD_DATA_SCHEMA(*, chat_model=None, downloaded_model_quantization=None):
def STEP_LOCAL_SETUP_DOWNLOAD_DATA_SCHEMA(*, chat_model=None, downloaded_model_quantization=None, selected_language=None):
return vol.Schema(
{
vol.Required(CONF_CHAT_MODEL, default=chat_model if chat_model else DEFAULT_CHAT_MODEL): SelectSelector(SelectSelectorConfig(
@@ -170,10 +180,16 @@ def STEP_LOCAL_SETUP_DOWNLOAD_DATA_SCHEMA(*, chat_model=None, downloaded_model_q
mode=SelectSelectorMode.DROPDOWN,
)),
vol.Required(CONF_DOWNLOADED_MODEL_QUANTIZATION, default=downloaded_model_quantization if downloaded_model_quantization else DEFAULT_DOWNLOADED_MODEL_QUANTIZATION): vol.In(CONF_DOWNLOADED_MODEL_QUANTIZATION_OPTIONS),
vol.Required(CONF_SELECTED_LANGUAGE, default=selected_language if selected_language else "en"): SelectSelector(SelectSelectorConfig(
options=CONF_SELECTED_LANGUAGE_OPTIONS,
translation_key=CONF_SELECTED_LANGUAGE,
multiple=False,
mode=SelectSelectorMode.DROPDOWN,
)),
}
)
def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ssl=None, chat_model=None, available_chat_models=[]):
def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ssl=None, chat_model=None, available_chat_models=[], selected_language=None):
extra1, extra2 = ({}, {})
default_port = DEFAULT_PORT
@@ -197,6 +213,12 @@ def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ss
multiple=False,
mode=SelectSelectorMode.DROPDOWN,
)),
vol.Required(CONF_SELECTED_LANGUAGE, default=selected_language if selected_language else "en"): SelectSelector(SelectSelectorConfig(
options=CONF_SELECTED_LANGUAGE_OPTIONS,
translation_key=CONF_SELECTED_LANGUAGE,
multiple=False,
mode=SelectSelectorMode.DROPDOWN,
)),
**extra1,
vol.Optional(CONF_OPENAI_API_KEY): TextSelector(TextSelectorConfig(type="password")),
**extra2
@@ -264,6 +286,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
download_error = None
model_config: dict[str, Any]
options: dict[str, Any]
selected_language: str
@property
def flow_manager(self) -> config_entries.ConfigEntriesFlowManager:
@@ -294,7 +317,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
local_backend = is_local_backend(user_input[CONF_BACKEND_TYPE])
self.model_config.update(user_input)
if local_backend:
if is_installed("llama-cpp-python"):
if is_installed("llama-cpp-python") and version("llama-cpp-python") == EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
return await self.async_step_local_model()
else:
return await self.async_step_install_local_wheels()
@@ -367,10 +390,13 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
description_placeholders["exception"] = str(self.download_error)
schema = STEP_LOCAL_SETUP_DOWNLOAD_DATA_SCHEMA(
chat_model=self.model_config[CONF_CHAT_MODEL],
downloaded_model_quantization=self.model_config[CONF_DOWNLOADED_MODEL_QUANTIZATION]
downloaded_model_quantization=self.model_config[CONF_DOWNLOADED_MODEL_QUANTIZATION],
selected_language=self.selected_language
)
if user_input and "result" not in user_input:
self.selected_language = user_input.pop(CONF_SELECTED_LANGUAGE, self.hass.config.language)
self.model_config.update(user_input)
if backend_type == BACKEND_TYPE_LLAMA_HF:
@@ -382,7 +408,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
return await self.async_step_model_parameters()
else:
errors["base"] = "missing_model_file"
schema = STEP_LOCAL_SETUP_EXISTING_DATA_SCHEMA(model_file)
schema = STEP_LOCAL_SETUP_EXISTING_DATA_SCHEMA(model_file, self.selected_language)
return self.async_show_form(
step_id="local_model", data_schema=schema, errors=errors, description_placeholders=description_placeholders, last_step=False
@@ -500,6 +526,8 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
if user_input:
try:
self.selected_language = user_input.pop(CONF_SELECTED_LANGUAGE, self.hass.config.language)
self.model_config.update(user_input)
error_message = None
@@ -526,6 +554,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
ssl=user_input[CONF_SSL],
chat_model=user_input[CONF_CHAT_MODEL],
available_chat_models=possible_models,
selected_language=self.selected_language,
)
else:
return await self.async_step_model_parameters()
@@ -550,8 +579,11 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
for key in OPTIONS_OVERRIDES.keys():
if key in model_name:
selected_default_options.update(OPTIONS_OVERRIDES[key])
persona = PERSONA_PROMPTS.get(self.selected_language, PERSONA_PROMPTS.get("en"))
selected_default_options[CONF_PROMPT] = selected_default_options[CONF_PROMPT].replace("<persona>", persona)
schema = vol.Schema(local_llama_config_option_schema(selected_default_options, backend_type, self.hass.config.language))
schema = vol.Schema(local_llama_config_option_schema(selected_default_options, backend_type))
if user_input:
self.options = user_input
@@ -628,7 +660,6 @@ class OptionsFlow(config_entries.OptionsFlow):
schema = local_llama_config_option_schema(
self.config_entry.options,
self.config_entry.data[CONF_BACKEND_TYPE],
self.hass.config.language
)
return self.async_show_form(
step_id="init",
@@ -650,14 +681,11 @@ def insert_after_key(input_dict: dict, key_name: str, other_dict: dict):
return result
def local_llama_config_option_schema(options: MappingProxyType[str, Any], backend_type: str, language: str) -> dict:
def local_llama_config_option_schema(options: MappingProxyType[str, Any], backend_type: str) -> dict:
"""Return a schema for Local LLaMA completion options."""
if not options:
options = DEFAULT_OPTIONS
persona = PERSONA_PROMPTS.get(language, PERSONA_PROMPTS.get("en"))
options[CONF_PROMPT] = options[CONF_PROMPT].replace("<persona>", persona)
result = {
vol.Required(
CONF_PROMPT,

View File

@@ -25,7 +25,7 @@ User instruction:"""
DEFAULT_PROMPT = DEFAULT_PROMPT_BASE + ICL_EXTRAS
CONF_CHAT_MODEL = "huggingface_model"
DEFAULT_CHAT_MODEL = "acon96/Home-3B-v3-GGUF"
RECOMMENDED_CHAT_MODELS = [ "acon96/Home-3B-v3-GGUF", "acon96/Home-1B-v2-GGUF", "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" ]
RECOMMENDED_CHAT_MODELS = [ "acon96/Home-3B-v3-GGUF", "acon96/Home-1B-v3-GGUF", "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" ]
CONF_MAX_TOKENS = "max_new_tokens"
DEFAULT_MAX_TOKENS = 128
CONF_TOP_K = "top_k"
@@ -48,6 +48,8 @@ BACKEND_TYPE_GENERIC_OPENAI = "generic_openai"
BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER = "llama_cpp_python_server"
BACKEND_TYPE_OLLAMA = "ollama"
DEFAULT_BACKEND_TYPE = BACKEND_TYPE_LLAMA_HF
CONF_SELECTED_LANGUAGE = "selected_language"
CONF_SELECTED_LANGUAGE_OPTIONS = [ "en", "de", "fr", "es" ]
CONF_DOWNLOADED_MODEL_QUANTIZATION = "downloaded_model_quantization"
CONF_DOWNLOADED_MODEL_QUANTIZATION_OPTIONS = ["F16", "Q8_0", "Q5_K_M", "Q4_K_M", "Q3_K_M"]
DEFAULT_DOWNLOADED_MODEL_QUANTIZATION = "Q4_K_M"
@@ -67,6 +69,7 @@ PROMPT_TEMPLATE_MISTRAL = "mistral"
PROMPT_TEMPLATE_LLAMA3 = "llama3"
PROMPT_TEMPLATE_NONE = "no_prompt_template"
PROMPT_TEMPLATE_ZEPHYR = "zephyr"
PROMPT_TEMPLATE_ZEPHYR2 = "zephyr2"
DEFAULT_PROMPT_TEMPLATE = PROMPT_TEMPLATE_CHATML
PROMPT_TEMPLATE_DESCRIPTIONS = {
PROMPT_TEMPLATE_CHATML: {
@@ -104,6 +107,12 @@ PROMPT_TEMPLATE_DESCRIPTIONS = {
"assistant": { "prefix": "<|assistant|>\n", "suffix": "<|endoftext|>" },
"generation_prompt": "<|assistant|>\n"
},
PROMPT_TEMPLATE_ZEPHYR2: {
"system": { "prefix": "<|system|>\n", "suffix": "</s>" },
"user": { "prefix": "<|user|>\n", "suffix": "</s>" },
"assistant": { "prefix": "<|assistant|>\n", "suffix": "</s>" },
"generation_prompt": "<|assistant|>\n"
},
PROMPT_TEMPLATE_LLAMA3: {
"system": { "prefix": "<|start_header_id|>system<|end_header_id|>\n\n", "suffix": "<|eot_id|>"},
"user": { "prefix": "<|start_header_id|>user<|end_header_id|>\n\n", "suffix": "<|eot_id|>"},
@@ -221,8 +230,10 @@ OPTIONS_OVERRIDES = {
},
"home-1b-v3": {
CONF_PROMPT: DEFAULT_PROMPT_BASE,
CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_ZEPHYR2,
CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
CONF_SERVICE_CALL_REGEX: FINE_TUNED_SERVICE_CALL_REGEX,
CONF_USE_GBNF_GRAMMAR: True,
},
"home-1b-v2": {
CONF_PROMPT: DEFAULT_PROMPT_BASE,
@@ -261,4 +272,4 @@ OPTIONS_OVERRIDES = {
}
INTEGRATION_VERSION = "0.2.12"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.60"
EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.64"

View File

@@ -18,7 +18,8 @@
"data": {
"downloaded_model_file": "Local file name",
"downloaded_model_quantization": "Downloaded model quantization",
"huggingface_model": "HuggingFace Model"
"huggingface_model": "HuggingFace Model",
"selected_language": "Model Language"
},
"description": "Please select a model to use.\n\n**Models supported out of the box:**\n1. [Home LLM](https://huggingface.co/collections/acon96/home-llm-6618762669211da33bb22c5a): Home 3B & Home 1B\n2. Mistral: [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) or [Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)\n3. Llama 3: [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) and [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)",
"title": "Select Model"
@@ -33,7 +34,8 @@
"text_generation_webui_admin_key": "Admin Key",
"text_generation_webui_preset": "Generation Preset/Character Name",
"remote_use_chat_endpoint": "Use chat completions endpoint",
"text_generation_webui_chat_mode": "Chat Mode"
"text_generation_webui_chat_mode": "Chat Mode",
"selected_language": "Model Language"
},
"description": "Provide the connection details to connect to the API that is hosting the model.\n\n**Models supported out of the box:**\n1. [Home LLM](https://huggingface.co/collections/acon96/home-llm-6618762669211da33bb22c5a): Home 3B & Home 1B\n2. Mistral: [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) or [Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)",
"title": "Configure connection to remote API"
@@ -158,7 +160,8 @@
"vicuna": "Vicuna",
"alpaca": "Alpaca",
"mistral": "Mistral",
"zephyr": "Zephyr",
"zephyr": "Zephyr (<|endoftext|>)",
"zephyr2": "Zephyr ('</s>')",
"llama3": "Llama 3",
"no_prompt_template": "None"
}
@@ -180,6 +183,14 @@
"instruct": "Instruct",
"chat-instruct": "Chat-Instruct"
}
},
"selected_language": {
"options": {
"en": "English",
"de": "German",
"fr": "French",
"es": "Spanish"
}
}
}
}

View File

@@ -69,17 +69,34 @@ def download_model_from_hf(model_name: str, quantization_type: str, storage_fold
def install_llama_cpp_python(config_dir: str):
installed_wrong_version = False
if is_installed("llama-cpp-python"):
return True
if version("llama-cpp-python") != EMBEDDED_LLAMA_CPP_PYTHON_VERSION:
installed_wrong_version = True
else:
time.sleep(0.5) # I still don't know why this is required
return True
platform_suffix = platform.machine()
if platform_suffix == "arm64":
platform_suffix = "aarch64"
runtime_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
instruction_extensions_suffix = ""
try:
if platform_suffix == "amd64" or platform_suffix == "i386":
with open("/proc/cpuinfo") as f:
cpu_features = [ line for line in f.readlines() if line.startswith("Features") or line.startswith("flags")][0]
if "avx512f" in cpu_features and "avx512bw" in cpu_features:
instruction_extensions_suffix = "-avx512"
elif "avx" not in cpu_features:
instruction_extensions_suffix = "-noavx"
except Exception as ex:
_LOGGER.debug(f"Couldn't detect CPU features: {ex}")
folder = os.path.dirname(__file__)
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}.whl") ], reverse=True)
potential_wheels = sorted([ path for path in os.listdir(folder) if path.endswith(f"{platform_suffix}{instruction_extensions_suffix}.whl") ], reverse=True)
potential_wheels = [ wheel for wheel in potential_wheels if runtime_version in wheel ]
if len(potential_wheels) > 0:
@@ -88,27 +105,26 @@ def install_llama_cpp_python(config_dir: str):
_LOGGER.info("Installing llama-cpp-python from local wheel")
_LOGGER.debug(f"Wheel location: {latest_wheel}")
return install_package(os.path.join(folder, latest_wheel), pip_kwargs(config_dir))
instruction_extensions_suffix = ""
if platform_suffix == "amd64" or platform_suffix == "i386":
with open("/proc/cpuinfo") as f:
cpu_features = [ line for line in f.readlines() if line.startswith("Features") or line.startswith("flags")][0]
if "avx512f" in cpu_features and "avx512bw" in cpu_features:
instruction_extensions_suffix = "-avx512"
elif "avx" not in cpu_features:
instruction_extensions_suffix = "-noavx"
github_release_url = f"https://github.com/acon96/home-llm/releases/download/v{INTEGRATION_VERSION}/llama_cpp_python-{EMBEDDED_LLAMA_CPP_PYTHON_VERSION}-{runtime_version}-{runtime_version}-musllinux_1_2_{platform_suffix}{instruction_extensions_suffix}.whl"
if install_package(github_release_url, pip_kwargs(config_dir)):
_LOGGER.info("llama-cpp-python successfully installed from GitHub release")
return True
_LOGGER.error(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
f"platform: {platform_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
)
return False
# if it is just the wrong version installed then ignore the installation error
if not installed_wrong_version:
_LOGGER.error(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
f"platform: {platform_suffix}{instruction_extensions_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
"Please manually build or download the wheels and place them in the `/config/custom_components/llama_conversation` directory." + \
"Make sure that you download the correct .whl file for your platform and python version from the GitHub releases page."
)
return False
else:
_LOGGER.info(
"Error installing llama-cpp-python. Could not install the binary wheels from GitHub for " + \
f"platform: {platform_suffix}{instruction_extensions_suffix}, python version: {sys.version_info.major}.{sys.version_info.minor}. " + \
f"You already have a version of llama-cpp-python ({version('llama-cpp-python')}) installed, however it may not be compatible!"
)
time.sleep(0.5) # I still don't know why this is required
return True

View File

@@ -44,7 +44,7 @@ from custom_components.llama_conversation.const import (
CONF_BATCH_THREAD_COUNT,
DEFAULT_CHAT_MODEL,
DEFAULT_MAX_TOKENS,
NO_ICL_PROMPT,
DEFAULT_PROMPT_BASE,
DEFAULT_TEMPERATURE,
DEFAULT_TOP_K,
DEFAULT_TOP_P,
@@ -122,7 +122,7 @@ def config_entry():
},
options={
**DEFAULT_OPTIONS,
CONF_PROMPT: NO_ICL_PROMPT,
CONF_PROMPT: DEFAULT_PROMPT_BASE,
}
)
@@ -355,7 +355,8 @@ async def test_ollama_agent(ollama_agent_fixture):
"temperature": ollama_agent.entry.options[CONF_TEMPERATURE],
"num_predict": ollama_agent.entry.options[CONF_MAX_TOKENS],
},
"prompt": ANY
"prompt": ANY,
"raw": True
},
timeout=ollama_agent.entry.options[CONF_REQUEST_TIMEOUT]
)