From 5989853f7ab7102fe7abf662940cb25134102386 Mon Sep 17 00:00:00 2001 From: Aleksandar Date: Tue, 30 Apr 2024 01:26:02 +0100 Subject: [PATCH] Fix duplicate LLM_BASE_URL entry in config.toml and enable different ollama embeddings (#1437) Co-authored-by: Engel Nyst --- Makefile | 22 ++++++++++++++++------ agenthub/monologue_agent/utils/memory.py | 7 ++++--- opendevin/config.py | 4 ++++ opendevin/schema/config.py | 1 + 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 29ef766941..09e212030f 100644 --- a/Makefile +++ b/Makefile @@ -200,12 +200,22 @@ setup-config-prompts: @read -p "Enter your LLM Base URL [mostly used for local LLMs, leave blank if not needed - example: http://localhost:5001/v1/]: " llm_base_url; \ if [[ ! -z "$$llm_base_url" ]]; then echo "LLM_BASE_URL=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; fi - @echo "Enter your LLM Embedding Model\nChoices are openai, azureopenai, llama2 or leave blank to default to 'BAAI/bge-small-en-v1.5' via huggingface"; \ - read -p "> " llm_embedding_model; \ - echo "LLM_EMBEDDING_MODEL=\"$$llm_embedding_model\"" >> $(CONFIG_FILE).tmp; \ - if [ "$$llm_embedding_model" = "llama2" ]; then \ - read -p "Enter the local model URL (will overwrite LLM_BASE_URL): " llm_base_url; \ - echo "LLM_BASE_URL=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; \ + @echo "Enter your LLM Embedding Model"; \ + echo "Choices are:"; \ + echo " - openai"; \ + echo " - azureopenai"; \ + echo " - Embeddings available only with OllamaEmbedding:"; \ + echo " - llama2"; \ + echo " - mxbai-embed-large"; \ + echo " - nomic-embed-text"; \ + echo " - all-minilm"; \ + echo " - stable-code"; \ + echo " - Leave blank to default to 'BAAI/bge-small-en-v1.5' via huggingface"; \ + read -p "> " llm_embedding_model; \ + echo "LLM_EMBEDDING_MODEL=\"$$llm_embedding_model\"" >> $(CONFIG_FILE).tmp; \ + if [ "$$llm_embedding_model" = "llama2" ] || [ "$$llm_embedding_model" = "mxbai-embed-large" ] || [ "$$llm_embedding_model" = "nomic-embed-text" ] || [ "$$llm_embedding_model" = "all-minilm" ] || [ "$$llm_embedding_model" = "stable-code" ]; then \ + read -p "Enter the local model URL for the embedding model (will set LLM_EMBEDDING_BASE_URL): " llm_embedding_base_url; \ + echo "LLM_EMBEDDING_BASE_URL=\"$$llm_embedding_base_url\"" >> $(CONFIG_FILE).tmp; \ elif [ "$$llm_embedding_model" = "azureopenai" ]; then \ read -p "Enter the Azure endpoint URL (will overwrite LLM_BASE_URL): " llm_base_url; \ echo "LLM_BASE_URL=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; \ diff --git a/agenthub/monologue_agent/utils/memory.py b/agenthub/monologue_agent/utils/memory.py index 30cff89bd7..9f5433ff73 100644 --- a/agenthub/monologue_agent/utils/memory.py +++ b/agenthub/monologue_agent/utils/memory.py @@ -56,11 +56,12 @@ embedding_strategy = config.get(ConfigType.LLM_EMBEDDING_MODEL) # TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/ # There's probably a more programmatic way to do this. -if embedding_strategy == 'llama2': +supported_ollama_embed_models = ['llama2', 'mxbai-embed-large', 'nomic-embed-text', 'all-minilm', 'stable-code'] +if embedding_strategy in supported_ollama_embed_models: from llama_index.embeddings.ollama import OllamaEmbedding embed_model = OllamaEmbedding( - model_name='llama2', - base_url=config.get(ConfigType.LLM_BASE_URL, required=True), + model_name=embedding_strategy, + base_url=config.get(ConfigType.LLM_EMBEDDING_BASE_URL, required=True), ollama_additional_kwargs={'mirostat': 0}, ) elif embedding_strategy == 'openai': diff --git a/opendevin/config.py b/opendevin/config.py index ec095561b8..e38790a85a 100644 --- a/opendevin/config.py +++ b/opendevin/config.py @@ -31,6 +31,7 @@ DEFAULT_CONFIG: dict = { ConfigType.SANDBOX_CONTAINER_IMAGE: DEFAULT_CONTAINER_IMAGE, ConfigType.RUN_AS_DEVIN: 'true', ConfigType.LLM_EMBEDDING_MODEL: 'local', + ConfigType.LLM_EMBEDDING_BASE_URL: None, ConfigType.LLM_EMBEDDING_DEPLOYMENT_NAME: None, ConfigType.LLM_API_VERSION: None, ConfigType.LLM_NUM_RETRIES: 5, @@ -154,6 +155,9 @@ def finalize_config(): if config.get(ConfigType.WORKSPACE_MOUNT_PATH) is None: config[ConfigType.WORKSPACE_MOUNT_PATH] = os.path.abspath(config[ConfigType.WORKSPACE_BASE]) + if config.get(ConfigType.LLM_EMBEDDING_BASE_URL) is None: + config[ConfigType.LLM_EMBEDDING_BASE_URL] = config.get(ConfigType.LLM_BASE_URL) + USE_HOST_NETWORK = config[ConfigType.USE_HOST_NETWORK].lower() != 'false' if USE_HOST_NETWORK and platform.system() == 'Darwin': logger.warning( diff --git a/opendevin/schema/config.py b/opendevin/schema/config.py index 713ea982b7..6570ec847a 100644 --- a/opendevin/schema/config.py +++ b/opendevin/schema/config.py @@ -15,6 +15,7 @@ class ConfigType(str, Enum): SANDBOX_CONTAINER_IMAGE = 'SANDBOX_CONTAINER_IMAGE' RUN_AS_DEVIN = 'RUN_AS_DEVIN' LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL' + LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL' LLM_EMBEDDING_DEPLOYMENT_NAME = 'LLM_EMBEDDING_DEPLOYMENT_NAME' LLM_API_VERSION = 'LLM_API_VERSION' LLM_NUM_RETRIES = 'LLM_NUM_RETRIES'