diff --git a/README.md b/README.md
index 674cddfff2..080bd582f8 100644
--- a/README.md
+++ b/README.md
@@ -54,9 +54,19 @@ export LLM_API_KEY="your-api-key"
 export LLM_MODEL="claude-3-opus-20240229"
 ```
 
-### Running on the Command Line
-You can also run OpenDevin from your command line:
+You can also set the base URL for local/custom models:
+```bash
+export LLM_BASE_URL="https://localhost:3000"
 ```
+
+And you can customize which embeddings are used for the vector database storage:
+```bash
+export LLM_EMBEDDING_MODEL="llama2" # can be "llama2", "openai", "azureopenai", or "local"
+```
+
+### Running on the Command Line
+You can run OpenDevin from your command line:
+```bash
 PYTHONPATH=`pwd` python opendevin/main.py -d ./workspace/ -i 100 -t "Write a bash script that prints 'hello world'"
 ```
 
diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py
index a0c393b188..df27f86476 100644
--- a/agenthub/codeact_agent/__init__.py
+++ b/agenthub/codeact_agent/__init__.py
@@ -1,4 +1,3 @@
-import os
 import re
 from typing import List, Mapping
 
@@ -19,12 +18,6 @@ from opendevin.observation import (
 
 from opendevin.llm.llm import LLM
 
-assert (
-    "OPENAI_API_KEY" in os.environ
-), "Please set the OPENAI_API_KEY environment variable."
-
-
-
 SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
 You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
 
diff --git a/agenthub/langchains_agent/utils/memory.py b/agenthub/langchains_agent/utils/memory.py
index 0611a5aac6..40de0ce3cd 100644
--- a/agenthub/langchains_agent/utils/memory.py
+++ b/agenthub/langchains_agent/utils/memory.py
@@ -1,18 +1,51 @@
-from . import json
+import os
 
 import chromadb
-
 from llama_index.core import Document
 from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core import VectorStoreIndex
 from llama_index.vector_stores.chroma import ChromaVectorStore
 
+from . import json
+
+embedding_strategy = os.getenv("LLM_EMBEDDING_MODEL", "local")
+
+# TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/
+# There's probably a more programmatic way to do this.
+if embedding_strategy == "llama2":
+    from llama_index.embeddings.ollama import OllamaEmbedding
+    embed_model = OllamaEmbedding(
+        model_name="llama2",
+        base_url=os.getenv("LLM_BASE_URL", "http://localhost:8000"),
+        ollama_additional_kwargs={"mirostat": 0},
+    )
+elif embedding_strategy == "openai":
+    from llama_index.embeddings.openai import OpenAIEmbedding
+    embed_model = OpenAIEmbedding(
+        base_url=os.getenv("LLM_BASE_URL"),
+    )
+elif embedding_strategy == "azureopenai":
+    from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding  # Need to instruct to set these env variables in documentation
+    embed_model = AzureOpenAIEmbedding(
+        model="text-embedding-ada-002",
+        deployment_name=os.getenv("LLM_DEPLOYMENT_NAME"),
+        api_key=os.getenv("LLM_API_KEY"),
+        azure_endpoint=os.getenv("LLM_BASE_URL"),
+        api_version=os.getenv("LLM_API_VERSION"),
+    )
+else:
+    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    embed_model = HuggingFaceEmbedding(
+        model_name="BAAI/bge-small-en-v1.5"
+    )
+
+
 class LongTermMemory:
     def __init__(self):
         db = chromadb.Client()
         self.collection = db.get_or_create_collection(name="memories")
         vector_store = ChromaVectorStore(chroma_collection=self.collection)
-        self.index = VectorStoreIndex.from_vector_store(vector_store)
+        self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
         self.thought_idx = 0
 
     def add_event(self, event):
diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
index 7d33c1f481..e91f6ae672 100644
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -6,16 +6,19 @@ from functools import partial
 
 DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
 DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
+DEFAULT_BASE_URL = os.getenv("LLM_BASE_URL")
 PROMPT_DEBUG_DIR = os.getenv("PROMPT_DEBUG_DIR", "")
 
 class LLM:
-    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, debug_dir=PROMPT_DEBUG_DIR):
+    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, base_url=DEFAULT_BASE_URL, debug_dir=PROMPT_DEBUG_DIR):
         self.model = model if model else DEFAULT_MODEL
         self.api_key = api_key if api_key else DEFAULT_API_KEY
-        self._debug_dir = debug_dir
+        self.base_url = base_url if base_url else DEFAULT_BASE_URL
+        self._debug_dir = debug_dir if debug_dir else PROMPT_DEBUG_DIR
         self._debug_idx = 0
         self._debug_id = uuid.uuid4().hex
-        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key)
+
+        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key, base_url=self.base_url)
 
         if self._debug_dir:
             print(f"Logging prompts to {self._debug_dir}/{self._debug_id}")
diff --git a/requirements.txt b/requirements.txt
index 3a3f2cda81..489367618c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,6 @@ langchain-community
 llama-index
 llama-index-vector-stores-chroma
 chromadb
+llama-index-embeddings-huggingface
+llama-index-embeddings-azure-openai
+llama-index-embeddings-ollama