mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-04 03:45:12 -05:00
Convert to python module named autogpt.
Also fixed the Dockerfile. Converting to module makes development easier. Fixes coverage script in CI and test imports.
This commit is contained in:
committed by
Merwane Hamadi
parent
a17a850b25
commit
d64f866bfa
@@ -1,59 +0,0 @@
|
||||
from memory.local import LocalCache
|
||||
from memory.no_memory import NoMemory
|
||||
|
||||
# List of supported memory backends
|
||||
# Add a backend to this list if the import attempt is successful
|
||||
supported_memory = ['local', 'no_memory']
|
||||
|
||||
try:
|
||||
from memory.redismem import RedisMemory
|
||||
supported_memory.append('redis')
|
||||
except ImportError:
|
||||
print("Redis not installed. Skipping import.")
|
||||
RedisMemory = None
|
||||
|
||||
try:
|
||||
from memory.pinecone import PineconeMemory
|
||||
supported_memory.append('pinecone')
|
||||
except ImportError:
|
||||
print("Pinecone not installed. Skipping import.")
|
||||
PineconeMemory = None
|
||||
|
||||
|
||||
def get_memory(cfg, init=False):
|
||||
memory = None
|
||||
if cfg.memory_backend == "pinecone":
|
||||
if not PineconeMemory:
|
||||
print("Error: Pinecone is not installed. Please install pinecone"
|
||||
" to use Pinecone as a memory backend.")
|
||||
else:
|
||||
memory = PineconeMemory(cfg)
|
||||
if init:
|
||||
memory.clear()
|
||||
elif cfg.memory_backend == "redis":
|
||||
if not RedisMemory:
|
||||
print("Error: Redis is not installed. Please install redis-py to"
|
||||
" use Redis as a memory backend.")
|
||||
else:
|
||||
memory = RedisMemory(cfg)
|
||||
elif cfg.memory_backend == "no_memory":
|
||||
memory = NoMemory(cfg)
|
||||
|
||||
if memory is None:
|
||||
memory = LocalCache(cfg)
|
||||
if init:
|
||||
memory.clear()
|
||||
return memory
|
||||
|
||||
|
||||
def get_supported_memory_backends():
|
||||
return supported_memory
|
||||
|
||||
|
||||
__all__ = [
|
||||
"get_memory",
|
||||
"LocalCache",
|
||||
"RedisMemory",
|
||||
"PineconeMemory",
|
||||
"NoMemory"
|
||||
]
|
||||
@@ -1,36 +0,0 @@
|
||||
"""Base class for memory providers."""
|
||||
import abc
|
||||
from config import AbstractSingleton, Config
|
||||
import openai
|
||||
|
||||
cfg = Config()
|
||||
|
||||
|
||||
def get_ada_embedding(text):
|
||||
text = text.replace("\n", " ")
|
||||
if cfg.use_azure:
|
||||
return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"]
|
||||
else:
|
||||
return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"]
|
||||
|
||||
|
||||
class MemoryProviderSingleton(AbstractSingleton):
|
||||
@abc.abstractmethod
|
||||
def add(self, data):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get(self, data):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def clear(self):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_relevant(self, data, num_relevant=5):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_stats(self):
|
||||
pass
|
||||
@@ -1,124 +0,0 @@
|
||||
import dataclasses
|
||||
import orjson
|
||||
from typing import Any, List, Optional
|
||||
import numpy as np
|
||||
import os
|
||||
from memory.base import MemoryProviderSingleton, get_ada_embedding
|
||||
|
||||
|
||||
EMBED_DIM = 1536
|
||||
SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS
|
||||
|
||||
|
||||
def create_default_embeddings():
|
||||
return np.zeros((0, EMBED_DIM)).astype(np.float32)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class CacheContent:
|
||||
texts: List[str] = dataclasses.field(default_factory=list)
|
||||
embeddings: np.ndarray = dataclasses.field(
|
||||
default_factory=create_default_embeddings
|
||||
)
|
||||
|
||||
|
||||
class LocalCache(MemoryProviderSingleton):
|
||||
|
||||
# on load, load our database
|
||||
def __init__(self, cfg) -> None:
|
||||
self.filename = f"{cfg.memory_index}.json"
|
||||
if os.path.exists(self.filename):
|
||||
try:
|
||||
with open(self.filename, 'w+b') as f:
|
||||
file_content = f.read()
|
||||
if not file_content.strip():
|
||||
file_content = b'{}'
|
||||
f.write(file_content)
|
||||
|
||||
loaded = orjson.loads(file_content)
|
||||
self.data = CacheContent(**loaded)
|
||||
except orjson.JSONDecodeError:
|
||||
print(f"Error: The file '{self.filename}' is not in JSON format.")
|
||||
self.data = CacheContent()
|
||||
else:
|
||||
print(f"Warning: The file '{self.filename}' does not exist. Local memory would not be saved to a file.")
|
||||
self.data = CacheContent()
|
||||
|
||||
def add(self, text: str):
|
||||
"""
|
||||
Add text to our list of texts, add embedding as row to our
|
||||
embeddings-matrix
|
||||
|
||||
Args:
|
||||
text: str
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
if 'Command Error:' in text:
|
||||
return ""
|
||||
self.data.texts.append(text)
|
||||
|
||||
embedding = get_ada_embedding(text)
|
||||
|
||||
vector = np.array(embedding).astype(np.float32)
|
||||
vector = vector[np.newaxis, :]
|
||||
self.data.embeddings = np.concatenate(
|
||||
[
|
||||
self.data.embeddings,
|
||||
vector,
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
|
||||
with open(self.filename, 'wb') as f:
|
||||
out = orjson.dumps(
|
||||
self.data,
|
||||
option=SAVE_OPTIONS
|
||||
)
|
||||
f.write(out)
|
||||
return text
|
||||
|
||||
def clear(self) -> str:
|
||||
"""
|
||||
Clears the redis server.
|
||||
|
||||
Returns: A message indicating that the memory has been cleared.
|
||||
"""
|
||||
self.data = CacheContent()
|
||||
return "Obliviated"
|
||||
|
||||
def get(self, data: str) -> Optional[List[Any]]:
|
||||
"""
|
||||
Gets the data from the memory that is most relevant to the given data.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
|
||||
Returns: The most relevant data.
|
||||
"""
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def get_relevant(self, text: str, k: int) -> List[Any]:
|
||||
""""
|
||||
matrix-vector mult to find score-for-each-row-of-matrix
|
||||
get indices for top-k winning scores
|
||||
return texts for those indices
|
||||
Args:
|
||||
text: str
|
||||
k: int
|
||||
|
||||
Returns: List[str]
|
||||
"""
|
||||
embedding = get_ada_embedding(text)
|
||||
|
||||
scores = np.dot(self.data.embeddings, embedding)
|
||||
|
||||
top_k_indices = np.argsort(scores)[-k:][::-1]
|
||||
|
||||
return [self.data.texts[i] for i in top_k_indices]
|
||||
|
||||
def get_stats(self):
|
||||
"""
|
||||
Returns: The stats of the local cache.
|
||||
"""
|
||||
return len(self.data.texts), self.data.embeddings.shape
|
||||
@@ -1,66 +0,0 @@
|
||||
from typing import Optional, List, Any
|
||||
|
||||
from memory.base import MemoryProviderSingleton
|
||||
|
||||
|
||||
class NoMemory(MemoryProviderSingleton):
|
||||
def __init__(self, cfg):
|
||||
"""
|
||||
Initializes the NoMemory provider.
|
||||
|
||||
Args:
|
||||
cfg: The config object.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def add(self, data: str) -> str:
|
||||
"""
|
||||
Adds a data point to the memory. No action is taken in NoMemory.
|
||||
|
||||
Args:
|
||||
data: The data to add.
|
||||
|
||||
Returns: An empty string.
|
||||
"""
|
||||
return ""
|
||||
|
||||
def get(self, data: str) -> Optional[List[Any]]:
|
||||
"""
|
||||
Gets the data from the memory that is most relevant to the given data.
|
||||
NoMemory always returns None.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear(self) -> str:
|
||||
"""
|
||||
Clears the memory. No action is taken in NoMemory.
|
||||
|
||||
Returns: An empty string.
|
||||
"""
|
||||
return ""
|
||||
|
||||
def get_relevant(self, data: str, num_relevant: int = 5) -> Optional[List[Any]]:
|
||||
"""
|
||||
Returns all the data in the memory that is relevant to the given data.
|
||||
NoMemory always returns None.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
num_relevant: The number of relevant data to return.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_stats(self):
|
||||
"""
|
||||
Returns: An empty dictionary as there are no stats in NoMemory.
|
||||
"""
|
||||
return {}
|
||||
@@ -1,62 +0,0 @@
|
||||
|
||||
import pinecone
|
||||
|
||||
from memory.base import MemoryProviderSingleton, get_ada_embedding
|
||||
from logger import logger
|
||||
from colorama import Fore, Style
|
||||
|
||||
|
||||
class PineconeMemory(MemoryProviderSingleton):
|
||||
def __init__(self, cfg):
|
||||
pinecone_api_key = cfg.pinecone_api_key
|
||||
pinecone_region = cfg.pinecone_region
|
||||
pinecone.init(api_key=pinecone_api_key, environment=pinecone_region)
|
||||
dimension = 1536
|
||||
metric = "cosine"
|
||||
pod_type = "p1"
|
||||
table_name = "auto-gpt"
|
||||
# this assumes we don't start with memory.
|
||||
# for now this works.
|
||||
# we'll need a more complicated and robust system if we want to start with memory.
|
||||
self.vec_num = 0
|
||||
|
||||
try:
|
||||
pinecone.whoami()
|
||||
except Exception as e:
|
||||
logger.typewriter_log("FAILED TO CONNECT TO PINECONE", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL)
|
||||
logger.double_check("Please ensure you have setup and configured Pinecone properly for use. " +
|
||||
f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#-pinecone-api-key-setup{Style.RESET_ALL} to ensure you've set up everything correctly.")
|
||||
exit(1)
|
||||
|
||||
if table_name not in pinecone.list_indexes():
|
||||
pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type)
|
||||
self.index = pinecone.Index(table_name)
|
||||
|
||||
def add(self, data):
|
||||
vector = get_ada_embedding(data)
|
||||
# no metadata here. We may wish to change that long term.
|
||||
resp = self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})])
|
||||
_text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}"
|
||||
self.vec_num += 1
|
||||
return _text
|
||||
|
||||
def get(self, data):
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def clear(self):
|
||||
self.index.delete(deleteAll=True)
|
||||
return "Obliviated"
|
||||
|
||||
def get_relevant(self, data, num_relevant=5):
|
||||
"""
|
||||
Returns all the data in the memory that is relevant to the given data.
|
||||
:param data: The data to compare to.
|
||||
:param num_relevant: The number of relevant data to return. Defaults to 5
|
||||
"""
|
||||
query_embedding = get_ada_embedding(data)
|
||||
results = self.index.query(query_embedding, top_k=num_relevant, include_metadata=True)
|
||||
sorted_results = sorted(results.matches, key=lambda x: x.score)
|
||||
return [str(item['metadata']["raw_text"]) for item in sorted_results]
|
||||
|
||||
def get_stats(self):
|
||||
return self.index.describe_index_stats()
|
||||
@@ -1,155 +0,0 @@
|
||||
"""Redis memory provider."""
|
||||
from typing import Any, List, Optional
|
||||
import redis
|
||||
from redis.commands.search.field import VectorField, TextField
|
||||
from redis.commands.search.query import Query
|
||||
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
||||
import numpy as np
|
||||
|
||||
from memory.base import MemoryProviderSingleton, get_ada_embedding
|
||||
from logger import logger
|
||||
from colorama import Fore, Style
|
||||
|
||||
|
||||
SCHEMA = [
|
||||
TextField("data"),
|
||||
VectorField(
|
||||
"embedding",
|
||||
"HNSW",
|
||||
{
|
||||
"TYPE": "FLOAT32",
|
||||
"DIM": 1536,
|
||||
"DISTANCE_METRIC": "COSINE"
|
||||
}
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class RedisMemory(MemoryProviderSingleton):
|
||||
def __init__(self, cfg):
|
||||
"""
|
||||
Initializes the Redis memory provider.
|
||||
|
||||
Args:
|
||||
cfg: The config object.
|
||||
|
||||
Returns: None
|
||||
"""
|
||||
redis_host = cfg.redis_host
|
||||
redis_port = cfg.redis_port
|
||||
redis_password = cfg.redis_password
|
||||
self.dimension = 1536
|
||||
self.redis = redis.Redis(
|
||||
host=redis_host,
|
||||
port=redis_port,
|
||||
password=redis_password,
|
||||
db=0 # Cannot be changed
|
||||
)
|
||||
self.cfg = cfg
|
||||
|
||||
# Check redis connection
|
||||
try:
|
||||
self.redis.ping()
|
||||
except redis.ConnectionError as e:
|
||||
logger.typewriter_log("FAILED TO CONNECT TO REDIS", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL)
|
||||
logger.double_check("Please ensure you have setup and configured Redis properly for use. " +
|
||||
f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#redis-setup{Style.RESET_ALL} to ensure you've set up everything correctly.")
|
||||
exit(1)
|
||||
|
||||
if cfg.wipe_redis_on_start:
|
||||
self.redis.flushall()
|
||||
try:
|
||||
self.redis.ft(f"{cfg.memory_index}").create_index(
|
||||
fields=SCHEMA,
|
||||
definition=IndexDefinition(
|
||||
prefix=[f"{cfg.memory_index}:"],
|
||||
index_type=IndexType.HASH
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
print("Error creating Redis search index: ", e)
|
||||
existing_vec_num = self.redis.get(f'{cfg.memory_index}-vec_num')
|
||||
self.vec_num = int(existing_vec_num.decode('utf-8')) if\
|
||||
existing_vec_num else 0
|
||||
|
||||
def add(self, data: str) -> str:
|
||||
"""
|
||||
Adds a data point to the memory.
|
||||
|
||||
Args:
|
||||
data: The data to add.
|
||||
|
||||
Returns: Message indicating that the data has been added.
|
||||
"""
|
||||
if 'Command Error:' in data:
|
||||
return ""
|
||||
vector = get_ada_embedding(data)
|
||||
vector = np.array(vector).astype(np.float32).tobytes()
|
||||
data_dict = {
|
||||
b"data": data,
|
||||
"embedding": vector
|
||||
}
|
||||
pipe = self.redis.pipeline()
|
||||
pipe.hset(f"{self.cfg.memory_index}:{self.vec_num}", mapping=data_dict)
|
||||
_text = f"Inserting data into memory at index: {self.vec_num}:\n"\
|
||||
f"data: {data}"
|
||||
self.vec_num += 1
|
||||
pipe.set(f'{self.cfg.memory_index}-vec_num', self.vec_num)
|
||||
pipe.execute()
|
||||
return _text
|
||||
|
||||
def get(self, data: str) -> Optional[List[Any]]:
|
||||
"""
|
||||
Gets the data from the memory that is most relevant to the given data.
|
||||
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
|
||||
Returns: The most relevant data.
|
||||
"""
|
||||
return self.get_relevant(data, 1)
|
||||
|
||||
def clear(self) -> str:
|
||||
"""
|
||||
Clears the redis server.
|
||||
|
||||
Returns: A message indicating that the memory has been cleared.
|
||||
"""
|
||||
self.redis.flushall()
|
||||
return "Obliviated"
|
||||
|
||||
def get_relevant(
|
||||
self,
|
||||
data: str,
|
||||
num_relevant: int = 5
|
||||
) -> Optional[List[Any]]:
|
||||
"""
|
||||
Returns all the data in the memory that is relevant to the given data.
|
||||
Args:
|
||||
data: The data to compare to.
|
||||
num_relevant: The number of relevant data to return.
|
||||
|
||||
Returns: A list of the most relevant data.
|
||||
"""
|
||||
query_embedding = get_ada_embedding(data)
|
||||
base_query = f"*=>[KNN {num_relevant} @embedding $vector AS vector_score]"
|
||||
query = Query(base_query).return_fields(
|
||||
"data",
|
||||
"vector_score"
|
||||
).sort_by("vector_score").dialect(2)
|
||||
query_vector = np.array(query_embedding).astype(np.float32).tobytes()
|
||||
|
||||
try:
|
||||
results = self.redis.ft(f"{self.cfg.memory_index}").search(
|
||||
query, query_params={"vector": query_vector}
|
||||
)
|
||||
except Exception as e:
|
||||
print("Error calling Redis search: ", e)
|
||||
return None
|
||||
return [result.data for result in results.docs]
|
||||
|
||||
def get_stats(self):
|
||||
"""
|
||||
Returns: The stats of the memory index.
|
||||
"""
|
||||
return self.redis.ft(f"{self.cfg.memory_index}").info()
|
||||
Reference in New Issue
Block a user