Vector memory revamp (part 1: refactoring) (#4208)

Additional changes:

* Improve typing

* Modularize message history memory & fix/refactor lots of things

* Fix summarization

* Move memory relevance calculation to MemoryItem & improve test

* Fix import warnings in web_selenium.py

* Remove `memory_add` ghost command

* Implement overlap in `split_text`

* Move memory tests into subdirectory

* Remove deprecated `get_ada_embedding()` and helpers

* Fix used token calculation in `chat_with_ai`

* Replace Message TypedDict by dataclass

* Fix AgentManager singleton issues in tests

---------

Co-authored-by: Auto-GPT-Bot <github-bot@agpt.co>
This commit is contained in:
Reinier van der Leer
2023-05-25 20:31:11 +02:00
committed by GitHub
parent 10489e0df2
commit bfbe613960
92 changed files with 7282 additions and 7989 deletions

View File

@@ -0,0 +1,138 @@
from autogpt.config import Config
from autogpt.logs import logger
from .memory_item import MemoryItem, MemoryItemRelevance
from .providers.base import VectorMemoryProvider as VectorMemory
from .providers.json_file import JSONFileMemory
from .providers.no_memory import NoMemory
# List of supported memory backends
# Add a backend to this list if the import attempt is successful
supported_memory = ["json_file", "no_memory"]
# try:
# from .providers.redis import RedisMemory
# supported_memory.append("redis")
# except ImportError:
# RedisMemory = None
# try:
# from .providers.pinecone import PineconeMemory
# supported_memory.append("pinecone")
# except ImportError:
# PineconeMemory = None
# try:
# from .providers.weaviate import WeaviateMemory
# supported_memory.append("weaviate")
# except ImportError:
# WeaviateMemory = None
# try:
# from .providers.milvus import MilvusMemory
# supported_memory.append("milvus")
# except ImportError:
# MilvusMemory = None
def get_memory(cfg: Config, init=False) -> VectorMemory:
memory = None
match cfg.memory_backend:
case "json_file":
memory = JSONFileMemory(cfg)
case "pinecone":
raise NotImplementedError(
"The Pinecone memory backend has been rendered incompatible by work on "
"the memory system, and was removed. Whether support will be added back "
"in the future is subject to discussion, feel free to pitch in: "
"https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280"
)
# if not PineconeMemory:
# logger.warn(
# "Error: Pinecone is not installed. Please install pinecone"
# " to use Pinecone as a memory backend."
# )
# else:
# memory = PineconeMemory(cfg)
# if init:
# memory.clear()
case "redis":
raise NotImplementedError(
"The Redis memory backend has been rendered incompatible by work on "
"the memory system, and has been removed temporarily."
)
# if not RedisMemory:
# logger.warn(
# "Error: Redis is not installed. Please install redis-py to"
# " use Redis as a memory backend."
# )
# else:
# memory = RedisMemory(cfg)
case "weaviate":
raise NotImplementedError(
"The Weaviate memory backend has been rendered incompatible by work on "
"the memory system, and was removed. Whether support will be added back "
"in the future is subject to discussion, feel free to pitch in: "
"https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280"
)
# if not WeaviateMemory:
# logger.warn(
# "Error: Weaviate is not installed. Please install weaviate-client to"
# " use Weaviate as a memory backend."
# )
# else:
# memory = WeaviateMemory(cfg)
case "milvus":
raise NotImplementedError(
"The Milvus memory backend has been rendered incompatible by work on "
"the memory system, and was removed. Whether support will be added back "
"in the future is subject to discussion, feel free to pitch in: "
"https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280"
)
# if not MilvusMemory:
# logger.warn(
# "Error: pymilvus sdk is not installed."
# "Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
# )
# else:
# memory = MilvusMemory(cfg)
case "no_memory":
memory = NoMemory()
case _:
raise ValueError(
f"Unknown memory backend '{cfg.memory_backend}'. Please check your config."
)
if memory is None:
memory = JSONFileMemory(cfg)
return memory
def get_supported_memory_backends():
return supported_memory
__all__ = [
"get_memory",
"MemoryItem",
"MemoryItemRelevance",
"JSONFileMemory",
"NoMemory",
"VectorMemory",
# "RedisMemory",
# "PineconeMemory",
# "MilvusMemory",
# "WeaviateMemory",
]

View File

@@ -0,0 +1,223 @@
from __future__ import annotations
import dataclasses
import json
from typing import Literal
import numpy as np
from autogpt.config import Config
from autogpt.llm import Message
from autogpt.llm.utils import count_string_tokens
from autogpt.logs import logger
from autogpt.processing.text import chunk_content, split_text, summarize_text
from .utils import Embedding, get_embedding
MemoryDocType = Literal["webpage", "text_file", "code_file", "agent_history"]
@dataclasses.dataclass
class MemoryItem:
"""Memory object containing raw content as well as embeddings"""
raw_content: str
summary: str
chunks: list[str]
chunk_summaries: list[str]
e_summary: Embedding
e_chunks: list[Embedding]
metadata: dict
def relevance_for(self, query: str, e_query: Embedding | None = None):
return MemoryItemRelevance.of(self, query, e_query)
@staticmethod
def from_text(
text: str,
source_type: MemoryDocType,
metadata: dict = {},
how_to_summarize: str | None = None,
question_for_summary: str | None = None,
):
cfg = Config()
logger.debug(f"Memorizing text:\n{'-'*32}\n{text}\n{'-'*32}\n")
chunks = [
chunk
for chunk, _ in (
split_text(text, cfg.embedding_model)
if source_type != "code_file"
else chunk_content(text, cfg.embedding_model)
)
]
logger.debug("Chunks: " + str(chunks))
chunk_summaries = [
summary
for summary, _ in [
summarize_text(
text_chunk,
instruction=how_to_summarize,
question=question_for_summary,
)
for text_chunk in chunks
]
]
logger.debug("Chunk summaries: " + str(chunk_summaries))
e_chunks = get_embedding(chunks)
summary = (
chunk_summaries[0]
if len(chunks) == 1
else summarize_text(
"\n\n".join(chunk_summaries),
instruction=how_to_summarize,
question=question_for_summary,
)[0]
)
logger.debug("Total summary: " + summary)
# TODO: investigate search performance of weighted average vs summary
# e_average = np.average(e_chunks, axis=0, weights=[len(c) for c in chunks])
e_summary = get_embedding(summary)
metadata["source_type"] = source_type
return MemoryItem(
text,
summary,
chunks,
chunk_summaries,
e_summary,
e_chunks,
metadata=metadata,
)
@staticmethod
def from_text_file(content: str, path: str):
return MemoryItem.from_text(content, "text_file", {"location": path})
@staticmethod
def from_code_file(content: str, path: str):
# TODO: implement tailored code memories
return MemoryItem.from_text(content, "code_file", {"location": path})
@staticmethod
def from_ai_action(ai_message: Message, result_message: Message):
# The result_message contains either user feedback
# or the result of the command specified in ai_message
if ai_message["role"] != "assistant":
raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}")
result = (
result_message["content"]
if result_message["content"].startswith("Command")
else "None"
)
user_input = (
result_message["content"]
if result_message["content"].startswith("Human feedback")
else "None"
)
memory_content = (
f"Assistant Reply: {ai_message['content']}"
"\n\n"
f"Result: {result}"
"\n\n"
f"Human Feedback: {user_input}"
)
return MemoryItem.from_text(
text=memory_content,
source_type="agent_history",
how_to_summarize="if possible, also make clear the link between the command in the assistant's response and the command result. Do not mention the human feedback if there is none",
)
@staticmethod
def from_webpage(content: str, url: str, question: str | None = None):
return MemoryItem.from_text(
text=content,
source_type="webpage",
metadata={"location": url},
question_for_summary=question,
)
def dump(self) -> str:
token_length = count_string_tokens(self.raw_content, Config().embedding_model)
return f"""
=============== MemoryItem ===============
Length: {token_length} tokens in {len(self.e_chunks)} chunks
Metadata: {json.dumps(self.metadata, indent=2)}
---------------- SUMMARY -----------------
{self.summary}
------------------ RAW -------------------
{self.raw_content}
==========================================
"""
@dataclasses.dataclass
class MemoryItemRelevance:
"""
Class that encapsulates memory relevance search functionality and data.
Instances contain a MemoryItem and its relevance scores for a given query.
"""
memory_item: MemoryItem
for_query: str
summary_relevance_score: float
chunk_relevance_scores: list[float]
@staticmethod
def of(
memory_item: MemoryItem, for_query: str, e_query: Embedding | None = None
) -> MemoryItemRelevance:
e_query = e_query or get_embedding(for_query)
_, srs, crs = MemoryItemRelevance.calculate_scores(memory_item, e_query)
return MemoryItemRelevance(
for_query=for_query,
memory_item=memory_item,
summary_relevance_score=srs,
chunk_relevance_scores=crs,
)
@staticmethod
def calculate_scores(
memory: MemoryItem, compare_to: Embedding
) -> tuple[float, float, list[float]]:
"""
Calculates similarity between given embedding and all embeddings of the memory
Returns:
float: the aggregate (max) relevance score of the memory
float: the relevance score of the memory summary
list: the relevance scores of the memory chunks
"""
summary_relevance_score = np.dot(memory.e_summary, compare_to)
chunk_relevance_scores = np.dot(memory.e_chunks, compare_to)
logger.debug(f"Relevance of summary: {summary_relevance_score}")
logger.debug(f"Relevance of chunks: {chunk_relevance_scores}")
relevance_scores = [summary_relevance_score, *chunk_relevance_scores]
logger.debug(f"Relevance scores: {relevance_scores}")
return max(relevance_scores), summary_relevance_score, chunk_relevance_scores
@property
def score(self) -> float:
"""The aggregate relevance score of the memory item for the given query"""
return max([self.summary_relevance_score, *self.chunk_relevance_scores])
@property
def most_relevant_chunk(self) -> tuple[str, float]:
"""The most relevant chunk of the memory item + its score for the given query"""
i_relmax = np.argmax(self.chunk_relevance_scores)
return self.memory_item.chunks[i_relmax], self.chunk_relevance_scores[i_relmax]
def __str__(self):
return (
f"{self.memory_item.summary} ({self.summary_relevance_score}) "
f"{self.chunk_relevance_scores}"
)

View File

@@ -0,0 +1,7 @@
from .json_file import JSONFileMemory
from .no_memory import NoMemory
__all__ = [
"JSONFileMemory",
"NoMemory",
]

View File

@@ -0,0 +1,74 @@
import abc
import functools
from typing import MutableSet, Sequence
import numpy as np
from autogpt.config.config import Config
from autogpt.logs import logger
from autogpt.singleton import AbstractSingleton
from .. import MemoryItem, MemoryItemRelevance
from ..utils import Embedding, get_embedding
class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
@abc.abstractmethod
def __init__(self, config: Config):
pass
def get(self, query: str) -> MemoryItemRelevance | None:
"""
Gets the data from the memory that is most relevant to the given query.
Args:
data: The data to compare to.
Returns: The most relevant Memory
"""
result = self.get_relevant(query, 1)
return result[0] if result else None
def get_relevant(self, query: str, k: int) -> Sequence[MemoryItemRelevance]:
"""
Returns the top-k most relevant memories for the given query
Args:
query: the query to compare stored memories to
k: the number of relevant memories to fetch
Returns:
list[MemoryItemRelevance] containing the top [k] relevant memories
"""
if len(self) < 1:
return []
logger.debug(
f"Searching for {k} relevant memories for query '{query}'; "
f"{len(self)} memories in index"
)
relevances = self.score_memories_for_relevance(query)
logger.debug(f"Memory relevance scores: {[str(r) for r in relevances]}")
# take last k items and reverse
top_k_indices = np.argsort([r.score for r in relevances])[-k:][::-1]
return [relevances[i] for i in top_k_indices]
def score_memories_for_relevance(
self, for_query: str
) -> Sequence[MemoryItemRelevance]:
"""
Returns MemoryItemRelevance for every memory in the index.
Implementations may override this function for performance purposes.
"""
e_query: Embedding = get_embedding(for_query)
return [m.relevance_for(for_query, e_query) for m in self]
def get_stats(self) -> tuple[int, int]:
"""
Returns:
tuple (n_memories: int, n_chunks: int): the stats of the memory index
"""
return len(self), functools.reduce(lambda t, m: t + len(m.e_chunks), self, 0)

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
from pathlib import Path
from typing import Iterator
import orjson
from autogpt.config import Config
from autogpt.logs import logger
from ..memory_item import MemoryItem
from .base import VectorMemoryProvider
class JSONFileMemory(VectorMemoryProvider):
"""Memory backend that stores memories in a JSON file"""
SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS
file_path: Path
memories: list[MemoryItem]
def __init__(self, cfg: Config) -> None:
"""Initialize a class instance
Args:
cfg: Config object
Returns:
None
"""
workspace_path = Path(cfg.workspace_path)
self.file_path = workspace_path / f"{cfg.memory_index}.json"
self.file_path.touch()
logger.debug(f"Initialized {__name__} with index path {self.file_path}")
self.memories = []
self.save_index()
def __iter__(self) -> Iterator[MemoryItem]:
return iter(self.memories)
def __contains__(self, x: MemoryItem) -> bool:
return x in self.memories
def __len__(self) -> int:
return len(self.memories)
def add(self, item: MemoryItem):
self.memories.append(item)
self.save_index()
return len(self.memories)
def discard(self, item: MemoryItem):
try:
self.remove(item)
except:
pass
def clear(self):
"""Clears the data in memory."""
self.memories.clear()
self.save_index()
def save_index(self):
logger.debug(f"Saving memory index to file {self.file_path}")
with self.file_path.open("wb") as f:
return f.write(orjson.dumps(self.memories, option=self.SAVE_OPTIONS))

View File

@@ -0,0 +1,36 @@
"""A class that does not store any data. This is the default memory provider."""
from __future__ import annotations
from typing import Iterator, Optional
from autogpt.config.config import Config
from .. import MemoryItem
from .base import VectorMemoryProvider
class NoMemory(VectorMemoryProvider):
"""
A class that does not store any data. This is the default memory provider.
"""
def __init__(self, config: Optional[Config] = None):
pass
def __iter__(self) -> Iterator[MemoryItem]:
return iter([])
def __contains__(self, x: MemoryItem) -> bool:
return False
def __len__(self) -> int:
return 0
def add(self, item: MemoryItem):
pass
def discard(self, item: MemoryItem):
pass
def clear(self):
pass

View File

@@ -0,0 +1,71 @@
from typing import Any, overload
import numpy as np
import numpy.typing as npt
import openai
from autogpt.config import Config
from autogpt.llm.utils import metered, retry_openai_api
from autogpt.logs import logger
Embedding = list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
"""Embedding vector"""
TText = list[int]
"""Token array representing text"""
@overload
def get_embedding(input: str | TText) -> Embedding:
...
@overload
def get_embedding(input: list[str] | list[TText]) -> list[Embedding]:
...
@metered
@retry_openai_api()
def get_embedding(
input: str | TText | list[str] | list[TText],
) -> Embedding | list[Embedding]:
"""Get an embedding from the ada model.
Args:
input: Input text to get embeddings for, encoded as a string or array of tokens.
Multiple inputs may be given as a list of strings or token arrays.
Returns:
List[float]: The embedding.
"""
cfg = Config()
multiple = isinstance(input, list) and all(not isinstance(i, int) for i in input)
if isinstance(input, str):
input = input.replace("\n", " ")
elif multiple and isinstance(input[0], str):
input = [text.replace("\n", " ") for text in input]
model = cfg.embedding_model
if cfg.use_azure:
kwargs = {"engine": cfg.get_azure_deployment_id_for_model(model)}
else:
kwargs = {"model": model}
logger.debug(
f"Getting embedding{f's for {len(input)} inputs' if multiple else ''}"
f" with model '{model}'"
+ (f" via Azure deployment '{kwargs['engine']}'" if cfg.use_azure else "")
)
embeddings = openai.Embedding.create(
input=input,
api_key=cfg.openai_api_key,
**kwargs,
).data
if not multiple:
return embeddings[0]["embedding"]
embeddings = sorted(embeddings, key=lambda x: x["index"])
return [d["embedding"] for d in embeddings]