Vector memory revamp (part 1: refactoring) (#4208)

Additional changes: * Improve typing * Modularize message history memory & fix/refactor lots of things * Fix summarization * Move memory relevance calculation to MemoryItem & improve test * Fix import warnings in web_selenium.py * Remove `memory_add` ghost command * Implement overlap in `split_text` * Move memory tests into subdirectory * Remove deprecated `get_ada_embedding()` and helpers * Fix used token calculation in `chat_with_ai` * Replace Message TypedDict by dataclass * Fix AgentManager singleton issues in tests --------- Co-authored-by: Auto-GPT-Bot <github-bot@agpt.co>
2026-04-30 03:00:41 -04:00 · 2023-05-25 20:31:11 +02:00
parent 10489e0df2
commit bfbe613960
92 changed files with 7282 additions and 7989 deletions
--- a/autogpt/memory/vector/init.py
+++ b/autogpt/memory/vector/init.py
@@ -0,0 +1,138 @@
+from autogpt.config import Config
+from autogpt.logs import logger
+
+from .memory_item import MemoryItem, MemoryItemRelevance
+from .providers.base import VectorMemoryProvider as VectorMemory
+from .providers.json_file import JSONFileMemory
+from .providers.no_memory import NoMemory
+
+# List of supported memory backends
+# Add a backend to this list if the import attempt is successful
+supported_memory = ["json_file", "no_memory"]
+
+# try:
+#     from .providers.redis import RedisMemory
+
+#     supported_memory.append("redis")
+# except ImportError:
+#     RedisMemory = None
+
+# try:
+#     from .providers.pinecone import PineconeMemory
+
+#     supported_memory.append("pinecone")
+# except ImportError:
+#     PineconeMemory = None
+
+# try:
+#     from .providers.weaviate import WeaviateMemory
+
+#     supported_memory.append("weaviate")
+# except ImportError:
+#     WeaviateMemory = None
+
+# try:
+#     from .providers.milvus import MilvusMemory
+
+#     supported_memory.append("milvus")
+# except ImportError:
+#     MilvusMemory = None
+
+
+def get_memory(cfg: Config, init=False) -> VectorMemory:
+    memory = None
+
+    match cfg.memory_backend:
+        case "json_file":
+            memory = JSONFileMemory(cfg)
+
+        case "pinecone":
+            raise NotImplementedError(
+                "The Pinecone memory backend has been rendered incompatible by work on "
+                "the memory system, and was removed. Whether support will be added back "
+                "in the future is subject to discussion, feel free to pitch in: "
+                "https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280"
+            )
+            # if not PineconeMemory:
+            #     logger.warn(
+            #         "Error: Pinecone is not installed. Please install pinecone"
+            #         " to use Pinecone as a memory backend."
+            #     )
+            # else:
+            #     memory = PineconeMemory(cfg)
+            #     if init:
+            #         memory.clear()
+
+        case "redis":
+            raise NotImplementedError(
+                "The Redis memory backend has been rendered incompatible by work on "
+                "the memory system, and has been removed temporarily."
+            )
+            # if not RedisMemory:
+            #     logger.warn(
+            #         "Error: Redis is not installed. Please install redis-py to"
+            #         " use Redis as a memory backend."
+            #     )
+            # else:
+            #     memory = RedisMemory(cfg)
+
+        case "weaviate":
+            raise NotImplementedError(
+                "The Weaviate memory backend has been rendered incompatible by work on "
+                "the memory system, and was removed. Whether support will be added back "
+                "in the future is subject to discussion, feel free to pitch in: "
+                "https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280"
+            )
+            # if not WeaviateMemory:
+            #     logger.warn(
+            #         "Error: Weaviate is not installed. Please install weaviate-client to"
+            #         " use Weaviate as a memory backend."
+            #     )
+            # else:
+            #     memory = WeaviateMemory(cfg)
+
+        case "milvus":
+            raise NotImplementedError(
+                "The Milvus memory backend has been rendered incompatible by work on "
+                "the memory system, and was removed. Whether support will be added back "
+                "in the future is subject to discussion, feel free to pitch in: "
+                "https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280"
+            )
+            # if not MilvusMemory:
+            #     logger.warn(
+            #         "Error: pymilvus sdk is not installed."
+            #         "Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
+            #     )
+            # else:
+            #     memory = MilvusMemory(cfg)
+
+        case "no_memory":
+            memory = NoMemory()
+
+        case _:
+            raise ValueError(
+                f"Unknown memory backend '{cfg.memory_backend}'. Please check your config."
+            )
+
+    if memory is None:
+        memory = JSONFileMemory(cfg)
+
+    return memory
+
+
+def get_supported_memory_backends():
+    return supported_memory
+
+
+__all__ = [
+    "get_memory",
+    "MemoryItem",
+    "MemoryItemRelevance",
+    "JSONFileMemory",
+    "NoMemory",
+    "VectorMemory",
+    # "RedisMemory",
+    # "PineconeMemory",
+    # "MilvusMemory",
+    # "WeaviateMemory",
+]
--- a/autogpt/memory/vector/memory_item.py
+++ b/autogpt/memory/vector/memory_item.py
@@ -0,0 +1,223 @@
+from __future__ import annotations
+
+import dataclasses
+import json
+from typing import Literal
+
+import numpy as np
+
+from autogpt.config import Config
+from autogpt.llm import Message
+from autogpt.llm.utils import count_string_tokens
+from autogpt.logs import logger
+from autogpt.processing.text import chunk_content, split_text, summarize_text
+
+from .utils import Embedding, get_embedding
+
+MemoryDocType = Literal["webpage", "text_file", "code_file", "agent_history"]
+
+
+@dataclasses.dataclass
+class MemoryItem:
+    """Memory object containing raw content as well as embeddings"""
+
+    raw_content: str
+    summary: str
+    chunks: list[str]
+    chunk_summaries: list[str]
+    e_summary: Embedding
+    e_chunks: list[Embedding]
+    metadata: dict
+
+    def relevance_for(self, query: str, e_query: Embedding | None = None):
+        return MemoryItemRelevance.of(self, query, e_query)
+
+    @staticmethod
+    def from_text(
+        text: str,
+        source_type: MemoryDocType,
+        metadata: dict = {},
+        how_to_summarize: str | None = None,
+        question_for_summary: str | None = None,
+    ):
+        cfg = Config()
+        logger.debug(f"Memorizing text:\n{'-'*32}\n{text}\n{'-'*32}\n")
+
+        chunks = [
+            chunk
+            for chunk, _ in (
+                split_text(text, cfg.embedding_model)
+                if source_type != "code_file"
+                else chunk_content(text, cfg.embedding_model)
+            )
+        ]
+        logger.debug("Chunks: " + str(chunks))
+
+        chunk_summaries = [
+            summary
+            for summary, _ in [
+                summarize_text(
+                    text_chunk,
+                    instruction=how_to_summarize,
+                    question=question_for_summary,
+                )
+                for text_chunk in chunks
+            ]
+        ]
+        logger.debug("Chunk summaries: " + str(chunk_summaries))
+
+        e_chunks = get_embedding(chunks)
+
+        summary = (
+            chunk_summaries[0]
+            if len(chunks) == 1
+            else summarize_text(
+                "\n\n".join(chunk_summaries),
+                instruction=how_to_summarize,
+                question=question_for_summary,
+            )[0]
+        )
+        logger.debug("Total summary: " + summary)
+
+        # TODO: investigate search performance of weighted average vs summary
+        # e_average = np.average(e_chunks, axis=0, weights=[len(c) for c in chunks])
+        e_summary = get_embedding(summary)
+
+        metadata["source_type"] = source_type
+
+        return MemoryItem(
+            text,
+            summary,
+            chunks,
+            chunk_summaries,
+            e_summary,
+            e_chunks,
+            metadata=metadata,
+        )
+
+    @staticmethod
+    def from_text_file(content: str, path: str):
+        return MemoryItem.from_text(content, "text_file", {"location": path})
+
+    @staticmethod
+    def from_code_file(content: str, path: str):
+        # TODO: implement tailored code memories
+        return MemoryItem.from_text(content, "code_file", {"location": path})
+
+    @staticmethod
+    def from_ai_action(ai_message: Message, result_message: Message):
+        # The result_message contains either user feedback
+        # or the result of the command specified in ai_message
+
+        if ai_message["role"] != "assistant":
+            raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}")
+
+        result = (
+            result_message["content"]
+            if result_message["content"].startswith("Command")
+            else "None"
+        )
+        user_input = (
+            result_message["content"]
+            if result_message["content"].startswith("Human feedback")
+            else "None"
+        )
+        memory_content = (
+            f"Assistant Reply: {ai_message['content']}"
+            "\n\n"
+            f"Result: {result}"
+            "\n\n"
+            f"Human Feedback: {user_input}"
+        )
+
+        return MemoryItem.from_text(
+            text=memory_content,
+            source_type="agent_history",
+            how_to_summarize="if possible, also make clear the link between the command in the assistant's response and the command result. Do not mention the human feedback if there is none",
+        )
+
+    @staticmethod
+    def from_webpage(content: str, url: str, question: str | None = None):
+        return MemoryItem.from_text(
+            text=content,
+            source_type="webpage",
+            metadata={"location": url},
+            question_for_summary=question,
+        )
+
+    def dump(self) -> str:
+        token_length = count_string_tokens(self.raw_content, Config().embedding_model)
+        return f"""
+=============== MemoryItem ===============
+Length: {token_length} tokens in {len(self.e_chunks)} chunks
+Metadata: {json.dumps(self.metadata, indent=2)}
+---------------- SUMMARY -----------------
+{self.summary}
+------------------ RAW -------------------
+{self.raw_content}
+==========================================
+"""
+
+
+@dataclasses.dataclass
+class MemoryItemRelevance:
+    """
+    Class that encapsulates memory relevance search functionality and data.
+    Instances contain a MemoryItem and its relevance scores for a given query.
+    """
+
+    memory_item: MemoryItem
+    for_query: str
+    summary_relevance_score: float
+    chunk_relevance_scores: list[float]
+
+    @staticmethod
+    def of(
+        memory_item: MemoryItem, for_query: str, e_query: Embedding | None = None
+    ) -> MemoryItemRelevance:
+        e_query = e_query or get_embedding(for_query)
+        _, srs, crs = MemoryItemRelevance.calculate_scores(memory_item, e_query)
+        return MemoryItemRelevance(
+            for_query=for_query,
+            memory_item=memory_item,
+            summary_relevance_score=srs,
+            chunk_relevance_scores=crs,
+        )
+
+    @staticmethod
+    def calculate_scores(
+        memory: MemoryItem, compare_to: Embedding
+    ) -> tuple[float, float, list[float]]:
+        """
+        Calculates similarity between given embedding and all embeddings of the memory
+
+        Returns:
+            float: the aggregate (max) relevance score of the memory
+            float: the relevance score of the memory summary
+            list: the relevance scores of the memory chunks
+        """
+        summary_relevance_score = np.dot(memory.e_summary, compare_to)
+        chunk_relevance_scores = np.dot(memory.e_chunks, compare_to)
+        logger.debug(f"Relevance of summary: {summary_relevance_score}")
+        logger.debug(f"Relevance of chunks: {chunk_relevance_scores}")
+
+        relevance_scores = [summary_relevance_score, *chunk_relevance_scores]
+        logger.debug(f"Relevance scores: {relevance_scores}")
+        return max(relevance_scores), summary_relevance_score, chunk_relevance_scores
+
+    @property
+    def score(self) -> float:
+        """The aggregate relevance score of the memory item for the given query"""
+        return max([self.summary_relevance_score, *self.chunk_relevance_scores])
+
+    @property
+    def most_relevant_chunk(self) -> tuple[str, float]:
+        """The most relevant chunk of the memory item + its score for the given query"""
+        i_relmax = np.argmax(self.chunk_relevance_scores)
+        return self.memory_item.chunks[i_relmax], self.chunk_relevance_scores[i_relmax]
+
+    def __str__(self):
+        return (
+            f"{self.memory_item.summary} ({self.summary_relevance_score}) "
+            f"{self.chunk_relevance_scores}"
+        )
--- a/autogpt/memory/vector/providers/init.py
+++ b/autogpt/memory/vector/providers/init.py
@@ -0,0 +1,7 @@
+from .json_file import JSONFileMemory
+from .no_memory import NoMemory
+
+__all__ = [
+    "JSONFileMemory",
+    "NoMemory",
+]
--- a/autogpt/memory/vector/providers/base.py
+++ b/autogpt/memory/vector/providers/base.py
@@ -0,0 +1,74 @@
+import abc
+import functools
+from typing import MutableSet, Sequence
+
+import numpy as np
+
+from autogpt.config.config import Config
+from autogpt.logs import logger
+from autogpt.singleton import AbstractSingleton
+
+from .. import MemoryItem, MemoryItemRelevance
+from ..utils import Embedding, get_embedding
+
+
+class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
+    @abc.abstractmethod
+    def __init__(self, config: Config):
+        pass
+
+    def get(self, query: str) -> MemoryItemRelevance | None:
+        """
+        Gets the data from the memory that is most relevant to the given query.
+
+        Args:
+            data: The data to compare to.
+
+        Returns: The most relevant Memory
+        """
+        result = self.get_relevant(query, 1)
+        return result[0] if result else None
+
+    def get_relevant(self, query: str, k: int) -> Sequence[MemoryItemRelevance]:
+        """
+        Returns the top-k most relevant memories for the given query
+
+        Args:
+            query: the query to compare stored memories to
+            k: the number of relevant memories to fetch
+
+        Returns:
+            list[MemoryItemRelevance] containing the top [k] relevant memories
+        """
+        if len(self) < 1:
+            return []
+
+        logger.debug(
+            f"Searching for {k} relevant memories for query '{query}'; "
+            f"{len(self)} memories in index"
+        )
+
+        relevances = self.score_memories_for_relevance(query)
+        logger.debug(f"Memory relevance scores: {[str(r) for r in relevances]}")
+
+        # take last k items and reverse
+        top_k_indices = np.argsort([r.score for r in relevances])[-k:][::-1]
+
+        return [relevances[i] for i in top_k_indices]
+
+    def score_memories_for_relevance(
+        self, for_query: str
+    ) -> Sequence[MemoryItemRelevance]:
+        """
+        Returns MemoryItemRelevance for every memory in the index.
+        Implementations may override this function for performance purposes.
+        """
+        e_query: Embedding = get_embedding(for_query)
+        return [m.relevance_for(for_query, e_query) for m in self]
+
+    def get_stats(self) -> tuple[int, int]:
+        """
+        Returns:
+            tuple (n_memories: int, n_chunks: int): the stats of the memory index
+        """
+        return len(self), functools.reduce(lambda t, m: t + len(m.e_chunks), self, 0)
--- a/autogpt/memory/vector/providers/json_file.py
+++ b/autogpt/memory/vector/providers/json_file.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Iterator
+
+import orjson
+
+from autogpt.config import Config
+from autogpt.logs import logger
+
+from ..memory_item import MemoryItem
+from .base import VectorMemoryProvider
+
+
+class JSONFileMemory(VectorMemoryProvider):
+    """Memory backend that stores memories in a JSON file"""
+
+    SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS
+
+    file_path: Path
+    memories: list[MemoryItem]
+
+    def __init__(self, cfg: Config) -> None:
+        """Initialize a class instance
+
+        Args:
+            cfg: Config object
+
+        Returns:
+            None
+        """
+        workspace_path = Path(cfg.workspace_path)
+        self.file_path = workspace_path / f"{cfg.memory_index}.json"
+        self.file_path.touch()
+        logger.debug(f"Initialized {__name__} with index path {self.file_path}")
+
+        self.memories = []
+        self.save_index()
+
+    def __iter__(self) -> Iterator[MemoryItem]:
+        return iter(self.memories)
+
+    def __contains__(self, x: MemoryItem) -> bool:
+        return x in self.memories
+
+    def __len__(self) -> int:
+        return len(self.memories)
+
+    def add(self, item: MemoryItem):
+        self.memories.append(item)
+        self.save_index()
+        return len(self.memories)
+
+    def discard(self, item: MemoryItem):
+        try:
+            self.remove(item)
+        except:
+            pass
+
+    def clear(self):
+        """Clears the data in memory."""
+        self.memories.clear()
+        self.save_index()
+
+    def save_index(self):
+        logger.debug(f"Saving memory index to file {self.file_path}")
+        with self.file_path.open("wb") as f:
+            return f.write(orjson.dumps(self.memories, option=self.SAVE_OPTIONS))
--- a/autogpt/memory/vector/providers/no_memory.py
+++ b/autogpt/memory/vector/providers/no_memory.py
@@ -0,0 +1,36 @@
+"""A class that does not store any data. This is the default memory provider."""
+from __future__ import annotations
+
+from typing import Iterator, Optional
+
+from autogpt.config.config import Config
+
+from .. import MemoryItem
+from .base import VectorMemoryProvider
+
+
+class NoMemory(VectorMemoryProvider):
+    """
+    A class that does not store any data. This is the default memory provider.
+    """
+
+    def __init__(self, config: Optional[Config] = None):
+        pass
+
+    def __iter__(self) -> Iterator[MemoryItem]:
+        return iter([])
+
+    def __contains__(self, x: MemoryItem) -> bool:
+        return False
+
+    def __len__(self) -> int:
+        return 0
+
+    def add(self, item: MemoryItem):
+        pass
+
+    def discard(self, item: MemoryItem):
+        pass
+
+    def clear(self):
+        pass
--- a/autogpt/memory/vector/utils.py
+++ b/autogpt/memory/vector/utils.py
@@ -0,0 +1,71 @@
+from typing import Any, overload
+
+import numpy as np
+import numpy.typing as npt
+import openai
+
+from autogpt.config import Config
+from autogpt.llm.utils import metered, retry_openai_api
+from autogpt.logs import logger
+
+Embedding = list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
+"""Embedding vector"""
+TText = list[int]
+"""Token array representing text"""
+
+
+@overload
+def get_embedding(input: str | TText) -> Embedding:
+    ...
+
+
+@overload
+def get_embedding(input: list[str] | list[TText]) -> list[Embedding]:
+    ...
+
+
+@metered
+@retry_openai_api()
+def get_embedding(
+    input: str | TText | list[str] | list[TText],
+) -> Embedding | list[Embedding]:
+    """Get an embedding from the ada model.
+
+    Args:
+        input: Input text to get embeddings for, encoded as a string or array of tokens.
+            Multiple inputs may be given as a list of strings or token arrays.
+
+    Returns:
+        List[float]: The embedding.
+    """
+    cfg = Config()
+    multiple = isinstance(input, list) and all(not isinstance(i, int) for i in input)
+
+    if isinstance(input, str):
+        input = input.replace("\n", " ")
+    elif multiple and isinstance(input[0], str):
+        input = [text.replace("\n", " ") for text in input]
+
+    model = cfg.embedding_model
+    if cfg.use_azure:
+        kwargs = {"engine": cfg.get_azure_deployment_id_for_model(model)}
+    else:
+        kwargs = {"model": model}
+
+    logger.debug(
+        f"Getting embedding{f's for {len(input)} inputs' if multiple else ''}"
+        f" with model '{model}'"
+        + (f" via Azure deployment '{kwargs['engine']}'" if cfg.use_azure else "")
+    )
+
+    embeddings = openai.Embedding.create(
+        input=input,
+        api_key=cfg.openai_api_key,
+        **kwargs,
+    ).data
+
+    if not multiple:
+        return embeddings[0]["embedding"]
+
+    embeddings = sorted(embeddings, key=lambda x: x["index"])
+    return [d["embedding"] for d in embeddings]