mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-10 23:58:06 -05:00
Implement loading MemoryItems from file in JSONFileMemory (#4703)
Further changes: * remove `init` param from `get_memory()`, replace usages by `memory.clear()` * make token length calculation optional in `MemoryItem.dump()`
This commit is contained in:
committed by
GitHub
parent
6e6e7fcc9a
commit
f0a5250da5
@@ -160,7 +160,7 @@ def ingest_file(
|
||||
|
||||
# TODO: differentiate between different types of files
|
||||
file_memory = MemoryItem.from_text_file(content, filename)
|
||||
logger.debug(f"Created memory: {file_memory.dump()}")
|
||||
logger.debug(f"Created memory: {file_memory.dump(True)}")
|
||||
memory.add(file_memory)
|
||||
|
||||
logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
|
||||
|
||||
@@ -175,7 +175,8 @@ def run_auto_gpt(
|
||||
|
||||
# Initialize memory and make sure it is empty.
|
||||
# this is particularly important for indexing and referencing pinecone memory
|
||||
memory = get_memory(cfg, init=True)
|
||||
memory = get_memory(cfg)
|
||||
memory.clear()
|
||||
logger.typewriter_log(
|
||||
"Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}"
|
||||
)
|
||||
|
||||
@@ -39,7 +39,7 @@ supported_memory = ["json_file", "no_memory"]
|
||||
# MilvusMemory = None
|
||||
|
||||
|
||||
def get_memory(cfg: Config, init=False) -> VectorMemory:
|
||||
def get_memory(cfg: Config) -> VectorMemory:
|
||||
memory = None
|
||||
|
||||
match cfg.memory_backend:
|
||||
@@ -60,7 +60,7 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
|
||||
# )
|
||||
# else:
|
||||
# memory = PineconeMemory(cfg)
|
||||
# if init:
|
||||
# if clear:
|
||||
# memory.clear()
|
||||
|
||||
case "redis":
|
||||
|
||||
@@ -109,21 +109,21 @@ class MemoryItem:
|
||||
# The result_message contains either user feedback
|
||||
# or the result of the command specified in ai_message
|
||||
|
||||
if ai_message["role"] != "assistant":
|
||||
raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}")
|
||||
if ai_message.role != "assistant":
|
||||
raise ValueError(f"Invalid role on 'ai_message': {ai_message.role}")
|
||||
|
||||
result = (
|
||||
result_message["content"]
|
||||
if result_message["content"].startswith("Command")
|
||||
result_message.content
|
||||
if result_message.content.startswith("Command")
|
||||
else "None"
|
||||
)
|
||||
user_input = (
|
||||
result_message["content"]
|
||||
if result_message["content"].startswith("Human feedback")
|
||||
result_message.content
|
||||
if result_message.content.startswith("Human feedback")
|
||||
else "None"
|
||||
)
|
||||
memory_content = (
|
||||
f"Assistant Reply: {ai_message['content']}"
|
||||
f"Assistant Reply: {ai_message.content}"
|
||||
"\n\n"
|
||||
f"Result: {result}"
|
||||
"\n\n"
|
||||
@@ -145,11 +145,14 @@ class MemoryItem:
|
||||
question_for_summary=question,
|
||||
)
|
||||
|
||||
def dump(self) -> str:
|
||||
token_length = count_string_tokens(self.raw_content, Config().embedding_model)
|
||||
def dump(self, calculate_length=False) -> str:
|
||||
if calculate_length:
|
||||
token_length = count_string_tokens(
|
||||
self.raw_content, Config().embedding_model
|
||||
)
|
||||
return f"""
|
||||
=============== MemoryItem ===============
|
||||
Length: {token_length} tokens in {len(self.e_chunks)} chunks
|
||||
Size: {f'{token_length} tokens in ' if calculate_length else ''}{len(self.e_chunks)} chunks
|
||||
Metadata: {json.dumps(self.metadata, indent=2)}
|
||||
---------------- SUMMARY -----------------
|
||||
{self.summary}
|
||||
@@ -158,6 +161,31 @@ Metadata: {json.dumps(self.metadata, indent=2)}
|
||||
==========================================
|
||||
"""
|
||||
|
||||
def __eq__(self, other: MemoryItem):
|
||||
return (
|
||||
self.raw_content == other.raw_content
|
||||
and self.chunks == other.chunks
|
||||
and self.chunk_summaries == other.chunk_summaries
|
||||
# Embeddings can either be list[float] or np.ndarray[float32],
|
||||
# and for comparison they must be of the same type
|
||||
and np.array_equal(
|
||||
self.e_summary
|
||||
if isinstance(self.e_summary, np.ndarray)
|
||||
else np.array(self.e_summary, dtype=np.float32),
|
||||
other.e_summary
|
||||
if isinstance(other.e_summary, np.ndarray)
|
||||
else np.array(other.e_summary, dtype=np.float32),
|
||||
)
|
||||
and np.array_equal(
|
||||
self.e_chunks
|
||||
if isinstance(self.e_chunks[0], np.ndarray)
|
||||
else [np.array(c, dtype=np.float32) for c in self.e_chunks],
|
||||
other.e_chunks
|
||||
if isinstance(other.e_chunks[0], np.ndarray)
|
||||
else [np.array(c, dtype=np.float32) for c in other.e_chunks],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MemoryItemRelevance:
|
||||
|
||||
@@ -32,10 +32,17 @@ class JSONFileMemory(VectorMemoryProvider):
|
||||
workspace_path = Path(cfg.workspace_path)
|
||||
self.file_path = workspace_path / f"{cfg.memory_index}.json"
|
||||
self.file_path.touch()
|
||||
logger.debug(f"Initialized {__name__} with index path {self.file_path}")
|
||||
logger.debug(
|
||||
f"Initialized {__class__.__name__} with index path {self.file_path}"
|
||||
)
|
||||
|
||||
self.memories = []
|
||||
self.save_index()
|
||||
try:
|
||||
self.load_index()
|
||||
logger.debug(f"Loaded {len(self.memories)} MemoryItems from file")
|
||||
except Exception as e:
|
||||
logger.warn(f"Could not load MemoryItems from file: {e}")
|
||||
self.save_index()
|
||||
|
||||
def __iter__(self) -> Iterator[MemoryItem]:
|
||||
return iter(self.memories)
|
||||
@@ -48,6 +55,7 @@ class JSONFileMemory(VectorMemoryProvider):
|
||||
|
||||
def add(self, item: MemoryItem):
|
||||
self.memories.append(item)
|
||||
logger.debug(f"Adding item to memory: {item.dump()}")
|
||||
self.save_index()
|
||||
return len(self.memories)
|
||||
|
||||
@@ -62,6 +70,17 @@ class JSONFileMemory(VectorMemoryProvider):
|
||||
self.memories.clear()
|
||||
self.save_index()
|
||||
|
||||
def load_index(self):
|
||||
"""Loads all memories from the index file"""
|
||||
if not self.file_path.is_file():
|
||||
logger.debug(f"Index file '{self.file_path}' does not exist")
|
||||
return
|
||||
with self.file_path.open("r") as f:
|
||||
logger.debug(f"Loading memories from index file '{self.file_path}'")
|
||||
json_index = orjson.loads(f.read())
|
||||
for memory_item_dict in json_index:
|
||||
self.memories.append(MemoryItem(**memory_item_dict))
|
||||
|
||||
def save_index(self):
|
||||
logger.debug(f"Saving memory index to file {self.file_path}")
|
||||
with self.file_path.open("wb") as f:
|
||||
|
||||
@@ -70,7 +70,9 @@ def main() -> None:
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize memory
|
||||
memory = get_memory(cfg, init=args.init)
|
||||
memory = get_memory(cfg)
|
||||
if args.init:
|
||||
memory.clear()
|
||||
logger.debug("Using memory of type: " + memory.__class__.__name__)
|
||||
|
||||
if args.file:
|
||||
|
||||
@@ -94,7 +94,8 @@ def agent(config: Config, workspace: Workspace) -> Agent:
|
||||
ai_config.command_registry = command_registry
|
||||
|
||||
config.set_memory_backend("json_file")
|
||||
memory_json_file = get_memory(config, init=True)
|
||||
memory_json_file = get_memory(config)
|
||||
memory_json_file.clear()
|
||||
|
||||
system_prompt = ai_config.construct_full_prompt()
|
||||
|
||||
|
||||
@@ -28,7 +28,9 @@ def memory_json_file(agent_test_config: Config):
|
||||
was_memory_backend = agent_test_config.memory_backend
|
||||
|
||||
agent_test_config.set_memory_backend("json_file")
|
||||
yield get_memory(agent_test_config, init=True)
|
||||
memory = get_memory(agent_test_config)
|
||||
memory.clear()
|
||||
yield memory
|
||||
|
||||
agent_test_config.set_memory_backend(was_memory_backend)
|
||||
|
||||
|
||||
@@ -34,7 +34,9 @@ def test_json_memory_init_with_backing_empty_file(config: Config, workspace: Wor
|
||||
assert index_file.read_text() == "[]"
|
||||
|
||||
|
||||
def test_json_memory_init_with_backing_file(config: Config, workspace: Workspace):
|
||||
def test_json_memory_init_with_backing_invalid_file(
|
||||
config: Config, workspace: Workspace
|
||||
):
|
||||
index_file = workspace.root / f"{config.memory_index}.json"
|
||||
index_file.touch()
|
||||
|
||||
@@ -78,6 +80,24 @@ def test_json_memory_get(config: Config, memory_item: MemoryItem, mock_get_embed
|
||||
assert retrieved.memory_item == memory_item
|
||||
|
||||
|
||||
def test_json_memory_load_index(config: Config, memory_item: MemoryItem):
|
||||
index = JSONFileMemory(config)
|
||||
index.add(memory_item)
|
||||
|
||||
try:
|
||||
assert index.file_path.exists(), "index was not saved to file"
|
||||
assert len(index) == 1, f"index constains {len(index)} items instead of 1"
|
||||
assert index.memories[0] == memory_item, "item in index != added mock item"
|
||||
except AssertionError as e:
|
||||
raise ValueError(f"Setting up for load_index test failed: {e}")
|
||||
|
||||
index.memories = []
|
||||
index.load_index()
|
||||
|
||||
assert len(index) == 1
|
||||
assert index.memories[0] == memory_item
|
||||
|
||||
|
||||
@pytest.mark.vcr
|
||||
@requires_api_key("OPENAI_API_KEY")
|
||||
def test_json_memory_get_relevant(config: Config, patched_api_requestor: None) -> None:
|
||||
|
||||
Reference in New Issue
Block a user