diff --git a/docs/challenges/memory/challenge_d.md b/docs/challenges/memory/challenge_d.md new file mode 100644 index 0000000000..7563cce593 --- /dev/null +++ b/docs/challenges/memory/challenge_d.md @@ -0,0 +1,75 @@ +# Memory Challenge C + +**Status**: Current level to beat: level 1 + +**Command to try**: +``` +pytest -s tests/challenges/memory/test_memory_challenge_d.py --level=1 +`` + +## Description + +The provided code is a unit test designed to validate an AI's ability to track events and beliefs of characters in a story involving moving objects, specifically marbles. This scenario is an advanced form of the classic "Sally-Anne test", a psychological test used to measure a child's social cognitive ability to understand that others' perspectives and beliefs may differ from their own. + +Here is an explanation of the challenge: + +The AI is given a series of events involving characters Sally, Anne, Bob, and Charlie, and the movements of different marbles. These events are designed as tests at increasing levels of complexity. + +For each level, the AI is expected to keep track of the events and the resulting beliefs of each character about the locations of each marble. These beliefs are affected by whether the character was inside or outside the room when events occurred, as characters inside the room are aware of the actions, while characters outside the room aren't. + +After the AI processes the events and generates the beliefs of each character, it writes these beliefs to an output file in JSON format. + +The check_beliefs function then checks the AI's beliefs against the expected beliefs for that level. The expected beliefs are predefined and represent the correct interpretation of the events for each level. + +If the AI's beliefs match the expected beliefs, it means the AI has correctly interpreted the events and the perspectives of each character. This would indicate that the AI has passed the test for that level. + +The test runs for levels up to the maximum level that the AI has successfully beaten, or up to a user-selected level. + + +## Files + +- `instructions_1.txt` + +"Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).", + + +- `instructions_2.txt` + +"Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.", + + +...and so on. + +- `instructions_n.txt` + +The expected believes of every characters are given in a list: + +expected_beliefs = { + 1: { + 'Sally': { + 'marble A': 'basket S', + }, + 'Anne': { + 'marble A': 'basket A', + } + }, + 2: { + 'Sally': { + 'marble A': 'sofa', # Because Charlie told her + }, + 'Anne': { + 'marble A': 'green box', # Because she moved it there + 'marble B': 'basket A', # Because Bob put it there and she was in the room + }, + 'Bob': { + 'B': 'basket A', # Last place he put it + }, + 'Charlie': { + 'A': 'sofa', # Because Anne told him to tell Sally so + } + },... + + +## Objective + +This test essentially checks if an AI can accurately model and track the beliefs of different characters based on their knowledge of events, which is a critical aspect of understanding and generating human-like narratives. This ability would be beneficial for tasks such as writing stories, dialogue systems, and more. diff --git a/mkdocs.yml b/mkdocs.yml index 48fa0cb51d..50e062571c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - Memory Challenge A: challenges/memory/challenge_a.md - Memory Challenge B: challenges/memory/challenge_b.md - Memory Challenge C: challenges/memory/challenge_c.md + - Memory Challenge D: challenges/memory/challenge_d.md - Information retrieval: - Introduction: challenges/information_retrieval/introduction.md - Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json index 49f3b8586e..b03a6808c2 100644 --- a/tests/challenges/current_score.json +++ b/tests/challenges/current_score.json @@ -43,6 +43,10 @@ "memory_challenge_c": { "max_level": 5, "max_level_beaten": 1 + }, + "memory_challenge_d": { + "max_level": 5, + "max_level_beaten": null } } } diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py new file mode 100644 index 0000000000..9e662e0854 --- /dev/null +++ b/tests/challenges/memory/test_memory_challenge_d.py @@ -0,0 +1,241 @@ +import json +from typing import Dict + +import pytest +from pytest_mock import MockerFixture + +from autogpt.agent import Agent +from autogpt.commands.file_operations import read_file, write_to_file +from autogpt.config import Config +from tests.challenges.challenge_decorator.challenge_decorator import challenge +from tests.challenges.utils import get_workspace_path, run_interaction_loop +from tests.utils import requires_api_key + +LEVEL_CURRENTLY_BEATEN = 1 +MAX_LEVEL = 5 +OUTPUT_LOCATION = "output.txt" + + +# @pytest.mark.vcr +@requires_api_key("OPENAI_API_KEY") +@challenge +def test_memory_challenge_d( + memory_management_agent: Agent, + patched_api_requestor: MockerFixture, + monkeypatch: pytest.MonkeyPatch, + config: Config, + level_to_run: int, +) -> None: + """ + The agent is given a series of events and must remember the respective beliefs of the characters. + Args: + memory_management_agent (Agent) + user_selected_level (int) + """ + sally_anne_test_phrases = [ + "Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).", + "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speaks to Sally about the marble A as instructed by Anne.", + "Sally gives a new marble (marble C) to Charlie who is outside with her. Charlie enters the room and exchanges marble C with marble B in Anne's basket (basket A). Anne tells Charlie to tell Sally that he put marble C into the red box. Charlie leaves the room and speak to Sally about marble C as instructed by Anne. Meanwhile, after Charlie leaves the room, Bob enters into the room and moves marble A from the green box to under the sofa, but tells Anne to tell Sally that marble A is in the green box. Anne leaves the room and speak to Sally about the marble A as instructed by Bob", + "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.", + "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).", + ] + level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run] + create_instructions_files( + memory_management_agent, level_to_run, level_sally_anne_test_phrases, config + ) + run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2) + file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION) + + content = read_file(file_path, config) + check_beliefs(content, level_to_run) + + +def check_beliefs(content: str, level: int) -> None: + # Define the expected beliefs for each level + expected_beliefs = { + 1: { + "Sally": { + "marble A": "basket S", + }, + "Anne": { + "marble A": "basket A", + }, + }, + 2: { + "Sally": { + "marble A": "sofa", # Because Charlie told her + "marble B": "lost", # Because Bob told her + }, + "Anne": { + "marble A": "green box", # Because she moved it there + "marble B": "basket A", # Because Bob put it there and she was in the room + }, + "Bob": { + "marble B": "basket A", # Last place he put it + }, + "Charlie": { + "marble A": "sofa", # Because Anne told him to tell Sally so + }, + }, + 3: { + "Sally": { + "marble A": "green box", # Because Anne told her + "marble C": "red box", # Because Charlie told her + }, + "Anne": { + "marble A": "sofa", # Because Bob moved it there and told her + "marble B": "basket A", # Because Charlie exchanged marble C with marble B in her basket + "marble C": "basket A", # Because Charlie exchanged marble C with marble B in her basket + }, + "Bob": { + "marble A": "sofa", # Because he moved it there + "marble B": "basket A", + # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room + "marble C": "basket A", + # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room + }, + "Charlie": { + "marble A": "sofa", # Last place he knew it was + "marble B": "basket A", # Because he exchanged marble C with marble B in Anne's basket + "marble C": "red box", # Because Anne told him to tell Sally so + }, + }, + 4: { + "Sally": { + "marble A": "green box", # Because Anne told her in the last conversation + "marble C": "red box", # Because Charlie told her + "marble D": "sofa", # Because Charlie told her + }, + "Anne": { + "marble A": "blue box", # Because Bob moved it there, and she was not in the room to see + "marble B": "basket A", # Last place she knew it was + "marble C": "basket A", # Last place she knew it was + "marble D": "sofa", # Because Bob moved it there, and she was in the room to see + }, + "Bob": { + "marble A": "blue box", # Because he moved it there + "marble B": "basket A", # Last place he knew it was + "marble C": "basket A", # Last place he knew it was + "marble D": "sofa", # Because he moved it there + }, + "Charlie": { + "marble A": "sofa", # Last place he knew it was + "marble B": "basket A", # Last place he knew it was + "marble C": "red box", # Last place he knew it was + "marble D": "sofa", # Because Bob told him to tell Sally so + }, + }, + 5: { + "Sally": { + "marble A": "green box", # Because Anne told her in the last level + "marble C": "red box", # Because Charlie told her + "marble D": "sofa", # Because Charlie told her + "marble E": "green box", # Because Anne told her + }, + "Anne": { + "marble A": "blue box", # Last place she knew it was + "marble B": "basket A", # Last place she knew it was + "marble C": "basket A", # Last place she knew it was + "marble D": "basket C", # Last place she knew it was + "marble E": "sofa", # Because she moved it there + }, + "Charlie": { + "marble A": "blue box", # Last place he knew it was + "marble B": "basket A", # Last place he knew it was + "marble C": "basket A", # Last place he knew it was + "marble D": "basket C", # Because he moved it there + "marble E": "red box", # Last place he knew it was + }, + "Bob": { + "marble A": "blue box", # Last place he knew it was + "marble C": "red box", # Last place he knew it was + "marble D": "sofa", # Last place he knew it was + }, + }, + } + + # Extract the beliefs from the AI's response + ai_beliefs = extract_beliefs(content) + # Check the AI's beliefs against the expected beliefs + for character, belief in expected_beliefs[level].items(): + for marble, location in belief.items(): + ai_belief = ai_beliefs.get(character, {}).get(marble, "") + assert ( + location in ai_belief + ), f"For {character}'s {marble}, expected '{location}' to be in '{ai_belief}'" + + +def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]: + """Extract the beliefs of each character from the AI's output.""" + # Parse the JSON content + content_dict = json.loads(content) + beliefs = content_dict.get("beliefs", {}) + return beliefs + + +def create_instructions_files( + memory_management_agent: Agent, + level: int, + test_phrases: list, + config: Config, + base_filename: str = "instructions_", +) -> None: + """ + Creates a series of instructions files for the memory challenge. + Args: + level: + memory_management_agent (Agent) + test_phrases (list) + config (Config) + base_filename (str, optional) + """ + for i in range(1, level + 1): + content = generate_content(i, test_phrases, base_filename, level) + file_name = f"{base_filename}{i}.txt" + file_path = get_workspace_path(memory_management_agent, file_name) + write_to_file(file_path, content, config) + + +def generate_content( + index: int, test_phrases: list, base_filename: str, level: int +) -> str: + """ + Args: + index: int + test_phrases: list + base_filename: str + level: int + Returns: str + """ + phrase = test_phrases[index - 1] + + if index != level: + return f"The current event is '{phrase}'.\nRead the file {base_filename}{index + 1}.txt using the read_file command." + else: + return f""" + This event is '{phrase}' + rules: + 1. Everyone inside the room see and know what are the actions of the people inside the room. + 2. Everyone outside the room do not see and do not know the actions of the people inside the room. + 3. Only write about the marbles that are present in the level + 4. Marbles names are marble A, marble B, marble C, marble D, ... + 5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ... + 6. Do not use expression like use instead. + 7. All characters believe what they're being told by other characters. + Instructions: + I) Write the following information in the file output.txt in JSON format: + 1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is) + The format should be as follows: + {{ + "beliefs": {{ + "": {{ + "": "", + ... + }}, + ... + }}, + }} + Example of output (only use this to understand and learn how to use the format. Nothing else): + {{"beliefs": {{"Sally": {{"marble A": "basket A"}}, "Bob": {{"marble B": "basket S"}}, "Anne": {{"marble A": "green box"}}, "Charlie": {{"marble B": "sofa"}}}} + II) The file output.txt has not been created yet. You need to create it. After that, use the task_complete command. + """ diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index bad835dc70..4be96481d0 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -107,10 +107,7 @@ def file_system_agents( @pytest.fixture def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace): - command_registry = CommandRegistry() - command_registry.import_commands("autogpt.commands.file_operations") - command_registry.import_commands("autogpt.app") - command_registry.import_commands("autogpt.commands.task_statuses") + command_registry = get_command_registry(agent_test_config) ai_config = AIConfig( ai_name="Follow-Instructions-GPT", @@ -125,7 +122,7 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work system_prompt = ai_config.construct_full_prompt() agent = Agent( - ai_name="", + ai_name="Follow-Instructions-GPT", memory=memory_json_file, command_registry=command_registry, config=ai_config,