Add 'remember ids with noise' challenge (#61)

This commit is contained in:
merwanehamadi
2023-07-05 22:34:51 -07:00
committed by GitHub
parent c76062b092
commit 82d8f67f6a
8 changed files with 93 additions and 10 deletions

View File

@@ -0,0 +1,5 @@
xBd9cZTfaExYalwtUVR7m0pe3Nfaf5uBS4IxGFZPZcQjDf8Tfk2vdpqWI0ESBWCdVzsdlxMmUxq43INSz1iftsv6PTOMGQ88Cojwj5mQXp8XKZ6VJC893BDLVLLW00iQy8VerWjQay9rAJz8rYVZHa6dByYNWZNOgtjC7ejnKt0VYZtUFfRBZNWG2HNX3tgX8H2h4xeu7GIfm4wETGvzlZDANeqiY2hMCXEAsliVXXjgmvVeB05tjkS5uvq5uV2DnNyStimIVVdEMFI5Ft0qM82aMvJlUtVj6TJEmE0qPTqBXeHud72iRTcBa9okCzjYiAd6oSoJ8k9o6lmFTeq323ILYCGzsICjqoysuVonfHUDh1Ll2LTo4I2AygfPqCqvgQWq9wa8YfWKBlwPPVy2lymJRTd1mS7RUaiagoNn76ApJviCYh2fWEZcxULCKAbbn0E6vz1CBADSOEIVB14ZyyRfiDcXbgYYcnOShwMsg0vYcKDKfAHk
The id to remember is 3145. Read the file instructions_2.txt.
OueiZyOoM5eGI5VkTt6Ue1XboZ4jztAa5IGWqSbhIhLiI4X2nOmJw6tLBd3smZNwKQpq8NHxZSk76Xd82yGI3l9KhLARXRT37MRkY7WG8YQVJEurki86cIGuXthSWjq9dDKh6ILSq4DdWPIHfDeV12psErCcgEUxWtoU6bnSMnaoYteOkKWTAkXdC1t4j5p3rXbMv1j92nLmCmoslT2A9noQIODWLdudtCecTMmrbq85RLBt5WFLGMfWVsuSrSMGo5tiN7vC1siLfhlhco0q5QaMv0px6kVg44Wceg3UXIUoMxTNoh9G8uEABJhvsF2qzxkbHuhk6VRuydIWoGgfN01upk6BDfvooyAkdcTJG5jFlHOJixTe4ramT5uP54oZ0anJTB6w7hybN3o9vb4xLbAFQxCZIXZ9HXgeBnl1L8qIvQg9VsklntCMsu2cm5CgIryRBGPqnTAbrhmAmFOkNyLSVFfYmu2wtlMov2NIkYilT4Oa1Rkt

View File

@@ -0,0 +1,5 @@
2yXfw2vPZCZq4jGOTHF4NEUYLbAUBIcmkgLxG7qXnYLNsvvZDqAvBPw4OcOfleIWvS6S5GThSPcrSfX94yB1TT6SVHGqPkulJUk4W1wfIFRIiOSps6V8ulLyrmeZsEJ6l9B9Vrm4h6SZTQVP750TUfECOH4d5j5AtupugjqThyw3t6ZFYHr2eUYRiOiTlng2uvsoZiioBQlUitrjQ4mw8FRL3VaR2aAhHGwaNV0Q7XelFU50YQgcUYqfxHxmqCLqb7dtZ7WWwxrLcqwVbj4y1YteFzPZyU4TJKopMVizgWaam8tKa1hYAQHqEaiAAHigqvYhutPHarpzc4PP2RLE4AZCxRblSY40iXpxQ9waXsrUEZ51ZRFmvm5G17wuKghMcKea2jN2MIgvSxNBy7cszFyBTe6V6u6IMk1wVWa0YulPslLc0bXUVKqZ54b61lyBAKSoFbJVRFYB5XZBL5tp2efvTsEQ3JvFmSREEOhmawIriifCApy1
The id to remember is 3791. Read the file instructions_3.txt.
BDLfeJBcfS4iqE9sNAm4ndZT2F1fsFYdXGRpRQ6xSXl014c9H7NeMbQCtFb7kRtVvzx9AItPj1uqtjA0R35N2Pj8FgxfSPDb8dlizLA6dbKY4JfCWmibzrBYoFzoxiPX57m3n8yLKHA0aejG38aMJ6XjR75kAjBW0Cw9d3Ny0MphakfW8KDZoMO3qwsPLLASYrz42K7JjThVGZvEXczRBY8la4NJPZpj91GmfsQuJezCvcI87gjfjtRDp1GECU9SmLSWBufjQWWlc4p6z5XtPPu0vqxRjoiFDFZvafU35KkEDcWAHv3KhR0Z20JD2qIrJ4CHntwGBAk61nMBpKhNx0t3ONK5X0WD7gNCdG64obji2ifsI8ZydLkROJkAJCpe4zRd04mkydCwKGJzmCGv0lu1KRn4QobFq7mEeuzD0xvvGtyiuiVXJSVqphf5ySmfjD4EvDCMRDNZx7c4pECUnLBPDlB8JwMyugfyD5mslte9YCG9kK6n

View File

@@ -0,0 +1,5 @@
ccgfgLxgQxZ1F3USqH9STHNVYAV3AnxU97C77kgsSkx51Czq0cLp4ZmedPY3dVNUtm2kR9HDahe7Cp6zc3zvpK9RlRKQ1rugGDkNbgc4H57MYOQpDKzbikvHWXtxFixXqNiuRJW8h8uAAKv7qsStgJ2loyM9zKVicCdHerR2Q3kT0No2DOxHUv8T1d7rN3Kf0Sh6nz2LOSqvLjtzTAxpkM7ehUzFuOoXjQnuVUHqXTzL26ASeMd74N5DgO81LofDb6mcvrmVMIY8tfDBqHV83a8zn9HySeg5ZapnAVI1KOXRXyZEgbsVe1rEy8slCXqEuYCqn8IudJBnRDNKnZRdhwQ8tCpXQ1D3quhLEJxjhpRFiLl7lEZvtkunL2UlDkErcFjfKfVJLNy2loQsB9aKbR3NXBGbQNXR9QQ2xyGJPl8Wnw03sCRjubn83LpMoC2jirhJld5VFTffPY3jw0MyX3vUF8kfHRGowsrzzcdNuESiRf3iMcmx
The id to remember is 9317. Read the file instructions_4.txt.
8YwXVp6zAoxCfth85iCMA2BrfDW8JjjzieFnuj7GOhW8BlbTsymfiA59qTBCuThFk0wKlT4MDskG5dU7dMHuEAKslHwcfZywvUZYFacVgSn3VfQeWBUgXE7rrrYRclqpjI3nqYuqfVzNHxn0iG13j6q5SAauFSaP2x0CWdDnEGqi9Dh7DcvjVIzlXn3GLa1BTd0qceGYCJRT7UM9YTHJYHo7Ipr2Hgreawb09Xx0KovgzBSgJYpqRNwVRFTfpgvZZIEA0QDPZE28n8KgtDPHHAIroZP6KijKCjyNLjYyM0D4AtiuLDon8bcaLRhzhC6plMvI7pkpEpsct3u9zg8C8V0pONWpWNNCompsuwl4TECiVtVygrYNcroROjNs1CGfliHCvGBbADSQORrFbhRyYEhIVNgXQsh4ilpcOYLAZoxsYlFtJw5Mf7y8ebhay1uJfNaolFWD63QID5UHBqzXZUDupjtUhD0N4XEtNeVyTqXD4S5SffdQ

View File

@@ -0,0 +1,5 @@
RpsPog8gFZhpbMDHImq5OBbzE9DwrzKaqldc7UVs8FXohjoAaLb7sYW291NVDdUUk4AYwHum0VotgPPdQp3wxGza9Se6pex9G6IDd1jxPW5UTnik0Ygns0U5m5uCvuF140ozvPBj5blJ5wWIiQOXceyl7RsXIP82w1u0yTMCmAdefJISC9HAsJKg2QBbtCNoGIHEIuCZFSdskBe9XaPxPoA40AD4UXpwKIE3qEui39QyePL1czeqSGGmmRgziDiMhFiSwNarA0rwehZPW3QKpACjRu9wcVoTg7wxT4R8bziUzwVDeI5ZeqNqNrTUYiLpfUEbckcRQD3FUALmEtzIdSMFdD9z8CiSXVEAOpUbiqm9itNBnMVaZQ2eyKxet4EvZrfUbkhGntIxf8VOsgXlkzb0iXcyHu1PogCjOkSWodlPfN4mYm0P4EvUGogI3suX3tCXp5EJiZHSIoyApxme6Nd6NkdbIjcQOep42z6z23cO5uPv41cA
The id to remember is 9471. Read the file instructions_5.txt.
1d2q36DxTTRcg8ijubOcPntReHzsM94G2O0xmclXuqh8kqVaYcNdl7mmQWNSSh4OEF6BUuhVg0s58UFeJnLNOzYeC8T0fYhhrvkw8enra8JS2VeQCnTxdn4GUhJXu4V0UOhaeNnArmO03M8n6mQbV77VnH5RSVR4qVWnYmHGaswdHxYOZVnxFSyzzmhdX1dK5q9EgIwadglyBkpKu15ZaMIH2SOPjil8YjD4euRZc2EGZDcvCkJAJeLsrbTQLM82k2fKqRFyuNELCQp1DRz6gExByQ11SbHRqednp6ndj8msLC116PVMqp8pSDVZ1RrTFV9UOReeO9wnveJbLUEMh8c3wlg6yXn0XrbO7qabvdKXAjIIF8JN9HdLoFGdtDPLJbHDyJPqAYt9OIfyr8knQFLJZjKCBn8QrYy1jdFIHnENf1vZTI1bA4SUOG81xeRHGDVln9mcjdlBHdWRHX9TPbaokSjl9q7nDk6734KpPullyvm10l1M

View File

@@ -0,0 +1,5 @@
sUIic8IsnuubPzkKH7Q7eZ9d6ityYWhs5x9PBYpxt4q0BqVVJ63T0Ir0YXYbhTkxrUWnPYIGlAl8Qf96LPrqqdzJEgE42XDazV3MBdZKW8V1MGUb7PThgshpo7oPwj8CzmJII76VGILp8uOtDTiS9jGXg1b8bquMQUglx7i8R58m5b9uMtUhMyd45ETxlKBwY0Mmw9hm9EtiIZYfXuKlgcE7iTYip2FZVaTTo4WJyMD0Tqpf53ffrDWzPJUABk9b3PNkRtH1HQ4CQlKkwuNA51aWL3H0rcdHggTcOUDyQzUiWxebnPjLoDbXp4SDLeJHofCSzrUoCKTj1cz82Ey7nB3zt1jbqD3MClCQWunifkt2NHDQfMQMsDyhkzG9NCSHMKbGOBgIcxqx9HEDaYuOUCYDnj2iqXiRl0DnrzRvCm9wZOX4hmkFSZicx3g8N1CFVqeSXYAMlXnZfVsr1zB30A7LbjUgaF4XEUO1AVaW8XL8snvfg232
Write the ids previously mentioned to a file called file_to_check.txt.
gPBX3I9EGxmOKs2c4SfFdTiGjqSDNGtTFfLqQIFZiB8m0ooZ2gX8BMyilVHcGjOXx7TjJiknrK0aCbrXQvyTgYQkzfYEFm0MPi285Gu41T4Ue3oBc6evhvl3erooIVC85ZViFi9fEXMl5uxEuTrSbXWaauRQqxoZnA8OCqdg04HqJcd7pdVd6uZk45JBRJq8TSGlY1QnNE5IfN6bcHYoPP41c2li8TM18GBzALIaGHM2AbWGdUWMnIMQs9Dtriv0Kzrh5lywwnd8blFiUqzNNJ8BGlOQZOoj2Qp73LjDUPcTYEkJGtRTVt6gbpRHECcS6aauG03Ntjm4Fr0CCdHUdvuiMoOG0VUg6e7i3jJHA2CQ8J7PNcVIfjB9B4gOjVIHXWoOmXM9zSrIpiNc6hwaC9QfenFh1txI2lqYyDz7geulivQRIC03EiSwpjvjPqXVySFVJONM7t85Y27NJzRHyfRlEKwlAZOtk0782gyjrPOjkrLgObI0

View File

@@ -0,0 +1,21 @@
{
"name": "remember_multiple_ids_with_noise_mock",
"category": ["memory"],
"task": "Follow the instructions in the instructions_1.txt file",
"dependencies": [],
"ground": {
"answer": "3145\n3791\n9317\n9471",
"should_contain": ["3145", "3791", "9317", "9471"],
"should_not_contain": [],
"files": ["file_to_check.txt"]
},
"mock": {
"mock_func": "remember_multiple_ids_mock",
"mock_task": "Follow the instructions in the instructions_1.txt file"
},
"info": {
"difficulty": "medium",
"description": "Tests ability for the agent to remember information between each action. Multiple ids are presented in different files and the last file instructs the agent to write these ids into another file. Some noise is also added to the files to test the agent's ability to filter out noise.",
"side_effects": ["tests if there is in fact an LLM attached"]
}
}

View File

@@ -0,0 +1,32 @@
import os
from typing import Any, Dict
import pytest
from agbenchmark.challenges.memory.memory import MemoryChallenge
class TestRememberMultipleIdsWithNoise(MemoryChallenge):
"""The first memory challenge"""
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(
os.path.dirname(__file__), "remember_multiple_ids_with_noise_data.json"
)
@pytest.mark.depends(
name="test_remember_multiple_ids_with_noise",
depends=["test_remember_multiple_ids"],
)
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:
score = self.scoring(file_content, self.data.ground)
print("Your score is:", score)
scores.append(score)
assert 1 in scores

View File

@@ -4,11 +4,6 @@
"dependencies": [],
"test": "agbenchmark/challenges/memory/m1/m1_test.py"
},
"TestRetrieval": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r1/r1_test.py"
},
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
@@ -19,6 +14,16 @@
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
},
"TestRetrieval3": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
},
"TestRetrieval": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r1/r1_test.py"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
@@ -26,14 +31,14 @@
],
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
},
"TestRetrieval3": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
},
"TestRememberMultipleIds": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py"
},
"TestRememberMultipleIdsWithNoise": {
"difficulty": "medium",
"dependencies": [],
"test": "agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py"
}
}