mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-08 22:58:01 -05:00
Make agbenchmark a Proxy of the evaluated agent Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
33 lines
858 B
JSON
33 lines
858 B
JSON
{
|
|
"name": "DebugSimpleTypoWithGuidance",
|
|
"category": [
|
|
"code",
|
|
"iterate"
|
|
],
|
|
"task": "1- Run test.py.\n2- Read sample_code.py.\n3- Modify sample_code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
|
|
"dependencies": [
|
|
"ReadFile"
|
|
],
|
|
"cutoff": 75,
|
|
"ground": {
|
|
"answer": "[0, 1] [2, 5] [0, 3]",
|
|
"should_contain": [
|
|
"[0, 1]",
|
|
"[2, 5]",
|
|
"[0, 3]"
|
|
],
|
|
"should_not_contain": [],
|
|
"files": [
|
|
"test.py"
|
|
],
|
|
"eval": {
|
|
"type": "python"
|
|
}
|
|
},
|
|
"info": {
|
|
"difficulty": "novice",
|
|
"description": "s ability for the agent to debug python code with a simple typo in it.",
|
|
"side_effects": []
|
|
},
|
|
"eval_id": "1ce0ccdd-cbe3-4000-a2a4-86d9c147fcfe"
|
|
} |