mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
Agenteval integration (#2672)
* first pass at offline agent eval integration * Integrating AgentEval for offline scenarios * removing old changes * fixing notebook, updating docs * fixing subcriteria bug * updating class comment * cleaning up agent constructors * moving AgentEval agents to separate folder and adding a brief README * fixing build breaks * fixing formatting break * fixing comments * consolidating files in the agenteval folder under contrib and cleaning up imports * fixing import ordering * adding basic agenteval tests and fixing criteria parsing bug * first try at adding openai agenteval tests to build process * adding non-openai agenteval tests to build process * updating test settings * updating openai test * Update test/agentchat/contrib/agent_eval/test_agent_eval.py Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com> * Update .github/workflows/contrib-openai.yml Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com> * test commit * updating typing and converting to pydantic objects * fixing test file --------- Co-authored-by: Beibin Li <BeibinLi@users.noreply.github.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com>
This commit is contained in:
committed by
GitHub
parent
4b747d731a
commit
dad9c66104
@@ -1,26 +1,26 @@
|
||||
{
|
||||
"Problem Interpretation": {
|
||||
"description": "Ability to correctly interpret the problem.",
|
||||
"accepted_values": ["completely off", "slightly relevant", "relevant", "mostly accurate", "completely accurate"]
|
||||
},
|
||||
"Mathematical Methodology": {
|
||||
"description": "Adequacy of the chosen mathematical or algorithmic methodology for the question",
|
||||
"accepted_values": ["inappropriate", "barely adequate", "adequate", "mostly effective", "completely effective"]
|
||||
},
|
||||
"Calculation Correctness": {
|
||||
"description": "Accuracy of calculations made and solutions given",
|
||||
"accepted_values": ["completely incorrect", "mostly incorrect", "neither", "mostly correct", "completely correct"]
|
||||
},
|
||||
"Explanation Clarity": {
|
||||
"description": "Clarity and comprehensibility of explanations, including language use and structure",
|
||||
"accepted_values": ["not at all clear", "slightly clear", "moderately clear", "very clear", "completely clear"]
|
||||
},
|
||||
"Code Efficiency": {
|
||||
"description": "Quality of code in terms of efficiency and elegance",
|
||||
"accepted_values": ["not at all efficient", "slightly efficient", "moderately efficient", "very efficient", "extremely efficient"]
|
||||
},
|
||||
"Code Correctness": {
|
||||
"description": "Correctness of the provided code",
|
||||
"accepted_values": ["completely incorrect", "mostly incorrect", "partly correct", "mostly correct", "completely correct"]
|
||||
}
|
||||
[
|
||||
{ "name": "Problem Interpretation",
|
||||
"description": "Ability to correctly interpret the problem.",
|
||||
"accepted_values": ["completely off", "slightly relevant", "relevant", "mostly accurate", "completely accurate"]
|
||||
},
|
||||
{ "name": "Mathematical Methodology",
|
||||
"description": "Adequacy of the chosen mathematical or algorithmic methodology for the question",
|
||||
"accepted_values": ["inappropriate", "barely adequate", "adequate", "mostly effective", "completely effective"]
|
||||
},
|
||||
{ "name": "Calculation Correctness",
|
||||
"description": "Accuracy of calculations made and solutions given",
|
||||
"accepted_values": ["completely incorrect", "mostly incorrect", "neither", "mostly correct", "completely correct"]
|
||||
},
|
||||
{ "name": "Explanation Clarity",
|
||||
"description": "Clarity and comprehensibility of explanations, including language use and structure",
|
||||
"accepted_values": ["not at all clear", "slightly clear", "moderately clear", "very clear", "completely clear"]
|
||||
},
|
||||
{ "name": "Code Efficiency",
|
||||
"description": "Quality of code in terms of efficiency and elegance",
|
||||
"accepted_values": ["not at all efficient", "slightly efficient", "moderately efficient", "very efficient", "extremely efficient"]
|
||||
},
|
||||
{ "name": "Code Correctness",
|
||||
"description": "Correctness of the provided code",
|
||||
"accepted_values": ["completely incorrect", "mostly incorrect", "partly correct", "mostly correct", "completely correct"]
|
||||
}
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user