mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
* adding draft evaluation code for EDA, using chatgpt as the temporal agent for now * Update README.md * Delete frontend/package.json * reverse the irrelevant changes * reverse package.json * use chatgpt as the codeactagent * integrate with opendevin * Update evaluation/EDA/README.md * Update evaluation/EDA/README.md * Use poetry to manage packages * integrate with opendevin * minor update * minor update * update poetry * update README * clean-up infer scripts * add run_infer script and improve readme * log final success and final message & ground truth --------- Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Xingyao Wang <xingyao6@illinois.edu> Co-authored-by: yufansong <yufan@risingwave-labs.com> Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
50 lines
1.3 KiB
Bash
Executable File
50 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
|
MODEL_CONFIG=$1
|
|
AGENT=$2
|
|
DATASET=$3
|
|
EVAL_LIMIT=$4
|
|
|
|
if [ -z "$AGENT" ]; then
|
|
echo "Agent not specified, use default CodeActAgent"
|
|
AGENT="CodeActAgent"
|
|
fi
|
|
|
|
if [ -z "$DATASET" ]; then
|
|
echo "Dataset not specified, use default 'things'"
|
|
DATASET="things"
|
|
fi
|
|
|
|
# check if OPENAI_API_KEY is set
|
|
if [ -z "$OPENAI_API_KEY" ]; then
|
|
echo "OPENAI_API_KEY is not set, please set it to run the script"
|
|
exit 1
|
|
fi
|
|
|
|
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenDevin
|
|
# We need to track the version of Agent in the evaluation to make sure results are comparable
|
|
AGENT_VERSION=v$(poetry run python -c "import agenthub; from opendevin.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
|
|
|
|
echo "AGENT: $AGENT"
|
|
echo "AGENT_VERSION: $AGENT_VERSION"
|
|
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
|
echo "DATASET: $DATASET"
|
|
|
|
COMMAND="poetry run python evaluation/EDA/run_infer.py \
|
|
--agent-cls $AGENT \
|
|
--llm-config $MODEL_CONFIG \
|
|
--dataset $DATASET \
|
|
--data-split test \
|
|
--max-iterations 20 \
|
|
--OPENAI_API_KEY $OPENAI_API_KEY \
|
|
--max-chars 10000000 \
|
|
--eval-num-workers 1 \
|
|
--eval-note ${AGENT_VERSION}_${DATASET}"
|
|
|
|
if [ -n "$EVAL_LIMIT" ]; then
|
|
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
|
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
|
fi
|
|
|
|
# Run the command
|
|
eval $COMMAND
|