mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-08 22:38:05 -05:00
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
6
.github/workflows/eval-runner.yml
vendored
6
.github/workflows/eval-runner.yml
vendored
@@ -84,12 +84,12 @@ jobs:
|
||||
EVAL_DOCKER_IMAGE_PREFIX: us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images
|
||||
|
||||
run: |
|
||||
poetry run ./evaluation/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 30 $N_PROCESSES "princeton-nlp/SWE-bench_Lite" test
|
||||
poetry run ./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 30 $N_PROCESSES "princeton-nlp/SWE-bench_Lite" test
|
||||
OUTPUT_FOLDER=$(find evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite-test/CodeActAgent -name "deepseek-chat_maxiter_50_N_*-no-hint-run_1" -type d | head -n 1)
|
||||
echo "OUTPUT_FOLDER for SWE-bench evaluation: $OUTPUT_FOLDER"
|
||||
poetry run ./evaluation/swe_bench/scripts/eval_infer_remote.sh $OUTPUT_FOLDER/output.jsonl $N_PROCESSES "princeton-nlp/SWE-bench_Lite" test
|
||||
poetry run ./evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh $OUTPUT_FOLDER/output.jsonl $N_PROCESSES "princeton-nlp/SWE-bench_Lite" test
|
||||
|
||||
poetry run ./evaluation/swe_bench/scripts/eval/summarize_outputs.py $OUTPUT_FOLDER/output.jsonl > summarize_outputs.log 2>&1
|
||||
poetry run ./evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py $OUTPUT_FOLDER/output.jsonl > summarize_outputs.log 2>&1
|
||||
echo "SWEBENCH_REPORT<<EOF" >> $GITHUB_ENV
|
||||
cat summarize_outputs.log >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
Reference in New Issue
Block a user