From 11a2d1682d4e16afa73f71632ab04dd37ee66834 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 11 Jun 2024 04:14:22 +0800 Subject: [PATCH] Minor SWE-Bench inference config tweak (#2381) * save infer logs to infer_logs * set max budget for swebench eval --- evaluation/swe_bench/README.md | 1 + evaluation/swe_bench/run_infer.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md index 958d6b7649..44efd1cb1f 100644 --- a/evaluation/swe_bench/README.md +++ b/evaluation/swe_bench/README.md @@ -51,6 +51,7 @@ sandbox_timeout = 120 use_host_network = false run_as_devin = false enable_auto_lint = true +max_budget_per_task = 4 # 4 USD # TODO: Change these to the model you want to evaluate [eval_gpt4_1106_preview] diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index d89835559d..fdd073e933 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -209,7 +209,7 @@ def process_instance( if reset_logger: # Set up logger log_file = os.path.join( - eval_output_dir, 'logs', f'instance_{instance.instance_id}.log' + eval_output_dir, 'infer_logs', f'instance_{instance.instance_id}.log' ) # Remove all existing handlers from logger for handler in logger.handlers[:]: