From 43c4a7fff45f8d4afa46a934b69bd7a679042afc Mon Sep 17 00:00:00 2001 From: Jiayi Pan Date: Fri, 6 Sep 2024 06:05:00 -0700 Subject: [PATCH] Allow Generalized SWE-Bench format for evaluation (#3752) * allow generalized swe-bench format * Update run_infer.py * fix linter --------- Co-authored-by: Xingyao Wang --- evaluation/swe_bench/run_infer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 9f4eb7cc99..ba2181c643 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -456,6 +456,12 @@ if __name__ == '__main__': output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl') instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit) + if not isinstance( + instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str + ): + for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']: + instances[col] = instances[col].apply(lambda x: str(list(x))) + run_evaluation( instances, metadata, output_file, args.eval_num_workers, process_instance )