From 7af35ab8276dd05148a8d8d814bedd0ecfa14bcb Mon Sep 17 00:00:00 2001 From: Boxuan Li Date: Mon, 21 Jul 2025 18:45:52 -0700 Subject: [PATCH] Evaluation: disable browser when NOT run_with_browsing (#9837) --- evaluation/benchmarks/commit0/run_infer.py | 1 + evaluation/benchmarks/multi_swe_bench/run_infer.py | 1 + evaluation/benchmarks/swe_bench/run_infer.py | 1 + evaluation/benchmarks/swe_bench/run_localize.py | 1 + evaluation/benchmarks/visual_swe_bench/run_infer.py | 1 + 5 files changed, 5 insertions(+) diff --git a/evaluation/benchmarks/commit0/run_infer.py b/evaluation/benchmarks/commit0/run_infer.py index 1f6160d0ea..99c5b4a43d 100644 --- a/evaluation/benchmarks/commit0/run_infer.py +++ b/evaluation/benchmarks/commit0/run_infer.py @@ -117,6 +117,7 @@ def get_config( default_agent=metadata.agent_class, run_as_openhands=False, max_iterations=metadata.max_iterations, + enable_browser=RUN_WITH_BROWSING, runtime=os.environ.get('RUNTIME', 'docker'), sandbox=sandbox_config, # do not mount workspace diff --git a/evaluation/benchmarks/multi_swe_bench/run_infer.py b/evaluation/benchmarks/multi_swe_bench/run_infer.py index 1881b1e422..4f12677dc2 100644 --- a/evaluation/benchmarks/multi_swe_bench/run_infer.py +++ b/evaluation/benchmarks/multi_swe_bench/run_infer.py @@ -345,6 +345,7 @@ def get_config( default_agent=metadata.agent_class, run_as_openhands=False, max_iterations=metadata.max_iterations, + enable_browser=RUN_WITH_BROWSING, runtime=os.environ.get('RUNTIME', 'docker'), sandbox=sandbox_config, # do not mount workspace diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index 876645b910..52ca9a9b81 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -226,6 +226,7 @@ def get_config( default_agent=metadata.agent_class, run_as_openhands=False, max_iterations=metadata.max_iterations, + enable_browser=RUN_WITH_BROWSING, runtime=os.environ.get('RUNTIME', 'docker'), sandbox=sandbox_config, # do not mount workspace diff --git a/evaluation/benchmarks/swe_bench/run_localize.py b/evaluation/benchmarks/swe_bench/run_localize.py index d81fd6c3b5..f17d40b87c 100644 --- a/evaluation/benchmarks/swe_bench/run_localize.py +++ b/evaluation/benchmarks/swe_bench/run_localize.py @@ -203,6 +203,7 @@ def get_config( default_agent=metadata.agent_class, run_as_openhands=False, max_iterations=metadata.max_iterations, + enable_browser=RUN_WITH_BROWSING, runtime=os.environ.get('RUNTIME', 'docker'), sandbox=sandbox_config, # do not mount workspace diff --git a/evaluation/benchmarks/visual_swe_bench/run_infer.py b/evaluation/benchmarks/visual_swe_bench/run_infer.py index 1bdd5349f1..d07e885b1d 100644 --- a/evaluation/benchmarks/visual_swe_bench/run_infer.py +++ b/evaluation/benchmarks/visual_swe_bench/run_infer.py @@ -164,6 +164,7 @@ def get_config( default_agent=metadata.agent_class, run_as_openhands=False, max_iterations=metadata.max_iterations, + enable_browser=RUN_WITH_BROWSING, runtime=os.environ.get('RUNTIME', 'docker'), sandbox=sandbox_config, # do not mount workspace