Evaluation: redirect sessions to repo-local .eval_sessions via helper; apply across entrypoints; add tests (#10540)

Co-authored-by: openhands <openhands@all-hands.dev>
2026-01-08 22:38:05 -05:00 · 2025-08-22 09:34:02 -04:00
parent d9cf5b7302
commit 4507a25b85
36 changed files with 274 additions and 293 deletions
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
    compatibility_for_eval_history_pairs,
    get_default_sandbox_config_for_eval,
    get_metrics,
+    get_openhands_config_for_eval,
    make_metadata,
    prepare_dataset,
    reset_logger_for_multiprocessing,
@@ -110,15 +111,10 @@ def get_config(
        f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
    )

-    config = OpenHandsConfig(
-        default_agent=metadata.agent_class,
-        run_as_openhands=False,
+    config = get_openhands_config_for_eval(
+        metadata=metadata,
        runtime='docker',
-        max_iterations=metadata.max_iterations,
-        sandbox=sandbox_config,
-        # do not mount workspace
-        workspace_base=None,
-        workspace_mount_path=None,
+        sandbox_config=sandbox_config,
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)