mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
Evaluation: redirect sessions to repo-local .eval_sessions via helper; apply across entrypoints; add tests (#10540)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -83,13 +84,9 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -227,16 +228,11 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -20,6 +20,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -199,16 +200,11 @@ def get_config(
|
||||
'REPO_PATH': f'/workspace/{workspace_dir_name}/',
|
||||
}
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
Reference in New Issue
Block a user