Compare commits

...

1 Commits

Author SHA1 Message Date
openhands
68ca0b6a9b Add evaluation changes without disabling repository memory 2025-04-15 15:06:58 +00:00
21 changed files with 81 additions and 1 deletions

View File

@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -74,6 +75,7 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -55,6 +56,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -61,6 +62,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
# copy 'draft_editor' config if exists

View File

@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -72,6 +73,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -86,6 +87,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -50,6 +51,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -20,6 +20,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
@@ -135,6 +136,7 @@ def get_config(
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=False,
)
agent_config = update_agent_config_for_eval(agent_config)
config.set_agent_config(agent_config)
return config

View File

@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -76,6 +77,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
agent_config = AgentConfig(
function_calling=False,

View File

@@ -18,6 +18,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -66,6 +67,8 @@ def get_config(
else:
logger.info('Agent config not provided, using default settings')
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -54,6 +55,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -34,6 +34,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -75,6 +76,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -27,6 +27,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -96,6 +97,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -63,6 +64,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -121,6 +122,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -91,6 +92,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
@@ -231,6 +232,7 @@ def get_config(
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
agent_config = update_agent_config_for_eval(agent_config)
config.set_agent_config(agent_config)
return config

View File

@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
@@ -164,6 +165,7 @@ def get_config(
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
agent_config = update_agent_config_for_eval(agent_config)
config.set_agent_config(agent_config)
return config

View File

@@ -13,7 +13,10 @@ from typing import List
import yaml
from browsing import pre_login
from evaluation.utils.shared import get_default_sandbox_config_for_eval
from evaluation.utils.shared import (
get_default_sandbox_config_for_eval,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
@@ -58,12 +61,14 @@ def get_config(
)
config.set_llm_config(llm_config)
if agent_config:
agent_config = update_agent_config_for_eval(agent_config)
config.set_agent_config(agent_config)
else:
logger.info('Agent config not provided, using default settings')
agent_config = AgentConfig(
enable_prompt_extensions=False,
)
agent_config = update_agent_config_for_eval(agent_config)
config.set_agent_config(agent_config)
return config

View File

@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -55,6 +56,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_agent_config_for_eval,
)
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -76,6 +77,8 @@ def get_config(
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config = update_agent_config_for_eval(agent_config)
agent_config.enable_prompt_extensions = False
return config

View File

@@ -160,6 +160,26 @@ def cleanup():
process.join()
def update_agent_config_for_eval(
agent_config: AgentConfig | None = None,
) -> AgentConfig:
"""Update agent config with evaluation-specific settings.
Args:
agent_config: The agent config to update. If None, a new AgentConfig will be created.
Returns:
The updated agent config.
"""
if agent_config is None:
agent_config = AgentConfig()
# Note: We're not disabling repository memory here as requested
# This function can be used for other evaluation-specific settings
return agent_config
def make_metadata(
llm_config: LLMConfig,
dataset_name: str,
@@ -172,6 +192,8 @@ def make_metadata(
agent_config: AgentConfig | None = None,
condenser_config: CondenserConfig | None = None,
) -> EvalMetadata:
# Update agent config with evaluation-specific settings
agent_config = update_agent_config_for_eval(agent_config)
model_name = llm_config.model.split('/')[-1]
model_path = model_name.replace(':', '_').replace('@', '-')
eval_note = f'_N_{eval_note}' if eval_note else ''