mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
1 Commits
debug-logg
...
xw/evaluat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68ca0b6a9b |
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -74,6 +75,7 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -55,6 +56,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -61,6 +62,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
|
||||
# copy 'draft_editor' config if exists
|
||||
|
||||
@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -72,6 +73,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -86,6 +87,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -50,6 +51,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
update_llm_config_for_completions_logging,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
@@ -135,6 +136,7 @@ def get_config(
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
)
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -76,6 +77,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
agent_config = AgentConfig(
|
||||
function_calling=False,
|
||||
|
||||
@@ -18,6 +18,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -66,6 +67,8 @@ def get_config(
|
||||
else:
|
||||
logger.info('Agent config not provided, using default settings')
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -54,6 +55,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -75,6 +76,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -96,6 +97,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -63,6 +64,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -121,6 +122,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -91,6 +92,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
update_llm_config_for_completions_logging,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
@@ -231,6 +232,7 @@ def get_config(
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
update_llm_config_for_completions_logging,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
@@ -164,6 +165,7 @@ def get_config(
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -13,7 +13,10 @@ from typing import List
|
||||
import yaml
|
||||
from browsing import pre_login
|
||||
|
||||
from evaluation.utils.shared import get_default_sandbox_config_for_eval
|
||||
from evaluation.utils.shared import (
|
||||
get_default_sandbox_config_for_eval,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
@@ -58,12 +61,14 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(llm_config)
|
||||
if agent_config:
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
config.set_agent_config(agent_config)
|
||||
else:
|
||||
logger.info('Agent config not provided, using default settings')
|
||||
agent_config = AgentConfig(
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -55,6 +56,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_agent_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
@@ -76,6 +77,8 @@ def get_config(
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -160,6 +160,26 @@ def cleanup():
|
||||
process.join()
|
||||
|
||||
|
||||
def update_agent_config_for_eval(
|
||||
agent_config: AgentConfig | None = None,
|
||||
) -> AgentConfig:
|
||||
"""Update agent config with evaluation-specific settings.
|
||||
|
||||
Args:
|
||||
agent_config: The agent config to update. If None, a new AgentConfig will be created.
|
||||
|
||||
Returns:
|
||||
The updated agent config.
|
||||
"""
|
||||
if agent_config is None:
|
||||
agent_config = AgentConfig()
|
||||
|
||||
# Note: We're not disabling repository memory here as requested
|
||||
# This function can be used for other evaluation-specific settings
|
||||
|
||||
return agent_config
|
||||
|
||||
|
||||
def make_metadata(
|
||||
llm_config: LLMConfig,
|
||||
dataset_name: str,
|
||||
@@ -172,6 +192,8 @@ def make_metadata(
|
||||
agent_config: AgentConfig | None = None,
|
||||
condenser_config: CondenserConfig | None = None,
|
||||
) -> EvalMetadata:
|
||||
# Update agent config with evaluation-specific settings
|
||||
agent_config = update_agent_config_for_eval(agent_config)
|
||||
model_name = llm_config.model.split('/')[-1]
|
||||
model_path = model_name.replace(':', '_').replace('@', '-')
|
||||
eval_note = f'_N_{eval_note}' if eval_note else ''
|
||||
|
||||
Reference in New Issue
Block a user