Add sysbox support to remote runtime for eval; Add memory monitor, stress tests to help debug memory issue (#6684)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
Xingyao Wang
2025-02-18 15:02:28 -05:00
committed by GitHub
parent 8d097efb4f
commit 1a7003a705
35 changed files with 687 additions and 419 deletions

View File

@@ -10,6 +10,7 @@ from evaluation.utils.shared import (
EvalOutput,
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -18,7 +19,6 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
SandboxConfig,
get_llm_config_arg,
get_parser,
)
@@ -41,17 +41,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,