Add sysbox support to remote runtime for eval; Add memory monitor, stress tests to help debug memory issue (#6684)

Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Graham Neubig <neubig@gmail.com>
2026-01-10 07:18:10 -05:00 · 2025-02-18 15:02:28 -05:00
parent 8d097efb4f
commit 1a7003a705
35 changed files with 687 additions and 419 deletions
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -16,7 +16,7 @@ from pydantic import BaseModel
 from tqdm import tqdm

 from openhands.controller.state.state import State
-from openhands.core.config import LLMConfig
+from openhands.core.config import LLMConfig, SandboxConfig
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.condenser_config import (
    CondenserConfig,
@@ -555,3 +555,18 @@ def get_metrics(state: State) -> dict[str, Any]:
    metrics = state.metrics.get() if state.metrics else {}
    metrics['condenser'] = get_condensation_metadata(state)
    return metrics
+
+
+def get_default_sandbox_config_for_eval() -> SandboxConfig:
+    return SandboxConfig(
+        use_host_network=False,
+        # large enough timeout, since some testcases take very long to run
+        timeout=300,
+        api_key=os.environ.get('ALLHANDS_API_KEY', None),
+        remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
+        keep_runtime_alive=False,
+        remote_runtime_init_timeout=3600,
+        remote_runtime_api_timeout=120,
+        remote_runtime_enable_retries=True,
+        remote_runtime_class='sysbox',
+    )