mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f5d86e8132 | |||
| d615fe26c0 | |||
| 01f28f6269 |
@@ -257,5 +257,3 @@ containers/runtime/code
|
||||
|
||||
# test results
|
||||
test-results
|
||||
|
||||
.eval_sessions
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
exclude: ^(docs/|modules/|python/|openhands-ui/|third_party/)
|
||||
- id: end-of-file-fixer
|
||||
exclude: ^(docs/|modules/|python/|openhands-ui/|third_party/)
|
||||
- id: check-yaml
|
||||
args: ["--allow-multiple-documents"]
|
||||
- id: debug-statements
|
||||
|
||||
- repo: https://github.com/tox-dev/pyproject-fmt
|
||||
rev: v2.5.1
|
||||
hooks:
|
||||
- id: pyproject-fmt
|
||||
- repo: https://github.com/abravalheri/validate-pyproject
|
||||
rev: v0.24.1
|
||||
hooks:
|
||||
- id: validate-pyproject
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.11.8
|
||||
hooks:
|
||||
# Run the linter.
|
||||
- id: ruff
|
||||
entry: ruff check --config dev_config/python/ruff.toml
|
||||
types_or: [python, pyi, jupyter]
|
||||
args: [--fix, --unsafe-fixes]
|
||||
exclude: third_party/
|
||||
# Run the formatter.
|
||||
- id: ruff-format
|
||||
entry: ruff format --config dev_config/python/ruff.toml
|
||||
types_or: [python, pyi, jupyter]
|
||||
exclude: third_party/
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.15.0
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
[types-requests, types-setuptools, types-pyyaml, types-toml, types-docker, types-Markdown, pydantic, lxml]
|
||||
# To see gaps add `--html-report mypy-report/`
|
||||
entry: mypy --config-file dev_config/python/mypy.ini openhands/
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
@@ -9,8 +9,8 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -61,15 +61,18 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> OpenHandsConfig:
|
||||
# Create config with EDA-specific container image
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
# Override the container image for EDA
|
||||
config.sandbox.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
|
||||
@@ -17,8 +17,8 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -41,12 +41,19 @@ from openhands.utils.async_utils import call_async_from_sync
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> OpenHandsConfig:
|
||||
# Create config with agent_bench-specific container image
|
||||
config = get_openhands_config_for_eval(metadata=metadata)
|
||||
|
||||
# Override the container image for agent_bench
|
||||
config.sandbox.base_container_image = 'python:3.12-slim'
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-slim'
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
|
||||
@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -51,10 +50,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.11-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -62,10 +61,15 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = BIOCODER_BENCH_CONTAINER_IMAGE
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -19,7 +19,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -76,10 +75,15 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -41,8 +40,14 @@ def get_config(
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata, runtime='docker', sandbox_config=sandbox_config
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -115,11 +114,16 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -66,10 +65,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -61,10 +60,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'nikolaik/python-nodejs:python3.12-nodejs22'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
if metadata.agent_config:
|
||||
|
||||
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -44,10 +43,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -31,7 +31,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -65,10 +64,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -24,7 +24,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -86,10 +85,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -16,7 +16,6 @@ import ruamel.yaml
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
@@ -38,10 +37,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -49,10 +48,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -53,10 +52,15 @@ def get_config(
|
||||
'$OH_INTERPRETER_PATH -m pip install scitools-pyke'
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -14,7 +14,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -59,10 +58,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-miniwob:v1.0'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -111,10 +110,15 @@ def get_config(
|
||||
f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -27,7 +27,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -81,10 +80,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'public.ecr.aws/i5g0m1f6/ml-bench'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -88,9 +87,13 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = get_openhands_config_for_eval(
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -342,11 +341,16 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -31,7 +31,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -175,10 +174,15 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -65,10 +64,16 @@ def get_config(
|
||||
sandbox_config.base_container_image = (
|
||||
'docker.io/xingyaoww/openhands-eval-scienceagentbench'
|
||||
)
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
max_budget_per_task=4,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -19,7 +19,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -84,9 +83,13 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = get_openhands_config_for_eval(
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -32,7 +32,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -228,11 +227,16 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -20,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -200,11 +199,16 @@ def get_config(
|
||||
'REPO_PATH': f'/workspace/{workspace_dir_name}/',
|
||||
}
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -37,7 +37,6 @@ from evaluation.benchmarks.testgeneval.utils import load_testgeneval_dataset
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -59,21 +58,20 @@ def get_config(instance: pd.Series) -> OpenHandsConfig:
|
||||
f'Invalid container image for instance {instance["instance_id_swebench"]}.'
|
||||
)
|
||||
logger.info(f'Using instance container image: {base_container_image}.')
|
||||
|
||||
# Create custom sandbox config for testgeneval with specific requirements
|
||||
sandbox_config = SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
timeout=1800, # Longer timeout than default (300)
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY'),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
return OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'eventstream'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
timeout=1800,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY'),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
return get_openhands_config_for_eval(
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'), # Different default runtime
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ from evaluation.utils.shared import (
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -127,26 +126,29 @@ def get_config(
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
|
||||
sandbox_config = SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'eventstream'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -12,10 +12,7 @@ import tempfile
|
||||
import yaml
|
||||
from browsing import pre_login
|
||||
|
||||
from evaluation.utils.shared import (
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
)
|
||||
from evaluation.utils.shared import get_default_sandbox_config_for_eval
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
LLMConfig,
|
||||
@@ -45,17 +42,19 @@ def get_config(
|
||||
sandbox_config.enable_auto_lint = True
|
||||
# If the web services are running on the host machine, this must be set to True
|
||||
sandbox_config.use_host_network = True
|
||||
config = get_openhands_config_for_eval(
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
max_budget_per_task=4,
|
||||
max_iterations=100,
|
||||
save_trajectory_path=os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# we mount trajectories path so that trajectories, generated by OpenHands
|
||||
# controller, can be accessible to the evaluator file in the runtime container
|
||||
sandbox_config=sandbox_config,
|
||||
workspace_mount_path=mount_path_on_host,
|
||||
workspace_mount_path_in_sandbox='/outputs',
|
||||
)
|
||||
config.save_trajectory_path = os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
)
|
||||
config.max_budget_per_task = 4
|
||||
config.set_llm_config(llm_config)
|
||||
if agent_config:
|
||||
config.set_agent_config(agent_config)
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -45,10 +44,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -20,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -161,11 +160,16 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -74,10 +73,16 @@ def get_config(
|
||||
'VWA_WIKIPEDIA': f'{base_url}:8888',
|
||||
'VWA_HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
attach_to_existing=True,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -66,10 +65,15 @@ def get_config(
|
||||
'MAP': f'{base_url}:3000',
|
||||
'HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -46,12 +45,18 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
# debug
|
||||
debug=True,
|
||||
)
|
||||
config.debug = True
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
metadata.llm_config, metadata.eval_output_dir, instance_id
|
||||
|
||||
@@ -703,79 +703,3 @@ def get_default_sandbox_config_for_eval() -> SandboxConfig:
|
||||
remote_runtime_enable_retries=True,
|
||||
remote_runtime_class='sysbox',
|
||||
)
|
||||
|
||||
|
||||
def get_openhands_config_for_eval(
|
||||
metadata: EvalMetadata | None = None,
|
||||
sandbox_config: SandboxConfig | None = None,
|
||||
runtime: str | None = None,
|
||||
max_iterations: int | None = None,
|
||||
default_agent: str | None = None,
|
||||
enable_browser: bool = False,
|
||||
workspace_base: str | None = None,
|
||||
workspace_mount_path: str | None = None,
|
||||
):
|
||||
"""Create an OpenHandsConfig with common patterns used across evaluation scripts.
|
||||
|
||||
This function provides a standardized way to create OpenHands configurations
|
||||
for evaluation runs, with sensible defaults that match the patterns used in
|
||||
most run_infer.py scripts. Individual evaluation scripts can override specific
|
||||
attributes as needed.
|
||||
|
||||
Args:
|
||||
metadata: EvalMetadata containing agent class, max iterations, etc.
|
||||
sandbox_config: Custom sandbox config. If None, uses get_default_sandbox_config_for_eval()
|
||||
runtime: Runtime type. If None, uses environment RUNTIME or 'docker'
|
||||
max_iterations: Max iterations for the agent. If None, uses metadata.max_iterations
|
||||
default_agent: Agent class name. If None, uses metadata.agent_class
|
||||
enable_browser: Whether to enable browser functionality
|
||||
workspace_base: Workspace base path. Defaults to None
|
||||
workspace_mount_path: Workspace mount path. Defaults to None
|
||||
|
||||
Returns:
|
||||
OpenHandsConfig: Configured for evaluation with eval-specific overrides applied
|
||||
"""
|
||||
# Defer import to avoid circular imports at module load time
|
||||
from openhands.core.config.openhands_config import (
|
||||
OpenHandsConfig as _OHConfig, # type: ignore
|
||||
)
|
||||
|
||||
# Use provided sandbox config or get default
|
||||
if sandbox_config is None:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
|
||||
# Extract values from metadata if provided
|
||||
if metadata is not None:
|
||||
if max_iterations is None:
|
||||
max_iterations = metadata.max_iterations
|
||||
if default_agent is None:
|
||||
default_agent = metadata.agent_class
|
||||
|
||||
# Use environment runtime or default
|
||||
if runtime is None:
|
||||
runtime = os.environ.get('RUNTIME', 'docker')
|
||||
|
||||
# Provide sensible defaults if still None
|
||||
if default_agent is None:
|
||||
default_agent = 'CodeActAgent'
|
||||
if max_iterations is None:
|
||||
max_iterations = 50
|
||||
|
||||
# Always use repo-local .eval_sessions directory (absolute path)
|
||||
eval_store = os.path.abspath(os.path.join(os.getcwd(), '.eval_sessions'))
|
||||
|
||||
# Create the base config with evaluation-specific overrides
|
||||
config = _OHConfig(
|
||||
default_agent=default_agent,
|
||||
run_as_openhands=False,
|
||||
runtime=runtime,
|
||||
max_iterations=max_iterations,
|
||||
enable_browser=enable_browser,
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=workspace_base,
|
||||
workspace_mount_path=workspace_mount_path,
|
||||
file_store='local',
|
||||
file_store_path=eval_store,
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
@@ -275,7 +275,7 @@ async def run_session(
|
||||
|
||||
if event.agent_state == AgentState.RUNNING:
|
||||
display_agent_running_message()
|
||||
start_pause_listener(loop, is_paused, event_stream)
|
||||
start_pause_listener(loop, is_paused, event_stream, config)
|
||||
|
||||
def on_event(event: Event) -> None:
|
||||
loop.create_task(on_event_async(event))
|
||||
|
||||
+181
-8
@@ -87,6 +87,9 @@ COMMANDS = {
|
||||
|
||||
print_lock = threading.Lock()
|
||||
|
||||
# Lock to debounce sending Ctrl+C interrupts to the running command
|
||||
_interrupt_lock: asyncio.Lock = asyncio.Lock()
|
||||
|
||||
pause_task: asyncio.Task | None = None # No more than one pause task
|
||||
|
||||
|
||||
@@ -659,6 +662,15 @@ def display_help() -> None:
|
||||
commands_html += f'<gold><b>{command}</b></gold> - <grey>{description}</grey>\n'
|
||||
print_formatted_text(HTML(commands_html))
|
||||
|
||||
# Keyboard shortcuts section
|
||||
print_formatted_text(HTML('\nKeyboard shortcuts:'))
|
||||
shortcuts_html = (
|
||||
'<gold><b>Ctrl+P</b></gold> - <grey>Pause the agent</grey>\n'
|
||||
'<gold><b>Ctrl+C</b></gold> - <grey>Pause the agent; press twice quickly to interrupt a running command</grey>\n'
|
||||
'<gold><b>Ctrl+D</b></gold> - <grey>Pause the agent</grey>\n'
|
||||
)
|
||||
print_formatted_text(HTML(shortcuts_html))
|
||||
|
||||
# Footer
|
||||
print_formatted_text(
|
||||
HTML(
|
||||
@@ -864,12 +876,13 @@ async def read_confirmation_input(config: OpenHandsConfig) -> str:
|
||||
def start_pause_listener(
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
done_event: asyncio.Event,
|
||||
event_stream,
|
||||
event_stream: EventStream,
|
||||
config: OpenHandsConfig,
|
||||
) -> None:
|
||||
global pause_task
|
||||
if pause_task is None or pause_task.done():
|
||||
pause_task = loop.create_task(
|
||||
process_agent_pause(done_event, event_stream)
|
||||
process_agent_pause(done_event, event_stream, config)
|
||||
) # Create a task to track agent pause requests from the user
|
||||
|
||||
|
||||
@@ -883,16 +896,135 @@ async def stop_pause_listener() -> None:
|
||||
pause_task = None
|
||||
|
||||
|
||||
async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) -> None:
|
||||
def is_command_running(event_stream: EventStream) -> bool:
|
||||
"""Check if a shell command is currently running using bounded reverse search.
|
||||
|
||||
We look at the latest relevant event (CmdRunAction or CmdOutputObservation):
|
||||
- If it's a CmdOutputObservation with a finalized exit_code (>= 0), no command is running
|
||||
- If it's a CmdOutputObservation with exit_code == -1, the command is still running (streaming)
|
||||
- If it's a CmdRunAction (non-input), we assume a command has started and is running
|
||||
"""
|
||||
try:
|
||||
from openhands.events.event_filter import EventFilter
|
||||
|
||||
filt = EventFilter(include_types=(CmdRunAction, CmdOutputObservation))
|
||||
for ev in event_stream.search_events(reverse=True, filter=filt, limit=50):
|
||||
if isinstance(ev, CmdOutputObservation):
|
||||
return ev.metadata.exit_code == -1
|
||||
if isinstance(ev, CmdRunAction):
|
||||
if ev.is_input:
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
# If detection fails for any reason, default to no running command
|
||||
return False
|
||||
|
||||
|
||||
async def _handle_command_interrupt(
|
||||
event_stream: EventStream, config: OpenHandsConfig
|
||||
) -> bool:
|
||||
"""Handle command interruption with user confirmation.
|
||||
|
||||
Returns:
|
||||
bool: True if the interrupt was handled, False if the user wants to pause the agent
|
||||
"""
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Command is currently running.</gold>'))
|
||||
print_formatted_text('')
|
||||
|
||||
# Keep legacy behavior: single Ctrl+C pauses by default. Offer kill as opt-in.
|
||||
choices = [
|
||||
'Pause the agent (default)',
|
||||
'Continue waiting for command to complete',
|
||||
'Send interrupt to running command (Ctrl+C)',
|
||||
]
|
||||
|
||||
# Use the passed-in config so we honor CLI settings like VI mode. Run the blocking UI off the loop.
|
||||
selection = await asyncio.to_thread(
|
||||
cli_confirm, config, 'What would you like to do?', choices, 0
|
||||
)
|
||||
|
||||
if selection == 2: # Send interrupt to the running command
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML('<gold>Sending interrupt signal to running command...</gold>')
|
||||
)
|
||||
# Debounce rapid interrupts to avoid multiple concurrent dialogs/interrupts
|
||||
if _interrupt_lock.locked():
|
||||
print_formatted_text(HTML('<grey>Interrupt already sent; waiting…</grey>'))
|
||||
return True
|
||||
async with _interrupt_lock:
|
||||
event_stream.add_event(
|
||||
CmdRunAction(command='C-c', is_input=True),
|
||||
EventSource.USER,
|
||||
)
|
||||
return True
|
||||
elif selection == 1: # Continue waiting
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML('<gold>Continuing to wait for command completion...</gold>')
|
||||
)
|
||||
return True
|
||||
else: # Pause the agent (selection == 0)
|
||||
return False
|
||||
|
||||
|
||||
async def _handle_interrupt_async(
|
||||
event_stream: EventStream, done: asyncio.Event, config: OpenHandsConfig
|
||||
) -> None:
|
||||
"""Handle the interrupt asynchronously to avoid blocking the input handler."""
|
||||
try:
|
||||
handled = await _handle_command_interrupt(event_stream, config)
|
||||
if not handled:
|
||||
# User chose to pause the agent
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
except Exception as e:
|
||||
# If something goes wrong, fall back to pausing the agent
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML(f'<ansired>Error handling interrupt: {e}</ansired>'))
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
|
||||
|
||||
async def process_agent_pause(
|
||||
done: asyncio.Event, event_stream: EventStream, config: OpenHandsConfig
|
||||
) -> None:
|
||||
input = create_input()
|
||||
|
||||
# Double-press detection window for Ctrl+C to send interrupt to running command
|
||||
CTRL_C_WINDOW_SECONDS = 0.4
|
||||
ctrl_c_timer: asyncio.Task | None = None
|
||||
|
||||
async def pause_after_delay(delay: float) -> None:
|
||||
try:
|
||||
await asyncio.sleep(delay)
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
except asyncio.CancelledError:
|
||||
# Timer canceled because a second Ctrl+C was detected; do nothing
|
||||
pass
|
||||
|
||||
def keys_ready() -> None:
|
||||
nonlocal ctrl_c_timer
|
||||
for key_press in input.read_keys():
|
||||
if (
|
||||
key_press.key == Keys.ControlP
|
||||
or key_press.key == Keys.ControlC
|
||||
or key_press.key == Keys.ControlD
|
||||
):
|
||||
if key_press.key == Keys.ControlP or key_press.key == Keys.ControlD:
|
||||
# Immediate pause
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
@@ -900,6 +1032,47 @@ async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) ->
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
elif key_press.key == Keys.ControlC:
|
||||
if is_command_running(event_stream):
|
||||
# If a timer is already running, this is a double-press: send interrupt
|
||||
if ctrl_c_timer and not ctrl_c_timer.done():
|
||||
ctrl_c_timer.cancel()
|
||||
ctrl_c_timer = None
|
||||
if _interrupt_lock.locked():
|
||||
print_formatted_text(
|
||||
HTML('<grey>Interrupt already sent; waiting…</grey>')
|
||||
)
|
||||
continue
|
||||
|
||||
# Send Ctrl+C to the running command
|
||||
async def send_interrupt() -> None:
|
||||
async with _interrupt_lock:
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML(
|
||||
'<gold>Sending interrupt signal to running command...</gold>'
|
||||
)
|
||||
)
|
||||
event_stream.add_event(
|
||||
CmdRunAction(command='C-c', is_input=True),
|
||||
EventSource.USER,
|
||||
)
|
||||
|
||||
asyncio.create_task(send_interrupt())
|
||||
else:
|
||||
# Start a short window; if no second press, pause
|
||||
ctrl_c_timer = asyncio.create_task(
|
||||
pause_after_delay(CTRL_C_WINDOW_SECONDS)
|
||||
)
|
||||
else:
|
||||
# No command running: default immediate pause
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
|
||||
try:
|
||||
with input.raw_mode():
|
||||
|
||||
@@ -159,9 +159,6 @@ class CLIRuntime(Runtime):
|
||||
self._is_windows = sys.platform == 'win32'
|
||||
self._powershell_session: WindowsPowershellSession | None = None
|
||||
|
||||
# Track git wrapper bin dir for use in subprocess env
|
||||
self._git_wrapper_bin_dir = os.path.expanduser('~/.openhands/bin')
|
||||
|
||||
logger.warning(
|
||||
'Initializing CLIRuntime. WARNING: NO SANDBOX IS USED. '
|
||||
'This runtime executes commands directly on the local system. '
|
||||
@@ -220,106 +217,6 @@ class CLIRuntime(Runtime):
|
||||
# We don't use self.run() here because this method is called
|
||||
# during initialization before self._runtime_initialized is True.
|
||||
|
||||
def setup_initial_env(self) -> None:
|
||||
"""Override to add git wrapper setup for CLIRuntime."""
|
||||
super().setup_initial_env()
|
||||
|
||||
# Always enable git co-authorship in CLI runtime
|
||||
self._setup_git_wrapper()
|
||||
# As a fallback for commit invocations that don't use -m/--message
|
||||
# ensure a global prepare-commit-msg hook is configured so co-authorship
|
||||
# is still added (parity with Docker runtime behavior in tests).
|
||||
try:
|
||||
hooks_root = os.path.expanduser('~/.openhands/git-hooks')
|
||||
hooks_dir = os.path.join(hooks_root, 'hooks')
|
||||
os.makedirs(hooks_dir, exist_ok=True)
|
||||
|
||||
hook_src = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
'utils',
|
||||
'git_hooks',
|
||||
'prepare-commit-msg',
|
||||
)
|
||||
hook_dest = os.path.join(hooks_dir, 'prepare-commit-msg')
|
||||
if os.path.exists(hook_src):
|
||||
shutil.copyfile(hook_src, hook_dest)
|
||||
os.chmod(hook_dest, 0o755)
|
||||
# Configure global hooks path and template dir so newly inited repos pick it up
|
||||
subprocess.run(
|
||||
['git', 'config', '--global', 'core.hooksPath', hooks_dir],
|
||||
check=False,
|
||||
)
|
||||
subprocess.run(
|
||||
['git', 'config', '--global', 'init.templateDir', hooks_root],
|
||||
check=False,
|
||||
)
|
||||
logger.info(
|
||||
f'[CLIRuntime] Configured global git hooks at {hooks_dir} for co-authorship'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f'[CLIRuntime] Failed to configure global git hook: {e}')
|
||||
|
||||
def _setup_git_wrapper(self) -> None:
|
||||
"""Set up git wrapper to automatically add co-authorship."""
|
||||
try:
|
||||
# Path to our git wrapper script
|
||||
git_wrapper_source = os.path.join(
|
||||
os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(__file__))
|
||||
), # openhands/runtime/
|
||||
'utils',
|
||||
'git_wrapper.sh',
|
||||
)
|
||||
|
||||
if not os.path.exists(git_wrapper_source):
|
||||
logger.warning(
|
||||
f'[CLIRuntime] Git wrapper not found at {git_wrapper_source}'
|
||||
)
|
||||
return
|
||||
|
||||
# Find the real git executable path
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
real_git_path = subprocess.check_output(
|
||||
['which', 'git'], text=True
|
||||
).strip()
|
||||
except subprocess.CalledProcessError:
|
||||
logger.warning('[CLIRuntime] Could not find git executable')
|
||||
return
|
||||
|
||||
# Create a bin directory in user's home for our git wrapper
|
||||
bin_dir = os.path.expanduser('~/.openhands/bin')
|
||||
os.makedirs(bin_dir, exist_ok=True)
|
||||
|
||||
# Create a modified wrapper that calls the real git with full path
|
||||
git_wrapper_dest = os.path.join(bin_dir, 'git')
|
||||
with open(git_wrapper_source, 'r') as src:
|
||||
wrapper_content = src.read()
|
||||
|
||||
# Replace 'command git' with the full path to avoid recursion
|
||||
wrapper_content = wrapper_content.replace(
|
||||
'command git', f'"{real_git_path}"'
|
||||
)
|
||||
|
||||
with open(git_wrapper_dest, 'w') as dest:
|
||||
dest.write(wrapper_content)
|
||||
|
||||
os.chmod(git_wrapper_dest, 0o755)
|
||||
|
||||
# Prepend the bin directory to PATH so our git wrapper is found first
|
||||
# This works for all commands including chained ones like "cd dir && git commit"
|
||||
current_path = os.environ.get('PATH', '')
|
||||
new_path = f'{bin_dir}:{current_path}'
|
||||
os.environ['PATH'] = new_path
|
||||
|
||||
logger.info(
|
||||
f'[CLIRuntime] Set up OpenHands git wrapper at {git_wrapper_dest} for co-authorship'
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'[CLIRuntime] Failed to set up git wrapper: {e}')
|
||||
|
||||
def _safe_terminate_process(self, process_obj, signal_to_send=signal.SIGTERM):
|
||||
"""Safely attempts to terminate/kill a process group or a single process.
|
||||
|
||||
@@ -440,13 +337,6 @@ class CLIRuntime(Runtime):
|
||||
timed_out = False
|
||||
start_time = time.monotonic()
|
||||
|
||||
# Ensure our git wrapper bin dir is first in PATH for the subprocess
|
||||
env = os.environ.copy()
|
||||
bin_dir = getattr(
|
||||
self, '_git_wrapper_bin_dir', os.path.expanduser('~/.openhands/bin')
|
||||
)
|
||||
env['PATH'] = f'{bin_dir}:{env.get("PATH", "")}'
|
||||
|
||||
# Use shell=True to run complex bash commands
|
||||
process = subprocess.Popen(
|
||||
['bash', '-c', command],
|
||||
@@ -456,10 +346,9 @@ class CLIRuntime(Runtime):
|
||||
bufsize=1, # Explicitly line-buffered for text mode
|
||||
universal_newlines=True,
|
||||
start_new_session=True,
|
||||
env=env,
|
||||
)
|
||||
logger.debug(
|
||||
f'[_execute_shell_command] PID of bash -c: {process.pid} for command: "{command}" with PATH={env.get("PATH")}'
|
||||
f'[_execute_shell_command] PID of bash -c: {process.pid} for command: "{command}"'
|
||||
)
|
||||
|
||||
exit_code = None
|
||||
@@ -569,20 +458,15 @@ class CLIRuntime(Runtime):
|
||||
f'Running command in CLIRuntime: "{action.command}" with effective timeout: {effective_timeout}s'
|
||||
)
|
||||
|
||||
# Use the command as-is since git alias is set up
|
||||
command_to_execute = action.command
|
||||
|
||||
# Use PowerShell on Windows if available, otherwise use subprocess
|
||||
if self._is_windows and self._powershell_session is not None:
|
||||
result = self._execute_powershell_command(
|
||||
command_to_execute, timeout=effective_timeout
|
||||
return self._execute_powershell_command(
|
||||
action.command, timeout=effective_timeout
|
||||
)
|
||||
else:
|
||||
result = self._execute_shell_command(
|
||||
command_to_execute, timeout=effective_timeout
|
||||
return self._execute_shell_command(
|
||||
action.command, timeout=effective_timeout
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Error in CLIRuntime.run for command "{action.command}": {str(e)}'
|
||||
|
||||
@@ -194,9 +194,8 @@ class BashSession:
|
||||
self.server = libtmux.Server()
|
||||
_shell_command = '/bin/bash'
|
||||
if self.username in ['root', 'openhands']:
|
||||
# Start a login shell for the given user without running an interactive login prompt
|
||||
# Use 'su -c' to run bash and ensure we start inside the project's working dir (self.work_dir).
|
||||
_shell_command = f"su {self.username} -c 'cd {self.work_dir} && /bin/bash'"
|
||||
# This starts a non-login (new) shell for the given user
|
||||
_shell_command = f'su {self.username} -'
|
||||
|
||||
# FIXME: we will introduce memory limit using sysbox-runc in coming PR
|
||||
# # otherwise, we are running as the CURRENT USER (e.g., when running LocalRuntime)
|
||||
@@ -417,7 +416,7 @@ class BashSession:
|
||||
)
|
||||
metadata = CmdOutputMetadata() # No metadata available
|
||||
metadata.suffix = (
|
||||
f'\n[The command timed out after {float(timeout):.1f} seconds. '
|
||||
f'\n[The command timed out after {timeout} seconds. '
|
||||
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
|
||||
@@ -39,15 +39,9 @@ def get_action_execution_server_startup_command(
|
||||
username = override_username or (
|
||||
'openhands' if app_config.run_as_openhands else 'root'
|
||||
)
|
||||
if app_config.run_as_openhands:
|
||||
resolved_uid = (
|
||||
override_user_id if override_user_id is not None else sandbox_config.user_id
|
||||
)
|
||||
# Avoid passing UID 0 for the non-root 'openhands' user inside containers
|
||||
# Fall back to 1000 when resolved UID is 0 or None
|
||||
user_id = resolved_uid if resolved_uid not in (None, 0) else 1000
|
||||
else:
|
||||
user_id = 0
|
||||
user_id = override_user_id or (
|
||||
sandbox_config.user_id if app_config.run_as_openhands else 0
|
||||
)
|
||||
|
||||
base_cmd = [
|
||||
*python_prefix,
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# OpenHands Git Hooks
|
||||
|
||||
This directory contains git hooks that are automatically installed in the OpenHands runtime environment.
|
||||
|
||||
## prepare-commit-msg
|
||||
|
||||
This hook serves as a fallback mechanism to ensure that OpenHands contributions are properly attributed. It automatically adds `Co-authored-by: openhands <openhands@all-hands.dev>` to commit messages when the co-authorship line is not already present (case-insensitive check).
|
||||
|
||||
### Behavior
|
||||
|
||||
- **Primary workflow**: The OpenHands agent should manually add co-authorship lines to commit messages as instructed in the system prompt
|
||||
- **Fallback**: If the agent forgets to add the co-authorship line, this hook will automatically add it
|
||||
- **No-op**: If the co-authorship line is already present (in any case variation), the hook does nothing
|
||||
|
||||
### Installation
|
||||
|
||||
#### Docker Runtime
|
||||
|
||||
The hook is automatically installed during Docker runtime build via the `Dockerfile.j2` template:
|
||||
|
||||
1. Copied from `/openhands/code/openhands/runtime/utils/git_hooks/` to `/openhands/git-hooks/hooks/`
|
||||
2. Made executable with `chmod +x`
|
||||
3. Configured globally via `git config --global core.hooksPath /openhands/git-hooks/hooks`
|
||||
4. Set as template for new repositories via `git config --global init.templateDir /openhands/git-hooks`
|
||||
|
||||
This ensures the hook works for both existing repositories and newly created ones.
|
||||
|
||||
#### CLI Runtime
|
||||
|
||||
For CLI runtime, git co-authorship is always enabled automatically. A git wrapper script is set up that intercepts git commit commands and automatically adds co-authorship. This approach is non-invasive as it doesn't modify the user's git configuration or install hooks in their repositories. Instead, it transparently wraps git commands to add the co-authorship line when needed.
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# OpenHands Git Hook: prepare-commit-msg
|
||||
# This hook automatically adds "Co-authored-by: openhands <openhands@all-hands.dev>"
|
||||
# to commit messages if it's not already present. This serves as a fallback when
|
||||
# the agent doesn't manually add the co-authorship line.
|
||||
|
||||
COMMIT_MSG_FILE=$1
|
||||
|
||||
# Check if co-authorship line already exists (case-insensitive)
|
||||
if ! grep -qi "co-authored-by.*openhands.*<openhands@all-hands.dev>" "$COMMIT_MSG_FILE"; then
|
||||
# Add two empty lines and co-authorship line
|
||||
echo "" >> "$COMMIT_MSG_FILE"
|
||||
echo "" >> "$COMMIT_MSG_FILE"
|
||||
echo "Co-authored-by: openhands <openhands@all-hands.dev>" >> "$COMMIT_MSG_FILE"
|
||||
fi
|
||||
@@ -1,85 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Git wrapper script that automatically adds co-authorship to commit messages
|
||||
# This script intercepts git commit commands and adds "Co-authored-by: openhands <openhands@all-hands.dev>"
|
||||
# if it's not already present in the commit message.
|
||||
|
||||
# Function to add co-authorship to a commit message
|
||||
add_coauthorship() {
|
||||
local commit_msg_file="$1"
|
||||
local coauthor_line="Co-authored-by: openhands <openhands@all-hands.dev>"
|
||||
|
||||
# Check if co-authorship line already exists (case-insensitive)
|
||||
if ! grep -qi "co-authored-by.*openhands" "$commit_msg_file" 2>/dev/null; then
|
||||
# Add two empty lines and the co-authorship line
|
||||
echo "" >> "$commit_msg_file"
|
||||
echo "" >> "$commit_msg_file"
|
||||
echo "$coauthor_line" >> "$commit_msg_file"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to handle git commit with message
|
||||
handle_commit_with_message() {
|
||||
local temp_msg_file
|
||||
temp_msg_file=$(mktemp)
|
||||
|
||||
# Extract the commit message from arguments
|
||||
local commit_msg=""
|
||||
local args=()
|
||||
local skip_next=false
|
||||
|
||||
for arg in "$@"; do
|
||||
if [ "$skip_next" = true ]; then
|
||||
commit_msg="$arg"
|
||||
args+=("$arg")
|
||||
skip_next=false
|
||||
elif [ "$arg" = "-m" ] || [ "$arg" = "--message" ]; then
|
||||
args+=("$arg")
|
||||
skip_next=true
|
||||
else
|
||||
args+=("$arg")
|
||||
fi
|
||||
done
|
||||
|
||||
# Write the commit message to temp file and add co-authorship
|
||||
echo "$commit_msg" > "$temp_msg_file"
|
||||
add_coauthorship "$temp_msg_file"
|
||||
|
||||
# Replace -m argument with -F (file) argument
|
||||
local new_args=()
|
||||
skip_next=false
|
||||
for arg in "${args[@]}"; do
|
||||
if [ "$skip_next" = true ]; then
|
||||
new_args+=("-F" "$temp_msg_file")
|
||||
skip_next=false
|
||||
elif [ "$arg" = "-m" ] || [ "$arg" = "--message" ]; then
|
||||
skip_next=true
|
||||
else
|
||||
new_args+=("$arg")
|
||||
fi
|
||||
done
|
||||
|
||||
# Execute git with modified arguments
|
||||
command git "${new_args[@]}"
|
||||
local exit_code=$?
|
||||
|
||||
# Clean up temp file
|
||||
rm -f "$temp_msg_file"
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
# Main logic
|
||||
if [ "$1" = "commit" ]; then
|
||||
# Check if this is a commit with -m/--message flag
|
||||
if [[ "$*" =~ -m[[:space:]] ]] || [[ "$*" =~ --message[[:space:]] ]] || [[ "$*" =~ -m= ]] || [[ "$*" =~ --message= ]]; then
|
||||
handle_commit_with_message "$@"
|
||||
else
|
||||
# For other commit types (interactive, -F file, etc.), just pass through
|
||||
# The prepare-commit-msg hook would handle these in Docker runtime
|
||||
command git "$@"
|
||||
fi
|
||||
else
|
||||
# For non-commit commands, just pass through to real git
|
||||
command git "$@"
|
||||
fi
|
||||
@@ -1,39 +0,0 @@
|
||||
# Git Wrapper for Co-authorship
|
||||
|
||||
This git wrapper script (`git_wrapper.sh`) provides a non-invasive way to automatically add co-authorship to git commits without modifying the user's git configuration or installing hooks in their repositories.
|
||||
|
||||
## How it works
|
||||
|
||||
The wrapper script intercepts git commit commands and:
|
||||
|
||||
1. **For `git commit -m "message"` commands**: Extracts the commit message, adds co-authorship, and uses a temporary file to commit with the enhanced message.
|
||||
|
||||
2. **For other commit types**: Passes through to the regular git command (interactive commits, file-based commits, etc. would be handled by git hooks in Docker runtime).
|
||||
|
||||
## Usage
|
||||
|
||||
The wrapper is automatically set up in CLI runtime.
|
||||
|
||||
When active:
|
||||
- The wrapper script is copied to the workspace as `.openhands_git_wrapper.sh`
|
||||
- Git commands are transparently intercepted and processed
|
||||
- Co-authorship is automatically added: `Co-authored-by: openhands <openhands@all-hands.dev>`
|
||||
|
||||
## Benefits
|
||||
|
||||
- **Non-invasive**: Doesn't modify user's git configuration or repository hooks
|
||||
- **Transparent**: Agent thinks it's running regular git commands
|
||||
- **Automatic**: No manual intervention required
|
||||
- **Safe**: Only affects the current workspace session
|
||||
|
||||
## Example
|
||||
|
||||
```bash
|
||||
# Without wrapper
|
||||
git commit -m "Fix bug"
|
||||
# Results in: "Fix bug"
|
||||
|
||||
# With wrapper enabled
|
||||
git commit -m "Fix bug"
|
||||
# Results in: "Fix bug\n\nCo-authored-by: openhands <openhands@all-hands.dev>"
|
||||
```
|
||||
@@ -1,74 +1,10 @@
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def _configure_git_for_user(username: str, initial_cwd: str) -> None:
|
||||
"""Configure git for the target user: safe.directory and global hooks/template."""
|
||||
try:
|
||||
# Ensure hooks directory exists and has our prepare-commit-msg
|
||||
hooks_root = '/openhands/git-hooks'
|
||||
hooks_dir = os.path.join(hooks_root, 'hooks')
|
||||
os.makedirs(hooks_dir, exist_ok=True)
|
||||
hook_src = (
|
||||
'/openhands/code/openhands/runtime/utils/git_hooks/prepare-commit-msg'
|
||||
)
|
||||
hook_dest = os.path.join(hooks_dir, 'prepare-commit-msg')
|
||||
if os.path.exists(hook_src):
|
||||
shutil.copyfile(hook_src, hook_dest)
|
||||
os.chmod(hook_dest, 0o755)
|
||||
else:
|
||||
# Fallback: write a minimal prepare-commit-msg hook that adds co-authorship
|
||||
with open(hook_dest, 'w') as f:
|
||||
f.write('#!/bin/sh\n')
|
||||
f.write('FILE="$1"\n')
|
||||
f.write(
|
||||
'if ! grep -qi "co-authored-by.*openhands.*<openhands@all-hands.dev>" "$FILE" 2>/dev/null; then\n'
|
||||
)
|
||||
f.write(' echo "" >> "$FILE"\n')
|
||||
f.write(' echo "" >> "$FILE"\n')
|
||||
f.write(
|
||||
' echo "Co-authored-by: openhands <openhands@all-hands.dev>" >> "$FILE"\n'
|
||||
)
|
||||
f.write('fi\n')
|
||||
os.chmod(hook_dest, 0o755)
|
||||
|
||||
env = dict(os.environ)
|
||||
if username == 'root':
|
||||
env['HOME'] = '/root'
|
||||
else:
|
||||
env['HOME'] = f'/home/{username}'
|
||||
|
||||
# Avoid dubious ownership errors
|
||||
subprocess.run(
|
||||
['git', 'config', '--global', '--add', 'safe.directory', initial_cwd],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
# Ensure co-authorship hook is enabled for all repos/actions
|
||||
subprocess.run(
|
||||
['git', 'config', '--global', 'core.hooksPath', hooks_dir],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
subprocess.run(
|
||||
['git', 'config', '--global', 'init.templateDir', hooks_root],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_user_and_working_directory(
|
||||
username: str, user_id: int, initial_cwd: str
|
||||
) -> int | None:
|
||||
@@ -108,85 +44,77 @@ def init_user_and_working_directory(
|
||||
|
||||
return None
|
||||
|
||||
# Defensive guard: never attempt to create a non-root user with UID 0
|
||||
try:
|
||||
user_id = int(user_id)
|
||||
except Exception:
|
||||
user_id = 1000
|
||||
if username != 'root' and user_id == 0:
|
||||
logger.warning(
|
||||
'Received UID 0 for non-root user; overriding to 1000 to avoid conflict with root'
|
||||
)
|
||||
user_id = 1000
|
||||
|
||||
# if username is CURRENT_USER, then we don't need to do anything
|
||||
# This is specific to the local runtime
|
||||
if username == os.getenv('USER') and username not in ['root', 'openhands']:
|
||||
return None
|
||||
|
||||
# First create the working directory
|
||||
# First create the working directory, independent of the user
|
||||
logger.debug(f'Client working directory: {initial_cwd}')
|
||||
output = subprocess.run(
|
||||
f'umask 002; mkdir -p {initial_cwd}', shell=True, capture_output=True
|
||||
)
|
||||
command = f'umask 002; mkdir -p {initial_cwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str = output.stdout.decode()
|
||||
logger.debug(f'Ensured working directory exists. Output: [{out_str}]')
|
||||
|
||||
# If running as root user, no need to create another user
|
||||
command = f'chown -R {username}:root {initial_cwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str += output.stdout.decode()
|
||||
|
||||
command = f'chmod g+rw {initial_cwd}'
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
out_str += output.stdout.decode()
|
||||
logger.debug(f'Created working directory. Output: [{out_str}]')
|
||||
|
||||
# Skip root since it is already created
|
||||
if username == 'root':
|
||||
# Make sure directory is group-writable
|
||||
subprocess.run(f'chmod g+rw {initial_cwd}', shell=True, capture_output=True)
|
||||
# Still need to configure git for root user
|
||||
_configure_git_for_user(username, initial_cwd)
|
||||
return None
|
||||
|
||||
# Ensure the user exists before attempting chown
|
||||
# Check if the username already exists
|
||||
existing_user_id = -1
|
||||
try:
|
||||
result = subprocess.run(
|
||||
f'id -u {username}', shell=True, check=True, capture_output=True
|
||||
)
|
||||
existing_user_id = int(result.stdout.decode().strip())
|
||||
if existing_user_id != user_id:
|
||||
|
||||
# The user ID already exists, skip setup
|
||||
if existing_user_id == user_id:
|
||||
logger.debug(
|
||||
f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
|
||||
)
|
||||
user_id = existing_user_id
|
||||
return existing_user_id
|
||||
return None
|
||||
except subprocess.CalledProcessError as e:
|
||||
# Returncode 1 indicates, that the user does not exist yet
|
||||
if e.returncode == 1:
|
||||
logger.debug(
|
||||
f'User `{username}` does not exist. Proceeding with user creation.'
|
||||
)
|
||||
# Add sudoer (passwordless)
|
||||
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
|
||||
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
|
||||
if output.returncode != 0:
|
||||
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
|
||||
# Create the user with the provided UID
|
||||
cmd_useradd = (
|
||||
f'useradd -rm -d /home/{username} -s /bin/bash '
|
||||
f'-g root -G sudo -u {user_id} {username}'
|
||||
)
|
||||
output = subprocess.run(cmd_useradd, shell=True, capture_output=True)
|
||||
if output.returncode == 0:
|
||||
logger.debug(
|
||||
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
|
||||
)
|
||||
else:
|
||||
logger.error(f'Error checking user `{username}`, skipping setup:\n{e}\n')
|
||||
raise
|
||||
|
||||
# Now that the user exists, set ownership and permissions on the workspace
|
||||
subprocess.run(
|
||||
f'chown -R {username}:root {initial_cwd}', shell=True, capture_output=True
|
||||
# Add sudoer
|
||||
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
|
||||
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
|
||||
if output.returncode != 0:
|
||||
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
|
||||
logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
|
||||
|
||||
command = (
|
||||
f'useradd -rm -d /home/{username} -s /bin/bash '
|
||||
f'-g root -G sudo -u {user_id} {username}'
|
||||
)
|
||||
subprocess.run(f'chmod g+rw {initial_cwd}', shell=True, capture_output=True)
|
||||
|
||||
# Configure git for the target user: safe.directory and global hooks/template
|
||||
_configure_git_for_user(username, initial_cwd)
|
||||
|
||||
output = subprocess.run(command, shell=True, capture_output=True)
|
||||
if output.returncode == 0:
|
||||
logger.debug(
|
||||
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -239,17 +239,6 @@ COPY ./code/microagents /openhands/code/microagents
|
||||
COPY ./code/openhands /openhands/code/openhands
|
||||
RUN chmod a+rwx /openhands/code/openhands/__init__.py
|
||||
|
||||
# Set up global git hooks for automatic co-authorship
|
||||
RUN \
|
||||
# Set up global git hook template directory for automatic co-authorship fallback
|
||||
mkdir -p /openhands/git-hooks/hooks && \
|
||||
git config --global init.templateDir /openhands/git-hooks && \
|
||||
# Copy git hooks from source code
|
||||
cp /openhands/code/openhands/runtime/utils/git_hooks/prepare-commit-msg /openhands/git-hooks/hooks/ && \
|
||||
chmod +x /openhands/git-hooks/hooks/prepare-commit-msg && \
|
||||
# Set up global git hooks path for existing repositories
|
||||
git config --global core.hooksPath /openhands/git-hooks/hooks
|
||||
|
||||
|
||||
|
||||
# ================================================================
|
||||
|
||||
+11
-116
@@ -16,12 +16,15 @@ from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||
from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
|
||||
from openhands.runtime.impl.local.local_runtime import LocalRuntime
|
||||
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
|
||||
|
||||
|
||||
def get_timeout_suffix(timeout_seconds):
|
||||
"""Helper to match the timeout suffix across runtime versions."""
|
||||
# Only assert on the stable prefix to avoid mismatches between server and test code
|
||||
return f'[The command timed out after {float(timeout_seconds):.1f} seconds.'
|
||||
"""Helper function to generate the expected timeout suffix."""
|
||||
return (
|
||||
f'[The command timed out after {timeout_seconds} seconds. '
|
||||
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================================================================
|
||||
@@ -874,111 +877,6 @@ def test_git_operation(temp_dir, runtime_cls):
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
is_windows(), reason='Test uses Linux-specific git hooks and file operations'
|
||||
)
|
||||
def test_git_co_authorship_runtime_setup(temp_dir, runtime_cls):
|
||||
"""Test that all runtimes have git co-authorship enabled via Dockerfile.j2 hooks."""
|
||||
runtime, config = _load_runtime(
|
||||
temp_dir=temp_dir,
|
||||
use_workspace=False,
|
||||
runtime_cls=runtime_cls,
|
||||
run_as_openhands=True,
|
||||
)
|
||||
|
||||
try:
|
||||
# Set up git repository
|
||||
obs = _run_cmd_action(runtime, 'git init')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Set up a different git user (not openhands) to test the co-authorship
|
||||
obs = _run_cmd_action(
|
||||
runtime,
|
||||
'git config user.name "testuser" && git config user.email "testuser@example.com"',
|
||||
)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Create a test file and add it to git
|
||||
obs = _run_cmd_action(runtime, 'echo "test content" > test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
obs = _run_cmd_action(runtime, 'git add test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Commit without manually adding co-authorship - the runtime should add it
|
||||
obs = _run_cmd_action(
|
||||
runtime, 'git commit -m "Test commit without manual co-authorship"'
|
||||
)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Check the commit message to verify co-authorship was added by the runtime
|
||||
obs = _run_cmd_action(runtime, 'git log --format="%B" -n 1')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# All runtimes should have git co-authorship enabled via hooks in Dockerfile.j2
|
||||
# CLI runtime uses additional PATH-based wrapper, but hooks work for all
|
||||
assert 'Co-authored-by: openhands <openhands@all-hands.dev>' in obs.content
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
is_windows(), reason='Test uses Linux-specific git wrapper and file operations'
|
||||
)
|
||||
def test_git_co_authorship_wrapper_always_enabled(temp_dir, runtime_cls):
|
||||
"""Test that git co-authorship wrapper is always enabled in CLI runtime."""
|
||||
# Only test with CLIRuntime since other runtimes handle git co-authorship differently
|
||||
if runtime_cls.__name__ != 'CLIRuntime':
|
||||
pytest.skip('This test is specific to CLIRuntime')
|
||||
|
||||
runtime, config = _load_runtime(
|
||||
temp_dir=temp_dir,
|
||||
use_workspace=False,
|
||||
runtime_cls=runtime_cls,
|
||||
run_as_openhands=True,
|
||||
)
|
||||
|
||||
try:
|
||||
# Initialize git repository in the workspace
|
||||
obs = _run_cmd_action(runtime, 'git init')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Set up a different git user (not openhands) to test the wrapper
|
||||
obs = _run_cmd_action(
|
||||
runtime,
|
||||
'git config user.name "testuser" && git config user.email "testuser@example.com"',
|
||||
)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# The git wrapper should have been set up during runtime initialization
|
||||
# Check if the wrapper exists in the user's bin directory
|
||||
obs = _run_cmd_action(
|
||||
runtime, 'test -x ~/.openhands/bin/git && echo "wrapper exists"'
|
||||
)
|
||||
assert obs.exit_code == 0
|
||||
assert 'wrapper exists' in obs.content
|
||||
|
||||
# Create a test file and commit to verify the wrapper works
|
||||
obs = _run_cmd_action(runtime, 'echo "test content" > test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
obs = _run_cmd_action(runtime, 'git add test_file.txt')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Commit without manually adding co-authorship - the wrapper should add it
|
||||
obs = _run_cmd_action(runtime, 'git commit -m "Test commit with wrapper"')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Check the commit message to verify co-authorship was added by the wrapper
|
||||
obs = _run_cmd_action(runtime, 'git log --format="%B" -n 1')
|
||||
assert obs.exit_code == 0
|
||||
assert 'Co-authored-by: openhands <openhands@all-hands.dev>' in obs.content
|
||||
|
||||
finally:
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_python_version(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
@@ -1551,19 +1449,16 @@ def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
|
||||
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
|
||||
try:
|
||||
# create a git repo - same for both platforms
|
||||
obs = runtime.run_action(CmdRunAction('git init'))
|
||||
assert obs.metadata.exit_code == 0
|
||||
|
||||
# add or update origin remote robustly (handles case where it already exists)
|
||||
add_remote_cmd = (
|
||||
'git remote add origin https://github.com/All-Hands-AI/OpenHands || '
|
||||
'git remote set-url origin https://github.com/All-Hands-AI/OpenHands'
|
||||
action = CmdRunAction(
|
||||
'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
|
||||
)
|
||||
obs = runtime.run_action(CmdRunAction(add_remote_cmd))
|
||||
obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 0
|
||||
|
||||
# Check git remote - same for both platforms
|
||||
obs = runtime.run_action(CmdRunAction('git remote -v'))
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.metadata.exit_code == 0
|
||||
assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
|
||||
assert 'git remote -v' not in obs.content
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from evaluation.utils.shared import get_openhands_config_for_eval
|
||||
|
||||
|
||||
def test_eval_file_store_defaults_to_repo_local(tmp_path, monkeypatch):
|
||||
prev_cwd = Path.cwd()
|
||||
try:
|
||||
os.chdir(tmp_path)
|
||||
cfg = get_openhands_config_for_eval()
|
||||
assert Path(cfg.file_store_path) == (tmp_path / '.eval_sessions').resolve()
|
||||
assert cfg.file_store == 'local'
|
||||
finally:
|
||||
os.chdir(prev_cwd)
|
||||
|
||||
|
||||
def test_eval_file_store_is_hard_coded_repo_local(tmp_path):
|
||||
prev_cwd = Path.cwd()
|
||||
try:
|
||||
os.chdir(tmp_path)
|
||||
cfg = get_openhands_config_for_eval()
|
||||
assert Path(cfg.file_store_path) == (tmp_path / '.eval_sessions').resolve()
|
||||
assert cfg.file_store == 'local'
|
||||
finally:
|
||||
os.chdir(prev_cwd)
|
||||
Reference in New Issue
Block a user