Compare commits

..

3 Commits

Author SHA1 Message Date
Xingyao Wang f5d86e8132 Merge branch 'main' into fix-cli-command-interruption 2025-08-22 09:26:13 -04:00
openhands d615fe26c0 cli: refine Ctrl+C behavior and async safety
- Double-press Ctrl+C within 400ms to send interrupt to running command
- Single Ctrl+C pauses (legacy) when command running or not
- Honor CLI config in dialogs and avoid blocking event loop via to_thread
- Debounce interrupts with asyncio.Lock to prevent races
- Use bounded reverse search on EventStream with EventFilter; rely on exit_code
- Pass config through start_pause_listener; remove ad-hoc OpenHandsConfig()
- Update help text for clarity

Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-21 19:54:57 +00:00
openhands 01f28f6269 Fix issue #10434: Add command interruption support to CLI
- Enhanced Ctrl+C behavior to detect running commands and provide user options
- Added is_command_running() function to analyze event stream for active commands
- Modified process_agent_pause() to handle command interruption vs agent pause
- Added _handle_command_interrupt() with user confirmation dialog offering:
  * Kill running command (send Ctrl+C to command)
  * Continue waiting for command completion
  * Pause the entire agent
- Updated help documentation with new keyboard shortcuts section
- Maintains backward compatibility: Ctrl+C still pauses agent when no command running
- All existing CLI tests pass (237 tests)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-16 23:29:04 +00:00
49 changed files with 540 additions and 876 deletions
-2
View File
@@ -257,5 +257,3 @@ containers/runtime/code
# test results
test-results
.eval_sessions
-47
View File
@@ -1,47 +0,0 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: ^(docs/|modules/|python/|openhands-ui/|third_party/)
- id: end-of-file-fixer
exclude: ^(docs/|modules/|python/|openhands-ui/|third_party/)
- id: check-yaml
args: ["--allow-multiple-documents"]
- id: debug-statements
- repo: https://github.com/tox-dev/pyproject-fmt
rev: v2.5.1
hooks:
- id: pyproject-fmt
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.24.1
hooks:
- id: validate-pyproject
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.11.8
hooks:
# Run the linter.
- id: ruff
entry: ruff check --config dev_config/python/ruff.toml
types_or: [python, pyi, jupyter]
args: [--fix, --unsafe-fixes]
exclude: third_party/
# Run the formatter.
- id: ruff-format
entry: ruff format --config dev_config/python/ruff.toml
types_or: [python, pyi, jupyter]
exclude: third_party/
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
hooks:
- id: mypy
additional_dependencies:
[types-requests, types-setuptools, types-pyyaml, types-toml, types-docker, types-Markdown, pydantic, lxml]
# To see gaps add `--html-report mypy-report/`
entry: mypy --config-file dev_config/python/mypy.ini openhands/
always_run: true
pass_filenames: false
+11 -8
View File
@@ -9,8 +9,8 @@ from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -61,15 +61,18 @@ AGENT_CLS_TO_INST_SUFFIX = {
def get_config(
metadata: EvalMetadata,
) -> OpenHandsConfig:
# Create config with EDA-specific container image
config = get_openhands_config_for_eval(
metadata=metadata,
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
# Override the container image for EDA
config.sandbox.base_container_image = 'python:3.12-bookworm'
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False
+13 -6
View File
@@ -17,8 +17,8 @@ from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -41,12 +41,19 @@ from openhands.utils.async_utils import call_async_from_sync
def get_config(
metadata: EvalMetadata,
) -> OpenHandsConfig:
# Create config with agent_bench-specific container image
config = get_openhands_config_for_eval(metadata=metadata)
# Override the container image for agent_bench
config.sandbox.base_container_image = 'python:3.12-slim'
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-slim'
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False
@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -51,10 +50,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.11-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
sandbox_config=sandbox_config,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+8 -4
View File
@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -62,10 +61,15 @@ def get_config(
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = BIOCODER_BENCH_CONTAINER_IMAGE
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+8 -4
View File
@@ -19,7 +19,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -76,10 +75,15 @@ def get_config(
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -41,8 +40,14 @@ def get_config(
)
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata, runtime='docker', sandbox_config=sandbox_config
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+9 -5
View File
@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -115,11 +114,16 @@ def get_config(
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
config = get_openhands_config_for_eval(
metadata=metadata,
sandbox_config=sandbox_config,
runtime=os.environ.get('RUNTIME', 'docker'),
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
enable_browser=RUN_WITH_BROWSING,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -66,10 +65,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+8 -4
View File
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -61,10 +60,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'nikolaik/python-nodejs:python3.12-nodejs22'
config = get_openhands_config_for_eval(
metadata=metadata,
sandbox_config=sandbox_config,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
if metadata.agent_config:
+8 -4
View File
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -44,10 +43,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+8 -4
View File
@@ -31,7 +31,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -65,10 +64,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -24,7 +24,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -86,10 +85,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -16,7 +16,6 @@ import ruamel.yaml
from evaluation.utils.shared import (
EvalMetadata,
get_default_sandbox_config_for_eval,
get_openhands_config_for_eval,
make_metadata,
)
from openhands.core.config import (
@@ -38,10 +37,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -49,10 +48,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -53,10 +52,15 @@ def get_config(
'$OH_INTERPRETER_PATH -m pip install scitools-pyke'
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+8 -4
View File
@@ -14,7 +14,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -59,10 +58,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'xingyaoww/od-eval-miniwob:v1.0'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
+8 -4
View File
@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -111,10 +110,15 @@ def get_config(
f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+8 -4
View File
@@ -27,7 +27,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -81,10 +80,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'public.ecr.aws/i5g0m1f6/ml-bench'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
get_default_sandbox_config_for_eval,
get_openhands_config_for_eval,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
@@ -88,9 +87,13 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = get_openhands_config_for_eval(
config = OpenHandsConfig(
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
return config
@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@@ -342,11 +341,16 @@ def get_config(
instance_id=instance['instance_id'],
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
enable_browser=RUN_WITH_BROWSING,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
@@ -31,7 +31,6 @@ from evaluation.utils.shared import (
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@@ -175,10 +174,15 @@ def get_config(
instance_id=instance['instance_id'],
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -65,10 +64,16 @@ def get_config(
sandbox_config.base_container_image = (
'docker.io/xingyaoww/openhands-eval-scienceagentbench'
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
max_budget_per_task=4,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
@@ -19,7 +19,6 @@ from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
get_default_sandbox_config_for_eval,
get_openhands_config_for_eval,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
@@ -84,9 +83,13 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = get_openhands_config_for_eval(
config = OpenHandsConfig(
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
return config
+8 -4
View File
@@ -32,7 +32,6 @@ from evaluation.utils.shared import (
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@@ -228,11 +227,16 @@ def get_config(
instance_id=instance['instance_id'],
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
enable_browser=RUN_WITH_BROWSING,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
@@ -20,7 +20,6 @@ from evaluation.utils.shared import (
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@@ -200,11 +199,16 @@ def get_config(
'REPO_PATH': f'/workspace/{workspace_dir_name}/',
}
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
enable_browser=RUN_WITH_BROWSING,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
+13 -15
View File
@@ -37,7 +37,6 @@ from evaluation.benchmarks.testgeneval.utils import load_testgeneval_dataset
from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
get_openhands_config_for_eval,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
@@ -59,21 +58,20 @@ def get_config(instance: pd.Series) -> OpenHandsConfig:
f'Invalid container image for instance {instance["instance_id_swebench"]}.'
)
logger.info(f'Using instance container image: {base_container_image}.')
# Create custom sandbox config for testgeneval with specific requirements
sandbox_config = SandboxConfig(
base_container_image=base_container_image,
use_host_network=False,
timeout=1800, # Longer timeout than default (300)
api_key=os.environ.get('ALLHANDS_API_KEY'),
remote_runtime_api_url=os.environ.get(
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
return OpenHandsConfig(
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
sandbox=SandboxConfig(
base_container_image=base_container_image,
use_host_network=False,
timeout=1800,
api_key=os.environ.get('ALLHANDS_API_KEY'),
remote_runtime_api_url=os.environ.get(
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
),
),
)
return get_openhands_config_for_eval(
sandbox_config=sandbox_config,
runtime=os.environ.get('RUNTIME', 'docker'), # Different default runtime
workspace_base=None,
workspace_mount_path=None,
)
+22 -20
View File
@@ -25,7 +25,6 @@ from evaluation.utils.shared import (
assert_and_raise,
codeact_user_response,
get_metrics,
get_openhands_config_for_eval,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@@ -127,26 +126,29 @@ def get_config(
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
sandbox_config = SandboxConfig(
base_container_image=base_container_image,
enable_auto_lint=True,
use_host_network=False,
# large enough timeout, since some testcases take very long to run
timeout=300,
# Add platform to the sandbox config to solve issue 4401
platform='linux/amd64',
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get(
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'eventstream'),
sandbox=SandboxConfig(
base_container_image=base_container_image,
enable_auto_lint=True,
use_host_network=False,
# large enough timeout, since some testcases take very long to run
timeout=300,
# Add platform to the sandbox config to solve issue 4401
platform='linux/amd64',
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get(
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
),
keep_runtime_alive=False,
remote_runtime_init_timeout=3600,
),
keep_runtime_alive=False,
remote_runtime_init_timeout=3600,
)
config = get_openhands_config_for_eval(
metadata=metadata,
sandbox_config=sandbox_config,
runtime=os.environ.get('RUNTIME', 'docker'),
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
@@ -12,10 +12,7 @@ import tempfile
import yaml
from browsing import pre_login
from evaluation.utils.shared import (
get_default_sandbox_config_for_eval,
get_openhands_config_for_eval,
)
from evaluation.utils.shared import get_default_sandbox_config_for_eval
from openhands.controller.state.state import State
from openhands.core.config import (
LLMConfig,
@@ -45,17 +42,19 @@ def get_config(
sandbox_config.enable_auto_lint = True
# If the web services are running on the host machine, this must be set to True
sandbox_config.use_host_network = True
config = get_openhands_config_for_eval(
config = OpenHandsConfig(
run_as_openhands=False,
max_budget_per_task=4,
max_iterations=100,
save_trajectory_path=os.path.join(
mount_path_on_host, f'traj_{task_short_name}.json'
),
sandbox=sandbox_config,
# we mount trajectories path so that trajectories, generated by OpenHands
# controller, can be accessible to the evaluator file in the runtime container
sandbox_config=sandbox_config,
workspace_mount_path=mount_path_on_host,
workspace_mount_path_in_sandbox='/outputs',
)
config.save_trajectory_path = os.path.join(
mount_path_on_host, f'traj_{task_short_name}.json'
)
config.max_budget_per_task = 4
config.set_llm_config(llm_config)
if agent_config:
config.set_agent_config(agent_config)
+8 -4
View File
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -45,10 +44,15 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
@@ -20,7 +20,6 @@ from evaluation.utils.shared import (
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
@@ -161,11 +160,16 @@ def get_config(
instance_id=instance['instance_id'],
)
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
enable_browser=RUN_WITH_BROWSING,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -74,10 +73,16 @@ def get_config(
'VWA_WIKIPEDIA': f'{base_url}:8888',
'VWA_HOMEPAGE': f'{base_url}:4399',
}
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
attach_to_existing=True,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
+8 -4
View File
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -66,10 +65,15 @@ def get_config(
'MAP': f'{base_url}:3000',
'HOMEPAGE': f'{base_url}:4399',
}
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
+10 -5
View File
@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
EvalOutput,
get_default_sandbox_config_for_eval,
get_metrics,
get_openhands_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -46,12 +45,18 @@ def get_config(
) -> OpenHandsConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.platform = 'linux/amd64'
config = get_openhands_config_for_eval(
metadata=metadata,
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox_config=sandbox_config,
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
# debug
debug=True,
)
config.debug = True
config.set_llm_config(
update_llm_config_for_completions_logging(
metadata.llm_config, metadata.eval_output_dir, instance_id
-76
View File
@@ -703,79 +703,3 @@ def get_default_sandbox_config_for_eval() -> SandboxConfig:
remote_runtime_enable_retries=True,
remote_runtime_class='sysbox',
)
def get_openhands_config_for_eval(
metadata: EvalMetadata | None = None,
sandbox_config: SandboxConfig | None = None,
runtime: str | None = None,
max_iterations: int | None = None,
default_agent: str | None = None,
enable_browser: bool = False,
workspace_base: str | None = None,
workspace_mount_path: str | None = None,
):
"""Create an OpenHandsConfig with common patterns used across evaluation scripts.
This function provides a standardized way to create OpenHands configurations
for evaluation runs, with sensible defaults that match the patterns used in
most run_infer.py scripts. Individual evaluation scripts can override specific
attributes as needed.
Args:
metadata: EvalMetadata containing agent class, max iterations, etc.
sandbox_config: Custom sandbox config. If None, uses get_default_sandbox_config_for_eval()
runtime: Runtime type. If None, uses environment RUNTIME or 'docker'
max_iterations: Max iterations for the agent. If None, uses metadata.max_iterations
default_agent: Agent class name. If None, uses metadata.agent_class
enable_browser: Whether to enable browser functionality
workspace_base: Workspace base path. Defaults to None
workspace_mount_path: Workspace mount path. Defaults to None
Returns:
OpenHandsConfig: Configured for evaluation with eval-specific overrides applied
"""
# Defer import to avoid circular imports at module load time
from openhands.core.config.openhands_config import (
OpenHandsConfig as _OHConfig, # type: ignore
)
# Use provided sandbox config or get default
if sandbox_config is None:
sandbox_config = get_default_sandbox_config_for_eval()
# Extract values from metadata if provided
if metadata is not None:
if max_iterations is None:
max_iterations = metadata.max_iterations
if default_agent is None:
default_agent = metadata.agent_class
# Use environment runtime or default
if runtime is None:
runtime = os.environ.get('RUNTIME', 'docker')
# Provide sensible defaults if still None
if default_agent is None:
default_agent = 'CodeActAgent'
if max_iterations is None:
max_iterations = 50
# Always use repo-local .eval_sessions directory (absolute path)
eval_store = os.path.abspath(os.path.join(os.getcwd(), '.eval_sessions'))
# Create the base config with evaluation-specific overrides
config = _OHConfig(
default_agent=default_agent,
run_as_openhands=False,
runtime=runtime,
max_iterations=max_iterations,
enable_browser=enable_browser,
sandbox=sandbox_config,
workspace_base=workspace_base,
workspace_mount_path=workspace_mount_path,
file_store='local',
file_store_path=eval_store,
)
return config
+1 -1
View File
@@ -275,7 +275,7 @@ async def run_session(
if event.agent_state == AgentState.RUNNING:
display_agent_running_message()
start_pause_listener(loop, is_paused, event_stream)
start_pause_listener(loop, is_paused, event_stream, config)
def on_event(event: Event) -> None:
loop.create_task(on_event_async(event))
+181 -8
View File
@@ -87,6 +87,9 @@ COMMANDS = {
print_lock = threading.Lock()
# Lock to debounce sending Ctrl+C interrupts to the running command
_interrupt_lock: asyncio.Lock = asyncio.Lock()
pause_task: asyncio.Task | None = None # No more than one pause task
@@ -659,6 +662,15 @@ def display_help() -> None:
commands_html += f'<gold><b>{command}</b></gold> - <grey>{description}</grey>\n'
print_formatted_text(HTML(commands_html))
# Keyboard shortcuts section
print_formatted_text(HTML('\nKeyboard shortcuts:'))
shortcuts_html = (
'<gold><b>Ctrl+P</b></gold> - <grey>Pause the agent</grey>\n'
'<gold><b>Ctrl+C</b></gold> - <grey>Pause the agent; press twice quickly to interrupt a running command</grey>\n'
'<gold><b>Ctrl+D</b></gold> - <grey>Pause the agent</grey>\n'
)
print_formatted_text(HTML(shortcuts_html))
# Footer
print_formatted_text(
HTML(
@@ -864,12 +876,13 @@ async def read_confirmation_input(config: OpenHandsConfig) -> str:
def start_pause_listener(
loop: asyncio.AbstractEventLoop,
done_event: asyncio.Event,
event_stream,
event_stream: EventStream,
config: OpenHandsConfig,
) -> None:
global pause_task
if pause_task is None or pause_task.done():
pause_task = loop.create_task(
process_agent_pause(done_event, event_stream)
process_agent_pause(done_event, event_stream, config)
) # Create a task to track agent pause requests from the user
@@ -883,16 +896,135 @@ async def stop_pause_listener() -> None:
pause_task = None
async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) -> None:
def is_command_running(event_stream: EventStream) -> bool:
"""Check if a shell command is currently running using bounded reverse search.
We look at the latest relevant event (CmdRunAction or CmdOutputObservation):
- If it's a CmdOutputObservation with a finalized exit_code (>= 0), no command is running
- If it's a CmdOutputObservation with exit_code == -1, the command is still running (streaming)
- If it's a CmdRunAction (non-input), we assume a command has started and is running
"""
try:
from openhands.events.event_filter import EventFilter
filt = EventFilter(include_types=(CmdRunAction, CmdOutputObservation))
for ev in event_stream.search_events(reverse=True, filter=filt, limit=50):
if isinstance(ev, CmdOutputObservation):
return ev.metadata.exit_code == -1
if isinstance(ev, CmdRunAction):
if ev.is_input:
continue
return True
return False
except Exception:
# If detection fails for any reason, default to no running command
return False
async def _handle_command_interrupt(
event_stream: EventStream, config: OpenHandsConfig
) -> bool:
"""Handle command interruption with user confirmation.
Returns:
bool: True if the interrupt was handled, False if the user wants to pause the agent
"""
print_formatted_text('')
print_formatted_text(HTML('<gold>Command is currently running.</gold>'))
print_formatted_text('')
# Keep legacy behavior: single Ctrl+C pauses by default. Offer kill as opt-in.
choices = [
'Pause the agent (default)',
'Continue waiting for command to complete',
'Send interrupt to running command (Ctrl+C)',
]
# Use the passed-in config so we honor CLI settings like VI mode. Run the blocking UI off the loop.
selection = await asyncio.to_thread(
cli_confirm, config, 'What would you like to do?', choices, 0
)
if selection == 2: # Send interrupt to the running command
print_formatted_text('')
print_formatted_text(
HTML('<gold>Sending interrupt signal to running command...</gold>')
)
# Debounce rapid interrupts to avoid multiple concurrent dialogs/interrupts
if _interrupt_lock.locked():
print_formatted_text(HTML('<grey>Interrupt already sent; waiting…</grey>'))
return True
async with _interrupt_lock:
event_stream.add_event(
CmdRunAction(command='C-c', is_input=True),
EventSource.USER,
)
return True
elif selection == 1: # Continue waiting
print_formatted_text('')
print_formatted_text(
HTML('<gold>Continuing to wait for command completion...</gold>')
)
return True
else: # Pause the agent (selection == 0)
return False
async def _handle_interrupt_async(
event_stream: EventStream, done: asyncio.Event, config: OpenHandsConfig
) -> None:
"""Handle the interrupt asynchronously to avoid blocking the input handler."""
try:
handled = await _handle_command_interrupt(event_stream, config)
if not handled:
# User chose to pause the agent
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
except Exception as e:
# If something goes wrong, fall back to pausing the agent
print_formatted_text('')
print_formatted_text(HTML(f'<ansired>Error handling interrupt: {e}</ansired>'))
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
async def process_agent_pause(
done: asyncio.Event, event_stream: EventStream, config: OpenHandsConfig
) -> None:
input = create_input()
# Double-press detection window for Ctrl+C to send interrupt to running command
CTRL_C_WINDOW_SECONDS = 0.4
ctrl_c_timer: asyncio.Task | None = None
async def pause_after_delay(delay: float) -> None:
try:
await asyncio.sleep(delay)
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
except asyncio.CancelledError:
# Timer canceled because a second Ctrl+C was detected; do nothing
pass
def keys_ready() -> None:
nonlocal ctrl_c_timer
for key_press in input.read_keys():
if (
key_press.key == Keys.ControlP
or key_press.key == Keys.ControlC
or key_press.key == Keys.ControlD
):
if key_press.key == Keys.ControlP or key_press.key == Keys.ControlD:
# Immediate pause
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
@@ -900,6 +1032,47 @@ async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) ->
EventSource.USER,
)
done.set()
elif key_press.key == Keys.ControlC:
if is_command_running(event_stream):
# If a timer is already running, this is a double-press: send interrupt
if ctrl_c_timer and not ctrl_c_timer.done():
ctrl_c_timer.cancel()
ctrl_c_timer = None
if _interrupt_lock.locked():
print_formatted_text(
HTML('<grey>Interrupt already sent; waiting…</grey>')
)
continue
# Send Ctrl+C to the running command
async def send_interrupt() -> None:
async with _interrupt_lock:
print_formatted_text('')
print_formatted_text(
HTML(
'<gold>Sending interrupt signal to running command...</gold>'
)
)
event_stream.add_event(
CmdRunAction(command='C-c', is_input=True),
EventSource.USER,
)
asyncio.create_task(send_interrupt())
else:
# Start a short window; if no second press, pause
ctrl_c_timer = asyncio.create_task(
pause_after_delay(CTRL_C_WINDOW_SECONDS)
)
else:
# No command running: default immediate pause
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
try:
with input.raw_mode():
+5 -121
View File
@@ -159,9 +159,6 @@ class CLIRuntime(Runtime):
self._is_windows = sys.platform == 'win32'
self._powershell_session: WindowsPowershellSession | None = None
# Track git wrapper bin dir for use in subprocess env
self._git_wrapper_bin_dir = os.path.expanduser('~/.openhands/bin')
logger.warning(
'Initializing CLIRuntime. WARNING: NO SANDBOX IS USED. '
'This runtime executes commands directly on the local system. '
@@ -220,106 +217,6 @@ class CLIRuntime(Runtime):
# We don't use self.run() here because this method is called
# during initialization before self._runtime_initialized is True.
def setup_initial_env(self) -> None:
"""Override to add git wrapper setup for CLIRuntime."""
super().setup_initial_env()
# Always enable git co-authorship in CLI runtime
self._setup_git_wrapper()
# As a fallback for commit invocations that don't use -m/--message
# ensure a global prepare-commit-msg hook is configured so co-authorship
# is still added (parity with Docker runtime behavior in tests).
try:
hooks_root = os.path.expanduser('~/.openhands/git-hooks')
hooks_dir = os.path.join(hooks_root, 'hooks')
os.makedirs(hooks_dir, exist_ok=True)
hook_src = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
'utils',
'git_hooks',
'prepare-commit-msg',
)
hook_dest = os.path.join(hooks_dir, 'prepare-commit-msg')
if os.path.exists(hook_src):
shutil.copyfile(hook_src, hook_dest)
os.chmod(hook_dest, 0o755)
# Configure global hooks path and template dir so newly inited repos pick it up
subprocess.run(
['git', 'config', '--global', 'core.hooksPath', hooks_dir],
check=False,
)
subprocess.run(
['git', 'config', '--global', 'init.templateDir', hooks_root],
check=False,
)
logger.info(
f'[CLIRuntime] Configured global git hooks at {hooks_dir} for co-authorship'
)
except Exception as e:
logger.warning(f'[CLIRuntime] Failed to configure global git hook: {e}')
def _setup_git_wrapper(self) -> None:
"""Set up git wrapper to automatically add co-authorship."""
try:
# Path to our git wrapper script
git_wrapper_source = os.path.join(
os.path.dirname(
os.path.dirname(os.path.dirname(__file__))
), # openhands/runtime/
'utils',
'git_wrapper.sh',
)
if not os.path.exists(git_wrapper_source):
logger.warning(
f'[CLIRuntime] Git wrapper not found at {git_wrapper_source}'
)
return
# Find the real git executable path
import subprocess
try:
real_git_path = subprocess.check_output(
['which', 'git'], text=True
).strip()
except subprocess.CalledProcessError:
logger.warning('[CLIRuntime] Could not find git executable')
return
# Create a bin directory in user's home for our git wrapper
bin_dir = os.path.expanduser('~/.openhands/bin')
os.makedirs(bin_dir, exist_ok=True)
# Create a modified wrapper that calls the real git with full path
git_wrapper_dest = os.path.join(bin_dir, 'git')
with open(git_wrapper_source, 'r') as src:
wrapper_content = src.read()
# Replace 'command git' with the full path to avoid recursion
wrapper_content = wrapper_content.replace(
'command git', f'"{real_git_path}"'
)
with open(git_wrapper_dest, 'w') as dest:
dest.write(wrapper_content)
os.chmod(git_wrapper_dest, 0o755)
# Prepend the bin directory to PATH so our git wrapper is found first
# This works for all commands including chained ones like "cd dir && git commit"
current_path = os.environ.get('PATH', '')
new_path = f'{bin_dir}:{current_path}'
os.environ['PATH'] = new_path
logger.info(
f'[CLIRuntime] Set up OpenHands git wrapper at {git_wrapper_dest} for co-authorship'
)
except Exception as e:
logger.warning(f'[CLIRuntime] Failed to set up git wrapper: {e}')
def _safe_terminate_process(self, process_obj, signal_to_send=signal.SIGTERM):
"""Safely attempts to terminate/kill a process group or a single process.
@@ -440,13 +337,6 @@ class CLIRuntime(Runtime):
timed_out = False
start_time = time.monotonic()
# Ensure our git wrapper bin dir is first in PATH for the subprocess
env = os.environ.copy()
bin_dir = getattr(
self, '_git_wrapper_bin_dir', os.path.expanduser('~/.openhands/bin')
)
env['PATH'] = f'{bin_dir}:{env.get("PATH", "")}'
# Use shell=True to run complex bash commands
process = subprocess.Popen(
['bash', '-c', command],
@@ -456,10 +346,9 @@ class CLIRuntime(Runtime):
bufsize=1, # Explicitly line-buffered for text mode
universal_newlines=True,
start_new_session=True,
env=env,
)
logger.debug(
f'[_execute_shell_command] PID of bash -c: {process.pid} for command: "{command}" with PATH={env.get("PATH")}'
f'[_execute_shell_command] PID of bash -c: {process.pid} for command: "{command}"'
)
exit_code = None
@@ -569,20 +458,15 @@ class CLIRuntime(Runtime):
f'Running command in CLIRuntime: "{action.command}" with effective timeout: {effective_timeout}s'
)
# Use the command as-is since git alias is set up
command_to_execute = action.command
# Use PowerShell on Windows if available, otherwise use subprocess
if self._is_windows and self._powershell_session is not None:
result = self._execute_powershell_command(
command_to_execute, timeout=effective_timeout
return self._execute_powershell_command(
action.command, timeout=effective_timeout
)
else:
result = self._execute_shell_command(
command_to_execute, timeout=effective_timeout
return self._execute_shell_command(
action.command, timeout=effective_timeout
)
return result
except Exception as e:
logger.error(
f'Error in CLIRuntime.run for command "{action.command}": {str(e)}'
+3 -4
View File
@@ -194,9 +194,8 @@ class BashSession:
self.server = libtmux.Server()
_shell_command = '/bin/bash'
if self.username in ['root', 'openhands']:
# Start a login shell for the given user without running an interactive login prompt
# Use 'su -c' to run bash and ensure we start inside the project's working dir (self.work_dir).
_shell_command = f"su {self.username} -c 'cd {self.work_dir} && /bin/bash'"
# This starts a non-login (new) shell for the given user
_shell_command = f'su {self.username} -'
# FIXME: we will introduce memory limit using sysbox-runc in coming PR
# # otherwise, we are running as the CURRENT USER (e.g., when running LocalRuntime)
@@ -417,7 +416,7 @@ class BashSession:
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command timed out after {float(timeout):.1f} seconds. '
f'\n[The command timed out after {timeout} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
command_output = self._get_command_output(
+3 -9
View File
@@ -39,15 +39,9 @@ def get_action_execution_server_startup_command(
username = override_username or (
'openhands' if app_config.run_as_openhands else 'root'
)
if app_config.run_as_openhands:
resolved_uid = (
override_user_id if override_user_id is not None else sandbox_config.user_id
)
# Avoid passing UID 0 for the non-root 'openhands' user inside containers
# Fall back to 1000 when resolved UID is 0 or None
user_id = resolved_uid if resolved_uid not in (None, 0) else 1000
else:
user_id = 0
user_id = override_user_id or (
sandbox_config.user_id if app_config.run_as_openhands else 0
)
base_cmd = [
*python_prefix,
@@ -1,30 +0,0 @@
# OpenHands Git Hooks
This directory contains git hooks that are automatically installed in the OpenHands runtime environment.
## prepare-commit-msg
This hook serves as a fallback mechanism to ensure that OpenHands contributions are properly attributed. It automatically adds `Co-authored-by: openhands <openhands@all-hands.dev>` to commit messages when the co-authorship line is not already present (case-insensitive check).
### Behavior
- **Primary workflow**: The OpenHands agent should manually add co-authorship lines to commit messages as instructed in the system prompt
- **Fallback**: If the agent forgets to add the co-authorship line, this hook will automatically add it
- **No-op**: If the co-authorship line is already present (in any case variation), the hook does nothing
### Installation
#### Docker Runtime
The hook is automatically installed during Docker runtime build via the `Dockerfile.j2` template:
1. Copied from `/openhands/code/openhands/runtime/utils/git_hooks/` to `/openhands/git-hooks/hooks/`
2. Made executable with `chmod +x`
3. Configured globally via `git config --global core.hooksPath /openhands/git-hooks/hooks`
4. Set as template for new repositories via `git config --global init.templateDir /openhands/git-hooks`
This ensures the hook works for both existing repositories and newly created ones.
#### CLI Runtime
For CLI runtime, git co-authorship is always enabled automatically. A git wrapper script is set up that intercepts git commit commands and automatically adds co-authorship. This approach is non-invasive as it doesn't modify the user's git configuration or install hooks in their repositories. Instead, it transparently wraps git commands to add the co-authorship line when needed.
@@ -1,16 +0,0 @@
#!/bin/bash
# OpenHands Git Hook: prepare-commit-msg
# This hook automatically adds "Co-authored-by: openhands <openhands@all-hands.dev>"
# to commit messages if it's not already present. This serves as a fallback when
# the agent doesn't manually add the co-authorship line.
COMMIT_MSG_FILE=$1
# Check if co-authorship line already exists (case-insensitive)
if ! grep -qi "co-authored-by.*openhands.*<openhands@all-hands.dev>" "$COMMIT_MSG_FILE"; then
# Add two empty lines and co-authorship line
echo "" >> "$COMMIT_MSG_FILE"
echo "" >> "$COMMIT_MSG_FILE"
echo "Co-authored-by: openhands <openhands@all-hands.dev>" >> "$COMMIT_MSG_FILE"
fi
-85
View File
@@ -1,85 +0,0 @@
#!/bin/bash
# Git wrapper script that automatically adds co-authorship to commit messages
# This script intercepts git commit commands and adds "Co-authored-by: openhands <openhands@all-hands.dev>"
# if it's not already present in the commit message.
# Function to add co-authorship to a commit message
add_coauthorship() {
local commit_msg_file="$1"
local coauthor_line="Co-authored-by: openhands <openhands@all-hands.dev>"
# Check if co-authorship line already exists (case-insensitive)
if ! grep -qi "co-authored-by.*openhands" "$commit_msg_file" 2>/dev/null; then
# Add two empty lines and the co-authorship line
echo "" >> "$commit_msg_file"
echo "" >> "$commit_msg_file"
echo "$coauthor_line" >> "$commit_msg_file"
fi
}
# Function to handle git commit with message
handle_commit_with_message() {
local temp_msg_file
temp_msg_file=$(mktemp)
# Extract the commit message from arguments
local commit_msg=""
local args=()
local skip_next=false
for arg in "$@"; do
if [ "$skip_next" = true ]; then
commit_msg="$arg"
args+=("$arg")
skip_next=false
elif [ "$arg" = "-m" ] || [ "$arg" = "--message" ]; then
args+=("$arg")
skip_next=true
else
args+=("$arg")
fi
done
# Write the commit message to temp file and add co-authorship
echo "$commit_msg" > "$temp_msg_file"
add_coauthorship "$temp_msg_file"
# Replace -m argument with -F (file) argument
local new_args=()
skip_next=false
for arg in "${args[@]}"; do
if [ "$skip_next" = true ]; then
new_args+=("-F" "$temp_msg_file")
skip_next=false
elif [ "$arg" = "-m" ] || [ "$arg" = "--message" ]; then
skip_next=true
else
new_args+=("$arg")
fi
done
# Execute git with modified arguments
command git "${new_args[@]}"
local exit_code=$?
# Clean up temp file
rm -f "$temp_msg_file"
return $exit_code
}
# Main logic
if [ "$1" = "commit" ]; then
# Check if this is a commit with -m/--message flag
if [[ "$*" =~ -m[[:space:]] ]] || [[ "$*" =~ --message[[:space:]] ]] || [[ "$*" =~ -m= ]] || [[ "$*" =~ --message= ]]; then
handle_commit_with_message "$@"
else
# For other commit types (interactive, -F file, etc.), just pass through
# The prepare-commit-msg hook would handle these in Docker runtime
command git "$@"
fi
else
# For non-commit commands, just pass through to real git
command git "$@"
fi
@@ -1,39 +0,0 @@
# Git Wrapper for Co-authorship
This git wrapper script (`git_wrapper.sh`) provides a non-invasive way to automatically add co-authorship to git commits without modifying the user's git configuration or installing hooks in their repositories.
## How it works
The wrapper script intercepts git commit commands and:
1. **For `git commit -m "message"` commands**: Extracts the commit message, adds co-authorship, and uses a temporary file to commit with the enhanced message.
2. **For other commit types**: Passes through to the regular git command (interactive commits, file-based commits, etc. would be handled by git hooks in Docker runtime).
## Usage
The wrapper is automatically set up in CLI runtime.
When active:
- The wrapper script is copied to the workspace as `.openhands_git_wrapper.sh`
- Git commands are transparently intercepted and processed
- Co-authorship is automatically added: `Co-authored-by: openhands <openhands@all-hands.dev>`
## Benefits
- **Non-invasive**: Doesn't modify user's git configuration or repository hooks
- **Transparent**: Agent thinks it's running regular git commands
- **Automatic**: No manual intervention required
- **Safe**: Only affects the current workspace session
## Example
```bash
# Without wrapper
git commit -m "Fix bug"
# Results in: "Fix bug"
# With wrapper enabled
git commit -m "Fix bug"
# Results in: "Fix bug\n\nCo-authored-by: openhands <openhands@all-hands.dev>"
```
+43 -115
View File
@@ -1,74 +1,10 @@
import os
import shutil
import subprocess
import sys
from openhands.core.logger import openhands_logger as logger
def _configure_git_for_user(username: str, initial_cwd: str) -> None:
"""Configure git for the target user: safe.directory and global hooks/template."""
try:
# Ensure hooks directory exists and has our prepare-commit-msg
hooks_root = '/openhands/git-hooks'
hooks_dir = os.path.join(hooks_root, 'hooks')
os.makedirs(hooks_dir, exist_ok=True)
hook_src = (
'/openhands/code/openhands/runtime/utils/git_hooks/prepare-commit-msg'
)
hook_dest = os.path.join(hooks_dir, 'prepare-commit-msg')
if os.path.exists(hook_src):
shutil.copyfile(hook_src, hook_dest)
os.chmod(hook_dest, 0o755)
else:
# Fallback: write a minimal prepare-commit-msg hook that adds co-authorship
with open(hook_dest, 'w') as f:
f.write('#!/bin/sh\n')
f.write('FILE="$1"\n')
f.write(
'if ! grep -qi "co-authored-by.*openhands.*<openhands@all-hands.dev>" "$FILE" 2>/dev/null; then\n'
)
f.write(' echo "" >> "$FILE"\n')
f.write(' echo "" >> "$FILE"\n')
f.write(
' echo "Co-authored-by: openhands <openhands@all-hands.dev>" >> "$FILE"\n'
)
f.write('fi\n')
os.chmod(hook_dest, 0o755)
env = dict(os.environ)
if username == 'root':
env['HOME'] = '/root'
else:
env['HOME'] = f'/home/{username}'
# Avoid dubious ownership errors
subprocess.run(
['git', 'config', '--global', '--add', 'safe.directory', initial_cwd],
check=False,
capture_output=True,
text=True,
env=env,
)
# Ensure co-authorship hook is enabled for all repos/actions
subprocess.run(
['git', 'config', '--global', 'core.hooksPath', hooks_dir],
check=False,
capture_output=True,
text=True,
env=env,
)
subprocess.run(
['git', 'config', '--global', 'init.templateDir', hooks_root],
check=False,
capture_output=True,
text=True,
env=env,
)
except Exception:
pass
def init_user_and_working_directory(
username: str, user_id: int, initial_cwd: str
) -> int | None:
@@ -108,85 +44,77 @@ def init_user_and_working_directory(
return None
# Defensive guard: never attempt to create a non-root user with UID 0
try:
user_id = int(user_id)
except Exception:
user_id = 1000
if username != 'root' and user_id == 0:
logger.warning(
'Received UID 0 for non-root user; overriding to 1000 to avoid conflict with root'
)
user_id = 1000
# if username is CURRENT_USER, then we don't need to do anything
# This is specific to the local runtime
if username == os.getenv('USER') and username not in ['root', 'openhands']:
return None
# First create the working directory
# First create the working directory, independent of the user
logger.debug(f'Client working directory: {initial_cwd}')
output = subprocess.run(
f'umask 002; mkdir -p {initial_cwd}', shell=True, capture_output=True
)
command = f'umask 002; mkdir -p {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str = output.stdout.decode()
logger.debug(f'Ensured working directory exists. Output: [{out_str}]')
# If running as root user, no need to create another user
command = f'chown -R {username}:root {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
command = f'chmod g+rw {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
logger.debug(f'Created working directory. Output: [{out_str}]')
# Skip root since it is already created
if username == 'root':
# Make sure directory is group-writable
subprocess.run(f'chmod g+rw {initial_cwd}', shell=True, capture_output=True)
# Still need to configure git for root user
_configure_git_for_user(username, initial_cwd)
return None
# Ensure the user exists before attempting chown
# Check if the username already exists
existing_user_id = -1
try:
result = subprocess.run(
f'id -u {username}', shell=True, check=True, capture_output=True
)
existing_user_id = int(result.stdout.decode().strip())
if existing_user_id != user_id:
# The user ID already exists, skip setup
if existing_user_id == user_id:
logger.debug(
f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
)
else:
logger.warning(
f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
)
user_id = existing_user_id
return existing_user_id
return None
except subprocess.CalledProcessError as e:
# Returncode 1 indicates, that the user does not exist yet
if e.returncode == 1:
logger.debug(
f'User `{username}` does not exist. Proceeding with user creation.'
)
# Add sudoer (passwordless)
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
if output.returncode != 0:
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
# Create the user with the provided UID
cmd_useradd = (
f'useradd -rm -d /home/{username} -s /bin/bash '
f'-g root -G sudo -u {user_id} {username}'
)
output = subprocess.run(cmd_useradd, shell=True, capture_output=True)
if output.returncode == 0:
logger.debug(
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
)
else:
raise RuntimeError(
f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
)
else:
logger.error(f'Error checking user `{username}`, skipping setup:\n{e}\n')
raise
# Now that the user exists, set ownership and permissions on the workspace
subprocess.run(
f'chown -R {username}:root {initial_cwd}', shell=True, capture_output=True
# Add sudoer
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
if output.returncode != 0:
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
command = (
f'useradd -rm -d /home/{username} -s /bin/bash '
f'-g root -G sudo -u {user_id} {username}'
)
subprocess.run(f'chmod g+rw {initial_cwd}', shell=True, capture_output=True)
# Configure git for the target user: safe.directory and global hooks/template
_configure_git_for_user(username, initial_cwd)
output = subprocess.run(command, shell=True, capture_output=True)
if output.returncode == 0:
logger.debug(
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
)
else:
raise RuntimeError(
f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
)
return None
@@ -239,17 +239,6 @@ COPY ./code/microagents /openhands/code/microagents
COPY ./code/openhands /openhands/code/openhands
RUN chmod a+rwx /openhands/code/openhands/__init__.py
# Set up global git hooks for automatic co-authorship
RUN \
# Set up global git hook template directory for automatic co-authorship fallback
mkdir -p /openhands/git-hooks/hooks && \
git config --global init.templateDir /openhands/git-hooks && \
# Copy git hooks from source code
cp /openhands/code/openhands/runtime/utils/git_hooks/prepare-commit-msg /openhands/git-hooks/hooks/ && \
chmod +x /openhands/git-hooks/hooks/prepare-commit-msg && \
# Set up global git hooks path for existing repositories
git config --global core.hooksPath /openhands/git-hooks/hooks
# ================================================================
+11 -116
View File
@@ -16,12 +16,15 @@ from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
from openhands.runtime.impl.local.local_runtime import LocalRuntime
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
def get_timeout_suffix(timeout_seconds):
"""Helper to match the timeout suffix across runtime versions."""
# Only assert on the stable prefix to avoid mismatches between server and test code
return f'[The command timed out after {float(timeout_seconds):.1f} seconds.'
"""Helper function to generate the expected timeout suffix."""
return (
f'[The command timed out after {timeout_seconds} seconds. '
f'{TIMEOUT_MESSAGE_TEMPLATE}]'
)
# ============================================================================================================================
@@ -874,111 +877,6 @@ def test_git_operation(temp_dir, runtime_cls):
_close_test_runtime(runtime)
@pytest.mark.skipif(
is_windows(), reason='Test uses Linux-specific git hooks and file operations'
)
def test_git_co_authorship_runtime_setup(temp_dir, runtime_cls):
"""Test that all runtimes have git co-authorship enabled via Dockerfile.j2 hooks."""
runtime, config = _load_runtime(
temp_dir=temp_dir,
use_workspace=False,
runtime_cls=runtime_cls,
run_as_openhands=True,
)
try:
# Set up git repository
obs = _run_cmd_action(runtime, 'git init')
assert obs.exit_code == 0
# Set up a different git user (not openhands) to test the co-authorship
obs = _run_cmd_action(
runtime,
'git config user.name "testuser" && git config user.email "testuser@example.com"',
)
assert obs.exit_code == 0
# Create a test file and add it to git
obs = _run_cmd_action(runtime, 'echo "test content" > test_file.txt')
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, 'git add test_file.txt')
assert obs.exit_code == 0
# Commit without manually adding co-authorship - the runtime should add it
obs = _run_cmd_action(
runtime, 'git commit -m "Test commit without manual co-authorship"'
)
assert obs.exit_code == 0
# Check the commit message to verify co-authorship was added by the runtime
obs = _run_cmd_action(runtime, 'git log --format="%B" -n 1')
assert obs.exit_code == 0
# All runtimes should have git co-authorship enabled via hooks in Dockerfile.j2
# CLI runtime uses additional PATH-based wrapper, but hooks work for all
assert 'Co-authored-by: openhands <openhands@all-hands.dev>' in obs.content
finally:
_close_test_runtime(runtime)
@pytest.mark.skipif(
is_windows(), reason='Test uses Linux-specific git wrapper and file operations'
)
def test_git_co_authorship_wrapper_always_enabled(temp_dir, runtime_cls):
"""Test that git co-authorship wrapper is always enabled in CLI runtime."""
# Only test with CLIRuntime since other runtimes handle git co-authorship differently
if runtime_cls.__name__ != 'CLIRuntime':
pytest.skip('This test is specific to CLIRuntime')
runtime, config = _load_runtime(
temp_dir=temp_dir,
use_workspace=False,
runtime_cls=runtime_cls,
run_as_openhands=True,
)
try:
# Initialize git repository in the workspace
obs = _run_cmd_action(runtime, 'git init')
assert obs.exit_code == 0
# Set up a different git user (not openhands) to test the wrapper
obs = _run_cmd_action(
runtime,
'git config user.name "testuser" && git config user.email "testuser@example.com"',
)
assert obs.exit_code == 0
# The git wrapper should have been set up during runtime initialization
# Check if the wrapper exists in the user's bin directory
obs = _run_cmd_action(
runtime, 'test -x ~/.openhands/bin/git && echo "wrapper exists"'
)
assert obs.exit_code == 0
assert 'wrapper exists' in obs.content
# Create a test file and commit to verify the wrapper works
obs = _run_cmd_action(runtime, 'echo "test content" > test_file.txt')
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, 'git add test_file.txt')
assert obs.exit_code == 0
# Commit without manually adding co-authorship - the wrapper should add it
obs = _run_cmd_action(runtime, 'git commit -m "Test commit with wrapper"')
assert obs.exit_code == 0
# Check the commit message to verify co-authorship was added by the wrapper
obs = _run_cmd_action(runtime, 'git log --format="%B" -n 1')
assert obs.exit_code == 0
assert 'Co-authored-by: openhands <openhands@all-hands.dev>' in obs.content
finally:
_close_test_runtime(runtime)
def test_python_version(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
@@ -1551,19 +1449,16 @@ def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# create a git repo - same for both platforms
obs = runtime.run_action(CmdRunAction('git init'))
assert obs.metadata.exit_code == 0
# add or update origin remote robustly (handles case where it already exists)
add_remote_cmd = (
'git remote add origin https://github.com/All-Hands-AI/OpenHands || '
'git remote set-url origin https://github.com/All-Hands-AI/OpenHands'
action = CmdRunAction(
'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
)
obs = runtime.run_action(CmdRunAction(add_remote_cmd))
obs = runtime.run_action(action)
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 0
# Check git remote - same for both platforms
obs = runtime.run_action(CmdRunAction('git remote -v'))
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.metadata.exit_code == 0
assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
assert 'git remote -v' not in obs.content
@@ -1,26 +0,0 @@
import os
from pathlib import Path
from evaluation.utils.shared import get_openhands_config_for_eval
def test_eval_file_store_defaults_to_repo_local(tmp_path, monkeypatch):
prev_cwd = Path.cwd()
try:
os.chdir(tmp_path)
cfg = get_openhands_config_for_eval()
assert Path(cfg.file_store_path) == (tmp_path / '.eval_sessions').resolve()
assert cfg.file_store == 'local'
finally:
os.chdir(prev_cwd)
def test_eval_file_store_is_hard_coded_repo_local(tmp_path):
prev_cwd = Path.cwd()
try:
os.chdir(tmp_path)
cfg = get_openhands_config_for_eval()
assert Path(cfg.file_store_path) == (tmp_path / '.eval_sessions').resolve()
assert cfg.file_store == 'local'
finally:
os.chdir(prev_cwd)