mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e4d870f9ed | |||
| 1183dd2e0c | |||
| fe1052c35f | |||
| 9dbd1bf901 | |||
| 471aba2b57 | |||
| 0d6ea9501e | |||
| d62adb5c2a | |||
| f5c44af7f0 | |||
| 38bfd14e0a | |||
| bbaa1d9d1e | |||
| b7a5c48de5 | |||
| fadba6d779 | |||
| d819fa5750 | |||
| 66a60eafdb | |||
| dc37ad1433 | |||
| dbabaf2591 | |||
| f232ad8e3b | |||
| 5ebdd4ee93 | |||
| 76fcfba538 | |||
| 7f6882a3bd | |||
| 261e618a4b | |||
| 8d471aa2c2 |
@@ -233,3 +233,6 @@ containers/runtime/Dockerfile
|
||||
containers/runtime/project.tar.gz
|
||||
containers/runtime/code
|
||||
**/node_modules/
|
||||
|
||||
# regression test workspaces
|
||||
tests/regression/cases/*/workspace/
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
timeout: 120 # 2 minutes
|
||||
required: true
|
||||
@@ -0,0 +1 @@
|
||||
Create a bash script called hello.sh that prints "hello world"
|
||||
Executable
+28
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set +x
|
||||
|
||||
echo "checking hello world"
|
||||
pwd
|
||||
ls -lah
|
||||
|
||||
# Check if hello.sh exists
|
||||
if [ ! -f hello.sh ]; then
|
||||
echo "hello.sh does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if it's executable
|
||||
if [ ! -x hello.sh ]; then
|
||||
echo "hello.sh is not executable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run and check output
|
||||
output=$(./hello.sh)
|
||||
if [ "$output" != "hello world" ]; then
|
||||
echo "Expected 'hello world' but got: $output"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
import openhands.agenthub # noqa: F401 - import to register agents
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.main import auto_continue_response, run_controller
|
||||
from openhands.events.action import MessageAction
|
||||
|
||||
|
||||
def run_test_case(case_dir: Path) -> bool:
|
||||
"""Run a single test case.
|
||||
|
||||
Args:
|
||||
case_dir: Path to the test case directory
|
||||
|
||||
Returns:
|
||||
bool: True if test passed, False if failed
|
||||
"""
|
||||
case_name = case_dir.name
|
||||
print(f'Running test case: {case_name}')
|
||||
|
||||
# Read case configuration
|
||||
timeout = 120 # Default timeout 2 minutes
|
||||
required = True
|
||||
case_yaml = case_dir / 'case.yaml'
|
||||
|
||||
if case_yaml.exists():
|
||||
with open(case_yaml) as f:
|
||||
config: Optional[Dict[str, Any]] = yaml.safe_load(f)
|
||||
if config:
|
||||
timeout = config.get('timeout', timeout)
|
||||
required = config.get('required', required)
|
||||
|
||||
# Create workspace directory
|
||||
workspace_dir = case_dir / 'workspace'
|
||||
if workspace_dir.exists():
|
||||
# Clean up any existing workspace
|
||||
shutil.rmtree(workspace_dir)
|
||||
workspace_dir.mkdir(exist_ok=True)
|
||||
temp_path = workspace_dir
|
||||
temp_dir = str(workspace_dir)
|
||||
temp_dir_ctx = None
|
||||
|
||||
if not os.getenv('NO_CLEANUP'):
|
||||
|
||||
class WorkspaceCleanup:
|
||||
def __init__(self, workspace_path: Path):
|
||||
self.workspace_path = workspace_path
|
||||
|
||||
def cleanup(self):
|
||||
if self.workspace_path.exists():
|
||||
shutil.rmtree(self.workspace_path)
|
||||
|
||||
temp_dir_ctx = WorkspaceCleanup(workspace_dir)
|
||||
|
||||
try:
|
||||
# Check if git repo and commit-ish are specified
|
||||
if case_yaml.exists():
|
||||
with open(case_yaml) as f:
|
||||
config = yaml.safe_load(f)
|
||||
if config and 'git' in config:
|
||||
repo = config['git']
|
||||
commit = config.get('commit-ish', 'main')
|
||||
os.system(f'git clone {repo} {temp_dir}')
|
||||
os.system(f'cd {temp_dir} && git checkout {commit}')
|
||||
|
||||
# Copy prompt and test script
|
||||
shutil.copy2(case_dir / 'prompt.txt', temp_path / 'prompt.txt')
|
||||
shutil.copy2(case_dir / 'test.sh', temp_path / 'test.sh')
|
||||
os.chmod(temp_path / 'test.sh', 0o755) # Make test.sh executable
|
||||
|
||||
# Read the prompt
|
||||
with open(case_dir / 'prompt.txt') as f:
|
||||
task_str = f.read()
|
||||
|
||||
# Set up OpenHands configuration
|
||||
config = AppConfig()
|
||||
config.name = case_name
|
||||
config.agent_cls = 'CodeActAgent'
|
||||
config.max_budget_per_task = 100
|
||||
config.max_iterations = 100
|
||||
config.cli_multiline_input = False
|
||||
config.config_file = str(Path(__file__).parent.parent.parent / 'config.toml')
|
||||
config.workspace_base = str(temp_path)
|
||||
config.workspace_mount_path = str(temp_path)
|
||||
config.workspace_mount_path_in_sandbox = '/workspace'
|
||||
config.sandbox.keep_runtime_alive = False
|
||||
config.save_trajectory_path = str(temp_path / 'trajectory.json')
|
||||
initial_user_action = MessageAction(content=task_str)
|
||||
|
||||
# Change to temp directory for test execution
|
||||
original_cwd = os.getcwd()
|
||||
os.chdir(temp_dir)
|
||||
|
||||
try:
|
||||
# Run OpenHands
|
||||
asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=initial_user_action,
|
||||
fake_user_response_fn=auto_continue_response,
|
||||
headless_mode=True,
|
||||
)
|
||||
)
|
||||
|
||||
# Run the test script
|
||||
test_result = os.system('./test.sh')
|
||||
if test_result != 0:
|
||||
print(f'Test case {case_name} failed')
|
||||
if required:
|
||||
return False
|
||||
else:
|
||||
print(f'Test case {case_name} passed')
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error running test case {case_name}: {e}')
|
||||
if required:
|
||||
return False
|
||||
return True
|
||||
finally:
|
||||
os.chdir(original_cwd)
|
||||
finally:
|
||||
if temp_dir_ctx is not None:
|
||||
temp_dir_ctx.cleanup()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Run all regression tests."""
|
||||
# Find and run all test cases
|
||||
regression_dir = Path(__file__).parent
|
||||
cases_dir = regression_dir / 'cases'
|
||||
|
||||
all_passed = True
|
||||
for case_dir in cases_dir.iterdir():
|
||||
if case_dir.is_dir():
|
||||
if not run_test_case(case_dir):
|
||||
all_passed = False
|
||||
|
||||
if all_passed:
|
||||
print('All tests completed successfully')
|
||||
sys.exit(0)
|
||||
else:
|
||||
print('Some tests failed')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user