Compare commits

...

7 Commits

Author SHA1 Message Date
Robert Brennan
4b743e90e2 dont close runtimes 2024-08-30 12:14:02 -04:00
Robert Brennan
505b47380f try reuse_ids 2024-08-30 11:40:56 -04:00
Robert Brennan
e9f2959ad2 Merge branch 'main' into rb/parallel-tests 2024-08-30 11:37:20 -04:00
Robert Brennan
848fd38e7f revert workflow 2024-08-30 11:30:42 -04:00
Robert Brennan
8b97144ae8 try reusing runtimes 2024-08-30 11:30:05 -04:00
Robert Brennan
cde8bddf62 give containers different names 2024-08-29 16:44:42 -04:00
Robert Brennan
06449ba69e try running tests with 5 workers 2024-08-29 15:51:22 -04:00
2 changed files with 29 additions and 38 deletions

View File

@@ -97,6 +97,9 @@ def runtime(temp_dir, box_class, run_as_openhands):
time.sleep(1)
runtimes = {}
def _load_runtime(
temp_dir,
box_class,
@@ -104,7 +107,11 @@ def _load_runtime(
enable_auto_lint: bool = False,
base_container_image: str | None = None,
browsergym_eval_env: str | None = None,
reuse_id: str | None = None,
) -> Runtime:
if reuse_id is not None and reuse_id in runtimes:
return runtimes[reuse_id]
sid = 'test'
cli_session = 'main_test'
@@ -135,6 +142,10 @@ def _load_runtime(
plugins=plugins,
)
time.sleep(1)
if reuse_id is not None:
runtimes[reuse_id] = runtime
return runtime

View File

@@ -17,7 +17,7 @@ from openhands.events.observation import CmdOutputObservation
def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands, reuse_id='bash')
# We set env var PS1="\u@\h:\w $"
# and construct the PEXCEPT prompt base on it.
@@ -42,12 +42,11 @@ def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
), 'The observation should be a CmdOutputObservation.'
assert obs.exit_code == 0, 'The exit code should be 0.'
runtime.close()
time.sleep(1)
def test_single_multiline_command(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
action = CmdRunAction(command='echo \\\n -e "foo"')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -56,12 +55,11 @@ def test_single_multiline_command(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'foo' in obs.content
runtime.close()
time.sleep(1)
def test_multiline_echo(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
action = CmdRunAction(command='echo -e "hello\nworld"')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -70,12 +68,11 @@ def test_multiline_echo(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'hello\r\nworld' in obs.content
runtime.close()
time.sleep(1)
def test_runtime_whitespace(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
action = CmdRunAction(command='echo -e "\\n\\n\\n"')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -85,7 +82,6 @@ def test_runtime_whitespace(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '\r\n\r\n\r\n' in obs.content
runtime.close()
time.sleep(1)
@@ -119,7 +115,7 @@ world "
]
joined_cmds = '\n'.join(cmds)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands, reuse_id='bash')
action = CmdRunAction(command=joined_cmds)
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -137,13 +133,12 @@ world "
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
assert 'hello\r\nworld "\r\n' in obs.content
runtime.close()
time.sleep(1)
def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
"""Test that the PS2 sign is not added to the output of a multiline command."""
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands, reuse_id='bash')
action = CmdRunAction(command='echo -e "hello\nworld"')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -153,14 +148,13 @@ def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
assert 'hello\r\nworld' in obs.content
assert '>' not in obs.content
runtime.close()
time.sleep(1)
def test_multiline_command_loop(temp_dir, box_class):
# https://github.com/All-Hands-AI/OpenHands/issues/3143
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
init_cmd = """
mkdir -p _modules && \
@@ -196,12 +190,11 @@ echo "success"
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'success' in obs.content
runtime.close()
time.sleep(1)
def test_cmd_run(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands, reuse_id='bash')
action = CmdRunAction(command='ls -l')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -255,12 +248,11 @@ def test_cmd_run(temp_dir, box_class, run_as_openhands):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
runtime.close()
time.sleep(1)
def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands, reuse_id='bash')
action = CmdRunAction(command='cd ~ && pwd')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -273,12 +265,11 @@ def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
else:
assert '/root' in obs.content
runtime.close()
time.sleep(1)
def test_multi_cmd_run_in_single_line(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
action = CmdRunAction(command='pwd && ls -l')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -289,12 +280,11 @@ def test_multi_cmd_run_in_single_line(temp_dir, box_class):
assert '/workspace' in obs.content
assert 'total 0' in obs.content
runtime.close()
time.sleep(1)
def test_stateful_cmd(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
action = CmdRunAction(command='mkdir test')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -318,12 +308,11 @@ def test_stateful_cmd(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '/workspace/test' in obs.content
runtime.close()
time.sleep(1)
def test_failed_cmd(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
action = CmdRunAction(command='non_existing_command')
logger.info(action, extra={'msg_type': 'ACTION'})
@@ -332,7 +321,6 @@ def test_failed_cmd(temp_dir, box_class):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
runtime.close()
time.sleep(1)
@@ -343,7 +331,7 @@ def _create_test_file(host_temp_dir):
def test_copy_single_file(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
with tempfile.TemporaryDirectory() as host_temp_dir:
_create_test_file(host_temp_dir)
@@ -365,7 +353,6 @@ def test_copy_single_file(temp_dir, box_class):
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close()
time.sleep(1)
@@ -378,7 +365,7 @@ def _create_test_dir_with_files(host_temp_dir):
def test_copy_directory_recursively(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
with tempfile.TemporaryDirectory() as host_temp_dir:
# We need a separate directory, since temp_dir is mounted to /workspace
@@ -414,12 +401,11 @@ def test_copy_directory_recursively(temp_dir, box_class):
assert obs.exit_code == 0
assert 'File 1 content' in obs.content
runtime.close()
time.sleep(1)
def test_copy_to_non_existent_directory(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
with tempfile.TemporaryDirectory() as host_temp_dir:
_create_test_file(host_temp_dir)
@@ -435,12 +421,11 @@ def test_copy_to_non_existent_directory(temp_dir, box_class):
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close()
time.sleep(1)
def test_overwrite_existing_file(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
# touch a file in /workspace
action = CmdRunAction(command='touch /workspace/test_file.txt')
@@ -470,12 +455,11 @@ def test_overwrite_existing_file(temp_dir, box_class):
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close()
time.sleep(1)
def test_copy_non_existent_file(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
runtime = _load_runtime(temp_dir, box_class, reuse_id='bash')
with pytest.raises(FileNotFoundError):
runtime.copy_to(
@@ -490,7 +474,6 @@ def test_copy_non_existent_file(temp_dir, box_class):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code != 0 # File should not exist
runtime.close()
time.sleep(1)
@@ -517,7 +500,6 @@ def test_keep_prompt(box_class, temp_dir):
assert obs.exit_code == 0
assert 'root@' not in obs.content
runtime.close()
time.sleep(1)
@@ -530,6 +512,7 @@ def test_git_operation(box_class):
box_class=box_class,
# Need to use non-root user to expose issues
run_as_openhands=True,
reuse_id='bash',
)
# this will happen if permission of runtime is not properly configured
@@ -595,7 +578,4 @@ def test_git_operation(box_class):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
runtime.close()
runtime.close()
time.sleep(1)