[Arch] Add tests for EventStreamRuntime and fix bash parsing (#2933)

* deprecating recall action

* fix integration tests

* fix integration tests

* refractor runtime to use async

* remove search memory

* rename .initialize to .ainit

* draft of runtime image building (separate from img agnostic)

* refractor runtime build into separate file and add unit tests for it

* fix image agnostic tests

* move `split_bash_commands` into a separate util file

* fix bash pexcept parsing for env

* refractor add_env_var from sandbox to runtime;
add test runtime for env var, remove it from sandbox;

* remove unclear comment

* capture broader error

* make `add_env_var` handle multiple export at the same time

* add multi env var test

* fix tests with new config

* make runtime tests a separate ci to avoid full disk

* Update Runtime README with architecture diagram and detailed explanations

* update test

* remove dependency of global config in sandbox test

* fix sandbox typo

* runtime tests does not need ghcr build now

* remove download runtime img

* remove dependency of global config in sandbox test

* fix sandbox typo

* try to free disk before running the tests

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* try to reduce code duplication

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* Update opendevin/runtime/client/README.md

Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>

* cleanup before setup

* temporarily remove this enable lint test since env var are now handled by runtime

* linter

---------

Co-authored-by: OpenDevin <opendevin@all-hands.dev>
Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>
This commit is contained in:
Xingyao Wang
2024-07-18 06:10:45 +08:00
committed by GitHub
parent cf3d2298da
commit f80ecec772
18 changed files with 573 additions and 182 deletions

View File

@@ -4,7 +4,7 @@ from unittest.mock import MagicMock, call, patch
import pytest
from opendevin.core.config import config
from opendevin.core.config import SandboxConfig, config
from opendevin.events.action import IPythonRunCellAction
from opendevin.events.observation import IPythonRunCellObservation
from opendevin.runtime.docker.ssh_box import DockerSSHBox
@@ -43,7 +43,10 @@ async def test_run_python_backticks():
new=mock_sandbox_execute,
):
# Initialize the runtime with the mock event_stream
runtime = ServerRuntime(event_stream=mock_event_stream)
runtime = ServerRuntime(
sandbox_config=SandboxConfig(box_type='ssh', persist_sandbox=False),
event_stream=mock_event_stream,
)
# Define the test action with a simple IPython command
action = IPythonRunCellAction(code=test_code)

184
tests/unit/test_runtime.py Normal file
View File

@@ -0,0 +1,184 @@
"""Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
import os
import pathlib
import tempfile
from unittest.mock import patch
import pytest
from opendevin.core.config import SandboxConfig
from opendevin.events import EventStream
from opendevin.events.action import (
CmdRunAction,
)
from opendevin.events.observation import (
CmdOutputObservation,
)
from opendevin.runtime.client.runtime import EventStreamRuntime
from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
from opendevin.runtime.server.runtime import ServerRuntime
@pytest.fixture
def temp_dir(monkeypatch):
# get a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
pathlib.Path().mkdir(parents=True, exist_ok=True)
yield temp_dir
async def _load_runtime(box_class, event_stream, plugins, sid):
sandbox_config = SandboxConfig()
if box_class == EventStreamRuntime:
runtime = EventStreamRuntime(
sandbox_config=sandbox_config,
event_stream=event_stream,
sid=sid,
# NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
# Instead, we will pre-build a suite of container images with OD-runtime-cli installed.
container_image='ubuntu:22.04',
plugins=plugins,
)
await runtime.ainit()
elif box_class == ServerRuntime:
runtime = ServerRuntime(
sandbox_config=sandbox_config, event_stream=event_stream, sid=sid
)
await runtime.ainit()
runtime.init_sandbox_plugins(plugins)
runtime.init_runtime_tools(
[],
is_async=False,
runtime_tools_config={},
)
else:
raise ValueError(f'Invalid box class: {box_class}')
return runtime
RUNTIME_TO_TEST = [EventStreamRuntime, ServerRuntime]
@pytest.mark.asyncio
async def test_env_vars_os_environ():
with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
plugins = [JupyterRequirement(), AgentSkillsRequirement()]
sid = 'test'
cli_session = 'main_test'
for box_class in RUNTIME_TO_TEST:
event_stream = EventStream(cli_session)
runtime = await _load_runtime(box_class, event_stream, plugins, sid)
obs: CmdOutputObservation = await runtime.run_action(
CmdRunAction(command='env')
)
print(obs)
obs: CmdOutputObservation = await runtime.run_action(
CmdRunAction(command='echo $FOOBAR')
)
print(obs)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert (
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
), f'Output: [{obs.content}] for {box_class}'
@pytest.mark.asyncio
async def test_env_vars_runtime_add_env_var():
plugins = [JupyterRequirement(), AgentSkillsRequirement()]
sid = 'test'
cli_session = 'main_test'
for box_class in RUNTIME_TO_TEST:
event_stream = EventStream(cli_session)
runtime = await _load_runtime(box_class, event_stream, plugins, sid)
await runtime.add_env_var({'QUUX': 'abc"def'})
obs: CmdOutputObservation = await runtime.run_action(
CmdRunAction(command='echo $QUUX')
)
print(obs)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert (
obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
), f'Output: [{obs.content}] for {box_class}'
@pytest.mark.asyncio
async def test_env_vars_runtime_add_multiple_env_vars():
plugins = [JupyterRequirement(), AgentSkillsRequirement()]
sid = 'test'
cli_session = 'main_test'
for box_class in RUNTIME_TO_TEST:
event_stream = EventStream(cli_session)
runtime = await _load_runtime(box_class, event_stream, plugins, sid)
await runtime.add_env_var({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
obs: CmdOutputObservation = await runtime.run_action(
CmdRunAction(command='echo $QUUX $FOOBAR')
)
print(obs)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert (
obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
), f'Output: [{obs.content}] for {box_class}'
@pytest.mark.asyncio
async def test_env_vars_runtime_add_env_var_overwrite():
plugins = [JupyterRequirement(), AgentSkillsRequirement()]
sid = 'test'
cli_session = 'main_test'
for box_class in RUNTIME_TO_TEST:
with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
event_stream = EventStream(cli_session)
runtime = await _load_runtime(box_class, event_stream, plugins, sid)
await runtime.add_env_var({'FOOBAR': 'xyz'})
obs: CmdOutputObservation = await runtime.run_action(
CmdRunAction(command='echo $FOOBAR')
)
print(obs)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert (
obs.content.strip().split('\r\n')[0].strip() == 'xyz'
), f'Output: [{obs.content}] for {box_class}'
@pytest.mark.asyncio
async def test_bash_command_pexcept(temp_dir):
plugins = [JupyterRequirement(), AgentSkillsRequirement()]
sid = 'test'
cli_session = 'main_test'
box_class = EventStreamRuntime
event_stream = EventStream(cli_session)
runtime = await _load_runtime(box_class, event_stream, plugins, sid)
# We set env var PS1="\u@\h:\w $"
# and construct the PEXCEPT prompt base on it.
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
# and failed to pexcept the right content, causing it fail to get error code.
obs = await runtime.run_action(CmdRunAction(command='env'))
# For example:
# 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
# 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
# CONDA_EXE=/opendevin/miniforge3/bin/conda
# [...]
# LC_CTYPE=C.UTF-8
# PS1=\u@\h:\w $
# 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
# 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
# CONDA_DEFAULT_ENV=base
# As long as the exit code is 0, the test will pass.
assert isinstance(
obs, CmdOutputObservation
), 'The observation should be a CmdOutputObservation.'
assert obs.exit_code == 0, 'The exit code should be 0.'

View File

@@ -5,9 +5,9 @@ import tempfile
import pytest
from opendevin.core.config import AppConfig, SandboxConfig
from opendevin.runtime.docker.local_box import LocalBox
from opendevin.runtime.docker.ssh_box import DockerSSHBox, split_bash_commands
from opendevin.runtime.docker.ssh_box import DockerSSHBox
from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
from opendevin.runtime.utils import split_bash_commands
def create_docker_box_from_app_config(
@@ -42,30 +42,6 @@ def temp_dir(monkeypatch):
yield temp_dir
def test_env_vars(temp_dir):
os.environ['SANDBOX_ENV_FOOBAR'] = 'BAZ'
ssh_box = create_docker_box_from_app_config(temp_dir)
local_box_config = AppConfig(
sandbox=SandboxConfig(
box_type='local',
)
)
local_box = LocalBox(local_box_config.sandbox, temp_dir)
for box in [
ssh_box,
local_box,
]:
box.add_to_env(key='QUUX', value='abc"def')
assert box._env['FOOBAR'] == 'BAZ'
assert box._env['QUUX'] == 'abc"def'
exit_code, output = box.execute('echo $FOOBAR $QUUX')
assert exit_code == 0, 'The exit code should be 0.'
assert (
output.strip() == 'BAZ abc"def'
), f'Output: {output} for {box.__class__.__name__}'
def test_split_commands():
cmds = [
'ls -l',
@@ -339,20 +315,6 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
_test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config)
def test_sandbox_jupyter_agentskills_fileop_pwd_with_lint(temp_dir):
# get a temporary directory
config = AppConfig(
sandbox=SandboxConfig(
box_type='ssh',
persist_sandbox=False,
enable_auto_lint=True,
)
)
assert config.sandbox.enable_auto_lint
box = create_docker_box_from_app_config(temp_dir, config)
_test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config)
@pytest.mark.skipif(
os.getenv('TEST_IN_CI') != 'true',
reason='The unittest need to download image, so only run on CI',