[Arch] Add tests for EventStreamRuntime and fix bash parsing (#2933)

* deprecating recall action * fix integration tests * fix integration tests * refractor runtime to use async * remove search memory * rename .initialize to .ainit * draft of runtime image building (separate from img agnostic) * refractor runtime build into separate file and add unit tests for it * fix image agnostic tests * move `split_bash_commands` into a separate util file * fix bash pexcept parsing for env * refractor add_env_var from sandbox to runtime; add test runtime for env var, remove it from sandbox; * remove unclear comment * capture broader error * make `add_env_var` handle multiple export at the same time * add multi env var test * fix tests with new config * make runtime tests a separate ci to avoid full disk * Update Runtime README with architecture diagram and detailed explanations * update test * remove dependency of global config in sandbox test * fix sandbox typo * runtime tests does not need ghcr build now * remove download runtime img * remove dependency of global config in sandbox test * fix sandbox typo * try to free disk before running the tests * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * try to reduce code duplication * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * cleanup before setup * temporarily remove this enable lint test since env var are now handled by runtime * linter --------- Co-authored-by: OpenDevin <opendevin@all-hands.dev> Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com>
2026-01-09 14:57:59 -05:00 · 2024-07-18 06:10:45 +08:00
parent cf3d2298da
commit f80ecec772
18 changed files with 573 additions and 182 deletions
--- a/tests/unit/test_ipython.py
+++ b/tests/unit/test_ipython.py
@@ -4,7 +4,7 @@ from unittest.mock import MagicMock, call, patch

 import pytest

-from opendevin.core.config import config
+from opendevin.core.config import SandboxConfig, config
 from opendevin.events.action import IPythonRunCellAction
 from opendevin.events.observation import IPythonRunCellObservation
 from opendevin.runtime.docker.ssh_box import DockerSSHBox
@@ -43,7 +43,10 @@ async def test_run_python_backticks():
        new=mock_sandbox_execute,
    ):
        # Initialize the runtime with the mock event_stream
-        runtime = ServerRuntime(event_stream=mock_event_stream)
+        runtime = ServerRuntime(
+            sandbox_config=SandboxConfig(box_type='ssh', persist_sandbox=False),
+            event_stream=mock_event_stream,
+        )

        # Define the test action with a simple IPython command
        action = IPythonRunCellAction(code=test_code)
--- a/tests/unit/test_runtime.py
+++ b/tests/unit/test_runtime.py
@@ -0,0 +1,184 @@
+"""Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
+
+import os
+import pathlib
+import tempfile
+from unittest.mock import patch
+
+import pytest
+
+from opendevin.core.config import SandboxConfig
+from opendevin.events import EventStream
+from opendevin.events.action import (
+    CmdRunAction,
+)
+from opendevin.events.observation import (
+    CmdOutputObservation,
+)
+from opendevin.runtime.client.runtime import EventStreamRuntime
+from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
+from opendevin.runtime.server.runtime import ServerRuntime
+
+
+@pytest.fixture
+def temp_dir(monkeypatch):
+    # get a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        pathlib.Path().mkdir(parents=True, exist_ok=True)
+        yield temp_dir
+
+
+async def _load_runtime(box_class, event_stream, plugins, sid):
+    sandbox_config = SandboxConfig()
+    if box_class == EventStreamRuntime:
+        runtime = EventStreamRuntime(
+            sandbox_config=sandbox_config,
+            event_stream=event_stream,
+            sid=sid,
+            # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
+            # Instead, we will pre-build a suite of container images with OD-runtime-cli installed.
+            container_image='ubuntu:22.04',
+            plugins=plugins,
+        )
+        await runtime.ainit()
+    elif box_class == ServerRuntime:
+        runtime = ServerRuntime(
+            sandbox_config=sandbox_config, event_stream=event_stream, sid=sid
+        )
+        await runtime.ainit()
+        runtime.init_sandbox_plugins(plugins)
+        runtime.init_runtime_tools(
+            [],
+            is_async=False,
+            runtime_tools_config={},
+        )
+    else:
+        raise ValueError(f'Invalid box class: {box_class}')
+    return runtime
+
+
+RUNTIME_TO_TEST = [EventStreamRuntime, ServerRuntime]
+
+
+@pytest.mark.asyncio
+async def test_env_vars_os_environ():
+    with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
+        plugins = [JupyterRequirement(), AgentSkillsRequirement()]
+        sid = 'test'
+        cli_session = 'main_test'
+
+        for box_class in RUNTIME_TO_TEST:
+            event_stream = EventStream(cli_session)
+            runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+
+            obs: CmdOutputObservation = await runtime.run_action(
+                CmdRunAction(command='env')
+            )
+            print(obs)
+
+            obs: CmdOutputObservation = await runtime.run_action(
+                CmdRunAction(command='echo $FOOBAR')
+            )
+            print(obs)
+            assert obs.exit_code == 0, 'The exit code should be 0.'
+            assert (
+                obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
+            ), f'Output: [{obs.content}] for {box_class}'
+
+
+@pytest.mark.asyncio
+async def test_env_vars_runtime_add_env_var():
+    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
+    sid = 'test'
+    cli_session = 'main_test'
+
+    for box_class in RUNTIME_TO_TEST:
+        event_stream = EventStream(cli_session)
+        runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+        await runtime.add_env_var({'QUUX': 'abc"def'})
+
+        obs: CmdOutputObservation = await runtime.run_action(
+            CmdRunAction(command='echo $QUUX')
+        )
+        print(obs)
+        assert obs.exit_code == 0, 'The exit code should be 0.'
+        assert (
+            obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
+        ), f'Output: [{obs.content}] for {box_class}'
+
+
+@pytest.mark.asyncio
+async def test_env_vars_runtime_add_multiple_env_vars():
+    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
+    sid = 'test'
+    cli_session = 'main_test'
+
+    for box_class in RUNTIME_TO_TEST:
+        event_stream = EventStream(cli_session)
+        runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+        await runtime.add_env_var({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
+
+        obs: CmdOutputObservation = await runtime.run_action(
+            CmdRunAction(command='echo $QUUX $FOOBAR')
+        )
+        print(obs)
+        assert obs.exit_code == 0, 'The exit code should be 0.'
+        assert (
+            obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
+        ), f'Output: [{obs.content}] for {box_class}'
+
+
+@pytest.mark.asyncio
+async def test_env_vars_runtime_add_env_var_overwrite():
+    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
+    sid = 'test'
+    cli_session = 'main_test'
+
+    for box_class in RUNTIME_TO_TEST:
+        with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
+            event_stream = EventStream(cli_session)
+            runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+            await runtime.add_env_var({'FOOBAR': 'xyz'})
+
+            obs: CmdOutputObservation = await runtime.run_action(
+                CmdRunAction(command='echo $FOOBAR')
+            )
+            print(obs)
+            assert obs.exit_code == 0, 'The exit code should be 0.'
+            assert (
+                obs.content.strip().split('\r\n')[0].strip() == 'xyz'
+            ), f'Output: [{obs.content}] for {box_class}'
+
+
+@pytest.mark.asyncio
+async def test_bash_command_pexcept(temp_dir):
+    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
+    sid = 'test'
+    cli_session = 'main_test'
+
+    box_class = EventStreamRuntime
+    event_stream = EventStream(cli_session)
+    runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+
+    # We set env var PS1="\u@\h:\w $"
+    # and construct the PEXCEPT prompt base on it.
+    # When run `env`, bad implementation of CmdRunAction will be pexcepted by this
+    # and failed to pexcept the right content, causing it fail to get error code.
+    obs = await runtime.run_action(CmdRunAction(command='env'))
+
+    # For example:
+    # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
+    # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
+    # CONDA_EXE=/opendevin/miniforge3/bin/conda
+    # [...]
+    # LC_CTYPE=C.UTF-8
+    # PS1=\u@\h:\w $
+    # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
+    # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
+    # CONDA_DEFAULT_ENV=base
+
+    # As long as the exit code is 0, the test will pass.
+    assert isinstance(
+        obs, CmdOutputObservation
+    ), 'The observation should be a CmdOutputObservation.'
+    assert obs.exit_code == 0, 'The exit code should be 0.'
--- a/tests/unit/test_sandbox.py
+++ b/tests/unit/test_sandbox.py
@@ -5,9 +5,9 @@ import tempfile
 import pytest

 from opendevin.core.config import AppConfig, SandboxConfig
-from opendevin.runtime.docker.local_box import LocalBox
-from opendevin.runtime.docker.ssh_box import DockerSSHBox, split_bash_commands
+from opendevin.runtime.docker.ssh_box import DockerSSHBox
 from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
+from opendevin.runtime.utils import split_bash_commands


 def create_docker_box_from_app_config(
@@ -42,30 +42,6 @@ def temp_dir(monkeypatch):
        yield temp_dir


-def test_env_vars(temp_dir):
-    os.environ['SANDBOX_ENV_FOOBAR'] = 'BAZ'
-    ssh_box = create_docker_box_from_app_config(temp_dir)
-
-    local_box_config = AppConfig(
-        sandbox=SandboxConfig(
-            box_type='local',
-        )
-    )
-    local_box = LocalBox(local_box_config.sandbox, temp_dir)
-    for box in [
-        ssh_box,
-        local_box,
-    ]:
-        box.add_to_env(key='QUUX', value='abc"def')
-        assert box._env['FOOBAR'] == 'BAZ'
-        assert box._env['QUUX'] == 'abc"def'
-        exit_code, output = box.execute('echo $FOOBAR $QUUX')
-        assert exit_code == 0, 'The exit code should be 0.'
-        assert (
-            output.strip() == 'BAZ abc"def'
-        ), f'Output: {output} for {box.__class__.__name__}'
-
-
 def test_split_commands():
    cmds = [
        'ls -l',
@@ -339,20 +315,6 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
    _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config)


-def test_sandbox_jupyter_agentskills_fileop_pwd_with_lint(temp_dir):
-    # get a temporary directory
-    config = AppConfig(
-        sandbox=SandboxConfig(
-            box_type='ssh',
-            persist_sandbox=False,
-            enable_auto_lint=True,
-        )
-    )
-    assert config.sandbox.enable_auto_lint
-    box = create_docker_box_from_app_config(temp_dir, config)
-    _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config)
-
-
@pytest.mark.skipif(
    os.getenv('TEST_IN_CI') != 'true',
    reason='The unittest need to download image, so only run on CI',