From f80ecec772a84ba1ded425e2bc50c8359acf7ee6 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 18 Jul 2024 06:10:45 +0800 Subject: [PATCH] [Arch] Add tests for `EventStreamRuntime` and fix bash parsing (#2933) * deprecating recall action * fix integration tests * fix integration tests * refractor runtime to use async * remove search memory * rename .initialize to .ainit * draft of runtime image building (separate from img agnostic) * refractor runtime build into separate file and add unit tests for it * fix image agnostic tests * move `split_bash_commands` into a separate util file * fix bash pexcept parsing for env * refractor add_env_var from sandbox to runtime; add test runtime for env var, remove it from sandbox; * remove unclear comment * capture broader error * make `add_env_var` handle multiple export at the same time * add multi env var test * fix tests with new config * make runtime tests a separate ci to avoid full disk * Update Runtime README with architecture diagram and detailed explanations * update test * remove dependency of global config in sandbox test * fix sandbox typo * runtime tests does not need ghcr build now * remove download runtime img * remove dependency of global config in sandbox test * fix sandbox typo * try to free disk before running the tests * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * try to reduce code duplication * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * Update opendevin/runtime/client/README.md Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> * cleanup before setup * temporarily remove this enable lint test since env var are now handled by runtime * linter --------- Co-authored-by: OpenDevin Co-authored-by: Yufan Song <33971064+yufansong@users.noreply.github.com> --- .github/workflows/run-runtime-tests.yml | 64 ++++++++ .github/workflows/run-unit-tests.yml | 2 +- opendevin/core/main.py | 4 +- opendevin/runtime/client/README.md | 110 ++++++++++++++ opendevin/runtime/client/client.py | 54 +++++-- opendevin/runtime/client/runtime.py | 23 ++- opendevin/runtime/docker/ssh_box.py | 91 +---------- opendevin/runtime/e2b/runtime.py | 4 +- opendevin/runtime/runtime.py | 50 +++++- opendevin/runtime/sandbox.py | 13 -- opendevin/runtime/server/runtime.py | 8 +- opendevin/runtime/utils/__init__.py | 3 +- opendevin/runtime/utils/bash.py | 87 +++++++++++ opendevin/runtime/utils/runtime_build.py | 5 +- opendevin/server/session/agent.py | 4 +- tests/unit/test_ipython.py | 7 +- tests/unit/test_runtime.py | 184 +++++++++++++++++++++++ tests/unit/test_sandbox.py | 42 +----- 18 files changed, 573 insertions(+), 182 deletions(-) create mode 100644 .github/workflows/run-runtime-tests.yml create mode 100644 opendevin/runtime/client/README.md create mode 100644 opendevin/runtime/utils/bash.py create mode 100644 tests/unit/test_runtime.py diff --git a/.github/workflows/run-runtime-tests.yml b/.github/workflows/run-runtime-tests.yml new file mode 100644 index 0000000000..80f6e3924e --- /dev/null +++ b/.github/workflows/run-runtime-tests.yml @@ -0,0 +1,64 @@ +name: Run Runtime Tests + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +on: + push: + branches: + - main + paths-ignore: + - '**/*.md' + - 'frontend/**' + - 'docs/**' + - 'evaluation/**' + pull_request: + +env: + PERSIST_SANDBOX : "false" + +jobs: + test-for-runtime: + name: Test for Runtime + runs-on: ubuntu-latest + env: + PERSIST_SANDBOX: "false" + steps: + - uses: actions/checkout@v4 + + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # when set to "true" but frees about 6 GB + tool-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Install poetry via pipx + run: pipx install poetry + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "poetry" + + - name: Install Python dependencies using Poetry + run: make install-python-dependencies + + - name: Run runtime tests + run: | + TEST_IN_CI=true poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml -s ./tests/unit/test_runtime.py + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 8f17702f2c..7e6b53aed4 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -149,7 +149,7 @@ jobs: run: make build - name: Run Tests - run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" + run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox and not test_runtime" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/opendevin/core/main.py b/opendevin/core/main.py index bbbc2b0d58..db7d23f197 100644 --- a/opendevin/core/main.py +++ b/opendevin/core/main.py @@ -79,7 +79,9 @@ async def run_agent_controller( # runtime and tools runtime_cls = get_runtime_cls(config.runtime) - runtime = runtime_cls(event_stream=event_stream, sandbox=sandbox) + runtime = runtime_cls( + sandbox_config=config.sandbox, event_stream=event_stream, sandbox=sandbox + ) await runtime.ainit() runtime.init_sandbox_plugins(controller.agent.sandbox_plugins) runtime.init_runtime_tools( diff --git a/opendevin/runtime/client/README.md b/opendevin/runtime/client/README.md new file mode 100644 index 0000000000..2040ab6659 --- /dev/null +++ b/opendevin/runtime/client/README.md @@ -0,0 +1,110 @@ +# OpenDevin Runtime + +This README provides an overview of the OpenDevin Runtime, a crucial component of the OpenDevin system. It covers two main aspects: + +1. How the Runtime Image is Built: Explains the layered approach to creating Docker images for both production and development environments. +2. How the Runtime Client Works: Details the functionality and architecture of the Runtime Client, which executes actions within the Docker sandbox. + +The following sections dive deeper into these topics, providing a comprehensive understanding of the OpenDevin Runtime system. + +## How the Runtime Image is Built + +The OpenDevin runtime uses a layered approach for building Docker images: + +1. **Original Image**: `ubuntu:22.04` + - This is the base image used for all subsequent layers. + +2. **Runtime Image**: `od_runtime:ubuntu__22.04` + - Built from the stable release of OpenDevin. + - This is the primary runtime image that users will interact with. + - Created by copying all OpenDevin code into the original image and installing dependencies using Poetry. + +3. **Dev Runtime Image**: `od_runtime_dev:ubuntu__22.04` + - Built from local source code for development purposes. + +### Build Process + +#### Production Build (DEBUG=false) +By default, when DEBUG is set to false, the build process only needs to run once: +- The Runtime Image (`od_runtime:ubuntu__22.04`) is created by copying OpenDevin code into the original Ubuntu image and installing all dependencies. +- This pre-built image is then used for running the OpenDevin environment. + +#### Development Build (DEBUG=true) +When developing or modifying code that runs inside the container, you can set DEBUG=true to enable a more dynamic build process: +- Every time you run the code, the existing image will be updated with the latest changes. +- The Dev Runtime Image (`od_runtime_dev:ubuntu__22.04`) is rebuilt from the Runtime Image (`od_runtime:ubuntu__22.04`). +- Most dependencies are already installed in the Runtime Image, so this process mainly updates the code and any new dependencies. +- The rebuild process typically takes around 10 seconds, allowing for quick iterations during development. + +This approach allows developers to easily test changes to the OpenDevin codebase, including modifications to files like client.py, without needing to rebuild the entire image from scratch each time. + +## How the Runtime Client Works + +The Runtime Client is a crucial component of the OpenDevin system, responsible for executing actions within the Docker sandbox environment and producing observations. Here's an overview of its functionality: + +1. **Initialization**: + - The `EventStreamRuntime` class in `runtime.py` initializes the Docker container and sets up the runtime environment. + +2. **Communication**: + - The Runtime Client uses FastAPI to create a web server inside the Docker container. + - It listens for incoming action requests from the OpenDevin backend. + +3. **Action Execution**: + - When an action is received, the Runtime Client processes it based on its type: + - `CmdRunAction`: Executes shell commands using a pexpect-spawned bash shell. + - `FileReadAction` and `FileWriteAction`: Perform file operations within the sandbox. + - `IPythonRunCellAction`: Executes Python code in an IPython environment. + - `BrowseURLAction` and `BrowseInteractiveAction`: Handle web browsing tasks using a browser environment. + +4. **Plugin System**: + - The Runtime Client supports a plugin system for extending functionality. + - Plugins like JupyterPlugin can be loaded to provide additional features. + +5. **Observation Generation**: + - After executing an action, the Runtime Client generates an appropriate observation. + - Observations include command outputs, file contents, error messages, etc. + +6. **Asynchronous Operation**: + - The Runtime Client uses asyncio for avoid concurrent requests. + - It ensures that only one action is executed at a time using a semaphore. + +7. **Security**: + - All actions are executed within the confined Docker environment, providing a sandbox for safe execution. + +8. **Flexibility**: + - The system supports both production (DEBUG=false) and development (DEBUG=true) modes. + - In development mode, the runtime image can be updated with the latest code changes for testing and debugging. + + + +## Architecture Diagram + +``` ++-------------------+ +-------------------+ +| OpenDevin | | Docker Host | +| Backend | | | +| | | +-------------+ | +| +-------------+ | | | Runtime | | +| | EventStream | | | | Container | | +| | Runtime |<-|-----|->| | | +| +-------------+ | | | +-------+ | | +| | | | |Runtime| | | +| | | | |Client | | | +| | | | +-------+ | | +| | | | | | | +| | | | +-------+ | | +| | | | |Plugins| | | +| | | | +-------+ | | +| | | +-------------+ | ++-------------------+ +-------------------+ +``` + +This diagram illustrates the high-level architecture of the OpenDevin Runtime system: + +1. The OpenDevin Backend communicates with the Docker Host through the EventStreamRuntime. +2. The Docker Host runs a Runtime Container, which includes: + - The Runtime Client: Handles incoming actions and generates observations. + - Plugins: Extend the functionality of the Runtime Client. +3. The Runtime Client executes actions within the sandboxed environment of the Docker container. + +This architecture ensures a secure and flexible environment for executing AI-driven development tasks, allowing OpenDevin to execute a wide range of actions safely and efficiently. diff --git a/opendevin/runtime/client/client.py b/opendevin/runtime/client/client.py index 5733361757..a33e977ce3 100644 --- a/opendevin/runtime/client/client.py +++ b/opendevin/runtime/client/client.py @@ -1,6 +1,7 @@ import argparse import asyncio import os +import re from pathlib import Path import pexpect @@ -60,26 +61,51 @@ class RuntimeClient: def _init_bash_shell(self, work_dir: str) -> None: self.shell = pexpect.spawn('/bin/bash', encoding='utf-8', echo=False) - self.__bash_expect = r'\[PEXPECT\][\$\#] ' - self.__bash_PS1 = r'\u@\h:\w [PEXPECT]\$ ' + self.__bash_PS1 = r'[PEXPECT_BEGIN] \u@\h:\w [PEXPECT_END]' + + # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed + self.__bash_expect_regex = r'\[PEXPECT_BEGIN\] ([a-z_][a-z0-9_-]*)@([a-zA-Z][a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]' + self.shell.sendline(f'export PS1="{self.__bash_PS1}"') - self.shell.expect(self.__bash_expect) + self.shell.expect(self.__bash_expect_regex) + self.shell.sendline(f'cd {work_dir}') - self.shell.expect(self.__bash_expect) + self.shell.expect(self.__bash_expect_regex) - def _execute_bash(self, command, keep_prompt: bool = True): - logger.info(f'Received command: {command}') + def _get_bash_prompt(self): + ps1 = self.shell.after + # parse the ps1 to get username, hostname, and working directory + matched = re.match(self.__bash_expect_regex, ps1) + assert ( + matched is not None + ), f'Failed to parse bash prompt: {ps1}. This should not happen.' + username, hostname, working_dir = matched.groups() + + # re-assemble the prompt + prompt = f'{username}@{hostname}:{working_dir} ' + if username == 'root': + prompt += '#' + else: + prompt += '$' + return prompt + ' ' + + def _execute_bash(self, command, keep_prompt: bool = True) -> tuple[str, int]: + logger.debug(f'Executing command: {command}') self.shell.sendline(command) - self.shell.expect(self.__bash_expect) - output = self.shell.before + '$ ' - if not keep_prompt: - # remove the last line of the output (the prompt) - # e.g., user@host:~$ - output = '\r\n'.join(output.split('\r\n')[:-1]) + self.shell.expect(self.__bash_expect_regex) + output = self.shell.before + if keep_prompt: + output += '\r\n' + self._get_bash_prompt() + logger.debug(f'Command output: {output}') + + # Get exit code self.shell.sendline('echo $?') - self.shell.expect(r'[$#] ') - exit_code = int(self.shell.before.split('\r\n')[0].strip()) + logger.debug(f'Executing command for exit code: {command}') + self.shell.expect(self.__bash_expect_regex) + _exit_code_output = self.shell.before + logger.debug(f'Exit code Output: {_exit_code_output}') + exit_code = int(_exit_code_output.strip()) return output, exit_code async def run_action(self, action) -> Observation: diff --git a/opendevin/runtime/client/runtime.py b/opendevin/runtime/client/runtime.py index 2aa90066f2..40c5214205 100644 --- a/opendevin/runtime/client/runtime.py +++ b/opendevin/runtime/client/runtime.py @@ -6,7 +6,7 @@ import aiohttp import docker import tenacity -from opendevin.core.config import config +from opendevin.core.config import SandboxConfig, config from opendevin.core.logger import opendevin_logger as logger from opendevin.events import EventSource, EventStream from opendevin.events.action import ( @@ -45,12 +45,15 @@ class EventStreamRuntime(Runtime): def __init__( self, + sandbox_config: SandboxConfig, event_stream: EventStream, sid: str = 'default', container_image: str | None = None, plugins: list[PluginRequirement] | None = None, ): - super().__init__(event_stream, sid) # will initialize the event stream + super().__init__( + sandbox_config, event_stream, sid + ) # will initialize the event stream self._port = find_available_tcp_port() self.api_url = f'http://localhost:{self._port}' self.session: Optional[aiohttp.ClientSession] = None @@ -71,7 +74,7 @@ class EventStreamRuntime(Runtime): self.container = None self.action_semaphore = asyncio.Semaphore(1) # Ensure one action at a time - async def ainit(self): + async def ainit(self, env_vars: dict[str, str] | None = None): self.container_image = build_runtime_image( self.container_image, self.docker_client, @@ -85,6 +88,8 @@ class EventStreamRuntime(Runtime): mount_dir=config.workspace_mount_path, plugins=self.plugins, ) + # Initialize the env vars + await super().ainit(env_vars) @staticmethod def _init_docker_client() -> docker.DockerClient: @@ -126,6 +131,7 @@ class EventStreamRuntime(Runtime): working_dir='/opendevin/code/', name=self.container_name, detach=True, + environment={'DEBUG': 'true'} if config.debug else None, volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}, ) logger.info(f'Container started. Server url: {self.api_url}') @@ -273,7 +279,9 @@ async def test_run_command(): sid = 'test' cli_session = 'main' + ('_' + sid if sid else '') event_stream = EventStream(cli_session) - runtime = EventStreamRuntime(event_stream) + runtime = EventStreamRuntime( + sandbox_config=config.sandbox, event_stream=event_stream, sid=sid + ) await runtime.ainit() await runtime.run_action(CmdRunAction('ls -l')) @@ -283,9 +291,10 @@ async def test_event_stream(): cli_session = 'main' + ('_' + sid if sid else '') event_stream = EventStream(cli_session) runtime = EventStreamRuntime( - event_stream, - sid, - 'ubuntu:22.04', + sandbox_config=config.sandbox, + event_stream=event_stream, + sid=sid, + container_image='ubuntu:22.04', plugins=[JupyterRequirement(), AgentSkillsRequirement()], ) await runtime.ainit() diff --git a/opendevin/runtime/docker/ssh_box.py b/opendevin/runtime/docker/ssh_box.py index 3320ede4c8..59919266e4 100644 --- a/opendevin/runtime/docker/ssh_box.py +++ b/opendevin/runtime/docker/ssh_box.py @@ -19,7 +19,7 @@ from opendevin.core.schema import CancellableStream from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement from opendevin.runtime.plugins.requirement import PluginRequirement from opendevin.runtime.sandbox import Sandbox -from opendevin.runtime.utils import find_available_tcp_port +from opendevin.runtime.utils import find_available_tcp_port, split_bash_commands from opendevin.runtime.utils.image_agnostic import get_od_sandbox_image @@ -101,95 +101,6 @@ class SSHExecCancellableStream(CancellableStream): break -def split_bash_commands(commands): - # States - NORMAL = 0 - IN_SINGLE_QUOTE = 1 - IN_DOUBLE_QUOTE = 2 - IN_HEREDOC = 3 - - state = NORMAL - heredoc_trigger = None - result = [] - current_command: list[str] = [] - - i = 0 - while i < len(commands): - char = commands[i] - - if state == NORMAL: - if char == "'": - state = IN_SINGLE_QUOTE - elif char == '"': - state = IN_DOUBLE_QUOTE - elif char == '\\': - # Check if this is escaping a newline - if i + 1 < len(commands) and commands[i + 1] == '\n': - i += 1 # Skip the newline - # Continue with the next line as part of the same command - i += 1 # Move to the first character of the next line - continue - elif char == '\n': - if not heredoc_trigger and current_command: - result.append(''.join(current_command).strip()) - current_command = [] - elif char == '<' and commands[i : i + 2] == '<<': - # Detect heredoc - state = IN_HEREDOC - i += 2 # Skip '<<' - while commands[i] == ' ': - i += 1 - start = i - while commands[i] not in [' ', '\n']: - i += 1 - heredoc_trigger = commands[start:i] - current_command.append(commands[start - 2 : i]) # Include '<<' - continue # Skip incrementing i at the end of the loop - current_command.append(char) - - elif state == IN_SINGLE_QUOTE: - current_command.append(char) - if char == "'" and commands[i - 1] != '\\': - state = NORMAL - - elif state == IN_DOUBLE_QUOTE: - current_command.append(char) - if char == '"' and commands[i - 1] != '\\': - state = NORMAL - - elif state == IN_HEREDOC: - current_command.append(char) - if ( - char == '\n' - and heredoc_trigger - and commands[i + 1 : i + 1 + len(heredoc_trigger) + 1] - == heredoc_trigger + '\n' - ): - # Check if the next line starts with the heredoc trigger followed by a newline - i += ( - len(heredoc_trigger) + 1 - ) # Move past the heredoc trigger and newline - current_command.append( - heredoc_trigger + '\n' - ) # Include the heredoc trigger and newline - result.append(''.join(current_command).strip()) - current_command = [] - heredoc_trigger = None - state = NORMAL - continue - - i += 1 - - # Add the last command if any - if current_command: - result.append(''.join(current_command).strip()) - - # Remove any empty strings from the result - result = [cmd for cmd in result if cmd] - - return result - - class DockerSSHBox(Sandbox): instance_id: str container_image: str diff --git a/opendevin/runtime/e2b/runtime.py b/opendevin/runtime/e2b/runtime.py index 1a0710bf91..e21df3cb12 100644 --- a/opendevin/runtime/e2b/runtime.py +++ b/opendevin/runtime/e2b/runtime.py @@ -1,3 +1,4 @@ +from opendevin.core.config import SandboxConfig from opendevin.events.action import ( FileReadAction, FileWriteAction, @@ -20,11 +21,12 @@ from .sandbox import E2BSandbox class E2BRuntime(ServerRuntime): def __init__( self, + sandbox_config: SandboxConfig, event_stream: EventStream, sid: str = 'default', sandbox: Sandbox | None = None, ): - super().__init__(event_stream, sid, sandbox) + super().__init__(sandbox_config, event_stream, sid, sandbox) if not isinstance(self.sandbox, E2BSandbox): raise ValueError('E2BRuntime requires an E2BSandbox') self.file_store = E2BFileStore(self.sandbox.filesystem) diff --git a/opendevin/runtime/runtime.py b/opendevin/runtime/runtime.py index d722b32366..e3bd0d41a7 100644 --- a/opendevin/runtime/runtime.py +++ b/opendevin/runtime/runtime.py @@ -1,8 +1,13 @@ import asyncio import atexit +import copy +import json +import os from abc import abstractmethod from typing import Any, Optional +from opendevin.core.config import SandboxConfig +from opendevin.core.logger import opendevin_logger as logger from opendevin.events import EventStream, EventStreamSubscriber from opendevin.events.action import ( Action, @@ -16,6 +21,7 @@ from opendevin.events.action import ( ) from opendevin.events.event import Event from opendevin.events.observation import ( + CmdOutputObservation, ErrorObservation, NullObservation, Observation, @@ -27,6 +33,17 @@ from opendevin.runtime.tools import RuntimeTool from opendevin.storage import FileStore +def _default_env_vars(config: SandboxConfig) -> dict[str, str]: + ret = {} + for key in os.environ: + if key.startswith('SANDBOX_ENV_'): + sandbox_key = key.removeprefix('SANDBOX_ENV_') + ret[sandbox_key] = os.environ[key] + if config.enable_auto_lint: + ret['ENABLE_AUTO_LINT'] = 'true' + return ret + + class Runtime: """The runtime is how the agent interacts with the external environment. This includes a bash sandbox, a browser, and filesystem interactions. @@ -36,18 +53,32 @@ class Runtime: sid: str file_store: FileStore + DEFAULT_ENV_VARS: dict[str, str] - def __init__(self, event_stream: EventStream, sid: str = 'default'): + def __init__( + self, + sandbox_config: SandboxConfig, + event_stream: EventStream, + sid: str = 'default', + ): self.sid = sid self.event_stream = event_stream self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event) + self.sandbox_config = copy.deepcopy(sandbox_config) + self.DEFAULT_ENV_VARS = _default_env_vars(self.sandbox_config) atexit.register(self.close_sync) - async def ainit(self) -> None: - """Initialize the runtime (asynchronously). + async def ainit(self, env_vars: dict[str, str] | None = None) -> None: + """ + Initialize the runtime (asynchronously). + This method should be called after the runtime's constructor. """ - pass + logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}') + await self.add_env_var(self.DEFAULT_ENV_VARS) + if env_vars is not None: + logger.debug(f'Adding provided env vars: {env_vars}') + await self.add_env_var(env_vars) async def close(self) -> None: pass @@ -84,6 +115,17 @@ class Runtime: # ==================================================================== + async def add_env_var(self, vars): + cmd = '' + for key, value in vars.items(): + # Note: json.dumps gives us nice escaping for free + cmd += f'export {key}={json.dumps(value)}; ' + cmd = cmd.strip() + logger.debug(f'Adding env var: {cmd}') + obs: Observation = await self.run(CmdRunAction(cmd)) + if not isinstance(obs, CmdOutputObservation) or obs.exit_code != 0: + raise RuntimeError(f'Failed to add {key} to environment: {obs}') + async def on_event(self, event: Event) -> None: if isinstance(event, Action): observation = await self.run_action(event) diff --git a/opendevin/runtime/sandbox.py b/opendevin/runtime/sandbox.py index 116090b4d4..5009308a92 100644 --- a/opendevin/runtime/sandbox.py +++ b/opendevin/runtime/sandbox.py @@ -1,6 +1,4 @@ import copy -import json -import os from abc import ABC, abstractmethod from opendevin.core.config import SandboxConfig @@ -14,19 +12,8 @@ class Sandbox(ABC, PluginMixin): def __init__(self, config: SandboxConfig): self.config = copy.deepcopy(config) - for key in os.environ: - if key.startswith('SANDBOX_ENV_'): - sandbox_key = key.removeprefix('SANDBOX_ENV_') - self.add_to_env(sandbox_key, os.environ[key]) - if config.enable_auto_lint: - self.add_to_env('ENABLE_AUTO_LINT', 'true') self.initialize_plugins: bool = config.initialize_plugins - def add_to_env(self, key: str, value: str): - self._env[key] = value - # Note: json.dumps gives us nice escaping for free - self.execute(f'export {key}={json.dumps(value)}') - @abstractmethod def execute( self, cmd: str, stream: bool = False, timeout: int | None = None diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py index c6fec5faf9..c5e8e71db3 100644 --- a/opendevin/runtime/server/runtime.py +++ b/opendevin/runtime/server/runtime.py @@ -1,6 +1,6 @@ from typing import Any, Optional -from opendevin.core.config import config +from opendevin.core.config import SandboxConfig, config from opendevin.core.exceptions import BrowserInitException from opendevin.core.logger import opendevin_logger as logger from opendevin.events.action import ( @@ -63,11 +63,12 @@ def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox: class ServerRuntime(Runtime): def __init__( self, + sandbox_config: SandboxConfig, event_stream: EventStream, sid: str = 'default', sandbox: Sandbox | None = None, ): - super().__init__(event_stream, sid) + super().__init__(sandbox_config, event_stream, sid) self.file_store = LocalFileStore(config.workspace_base) if sandbox is None: self.sandbox = create_sandbox(sid, config.sandbox.box_type) @@ -77,9 +78,6 @@ class ServerRuntime(Runtime): self._is_external_sandbox = True self.browser: BrowserEnv | None = None - async def ainit(self) -> None: - pass - async def close(self): if not self._is_external_sandbox: self.sandbox.close() diff --git a/opendevin/runtime/utils/__init__.py b/opendevin/runtime/utils/__init__.py index f30d609486..e1512a7e87 100644 --- a/opendevin/runtime/utils/__init__.py +++ b/opendevin/runtime/utils/__init__.py @@ -1,3 +1,4 @@ +from .bash import split_bash_commands from .system import find_available_tcp_port -__all__ = ['find_available_tcp_port'] +__all__ = ['find_available_tcp_port', 'split_bash_commands'] diff --git a/opendevin/runtime/utils/bash.py b/opendevin/runtime/utils/bash.py new file mode 100644 index 0000000000..9dd7a60422 --- /dev/null +++ b/opendevin/runtime/utils/bash.py @@ -0,0 +1,87 @@ +def split_bash_commands(commands): + # States + NORMAL = 0 + IN_SINGLE_QUOTE = 1 + IN_DOUBLE_QUOTE = 2 + IN_HEREDOC = 3 + + state = NORMAL + heredoc_trigger = None + result = [] + current_command: list[str] = [] + + i = 0 + while i < len(commands): + char = commands[i] + + if state == NORMAL: + if char == "'": + state = IN_SINGLE_QUOTE + elif char == '"': + state = IN_DOUBLE_QUOTE + elif char == '\\': + # Check if this is escaping a newline + if i + 1 < len(commands) and commands[i + 1] == '\n': + i += 1 # Skip the newline + # Continue with the next line as part of the same command + i += 1 # Move to the first character of the next line + continue + elif char == '\n': + if not heredoc_trigger and current_command: + result.append(''.join(current_command).strip()) + current_command = [] + elif char == '<' and commands[i : i + 2] == '<<': + # Detect heredoc + state = IN_HEREDOC + i += 2 # Skip '<<' + while commands[i] == ' ': + i += 1 + start = i + while commands[i] not in [' ', '\n']: + i += 1 + heredoc_trigger = commands[start:i] + current_command.append(commands[start - 2 : i]) # Include '<<' + continue # Skip incrementing i at the end of the loop + current_command.append(char) + + elif state == IN_SINGLE_QUOTE: + current_command.append(char) + if char == "'" and commands[i - 1] != '\\': + state = NORMAL + + elif state == IN_DOUBLE_QUOTE: + current_command.append(char) + if char == '"' and commands[i - 1] != '\\': + state = NORMAL + + elif state == IN_HEREDOC: + current_command.append(char) + if ( + char == '\n' + and heredoc_trigger + and commands[i + 1 : i + 1 + len(heredoc_trigger) + 1] + == heredoc_trigger + '\n' + ): + # Check if the next line starts with the heredoc trigger followed by a newline + i += ( + len(heredoc_trigger) + 1 + ) # Move past the heredoc trigger and newline + current_command.append( + heredoc_trigger + '\n' + ) # Include the heredoc trigger and newline + result.append(''.join(current_command).strip()) + current_command = [] + heredoc_trigger = None + state = NORMAL + continue + + i += 1 + + # Add the last command if any + if current_command: + result.append(''.join(current_command).strip()) + + # Remove any empty strings from the result + result = [cmd for cmd in result if cmd] + + return result diff --git a/opendevin/runtime/utils/runtime_build.py b/opendevin/runtime/utils/runtime_build.py index 73a3a9b2bf..e31cdb3689 100644 --- a/opendevin/runtime/utils/runtime_build.py +++ b/opendevin/runtime/utils/runtime_build.py @@ -198,8 +198,9 @@ def build_runtime_image( # Try to pull the new image from the registry try: docker_client.images.pull(new_image_name) - except docker.errors.ImageNotFound: - logger.info(f'Image {new_image_name} not found, building it from scratch') + except Exception as e: + logger.info(f'Error pulling image {new_image_name}, building it from scratch') + logger.error(f'Error: {e}') # Detect if the sandbox image is built image_exists = _check_image_exists(new_image_name, docker_client) diff --git a/opendevin/server/session/agent.py b/opendevin/server/session/agent.py index b216ce3ed3..9454dba005 100644 --- a/opendevin/server/session/agent.py +++ b/opendevin/server/session/agent.py @@ -62,7 +62,9 @@ class AgentSession: logger.info(f'Using runtime: {config.runtime}') runtime_cls = get_runtime_cls(config.runtime) - self.runtime = runtime_cls(self.event_stream, self.sid) + self.runtime = runtime_cls( + sandbox_config=config.sandbox, event_stream=self.event_stream, sid=self.sid + ) await self.runtime.ainit() async def _create_controller(self, start_event: dict): diff --git a/tests/unit/test_ipython.py b/tests/unit/test_ipython.py index bfa8a98ff0..c789940330 100644 --- a/tests/unit/test_ipython.py +++ b/tests/unit/test_ipython.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, call, patch import pytest -from opendevin.core.config import config +from opendevin.core.config import SandboxConfig, config from opendevin.events.action import IPythonRunCellAction from opendevin.events.observation import IPythonRunCellObservation from opendevin.runtime.docker.ssh_box import DockerSSHBox @@ -43,7 +43,10 @@ async def test_run_python_backticks(): new=mock_sandbox_execute, ): # Initialize the runtime with the mock event_stream - runtime = ServerRuntime(event_stream=mock_event_stream) + runtime = ServerRuntime( + sandbox_config=SandboxConfig(box_type='ssh', persist_sandbox=False), + event_stream=mock_event_stream, + ) # Define the test action with a simple IPython command action = IPythonRunCellAction(code=test_code) diff --git a/tests/unit/test_runtime.py b/tests/unit/test_runtime.py new file mode 100644 index 0000000000..7fbb8dd2ea --- /dev/null +++ b/tests/unit/test_runtime.py @@ -0,0 +1,184 @@ +"""Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox.""" + +import os +import pathlib +import tempfile +from unittest.mock import patch + +import pytest + +from opendevin.core.config import SandboxConfig +from opendevin.events import EventStream +from opendevin.events.action import ( + CmdRunAction, +) +from opendevin.events.observation import ( + CmdOutputObservation, +) +from opendevin.runtime.client.runtime import EventStreamRuntime +from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement +from opendevin.runtime.server.runtime import ServerRuntime + + +@pytest.fixture +def temp_dir(monkeypatch): + # get a temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + pathlib.Path().mkdir(parents=True, exist_ok=True) + yield temp_dir + + +async def _load_runtime(box_class, event_stream, plugins, sid): + sandbox_config = SandboxConfig() + if box_class == EventStreamRuntime: + runtime = EventStreamRuntime( + sandbox_config=sandbox_config, + event_stream=event_stream, + sid=sid, + # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime + # Instead, we will pre-build a suite of container images with OD-runtime-cli installed. + container_image='ubuntu:22.04', + plugins=plugins, + ) + await runtime.ainit() + elif box_class == ServerRuntime: + runtime = ServerRuntime( + sandbox_config=sandbox_config, event_stream=event_stream, sid=sid + ) + await runtime.ainit() + runtime.init_sandbox_plugins(plugins) + runtime.init_runtime_tools( + [], + is_async=False, + runtime_tools_config={}, + ) + else: + raise ValueError(f'Invalid box class: {box_class}') + return runtime + + +RUNTIME_TO_TEST = [EventStreamRuntime, ServerRuntime] + + +@pytest.mark.asyncio +async def test_env_vars_os_environ(): + with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}): + plugins = [JupyterRequirement(), AgentSkillsRequirement()] + sid = 'test' + cli_session = 'main_test' + + for box_class in RUNTIME_TO_TEST: + event_stream = EventStream(cli_session) + runtime = await _load_runtime(box_class, event_stream, plugins, sid) + + obs: CmdOutputObservation = await runtime.run_action( + CmdRunAction(command='env') + ) + print(obs) + + obs: CmdOutputObservation = await runtime.run_action( + CmdRunAction(command='echo $FOOBAR') + ) + print(obs) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert ( + obs.content.strip().split('\n\r')[0].strip() == 'BAZ' + ), f'Output: [{obs.content}] for {box_class}' + + +@pytest.mark.asyncio +async def test_env_vars_runtime_add_env_var(): + plugins = [JupyterRequirement(), AgentSkillsRequirement()] + sid = 'test' + cli_session = 'main_test' + + for box_class in RUNTIME_TO_TEST: + event_stream = EventStream(cli_session) + runtime = await _load_runtime(box_class, event_stream, plugins, sid) + await runtime.add_env_var({'QUUX': 'abc"def'}) + + obs: CmdOutputObservation = await runtime.run_action( + CmdRunAction(command='echo $QUUX') + ) + print(obs) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert ( + obs.content.strip().split('\r\n')[0].strip() == 'abc"def' + ), f'Output: [{obs.content}] for {box_class}' + + +@pytest.mark.asyncio +async def test_env_vars_runtime_add_multiple_env_vars(): + plugins = [JupyterRequirement(), AgentSkillsRequirement()] + sid = 'test' + cli_session = 'main_test' + + for box_class in RUNTIME_TO_TEST: + event_stream = EventStream(cli_session) + runtime = await _load_runtime(box_class, event_stream, plugins, sid) + await runtime.add_env_var({'QUUX': 'abc"def', 'FOOBAR': 'xyz'}) + + obs: CmdOutputObservation = await runtime.run_action( + CmdRunAction(command='echo $QUUX $FOOBAR') + ) + print(obs) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert ( + obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz' + ), f'Output: [{obs.content}] for {box_class}' + + +@pytest.mark.asyncio +async def test_env_vars_runtime_add_env_var_overwrite(): + plugins = [JupyterRequirement(), AgentSkillsRequirement()] + sid = 'test' + cli_session = 'main_test' + + for box_class in RUNTIME_TO_TEST: + with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}): + event_stream = EventStream(cli_session) + runtime = await _load_runtime(box_class, event_stream, plugins, sid) + await runtime.add_env_var({'FOOBAR': 'xyz'}) + + obs: CmdOutputObservation = await runtime.run_action( + CmdRunAction(command='echo $FOOBAR') + ) + print(obs) + assert obs.exit_code == 0, 'The exit code should be 0.' + assert ( + obs.content.strip().split('\r\n')[0].strip() == 'xyz' + ), f'Output: [{obs.content}] for {box_class}' + + +@pytest.mark.asyncio +async def test_bash_command_pexcept(temp_dir): + plugins = [JupyterRequirement(), AgentSkillsRequirement()] + sid = 'test' + cli_session = 'main_test' + + box_class = EventStreamRuntime + event_stream = EventStream(cli_session) + runtime = await _load_runtime(box_class, event_stream, plugins, sid) + + # We set env var PS1="\u@\h:\w $" + # and construct the PEXCEPT prompt base on it. + # When run `env`, bad implementation of CmdRunAction will be pexcepted by this + # and failed to pexcept the right content, causing it fail to get error code. + obs = await runtime.run_action(CmdRunAction(command='env')) + + # For example: + # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env + # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1 + # CONDA_EXE=/opendevin/miniforge3/bin/conda + # [...] + # LC_CTYPE=C.UTF-8 + # PS1=\u@\h:\w $ + # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env + # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output: + # CONDA_DEFAULT_ENV=base + + # As long as the exit code is 0, the test will pass. + assert isinstance( + obs, CmdOutputObservation + ), 'The observation should be a CmdOutputObservation.' + assert obs.exit_code == 0, 'The exit code should be 0.' diff --git a/tests/unit/test_sandbox.py b/tests/unit/test_sandbox.py index a18ff75967..3e8d25b6b4 100644 --- a/tests/unit/test_sandbox.py +++ b/tests/unit/test_sandbox.py @@ -5,9 +5,9 @@ import tempfile import pytest from opendevin.core.config import AppConfig, SandboxConfig -from opendevin.runtime.docker.local_box import LocalBox -from opendevin.runtime.docker.ssh_box import DockerSSHBox, split_bash_commands +from opendevin.runtime.docker.ssh_box import DockerSSHBox from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement +from opendevin.runtime.utils import split_bash_commands def create_docker_box_from_app_config( @@ -42,30 +42,6 @@ def temp_dir(monkeypatch): yield temp_dir -def test_env_vars(temp_dir): - os.environ['SANDBOX_ENV_FOOBAR'] = 'BAZ' - ssh_box = create_docker_box_from_app_config(temp_dir) - - local_box_config = AppConfig( - sandbox=SandboxConfig( - box_type='local', - ) - ) - local_box = LocalBox(local_box_config.sandbox, temp_dir) - for box in [ - ssh_box, - local_box, - ]: - box.add_to_env(key='QUUX', value='abc"def') - assert box._env['FOOBAR'] == 'BAZ' - assert box._env['QUUX'] == 'abc"def' - exit_code, output = box.execute('echo $FOOBAR $QUUX') - assert exit_code == 0, 'The exit code should be 0.' - assert ( - output.strip() == 'BAZ abc"def' - ), f'Output: {output} for {box.__class__.__name__}' - - def test_split_commands(): cmds = [ 'ls -l', @@ -339,20 +315,6 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir): _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config) -def test_sandbox_jupyter_agentskills_fileop_pwd_with_lint(temp_dir): - # get a temporary directory - config = AppConfig( - sandbox=SandboxConfig( - box_type='ssh', - persist_sandbox=False, - enable_auto_lint=True, - ) - ) - assert config.sandbox.enable_auto_lint - box = create_docker_box_from_app_config(temp_dir, config) - _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config) - - @pytest.mark.skipif( os.getenv('TEST_IN_CI') != 'true', reason='The unittest need to download image, so only run on CI',