Integrate E2B sandbox as an alternative to a Docker container (#727)

* add e2b sandbox [wip] * Install e2b package * Add basic E2B sandbox integration * Update dependencies and fix command execution in E2BSandbox * Udpate e2b * Add comment * Lint * Remove unnecessary type conversion * Lint * Fix linting * Resolve comments * Update opendevin/action/fileop.py * Update opendevin/action/fileop.py * Fix log * Update E2B readme * poetry lock --------- Co-authored-by: Robert Brennan <accounts@rbren.io>
2026-01-09 14:57:59 -05:00 · 2024-04-19 11:21:58 -07:00
parent e6d91affc6
commit 76b81ca0ed
21 changed files with 514 additions and 204 deletions
--- a/containers/e2b-sandbox/Dockerfile
+++ b/containers/e2b-sandbox/Dockerfile
@@ -0,0 +1,19 @@
+FROM ubuntu:22.04
+
+# install basic packages
+RUN apt-get update && apt-get install -y \
+    curl \
+    wget \
+    git \
+    vim \
+    nano \
+    unzip \
+    zip \
+    python3 \
+    python3-pip \
+    python3-venv \
+    python3-dev \
+    build-essential \
+    openssh-server \
+    sudo \
+    && rm -rf /var/lib/apt/lists/*
--- a/containers/e2b-sandbox/README.md
+++ b/containers/e2b-sandbox/README.md
@@ -0,0 +1,15 @@
+# How to build custom E2B sandbox for OpenDevin
+
+[E2B](https://e2b.dev) is an [open-source](https://github.com/e2b-dev/e2b) secure cloud environment (sandbox) made for running AI-generated code and agents. E2B offers [Python](https://pypi.org/project/e2b/) and [JS/TS](https://www.npmjs.com/package/e2b) SDK to spawn and control these sandboxes.
+
+
+1. Install the CLI with NPM.
+    ```sh
+    npm install -g @e2b/cli@latest
+    ```
+    Full CLI API is [here](https://e2b.dev/docs/cli/installation).
+
+1. Build the sandbox
+  ```sh
+  e2b template build --dockerfile ./Dockerfile --name "open-devin"
+  ```
--- a/containers/e2b-sandbox/e2b.toml
+++ b/containers/e2b-sandbox/e2b.toml
@@ -0,0 +1,14 @@
+# This is a config for E2B sandbox template.
+# You can use 'template_id' (785n69crgahmz0lkdw9h) or 'template_name (open-devin) from this config to spawn a sandbox:
+
+# Python SDK
+# from e2b import Sandbox
+# sandbox = Sandbox(template='open-devin')
+
+# JS SDK
+# import { Sandbox } from 'e2b'
+# const sandbox = await Sandbox.create({ template: 'open-devin' })
+
+dockerfile = "Dockerfile"
+template_name = "open-devin"
+template_id = "785n69crgahmz0lkdw9h"
--- a/opendevin/README.md
+++ b/opendevin/README.md
@@ -6,7 +6,7 @@ See the [main README](../README.md) for instructions on how to run OpenDevin fro

 ## Sandbox Image
 ```bash
-docker build -f opendevin/sandbox/Dockerfile -t opendevin/sandbox:v0.1 .
+docker build -f opendevin/sandbox/docker/Dockerfile -t opendevin/sandbox:v0.1 .
 ```

 ## Sandbox Runner
@@ -15,7 +15,7 @@ Run the docker-based interactive sandbox:

 ```bash
 mkdir workspace
-python3 opendevin/sandbox/sandbox.py -d workspace
+python3 opendevin/sandbox/docker/sandbox.py -d workspace
 ```

 It will map `./workspace` into the docker container with the folder permission correctly adjusted for current user.
--- a/opendevin/action/fileop.py
+++ b/opendevin/action/fileop.py
@@ -8,6 +8,7 @@ from opendevin.observation import (
 )

 from opendevin.schema import ActionType
+from opendevin.sandbox import E2BBox
 from opendevin import config

 from .base import ExecutableAction
@@ -35,27 +36,33 @@ class FileReadAction(ExecutableAction):
    thoughts: str = ''
    action: str = ActionType.READ

+    def _read_lines(self, all_lines: list[str]):
+        if self.end == -1:
+            if self.start == 0:
+                return all_lines
+            else:
+                return all_lines[self.start:]
+        else:
+            num_lines = len(all_lines)
+            begin = max(0, min(self.start, num_lines - 2))
+            end = -1 if self.end > num_lines else max(begin + 1, self.end)
+            return all_lines[begin:end]
+
    async def run(self, controller) -> FileReadObservation:
-        whole_path = resolve_path(self.path)
-        self.start = max(self.start, 0)
-        try:
-            with open(whole_path, 'r', encoding='utf-8') as file:
-                if self.end == -1:
-                    if self.start == 0:
-                        code_view = file.read()
-                    else:
-                        all_lines = file.readlines()
-                        code_slice = all_lines[self.start:]
-                        code_view = ''.join(code_slice)
-                else:
-                    all_lines = file.readlines()
-                    num_lines = len(all_lines)
-                    begin = max(0, min(self.start, num_lines - 2))
-                    end = -1 if self.end > num_lines else max(begin + 1, self.end)
-                    code_slice = all_lines[begin:end]
-                    code_view = ''.join(code_slice)
-        except FileNotFoundError:
-            raise FileNotFoundError(f'File not found: {self.path}')
+        if isinstance(controller.command_manager.sandbox, E2BBox):
+            content = controller.command_manager.sandbox.filesystem.read(
+                self.path)
+            read_lines = self._read_lines(content.split('\n'))
+            code_view = ''.join(read_lines)
+        else:
+            whole_path = resolve_path(self.path)
+            self.start = max(self.start, 0)
+            try:
+                with open(whole_path, 'r', encoding='utf-8') as file:
+                    read_lines = self._read_lines(file.readlines())
+                    code_view = ''.join(read_lines)
+            except FileNotFoundError:
+                raise FileNotFoundError(f'File not found: {self.path}')
        return FileReadObservation(path=self.path, content=code_view)

    @property
@@ -72,25 +79,42 @@ class FileWriteAction(ExecutableAction):
    thoughts: str = ''
    action: str = ActionType.WRITE

-    async def run(self, controller) -> FileWriteObservation:
-        whole_path = resolve_path(self.path)
-        mode = 'w' if not os.path.exists(whole_path) else 'r+'
-        insert = self.content.split('\n')
-        try:
-            with open(whole_path, mode, encoding='utf-8') as file:
-                if mode != 'w':
-                    all_lines = file.readlines()
-                    new_file = [''] if self.start == 0 else all_lines[:self.start]
-                    new_file += [i + '\n' for i in insert]
-                    new_file += [''] if self.end == -1 else all_lines[self.end:]
-                else:
-                    new_file = [i + '\n' for i in insert]
+    def _insert_lines(self, to_insert: list[str], original: list[str]):
+        """
+        Insert the new conent to the original content based on self.start and self.end
+        """
+        new_lines = [''] if self.start == 0 else original[:self.start]
+        new_lines += [i + '\n' for i in to_insert]
+        new_lines += [''] if self.end == -1 else original[self.end:]
+        return new_lines

-                file.seek(0)
-                file.writelines(new_file)
-                file.truncate()
-        except FileNotFoundError:
-            raise FileNotFoundError(f'File not found: {self.path}')
+    async def run(self, controller) -> FileWriteObservation:
+        insert = self.content.split('\n')
+
+        if isinstance(controller.command_manager.sandbox, E2BBox):
+            files = controller.command_manager.sandbox.filesystem.list(self.path)
+            if self.path in files:
+                all_lines = controller.command_manager.sandbox.filesystem.read(self.path)
+                new_file = self._insert_lines(self.content.split('\n'), all_lines)
+                controller.command_manager.sandbox.filesystem.write(self.path, ''.join(new_file))
+            else:
+                raise FileNotFoundError(f'File not found: {self.path}')
+        else:
+            whole_path = resolve_path(self.path)
+            mode = 'w' if not os.path.exists(whole_path) else 'r+'
+            try:
+                with open(whole_path, mode, encoding='utf-8') as file:
+                    if mode != 'w':
+                        all_lines = file.readlines()
+                        new_file = self._insert_lines(insert, all_lines)
+                    else:
+                        new_file = [i + '\n' for i in insert]
+
+                    file.seek(0)
+                    file.writelines(new_file)
+                    file.truncate()
+            except FileNotFoundError:
+                raise FileNotFoundError(f'File not found: {self.path}')
        return FileWriteObservation(content='', path=self.path)

    @property
--- a/opendevin/config.py
+++ b/opendevin/config.py
@@ -28,7 +28,8 @@ DEFAULT_CONFIG: dict = {
    # Assuming 5 characters per token, 5 million is a reasonable default limit.
    ConfigType.MAX_CHARS: 5_000_000,
    ConfigType.AGENT: 'MonologueAgent',
-    ConfigType.SANDBOX_TYPE: 'ssh',
+    ConfigType.E2B_API_KEY: '',
+    ConfigType.SANDBOX_TYPE: 'ssh',  # Can be 'ssh', 'exec', or 'e2b'
    ConfigType.USE_HOST_NETWORK: 'false',
    ConfigType.SSH_HOSTNAME: 'localhost',
    ConfigType.DISABLE_COLOR: 'false',
--- a/opendevin/controller/action_manager.py
+++ b/opendevin/controller/action_manager.py
@@ -3,7 +3,7 @@ import traceback

 from opendevin import config
 from opendevin.observation import CmdOutputObservation
-from opendevin.sandbox import DockerExecBox, DockerSSHBox, Sandbox, LocalBox
+from opendevin.sandbox import DockerExecBox, DockerSSHBox, Sandbox, LocalBox, E2BBox
 from opendevin.schema import ConfigType
 from opendevin.logger import opendevin_logger as logger
 from opendevin.action import (
@@ -18,7 +18,7 @@ from opendevin.observation import (

 class ActionManager:
    id: str
-    shell: Sandbox
+    sandbox: Sandbox

    def __init__(
            self,
@@ -27,15 +27,17 @@ class ActionManager:
    ):
        sandbox_type = config.get(ConfigType.SANDBOX_TYPE).lower()
        if sandbox_type == 'exec':
-            self.shell = DockerExecBox(
+            self.sandbox = DockerExecBox(
                sid=(sid or 'default'), container_image=container_image
            )
        elif sandbox_type == 'local':
-            self.shell = LocalBox()
+            self.sandbox = LocalBox()
        elif sandbox_type == 'ssh':
-            self.shell = DockerSSHBox(
+            self.sandbox = DockerSSHBox(
                sid=(sid or 'default'), container_image=container_image
            )
+        elif sandbox_type == 'e2b':
+            self.sandbox = E2BBox()
        else:
            raise ValueError(f'Invalid sandbox type: {sandbox_type}')

@@ -58,23 +60,23 @@ class ActionManager:
            return self._run_immediately(command)

    def _run_immediately(self, command: str) -> CmdOutputObservation:
-        exit_code, output = self.shell.execute(command)
+        exit_code, output = self.sandbox.execute(command)
        return CmdOutputObservation(
            command_id=-1, content=output, command=command, exit_code=exit_code
        )

    def _run_background(self, command: str) -> CmdOutputObservation:
-        bg_cmd = self.shell.execute_in_background(command)
-        content = f'Background command started. To stop it, send a `kill` action with id {bg_cmd.id}'
+        bg_cmd = self.sandbox.execute_in_background(command)
+        content = f'Background command started. To stop it, send a `kill` action with id {bg_cmd.pid}'
        return CmdOutputObservation(
            content=content,
-            command_id=bg_cmd.id,
+            command_id=bg_cmd.pid,
            command=command,
            exit_code=0,
        )

    def kill_command(self, id: int) -> CmdOutputObservation:
-        cmd = self.shell.kill_background(id)
+        cmd = self.sandbox.kill_background(id)
        return CmdOutputObservation(
            content=f'Background command with id {id} has been killed.',
            command_id=id,
@@ -84,7 +86,7 @@ class ActionManager:

    def get_background_obs(self) -> List[CmdOutputObservation]:
        obs = []
-        for _id, cmd in self.shell.background_commands.items():
+        for _id, cmd in self.sandbox.background_commands.items():
            output = cmd.read_logs()
            if output is not None and output != '':
                obs.append(
--- a/opendevin/sandbox/init.py
+++ b/opendevin/sandbox/init.py
@@ -1,10 +1,13 @@
 from .sandbox import Sandbox
-from .ssh_box import DockerSSHBox
-from .exec_box import DockerExecBox
-from .local_box import LocalBox
+from .docker.ssh_box import DockerSSHBox
+from .docker.exec_box import DockerExecBox
+from .docker.local_box import LocalBox
+from .e2b.sandbox import E2BBox
+
 __all__ = [
    'Sandbox',
    'DockerSSHBox',
    'DockerExecBox',
+    'E2BBox',
    'LocalBox'
 ]
--- a/opendevin/sandbox/docker/exec_box.py
+++ b/opendevin/sandbox/docker/exec_box.py
@@ -11,7 +11,9 @@ import docker

 from opendevin import config
 from opendevin.logger import opendevin_logger as logger
-from opendevin.sandbox.sandbox import Sandbox, BackgroundCommand
+from opendevin.sandbox.sandbox import Sandbox
+from opendevin.sandbox.process import Process
+from opendevin.sandbox.docker.process import DockerProcess
 from opendevin.schema import ConfigType
 from opendevin.exceptions import SandboxInvalidBackgroundCommandError

@@ -40,7 +42,7 @@ class DockerExecBox(Sandbox):
    docker_client: docker.DockerClient

    cur_background_id = 0
-    background_commands: Dict[int, BackgroundCommand] = {}
+    background_commands: Dict[int, Process] = {}

    def __init__(
            self,
@@ -120,14 +122,14 @@ class DockerExecBox(Sandbox):
                return -1, f'Command: "{cmd}" timed out'
        return exit_code, logs.decode('utf-8')

-    def execute_in_background(self, cmd: str) -> BackgroundCommand:
+    def execute_in_background(self, cmd: str) -> Process:
        result = self.container.exec_run(
            self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR
        )
        result.output._sock.setblocking(0)
        pid = self.get_pid(cmd)
-        bg_cmd = BackgroundCommand(self.cur_background_id, cmd, result, pid)
-        self.background_commands[bg_cmd.id] = bg_cmd
+        bg_cmd = DockerProcess(self.cur_background_id, cmd, result, pid)
+        self.background_commands[bg_cmd.pid] = bg_cmd
        self.cur_background_id += 1
        return bg_cmd

@@ -142,13 +144,14 @@ class DockerExecBox(Sandbox):
                return pid
        return None

-    def kill_background(self, id: int) -> BackgroundCommand:
+    def kill_background(self, id: int) -> Process:
        if id not in self.background_commands:
            raise SandboxInvalidBackgroundCommandError()
        bg_cmd = self.background_commands[id]
        if bg_cmd.pid is not None:
            self.container.exec_run(
                f'kill -9 {bg_cmd.pid}', workdir=SANDBOX_WORKSPACE_DIR)
+        assert isinstance(bg_cmd, DockerProcess)
        bg_cmd.result.output.close()
        self.background_commands.pop(id)
        return bg_cmd
@@ -259,14 +262,14 @@ if __name__ == '__main__':
                logger.info('Exiting...')
                break
            if user_input.lower() == 'kill':
-                exec_box.kill_background(bg_cmd.id)
+                exec_box.kill_background(bg_cmd.pid)
                logger.info('Background process killed')
                continue
            exit_code, output = exec_box.execute(user_input)
            logger.info('exit code: %d', exit_code)
            logger.info(output)
-            if bg_cmd.id in exec_box.background_commands:
-                logs = exec_box.read_logs(bg_cmd.id)
+            if bg_cmd.pid in exec_box.background_commands:
+                logs = exec_box.read_logs(bg_cmd.pid)
                logger.info('background logs: %s', logs)
            sys.stdout.flush()
    except KeyboardInterrupt:
--- a/opendevin/sandbox/docker/local_box.py
+++ b/opendevin/sandbox/docker/local_box.py
@@ -1,7 +1,9 @@
 import subprocess
 import atexit
 from typing import Tuple, Dict
-from opendevin.sandbox.sandbox import Sandbox, BackgroundCommand
+from opendevin.sandbox.sandbox import Sandbox
+from opendevin.sandbox.process import Process
+from opendevin.sandbox.docker.process import DockerProcess
 from opendevin import config

 # ===============================================================================
@@ -23,7 +25,7 @@ from opendevin import config
 class LocalBox(Sandbox):
    def __init__(self, timeout: int = 120):
        self.timeout = timeout
-        self.background_commands: Dict[int, BackgroundCommand] = {}
+        self.background_commands: Dict[int, Process] = {}
        self.cur_background_id = 0
        atexit.register(self.cleanup)

@@ -37,12 +39,12 @@ class LocalBox(Sandbox):
        except subprocess.TimeoutExpired:
            return -1, 'Command timed out'

-    def execute_in_background(self, cmd: str) -> BackgroundCommand:
+    def execute_in_background(self, cmd: str) -> Process:
        process = subprocess.Popen(
            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
            text=True, cwd=config.get('WORKSPACE_BASE')
        )
-        bg_cmd = BackgroundCommand(
+        bg_cmd = DockerProcess(
            id=self.cur_background_id, command=cmd, result=process, pid=process.pid
        )
        self.background_commands[self.cur_background_id] = bg_cmd
@@ -53,6 +55,7 @@ class LocalBox(Sandbox):
        if id not in self.background_commands:
            raise ValueError('Invalid background command id')
        bg_cmd = self.background_commands[id]
+        assert isinstance(bg_cmd, DockerProcess)
        bg_cmd.result.terminate()  # terminate the process
        bg_cmd.result.wait()  # wait for process to terminate
        self.background_commands.pop(id)
@@ -61,6 +64,7 @@ class LocalBox(Sandbox):
        if id not in self.background_commands:
            raise ValueError('Invalid background command id')
        bg_cmd = self.background_commands[id]
+        assert isinstance(bg_cmd, DockerProcess)
        output = bg_cmd.result.stdout.read()
        return output.decode('utf-8')

--- a/opendevin/sandbox/docker/process.py
+++ b/opendevin/sandbox/docker/process.py
@@ -0,0 +1,132 @@
+import select
+import sys
+from typing import Tuple
+
+from opendevin.sandbox.process import Process
+
+
+class DockerProcess(Process):
+    """
+    Represents a background command execution
+    """
+
+    def __init__(self, id: int, command: str, result, pid: int):
+        """
+        Initialize a DockerProcess instance.
+
+        Args:
+            id (int): The identifier of the command.
+            command (str): The command to be executed.
+            result: The result of the command execution.
+            pid (int): The process ID (PID) of the command.
+        """
+        self.id = id
+        self._command = command
+        self.result = result
+        self._pid = pid
+
+    @property
+    def pid(self) -> int:
+        return self._pid
+
+    @property
+    def command(self) -> str:
+        return self._command
+
+    def parse_docker_exec_output(self, logs: bytes) -> Tuple[bytes, bytes]:
+        """
+            When you execute a command using `exec` in a docker container, the output produced will be in bytes. this function parses the output of a Docker exec command.
+
+        Example:
+            Considering you have a docker container named `my_container` up and running
+            $ docker exec my_container echo "Hello OpenDevin!"
+            >> b'\x00\x00\x00\x00\x00\x00\x00\x13Hello OpenDevin!'
+
+            Such binary logs will be processed by this function.
+
+            The function handles message types, padding, and byte order to create a usable result. The primary goal is to convert raw container logs into a more structured format for further analysis or display.
+
+            The function also returns a tail of bytes to ensure that no information is lost. It is a way to handle edge cases and maintain data integrity.
+
+            >> output_bytes = b'\x00\x00\x00\x00\x00\x00\x00\x13Hello OpenDevin!'
+            >> parsed_output, remaining_bytes = parse_docker_exec_output(output_bytes)
+
+            >> print(parsed_output)
+            b'Hello OpenDevin!'
+
+            >> print(remaining_bytes)
+            b''
+
+        Args:
+            logs (bytes): The raw output logs of the command.
+
+        Returns:
+            Tuple[bytes, bytes]: A tuple containing the parsed output and any remaining data.
+        """
+        res = b''
+        tail = b''
+        i = 0
+        byte_order = sys.byteorder
+        while i < len(logs):
+            prefix = logs[i: i + 8]
+            if len(prefix) < 8:
+                msg_type = prefix[0:1]
+                if msg_type in [b'\x00', b'\x01', b'\x02', b'\x03']:
+                    tail = prefix
+                break
+
+            msg_type = prefix[0:1]
+            padding = prefix[1:4]
+            if (
+                msg_type in [b'\x00', b'\x01', b'\x02', b'\x03']
+                and padding == b'\x00\x00\x00'
+            ):
+                msg_length = int.from_bytes(prefix[4:8], byteorder=byte_order)
+                res += logs[i + 8: i + 8 + msg_length]
+                i += 8 + msg_length
+            else:
+                res += logs[i: i + 1]
+                i += 1
+        return res, tail
+
+    def read_logs(self) -> str:
+        """
+        Read and decode the logs of the command.
+
+        This function continuously reads the standard output of a subprocess and
+        processes the output using the parse_docker_exec_output function to handle
+        binary log messages. It concatenates and decodes the output bytes into a
+        string, ensuring that no partial messages are lost during reading.
+
+        Dummy Example:
+
+        >> cmd = 'echo "Hello OpenDevin!"'
+        >> result = subprocess.Popen(
+            cmd, shell=True, stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT, text=True, cwd='.'
+            )
+        >> bg_cmd = DockerProcess(id, cmd = cmd, result = result, pid)
+
+        >> logs = bg_cmd.read_logs()
+        >> print(logs)
+        Hello OpenDevin!
+
+        Returns:
+            str: The decoded logs(string) of the command.
+        """
+        # TODO: get an exit code if process is exited
+        logs = b''
+        last_remains = b''
+        while True:
+            ready_to_read, _, _ = select.select(
+                [self.result.output], [], [], 0.1)  # type: ignore[has-type]
+            if ready_to_read:
+                data = self.result.output.read(4096)  # type: ignore[has-type]
+                if not data:
+                    break
+                chunk, last_remains = self.parse_docker_exec_output(
+                    last_remains + data)
+                logs += chunk
+            else:
+                break
+        return (logs + last_remains).decode('utf-8', errors='replace')
--- a/opendevin/sandbox/docker/ssh_box.py
+++ b/opendevin/sandbox/docker/ssh_box.py
@@ -12,7 +12,9 @@ from pexpect import pxssh

 from opendevin import config
 from opendevin.logger import opendevin_logger as logger
-from opendevin.sandbox.sandbox import Sandbox, BackgroundCommand
+from opendevin.sandbox.sandbox import Sandbox
+from opendevin.sandbox.process import Process
+from opendevin.sandbox.docker.process import DockerProcess
 from opendevin.schema import ConfigType
 from opendevin.utils import find_available_tcp_port
 from opendevin.exceptions import SandboxInvalidBackgroundCommandError
@@ -53,7 +55,7 @@ class DockerSSHBox(Sandbox):
    _ssh_port: int

    cur_background_id = 0
-    background_commands: Dict[int, BackgroundCommand] = {}
+    background_commands: Dict[int, Process] = {}

    def __init__(
            self,
@@ -206,14 +208,14 @@ class DockerSSHBox(Sandbox):
        exit_code = int(exit_code.lstrip('echo $?').strip())
        return exit_code, command_output

-    def execute_in_background(self, cmd: str) -> BackgroundCommand:
+    def execute_in_background(self, cmd: str) -> Process:
        result = self.container.exec_run(
            self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR
        )
        result.output._sock.setblocking(0)
        pid = self.get_pid(cmd)
-        bg_cmd = BackgroundCommand(self.cur_background_id, cmd, result, pid)
-        self.background_commands[bg_cmd.id] = bg_cmd
+        bg_cmd = DockerProcess(self.cur_background_id, cmd, result, pid)
+        self.background_commands[bg_cmd.pid] = bg_cmd
        self.cur_background_id += 1
        return bg_cmd

@@ -228,13 +230,14 @@ class DockerSSHBox(Sandbox):
                return pid
        return None

-    def kill_background(self, id: int) -> BackgroundCommand:
+    def kill_background(self, id: int) -> Process:
        if id not in self.background_commands:
            raise SandboxInvalidBackgroundCommandError()
        bg_cmd = self.background_commands[id]
        if bg_cmd.pid is not None:
            self.container.exec_run(
                f'kill -9 {bg_cmd.pid}', workdir=SANDBOX_WORKSPACE_DIR)
+        assert isinstance(bg_cmd, DockerProcess)
        bg_cmd.result.output.close()
        self.background_commands.pop(id)
        return bg_cmd
@@ -368,14 +371,14 @@ if __name__ == '__main__':
                logger.info('Exiting...')
                break
            if user_input.lower() == 'kill':
-                ssh_box.kill_background(bg_cmd.id)
+                ssh_box.kill_background(bg_cmd.pid)
                logger.info('Background process killed')
                continue
            exit_code, output = ssh_box.execute(user_input)
            logger.info('exit code: %d', exit_code)
            logger.info(output)
-            if bg_cmd.id in ssh_box.background_commands:
-                logs = ssh_box.read_logs(bg_cmd.id)
+            if bg_cmd.pid in ssh_box.background_commands:
+                logs = ssh_box.read_logs(bg_cmd.pid)
                logger.info('background logs: %s', logs)
            sys.stdout.flush()
    except KeyboardInterrupt:
--- a/opendevin/sandbox/e2b/README.md
+++ b/opendevin/sandbox/e2b/README.md
@@ -0,0 +1,35 @@
+# How to use E2B
+
+[E2B](https://e2b.dev) is an [open-source](https://github.com/e2b-dev/e2b) secure cloud environment (sandbox) made for running AI-generated code and agents. E2B offers [Python](https://pypi.org/project/e2b/) and [JS/TS](https://www.npmjs.com/package/e2b) SDK to spawn and control these sandboxes.
+
+## Getting started
+
+1. [Get your API key](https://e2b.dev/docs/getting-started/api-key)
+
+1. Set your E2B API key to the `E2B_API_KEY` env var when starting the Docker container
+
+1. **Optional** - Install the CLI with NPM.
+    ```sh
+    npm install -g @e2b/cli@latest
+    ```
+    Full CLI API is [here](https://e2b.dev/docs/cli/installation).
+
+## OpenDevin sandbox
+You can use the E2B CLI to create a custom sandbox with a Dockerfile. Read the full guide [here](https://e2b.dev/docs/guide/custom-sandbox). The premade OpenDevin sandbox for E2B is set up in the [`containers` directory](/containers/e2b-sandbox). and it's called `open-devin`.
+
+## Debugging
+You can connect to a running E2B sandbox with E2B CLI in your terminal.
+
+- List all running sandboxes (based on your API key)
+    ```sh
+    e2b sandbox list
+    ```
+
+- Connect to a running sandbox
+    ```sh
+    e2b sandbox connect <sandbox-id>
+    ```
+
+## Links
+- [E2B Docs](https://e2b.dev/docs)
+- [E2B GitHub](https://github.com/e2b-dev/e2b)
--- a/opendevin/sandbox/e2b/process.py
+++ b/opendevin/sandbox/e2b/process.py
@@ -0,0 +1,27 @@
+from e2b import Process as E2BSandboxProcess
+
+from opendevin.sandbox.process import Process
+
+
+class E2BProcess(Process):
+    def __init__(self, process: E2BSandboxProcess, cmd: str):
+        self._process = process
+        self._command = cmd
+
+    def kill(self):
+        self._process.kill()
+
+    def read_logs(self):
+        return '\n'.join([m.line for m in self._process.output_messages])
+
+    @property
+    def pid(self) -> int:
+        return int(self._process.process_id)
+
+    @property
+    def command(self) -> str:
+        return self._command
+
+    @property
+    def output_messages(self):
+        return self._process.output_messages
--- a/opendevin/sandbox/e2b/sandbox.py
+++ b/opendevin/sandbox/e2b/sandbox.py
@@ -0,0 +1,80 @@
+from typing import Dict, Tuple
+from e2b import Sandbox as E2BSandbox
+from e2b.sandbox.exception import (
+    TimeoutException,
+)
+
+from opendevin import config
+from opendevin.logger import opendevin_logger as logger
+from opendevin.sandbox.sandbox import Sandbox
+from opendevin.sandbox.e2b.process import E2BProcess
+from opendevin.sandbox.process import Process
+
+
+class E2BBox(Sandbox):
+    closed = False
+    cur_background_id = 0
+    background_commands: Dict[int, Process] = {}
+
+    def __init__(
+        self,
+        template: str = 'open-devin',
+        timeout: int = 120,
+    ):
+        self.sandbox = E2BSandbox(
+            api_key=config.get('E2B_API_KEY'),
+            template=template,
+            # It's possible to stream stdout and stderr from sandbox and from each process
+            on_stderr=lambda x: logger.info(f'E2B sandbox stderr: {x}'),
+            on_stdout=lambda x: logger.info(f'E2B sandbox stdout: {x}'),
+            cwd='/home/user',  # Default workdir inside sandbox
+        )
+        self.timeout = timeout
+        logger.info(f'Started E2B sandbox with ID "{self.sandbox.id}"')
+
+    @property
+    def filesystem(self):
+        return self.sandbox.filesystem
+
+    # TODO: This won't work if we didn't wait for the background process to finish
+    def read_logs(self, process_id: int) -> str:
+        proc = self.background_commands.get(process_id)
+        if proc is None:
+            raise ValueError(f'Process {process_id} not found')
+        assert isinstance(proc, E2BProcess)
+        return '\n'.join([m.line for m in proc.output_messages])
+
+    def execute(self, cmd: str) -> Tuple[int, str]:
+        process = self.sandbox.process.start(cmd)
+        try:
+            process_output = process.wait(timeout=self.timeout)
+        except TimeoutException:
+            logger.info('Command timed out, killing process...')
+            process.kill()
+            return -1, f'Command: "{cmd}" timed out'
+
+        logs = [m.line for m in process_output.messages]
+        logs_str = '\n'.join(logs)
+        if process.exit_code is None:
+            return -1, logs_str
+
+        assert process_output.exit_code is not None
+        return process_output.exit_code, logs_str
+
+    def execute_in_background(self, cmd: str) -> Process:
+        process = self.sandbox.process.start(cmd)
+        e2b_process = E2BProcess(process, cmd)
+        self.cur_background_id += 1
+        self.background_commands[self.cur_background_id] = e2b_process
+        return e2b_process
+
+    def kill_background(self, process_id: int):
+        process = self.background_commands.get(process_id)
+        if process is None:
+            raise ValueError(f'Process {process_id} not found')
+        assert isinstance(process, E2BProcess)
+        process.kill()
+        return process
+
+    def close(self):
+        self.sandbox.close()
--- a/opendevin/sandbox/process.py
+++ b/opendevin/sandbox/process.py
@@ -0,0 +1,17 @@
+from abc import ABC, abstractmethod
+
+
+class Process(ABC):
+    @property
+    @abstractmethod
+    def pid(self) -> int:
+        pass
+
+    @property
+    @abstractmethod
+    def command(self) -> str:
+        pass
+
+    @abstractmethod
+    def read_logs(self) -> str:
+        pass
--- a/opendevin/sandbox/sandbox.py
+++ b/opendevin/sandbox/sandbox.py
@@ -1,142 +1,23 @@
-import select
-import sys
 from abc import ABC, abstractmethod
 from typing import Dict
 from typing import Tuple

-
-class BackgroundCommand:
-    """
-    Represents a background command execution
-    """
-
-    def __init__(self, id: int, command: str, result, pid: int):
-        """
-        Initialize a BackgroundCommand instance.
-
-        Args:
-            id (int): The identifier of the command.
-            command (str): The command to be executed.
-            result: The result of the command execution.
-            pid (int): The process ID (PID) of the command.
-        """
-        self.id = id
-        self.command = command
-        self.result = result
-        self.pid = pid
-
-    def parse_docker_exec_output(self, logs: bytes) -> Tuple[bytes, bytes]:
-        """
-            When you execute a command using `exec` in a docker container, the output produced will be in bytes. this function parses the output of a Docker exec command.
-
-        Example:
-            Considering you have a docker container named `my_container` up and running
-            $ docker exec my_container echo "Hello OpenDevin!"
-            >> b'\x00\x00\x00\x00\x00\x00\x00\x13Hello OpenDevin!'
-
-            Such binary logs will be processed by this function.
-
-            The function handles message types, padding, and byte order to create a usable result. The primary goal is to convert raw container logs into a more structured format for further analysis or display.
-
-            The function also returns a tail of bytes to ensure that no information is lost. It is a way to handle edge cases and maintain data integrity.
-
-            >> output_bytes = b'\x00\x00\x00\x00\x00\x00\x00\x13Hello OpenDevin!'
-            >> parsed_output, remaining_bytes = parse_docker_exec_output(output_bytes)
-
-            >> print(parsed_output)
-            b'Hello OpenDevin!'
-
-            >> print(remaining_bytes)
-            b''
-
-        Args:
-            logs (bytes): The raw output logs of the command.
-
-        Returns:
-            Tuple[bytes, bytes]: A tuple containing the parsed output and any remaining data.
-        """
-        res = b''
-        tail = b''
-        i = 0
-        byte_order = sys.byteorder
-        while i < len(logs):
-            prefix = logs[i: i + 8]
-            if len(prefix) < 8:
-                msg_type = prefix[0:1]
-                if msg_type in [b'\x00', b'\x01', b'\x02', b'\x03']:
-                    tail = prefix
-                break
-
-            msg_type = prefix[0:1]
-            padding = prefix[1:4]
-            if (
-                    msg_type in [b'\x00', b'\x01', b'\x02', b'\x03']
-                    and padding == b'\x00\x00\x00'
-            ):
-                msg_length = int.from_bytes(prefix[4:8], byteorder=byte_order)
-                res += logs[i + 8: i + 8 + msg_length]
-                i += 8 + msg_length
-            else:
-                res += logs[i: i + 1]
-                i += 1
-        return res, tail
-
-    def read_logs(self) -> str:
-        """
-        Read and decode the logs of the command.
-
-        This function continuously reads the standard output of a subprocess and
-        processes the output using the parse_docker_exec_output function to handle
-        binary log messages. It concatenates and decodes the output bytes into a
-        string, ensuring that no partial messages are lost during reading.
-
-        Dummy Example:
-
-        >> cmd = 'echo "Hello OpenDevin!"'
-        >> result = subprocess.Popen(
-            cmd, shell=True, stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT, text=True, cwd='.'
-            )
-        >> bg_cmd = BackgroundCommand(id, cmd = cmd, result = result, pid)
-
-        >> logs = bg_cmd.read_logs()
-        >> print(logs)
-        Hello OpenDevin!
-
-        Returns:
-            str: The decoded logs(string) of the command.
-        """
-        # TODO: get an exit code if process is exited
-        logs = b''
-        last_remains = b''
-        while True:
-            ready_to_read, _, _ = select.select(
-                [self.result.output], [], [], 0.1)  # type: ignore[has-type]
-            if ready_to_read:
-                data = self.result.output.read(4096)  # type: ignore[has-type]
-                if not data:
-                    break
-                chunk, last_remains = self.parse_docker_exec_output(
-                    last_remains + data)
-                logs += chunk
-            else:
-                break
-        return (logs + last_remains).decode('utf-8', errors='replace')
+from opendevin.sandbox.process import Process


 class Sandbox(ABC):
-    background_commands: Dict[int, BackgroundCommand] = {}
+    background_commands: Dict[int, Process] = {}

    @abstractmethod
    def execute(self, cmd: str) -> Tuple[int, str]:
        pass

    @abstractmethod
-    def execute_in_background(self, cmd: str):
+    def execute_in_background(self, cmd: str) -> Process:
        pass

    @abstractmethod
-    def kill_background(self, id: int):
+    def kill_background(self, id: int) -> Process:
        pass

    @abstractmethod
--- a/opendevin/schema/config.py
+++ b/opendevin/schema/config.py
@@ -18,6 +18,7 @@ class ConfigType(str, Enum):
    MAX_ITERATIONS = 'MAX_ITERATIONS'
    MAX_CHARS = 'MAX_CHARS'
    AGENT = 'AGENT'
+    E2B_API_KEY = 'E2B_API_KEY'
    SANDBOX_TYPE = 'SANDBOX_TYPE'
    USE_HOST_NETWORK = 'USE_HOST_NETWORK'
    SSH_HOSTNAME = 'SSH_HOSTNAME'
--- a/opendevin/server/agent/agent.py
+++ b/opendevin/server/agent/agent.py
@@ -240,4 +240,4 @@ class AgentUnit:
        if self.agent_task:
            self.agent_task.cancel()
        if self.controller is not None:
-            self.controller.action_manager.shell.close()
+            self.controller.action_manager.sandbox.close()
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,16 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+
+[[package]]
+name = "aenum"
+version = "3.1.15"
+description = "Advanced Enumerations (compatible with Python's stdlib Enum), NamedTuples, and NamedConstants"
+optional = false
+python-versions = "*"
+files = [
+    {file = "aenum-3.1.15-py2-none-any.whl", hash = "sha256:27b1710b9d084de6e2e695dab78fe9f269de924b51ae2850170ee7e1ca6288a5"},
+    {file = "aenum-3.1.15-py3-none-any.whl", hash = "sha256:e0dfaeea4c2bd362144b87377e2c61d91958c5ed0b4daf89cb6f45ae23af6288"},
+    {file = "aenum-3.1.15.tar.gz", hash = "sha256:8cbd76cd18c4f870ff39b24284d3ea028fbe8731a58df3aa581e434c575b9559"},
+]

 [[package]]
 name = "aiohttp"
@@ -911,6 +923,28 @@ urllib3 = ">=1.26.0"
 ssh = ["paramiko (>=2.4.3)"]
 websockets = ["websocket-client (>=1.3.0)"]

+[[package]]
+name = "e2b"
+version = "0.14.13"
+description = "E2B SDK that give agents cloud environments"
+optional = false
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "e2b-0.14.13-py3-none-any.whl", hash = "sha256:321bef60d427b967e41d550b98575b6488dabf495bb952de16c580211fe8d888"},
+    {file = "e2b-0.14.13.tar.gz", hash = "sha256:2e09e24a390725b4bb21e79f33e98a9ad77ce1ea8c694ef158cd78ba6ff001a3"},
+]
+
+[package.dependencies]
+aenum = ">=3.1.11"
+aiohttp = ">=3.8.4"
+jsonrpcclient = ">=4.0.3"
+pydantic = "*"
+python-dateutil = ">=2.8.2"
+requests = ">=2.31.0"
+typing-extensions = ">=4.8.0"
+urllib3 = ">=1.25.3"
+websockets = ">=11.0.3"
+
 [[package]]
 name = "fastapi"
 version = "0.110.1"
@@ -1730,6 +1764,19 @@ files = [
    {file = "json_repair-0.13.0.tar.gz", hash = "sha256:37b95e47a9ded4e9c20a749c4296f1ddada3bea1589133f1af2d9a19ba0952e6"},
 ]

+[[package]]
+name = "jsonrpcclient"
+version = "4.0.3"
+description = "Send JSON-RPC requests"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "jsonrpcclient-4.0.3-py3-none-any.whl", hash = "sha256:3cbb9e27e1be29821becf135ea183144a836215422727e1ffe5056a49a670f0d"},
+]
+
+[package.extras]
+qa = ["pytest", "pytest-cov", "tox"]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.5"
@@ -3391,6 +3438,7 @@ optional = false
 python-versions = ">=3.9"
 files = [
    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
@@ -3411,6 +3459,7 @@ files = [
    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
@@ -4208,7 +4257,6 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -5970,4 +6018,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "3a5ca3c8b47e0e43994032d1620d85a8d602c52a93790d192b9fdb3a8ac36d97"
+content-hash = "277121a08f7a6748e8dee0f6d14a5eb93804351fa908b0b642b20a1d51a39c02"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ types-toml = "*"
 numpy = "*"
 json-repair = "*"
 playwright = "*"
+e2b = "^0.14.13"
 pexpect = "*"

 [tool.poetry.group.llama-index.dependencies]