diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py
index 825e4ee43a..66fb268366 100644
--- a/agenthub/langchains_agent/__init__.py
+++ b/agenthub/langchains_agent/__init__.py
@@ -9,6 +9,7 @@ from agenthub.langchains_agent.utils.monologue import Monologue
 from agenthub.langchains_agent.utils.memory import LongTermMemory
 
 from opendevin.action import (
+    NullAction,
     CmdRunAction,
     CmdKillAction,
     BrowseURLAction,
@@ -128,16 +129,18 @@ class LangchainsAgent(Agent):
 
         # Translate state to action_dict
         for prev_action, obs in state.updated_info:
+            d = None
             if isinstance(obs, CmdOutputObservation):
                 if obs.error:
                     d = {"action": "error", "args": {"output": obs.content}}
                 else:
                     d = {"action": "output", "args": {"output": obs.content}}
             else:
-                d = {"action": "output", "args": {"output": obs.content}}
-            self._add_event(d)
-
+                raise ValueError(f"Unknown observation type: {obs}")
+            if d is not None:
+                self._add_event(d)
 
+            d = None
             if isinstance(prev_action, CmdRunAction):
                 d = {"action": "run", "args": {"command": prev_action.command}}
             elif isinstance(prev_action, CmdKillAction):
@@ -154,9 +157,12 @@ class LangchainsAgent(Agent):
                 d = {"action": "think", "args": {"thought": prev_action.thought}}
             elif isinstance(prev_action, AgentFinishAction):
                 d = {"action": "finish"}
+            elif isinstance(prev_action, NullAction):
+                d = None
             else:
-                raise NotImplementedError(f"Unknown action type: {prev_action}")
-            self._add_event(d)
+                raise ValueError(f"Unknown action type: {prev_action}")
+            if d is not None:
+                self._add_event(d)
 
         state.updated_info = []
 
diff --git a/opendevin/action/browse.py b/opendevin/action/browse.py
index 6c9bfd8bf9..57fd6f8246 100644
--- a/opendevin/action/browse.py
+++ b/opendevin/action/browse.py
@@ -10,11 +10,19 @@ class BrowseURLAction(ExecutableAction):
     url: str
 
     def run(self, *args, **kwargs) -> BrowserOutputObservation:
-        response = requests.get(self.url)
-        return BrowserOutputObservation(
-            content=response.text,
-            url=self.url
-        )
+        try:
+            response = requests.get(self.url)
+            return BrowserOutputObservation(
+                content=response.text,
+                status_code=response.status_code,
+                url=self.url
+            )
+        except requests.exceptions.RequestException as e:
+            return BrowserOutputObservation(
+                content=str(e),
+                error=True,
+                url=self.url
+            )
 
     @property
     def message(self) -> str:
diff --git a/opendevin/controller/__init__.py b/opendevin/controller/__init__.py
index f8a06c9b9a..787c31cceb 100644
--- a/opendevin/controller/__init__.py
+++ b/opendevin/controller/__init__.py
@@ -18,6 +18,8 @@ from opendevin.observation import (
 
 from .command_manager import CommandManager
 
+def print_with_indent(text: str):
+    print("\t"+text.replace("\n","\n\t"), flush=True)
 
 class AgentController:
     def __init__(
@@ -47,45 +49,60 @@ class AgentController:
         self.state_updated_info.append((NullAction(), observation))
 
     async def start_loop(self, task_instruction: str):
-        try:
-            self.agent.instruction = task_instruction
-            for i in range(self.max_iterations):
-                print("STEP", i, flush=True)
+        finished = False
+        self.agent.instruction = task_instruction
+        for i in range(self.max_iterations):
+            try:
+                finished = await self.step(i)
+            except Exception as e:
+                print("Error in loop", e, flush=True)
+                break
+            if finished:
+                break
+        if not finished:
+            print("Exited before finishing", flush=True)
 
-                state: State = self.get_current_state()
-                action: Action = self.agent.step(state)
-                
-                print("ACTION", action, flush=True)
-                for _callback_fn in self.callbacks:
-                    _callback_fn(action)
-                
-                if isinstance(action, AgentFinishAction):
-                    print("FINISHED", flush=True)
-                    break
-                if isinstance(action, (FileReadAction, FileWriteAction)):
-                    action_cls = action.__class__
-                    _kwargs = action.__dict__
-                    _kwargs["base_path"] = self.workdir
-                    action = action_cls(**_kwargs)
-                    print(action, flush=True)
-                print("---", flush=True)
+    async def step(self, i: int):
+        print("\n\n==============", flush=True)
+        print("STEP", i, flush=True)
+        log_obs = self.command_manager.get_background_obs()
+        for obs in log_obs:
+            self.add_observation(obs)
+            await self._run_callbacks(obs)
+            print_with_indent("\nBACKGROUND LOG:\n%s" % obs)
+
+        state: State = self.get_current_state()
+        action: Action = self.agent.step(state)
+
+        print_with_indent("\nACTION:\n%s" % action)
+        await self._run_callbacks(action)
+
+        if isinstance(action, AgentFinishAction):
+            print_with_indent("\nFINISHED")
+            return True
+        if isinstance(action, (FileReadAction, FileWriteAction)):
+            action_cls = action.__class__
+            _kwargs = action.__dict__
+            _kwargs["base_path"] = self.workdir
+            action = action_cls(**_kwargs)
+            print(action, flush=True)
+        if action.executable:
+            observation: Observation = action.run(self)
+        else:
+            observation = NullObservation("")
+        print_with_indent("\nOBSERVATION:\n%s" % observation)
+        self.state_updated_info.append((action, observation))
+        await self._run_callbacks(observation)
 
 
-                if action.executable:
-                    observation: Observation = action.run(self)
-                else:
-                    print("ACTION NOT EXECUTABLE", flush=True)
-                    observation = NullObservation("")
-                print("OBSERVATION", observation, flush=True)
-                self.state_updated_info.append((action, observation))
-                
-                print(observation, flush=True)
-                for _callback_fn in self.callbacks:
-                    _callback_fn(observation)
-
-                print("==============", flush=True)
-
-                await asyncio.sleep(0.001)
-        except Exception as e:
-            print("Error in loop", e, flush=True)
-            pass
+    async def _run_callbacks(self, event):
+        if event is None:
+            return
+        for callback in self.callbacks:
+            idx = self.callbacks.index(callback)
+            try:
+                callback(event)
+            except Exception as e:
+                print("Callback error:" + str(idx), e, flush=True)
+                pass
+        await asyncio.sleep(0.001) # Give back control for a tick, so we can await in callbacks
diff --git a/opendevin/controller/command_manager.py b/opendevin/controller/command_manager.py
index 51a8cd0de9..2f5528b35f 100644
--- a/opendevin/controller/command_manager.py
+++ b/opendevin/controller/command_manager.py
@@ -3,24 +3,9 @@ from typing import List
 from opendevin.observation import CmdOutputObservation
 from opendevin.sandbox.sandbox import DockerInteractive
 
-
-class BackgroundCommand:
-    def __init__(self, id: int, command: str, dir: str):
-        self.command = command
-        self.id = id
-        self.shell = DockerInteractive(id=str(id), workspace_dir=dir)
-        self.shell.execute_in_background(command)
-
-    def get_logs(self) -> str:
-        # TODO: get an exit code if process is exited
-        return self.shell.read_logs()
-
-
 class CommandManager:
     def __init__(self, dir):
-        self.cur_id = 0
         self.directory = dir
-        self.background_commands = {}
         self.shell = DockerInteractive(id="default", workspace_dir=dir)
 
     def run_command(self, command: str, background=False) -> CmdOutputObservation:
@@ -32,35 +17,38 @@ class CommandManager:
     def _run_immediately(self, command: str) -> CmdOutputObservation:
         exit_code, output = self.shell.execute(command)
         return CmdOutputObservation(
+            command_id=-1,
             content=output,
-            command_id=self.cur_id,
             command=command,
             exit_code=exit_code
         )
 
     def _run_background(self, command: str) -> CmdOutputObservation:
-        bg_cmd = BackgroundCommand(self.cur_id, command, self.directory)
-        self.cur_id += 1
-        self.background_commands[bg_cmd.id] = bg_cmd
+        bg_cmd = self.shell.execute_in_background(command)
         return CmdOutputObservation(
-            content=f"Background command started.  To stop it, send a `kill` action with id {bg_cmd.id}",
+            content=f"Background command started. To stop it, send a `kill` action with id {bg_cmd.id}",
             command_id=bg_cmd.id,
             command=command,
             exit_code=0
         )
 
-    def kill_command(self, id: int):
-        # TODO: get log events before killing
-        self.background_commands[id].shell.close()
-        del self.background_commands[id]
+    def kill_command(self, id: int) -> CmdOutputObservation:
+        cmd = self.shell.kill_background(id)
+        return CmdOutputObservation(
+            content=f"Background command with id {id} has been killed.",
+            command_id=id,
+            command=cmd.command,
+            exit_code=0
+        )
 
     def get_background_obs(self) -> List[CmdOutputObservation]:
         obs = []
-        for _id, cmd in self.background_commands.items():
-            output = cmd.get_logs()
-            obs.append(
-                CmdOutputObservation(
-                    content=output, command_id=_id, command=cmd.command
+        for _id, cmd in self.shell.background_commands.items():
+            output = cmd.read_logs()
+            if output is not None and output != "":
+                obs.append(
+                    CmdOutputObservation(
+                        content=output, command_id=_id, command=cmd.command
+                    )
                 )
-            )
         return obs
diff --git a/opendevin/observation.py b/opendevin/observation.py
index 0390907081..408099453b 100644
--- a/opendevin/observation.py
+++ b/opendevin/observation.py
@@ -57,6 +57,8 @@ class BrowserOutputObservation(Observation):
     """
 
     url: str
+    status_code: int = 200
+    error: bool = False
 
     @property
     def message(self) -> str:
diff --git a/opendevin/sandbox/sandbox.py b/opendevin/sandbox/sandbox.py
index b23b66d0b0..a904534e87 100644
--- a/opendevin/sandbox/sandbox.py
+++ b/opendevin/sandbox/sandbox.py
@@ -4,7 +4,7 @@ import uuid
 import time
 import select
 import docker
-from typing import Tuple
+from typing import Tuple, Dict, List
 from collections import namedtuple
 import atexit
 
@@ -12,6 +12,35 @@ InputType = namedtuple("InputType", ["content"])
 OutputType = namedtuple("OutputType", ["content"])
 
 CONTAINER_IMAGE = os.getenv("SANDBOX_CONTAINER_IMAGE", "opendevin/sandbox:v0.1")
+# FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
+# How do we make this more flexible?
+RUN_AS_DEVIN = os.getenv("RUN_AS_DEVIN", "true").lower() != "false"
+
+class BackgroundCommand:
+    def __init__(self, id: int, command: str, result):
+        self.id = id
+        self.command = command
+        self.result = result
+
+    def read_logs(self) -> str:
+        # TODO: get an exit code if process is exited
+        logs = ""
+        while True:
+            ready_to_read, _, _ = select.select([self.result.output], [], [], .1) # type: ignore[has-type]
+            if ready_to_read:
+                data = self.result.output.read(4096) # type: ignore[has-type]
+                if not data:
+                    break
+                # FIXME: we're occasionally seeing some escape characters like `\x02` and `\x00` in the logs...
+                chunk = data.decode('utf-8')
+                logs += chunk
+            else:
+                break
+        return logs
+
+    def kill(self):
+        # FIXME: this doesn't actually kill the process!
+        self.result.output.close()
 
 
 USER_ID = 1000
@@ -21,6 +50,9 @@ elif hasattr(os, "getuid"):
     USER_ID = os.getuid()
 
 class DockerInteractive:
+    closed = False
+    cur_background_id = 0
+    background_commands : Dict[int, BackgroundCommand] = {}
 
     def __init__(
         self,
@@ -55,45 +87,55 @@ class DockerInteractive:
         self.container_name = f"sandbox-{self.instance_id}"
 
         self.restart_docker_container()
+        if RUN_AS_DEVIN:
+            self.setup_devin_user()
+        atexit.register(self.cleanup)
+
+    def setup_devin_user(self):
+        uid = os.getuid()
         exit_code, logs = self.container.exec_run([
             '/bin/bash', '-c',
-            f'useradd --shell /bin/bash -u {USER_ID} -o -c \"\" -m devin'
+            f'useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin'
             ],
             workdir="/workspace"
         )
-        # regester container cleanup function
-        atexit.register(self.cleanup)
 
-    def read_logs(self) -> str:
-        if not hasattr(self, "log_generator"):
-            return ""
-        logs = ""
-        while True:
-            ready_to_read, _, _ = select.select([self.log_generator], [], [], .1) # type: ignore[has-type]
-            if ready_to_read:
-                data = self.log_generator.read(4096) # type: ignore[has-type]
-                if not data:
-                    break
-                # FIXME: we're occasionally seeing some escape characters like `\x02` and `\x00` in the logs...
-                chunk = data.decode('utf-8')
-                logs += chunk
-            else:
-                break
-        return logs
+    def get_exec_cmd(self, cmd: str) -> List[str]:
+        if RUN_AS_DEVIN:
+            return ['su', 'devin', '-c', cmd]
+        else:
+            return ['/bin/bash', '-c', cmd]
+
+    def read_logs(self, id) -> str:
+        if id not in self.background_commands:
+            raise ValueError("Invalid background command id")
+        bg_cmd = self.background_commands[id]
+        return bg_cmd.read_logs()
 
     def execute(self, cmd: str) -> Tuple[int, str]:
         # TODO: each execute is not stateful! We need to keep track of the current working directory
-        exit_code, logs = self.container.exec_run(['su', 'devin', '-c', cmd], workdir="/workspace")
+        exit_code, logs = self.container.exec_run(self.get_exec_cmd(cmd), workdir="/workspace")
         return exit_code, logs.decode('utf-8')
 
-    def execute_in_background(self, cmd: str) -> None:
-        self.log_time = time.time()
-        result = self.container.exec_run(['su', 'devin', '-c', cmd], socket=True, workdir="/workspace")
-        self.log_generator = result.output # socket.SocketIO
-        self.log_generator._sock.setblocking(0)
+    def execute_in_background(self, cmd: str) -> BackgroundCommand:
+        result = self.container.exec_run(self.get_exec_cmd(cmd), socket=True, workdir="/workspace")
+        result.output._sock.setblocking(0)
+        bg_cmd = BackgroundCommand(self.cur_background_id, cmd, result)
+        self.background_commands[bg_cmd.id] = bg_cmd
+        self.cur_background_id += 1
+        return bg_cmd
+
+    def kill_background(self, id: int) -> BackgroundCommand:
+        if id not in self.background_commands:
+            raise ValueError("Invalid background command id")
+        bg_cmd = self.background_commands[id]
+        bg_cmd.kill()
+        self.background_commands.pop(id)
+        return bg_cmd
 
     def close(self):
         self.stop_docker_container()
+        self.closed = True
 
     def stop_docker_container(self):
         docker_client = docker.from_env()
@@ -145,8 +187,9 @@ class DockerInteractive:
 
     # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
     def cleanup(self):
+        if self.closed:
+            return
         self.container.remove(force=True)
-        print("Finish cleaning up Docker container")
 
 if __name__ == "__main__":
     import argparse
@@ -165,10 +208,7 @@ if __name__ == "__main__":
     )
     print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
 
-    bg = DockerInteractive(
-        workspace_dir=args.directory,
-    )
-    bg.execute_in_background("while true; do echo 'dot ' && sleep 1; done")
+    bg_cmd = docker_interactive.execute_in_background("while true; do echo 'dot ' && sleep 1; done")
 
     sys.stdout.flush()
     try:
@@ -181,11 +221,16 @@ if __name__ == "__main__":
             if user_input.lower() == "exit":
                 print("Exiting...")
                 break
+            if user_input.lower() == "kill":
+                docker_interactive.kill_background(bg_cmd.id)
+                print("Background process killed")
+                continue
             exit_code, output = docker_interactive.execute(user_input)
             print("exit code:", exit_code)
             print(output + "\n", end="")
-            logs = bg.read_logs()
-            print("background logs:", logs, "\n")
+            if bg_cmd.id in docker_interactive.background_commands:
+                logs = docker_interactive.read_logs(bg_cmd.id)
+                print("background logs:", logs, "\n")
             sys.stdout.flush()
     except KeyboardInterrupt:
         print("\nExiting...")
diff --git a/requirements.txt b/requirements.txt
index a012a665b9..3a3f2cda81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,12 +6,13 @@ seaborn
 docker
 fastapi
 uvicorn[standard]
+ruff
+mypy
 
 # for agenthub/lanchangs_agent
 langchain
 langchain-core
-langchain-openai
 langchain-community
 llama-index
 llama-index-vector-stores-chroma
-chromadb
\ No newline at end of file
+chromadb