diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 3cef3b6782..6440aa367b 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -32,9 +32,12 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: 3.11
+      - name: Create mypy cache directory
+        run: mkdir -p .mypy_cache
       - name: Install dependencies
-        run: pip install ruff mypy
+        run: pip install ruff mypy types-PyYAML types-toml
+      - name: Run mypy
+        run: python -m mypy --install-types --non-interactive --config-file dev_config/python/mypy.ini opendevin/ agenthub/
       - name: Run ruff
         run: ruff check --config dev_config/python/ruff.toml opendevin/ agenthub/
-      - name: Run mypy
-        run: mypy --install-types --non-interactive --config-file dev_config/python/mypy.ini opendevin/ agenthub/
+      
diff --git a/opendevin/config.py b/opendevin/config.py
index 3e61096bb6..b21f57cb18 100644
--- a/opendevin/config.py
+++ b/opendevin/config.py
@@ -16,7 +16,6 @@ DEFAULT_CONFIG = {
     'LLM_NUM_RETRIES': 6,
     'LLM_COOLDOWN_TIME': 1,
     'DIRECTORY_REWRITE': '',
-    'PROMPT_DEBUG_DIR': '',
     'MAX_ITERATIONS': 100,
 }
 
diff --git a/opendevin/controller/agent_controller.py b/opendevin/controller/agent_controller.py
index 72296a5131..e3d3d32798 100644
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@@ -17,50 +17,51 @@ from opendevin.action import (
 )
 from opendevin.observation import Observation, AgentErrorObservation, NullObservation
 from opendevin import config
+from opendevin.logging import opendevin_logger as logger
 
 from .command_manager import CommandManager
 
 
 ColorType = Literal[
-    "red",
-    "green",
-    "yellow",
-    "blue",
-    "magenta",
-    "cyan",
-    "light_grey",
-    "dark_grey",
-    "light_red",
-    "light_green",
-    "light_yellow",
-    "light_blue",
-    "light_magenta",
-    "light_cyan",
-    "white",
+    'red',
+    'green',
+    'yellow',
+    'blue',
+    'magenta',
+    'cyan',
+    'light_grey',
+    'dark_grey',
+    'light_red',
+    'light_green',
+    'light_yellow',
+    'light_blue',
+    'light_magenta',
+    'light_cyan',
+    'white',
 ]
 
 
 DISABLE_COLOR_PRINTING = (
-    config.get_or_default("DISABLE_COLOR", "false").lower() == "true"
+    config.get_or_default('DISABLE_COLOR', 'false').lower() == 'true'
 )
-MAX_ITERATIONS = config.get("MAX_ITERATIONS")
+MAX_ITERATIONS = config.get('MAX_ITERATIONS')
 
 
-def print_with_color(text: Any, print_type: str = "INFO"):
+def print_with_color(text: Any, print_type: str = 'INFO'):
     TYPE_TO_COLOR: Mapping[str, ColorType] = {
-        "BACKGROUND LOG": "blue",
-        "ACTION": "green",
-        "OBSERVATION": "yellow",
-        "INFO": "cyan",
-        "ERROR": "red",
-        "PLAN": "light_magenta",
+        'BACKGROUND LOG': 'blue',
+        'ACTION': 'green',
+        'OBSERVATION': 'yellow',
+        'INFO': 'cyan',
+        'ERROR': 'red',
+        'PLAN': 'light_magenta',
     }
-    color = TYPE_TO_COLOR.get(print_type.upper(), TYPE_TO_COLOR["INFO"])
+    color = TYPE_TO_COLOR.get(print_type.upper(), TYPE_TO_COLOR['INFO'])
     if DISABLE_COLOR_PRINTING:
         print(f"\n{print_type.upper()}:\n{str(text)}", flush=True)
     else:
         print(
-            colored(f"\n{print_type.upper()}:\n", color, attrs=["bold"])
+            colored(f"\n{print_type.upper()}:\n", color, attrs=['bold'])
             + colored(str(text), color),
             flush=True,
         )
@@ -73,7 +74,7 @@ class AgentController:
         self,
         agent: Agent,
         workdir: str,
-        id: str = "",
+        id: str = '',
         max_iterations: int = MAX_ITERATIONS,
         container_image: str | None = None,
         callbacks: List[Callable] = [],
@@ -82,7 +83,8 @@ class AgentController:
         self.agent = agent
         self.max_iterations = max_iterations
         self.workdir = workdir
-        self.command_manager = CommandManager(self.id, workdir, container_image)
+        self.command_manager = CommandManager(
+            self.id, workdir, container_image)
         self.callbacks = callbacks
 
     def update_state_for_step(self, i):
@@ -94,9 +96,9 @@ class AgentController:
 
     def add_history(self, action: Action, observation: Observation):
         if not isinstance(action, Action):
-            raise ValueError("action must be an instance of Action")
+            raise ValueError('action must be an instance of Action')
         if not isinstance(observation, Observation):
-            raise ValueError("observation must be an instance of Observation")
+            raise ValueError('observation must be an instance of Observation')
         self.state.history.append((action, observation))
         self.state.updated_info.append((action, observation))
 
@@ -108,38 +110,38 @@ class AgentController:
             try:
                 finished = await self.step(i)
             except Exception as e:
-                print("Error in loop", e, flush=True)
+                logger.error('Error in loop', exc_info=True)
                 raise e
             if finished:
                 break
         if not finished:
-            print("Exited before finishing", flush=True)
+            logger.info('Exited before finishing the task.')
 
     async def step(self, i: int):
-        print("\n\n==============", flush=True)
-        print("STEP", i, flush=True)
-        print_with_color(self.state.plan.main_goal, "PLAN")
+        print('\n\n==============', flush=True)
+        print('STEP', i, flush=True)
+        print_with_color(self.state.plan.main_goal, 'PLAN')
 
         log_obs = self.command_manager.get_background_obs()
         for obs in log_obs:
             self.add_history(NullAction(), obs)
             await self._run_callbacks(obs)
-            print_with_color(obs, "BACKGROUND LOG")
+            print_with_color(obs, 'BACKGROUND LOG')
 
         self.update_state_for_step(i)
         action: Action = NullAction()
-        observation: Observation = NullObservation("")
+        observation: Observation = NullObservation('')
         try:
             action = self.agent.step(self.state)
             if action is None:
-                raise ValueError("Agent must return an action")
-            print_with_color(action, "ACTION")
+                raise ValueError('Agent must return an action')
+            print_with_color(action, 'ACTION')
         except Exception as e:
             observation = AgentErrorObservation(str(e))
-            print_with_color(observation, "ERROR")
+            print_with_color(observation, 'ERROR')
             traceback.print_exc()
             # TODO Change to more robust error handling
-            if "The api_key client option must be set" in observation.content:
+            if 'The api_key client option must be set' in observation.content:
                 raise
         self.update_state_after_step()
 
@@ -147,22 +149,23 @@ class AgentController:
 
         finished = isinstance(action, AgentFinishAction)
         if finished:
-            print_with_color(action, "INFO")
+            print_with_color(action, 'INFO')
             return True
 
         if isinstance(action, AddTaskAction):
             try:
-                self.state.plan.add_subtask(action.parent, action.goal, action.subtasks)
+                self.state.plan.add_subtask(
+                    action.parent, action.goal, action.subtasks)
             except Exception as e:
                 observation = AgentErrorObservation(str(e))
-                print_with_color(observation, "ERROR")
+                print_with_color(observation, 'ERROR')
                 traceback.print_exc()
         elif isinstance(action, ModifyTaskAction):
             try:
                 self.state.plan.set_subtask_state(action.id, action.state)
             except Exception as e:
                 observation = AgentErrorObservation(str(e))
-                print_with_color(observation, "ERROR")
+                print_with_color(observation, 'ERROR')
                 traceback.print_exc()
 
         if action.executable:
@@ -173,11 +176,11 @@ class AgentController:
                     observation = action.run(self)
             except Exception as e:
                 observation = AgentErrorObservation(str(e))
-                print_with_color(observation, "ERROR")
+                print_with_color(observation, 'ERROR')
                 traceback.print_exc()
 
         if not isinstance(observation, NullObservation):
-            print_with_color(observation, "OBSERVATION")
+            print_with_color(observation, 'OBSERVATION')
 
         self.add_history(action, observation)
         await self._run_callbacks(observation)
@@ -189,8 +192,8 @@ class AgentController:
             idx = self.callbacks.index(callback)
             try:
                 callback(event)
-            except Exception as e:
-                print("Callback error:" + str(idx), e, flush=True)
+            except Exception:
+                logger.exception('Callback error: %s', idx)
                 pass
         await asyncio.sleep(
             0.001
diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
index 986cedfd98..1eae4fc28e 100644
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -1,70 +1,65 @@
-import os
-import uuid
+from datetime import datetime
 
 from litellm.router import Router
 from functools import partial
 
 from opendevin import config
+from opendevin.logging import llm_prompt_logger, llm_response_logger
+
+DEFAULT_API_KEY = config.get('LLM_API_KEY')
+DEFAULT_BASE_URL = config.get('LLM_BASE_URL')
+DEFAULT_MODEL_NAME = config.get('LLM_MODEL')
+DEFAULT_LLM_NUM_RETRIES = config.get('LLM_NUM_RETRIES')
+DEFAULT_LLM_COOLDOWN_TIME = config.get('LLM_COOLDOWN_TIME')
 
-DEFAULT_API_KEY = config.get("LLM_API_KEY")
-DEFAULT_BASE_URL = config.get("LLM_BASE_URL")
-DEFAULT_MODEL_NAME = config.get("LLM_MODEL")
-DEFAULT_LLM_NUM_RETRIES = config.get("LLM_NUM_RETRIES")
-DEFAULT_LLM_COOLDOWN_TIME = config.get("LLM_COOLDOWN_TIME")
-PROMPT_DEBUG_DIR = config.get("PROMPT_DEBUG_DIR")
 
 class LLM:
     def __init__(self,
-            model=DEFAULT_MODEL_NAME,
-            api_key=DEFAULT_API_KEY,
-            base_url=DEFAULT_BASE_URL,
-            num_retries=DEFAULT_LLM_NUM_RETRIES,
-            cooldown_time=DEFAULT_LLM_COOLDOWN_TIME,
-            debug_dir=PROMPT_DEBUG_DIR
-    ):
+                 model=DEFAULT_MODEL_NAME,
+                 api_key=DEFAULT_API_KEY,
+                 base_url=DEFAULT_BASE_URL,
+                 num_retries=DEFAULT_LLM_NUM_RETRIES,
+                 cooldown_time=DEFAULT_LLM_COOLDOWN_TIME,
+                 ):
         self.model_name = model if model else DEFAULT_MODEL_NAME
         self.api_key = api_key if api_key else DEFAULT_API_KEY
         self.base_url = base_url if base_url else DEFAULT_BASE_URL
         self.num_retries = num_retries if num_retries else DEFAULT_LLM_NUM_RETRIES
         self.cooldown_time = cooldown_time if cooldown_time else DEFAULT_LLM_COOLDOWN_TIME
-        self._debug_dir = debug_dir if debug_dir else PROMPT_DEBUG_DIR
-        self._debug_idx = 0
-        self._debug_id = uuid.uuid4().hex
+        self._debug_id = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 
-        # We use litellm's Router in order to support retries (especially rate limit backoff retries). 
+        # We use litellm's Router in order to support retries (especially rate limit backoff retries).
         # Typically you would use a whole model list, but it's unnecessary with our implementation's structure
         self._router = Router(
             model_list=[{
-                "model_name": self.model_name,
-                "litellm_params": {
-                    "model": self.model_name,
-                    "api_key": self.api_key,
-                    "api_base": self.base_url
+                'model_name': self.model_name,
+                'litellm_params': {
+                    'model': self.model_name,
+                    'api_key': self.api_key,
+                    'api_base': self.base_url
                 }
             }],
             num_retries=self.num_retries,
-            allowed_fails=self.num_retries, # We allow all retries to fail, so they can retry instead of going into "cooldown"
+            # We allow all retries to fail, so they can retry instead of going into "cooldown"
+            allowed_fails=self.num_retries,
             cooldown_time=self.cooldown_time
         )
-        self._completion = partial(self._router.completion, model=self.model_name)
+        self._completion = partial(
+            self._router.completion, model=self.model_name)
 
-        if self._debug_dir:
-            print(f"Logging prompts to {self._debug_dir}/{self._debug_id}")
-            completion_unwrapped = self._completion
-            def wrapper(*args, **kwargs):
-                dir = self._debug_dir + "/" + self._debug_id + "/" + str(self._debug_idx)
-                os.makedirs(dir, exist_ok=True)
-                if "messages" in kwargs:
-                    messages = kwargs["messages"]
-                else:
-                    messages = args[1]
-                self.write_debug_prompt(dir, messages)
-                resp = completion_unwrapped(*args, **kwargs)
-                message_back = resp['choices'][0]['message']['content']
-                self.write_debug_response(dir, message_back)
-                self._debug_idx += 1
-                return resp
-            self._completion = wrapper # type: ignore
+        completion_unwrapped = self._completion
+
+        def wrapper(*args, **kwargs):
+            if 'messages' in kwargs:
+                messages = kwargs['messages']
+            else:
+                messages = args[1]
+            llm_prompt_logger.debug(messages)
+            resp = completion_unwrapped(*args, **kwargs)
+            message_back = resp['choices'][0]['message']['content']
+            llm_response_logger.debug(message_back)
+            return resp
+        self._completion = wrapper  # type: ignore
 
     @property
     def completion(self):
@@ -73,14 +68,3 @@ class LLM:
         """
         return self._completion
 
-    def write_debug_prompt(self, dir, messages):
-        prompt_out = ""
-        for message in messages:
-            prompt_out += "<" + message["role"] + ">\n"
-            prompt_out += message["content"] + "\n\n"
-        with open(f"{dir}/prompt.md", "w") as f:
-            f.write(prompt_out)
-
-    def write_debug_response(self, dir, response):
-        with open(f"{dir}/response.md", "w") as f:
-            f.write(response)
diff --git a/opendevin/logging.py b/opendevin/logging.py
new file mode 100644
index 0000000000..a1a90d5150
--- /dev/null
+++ b/opendevin/logging.py
@@ -0,0 +1,147 @@
+import logging
+import os
+import sys
+import traceback
+from datetime import datetime
+
+console_formatter = logging.Formatter(
+    '\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s',
+    datefmt='%H:%M:%S',
+)
+file_formatter = logging.Formatter(
+    '%(asctime)s - %(name)s:%(levelname)s: %(filename)s:%(lineno)s - %(message)s',
+    datefmt='%H:%M:%S',
+)
+llm_formatter = logging.Formatter(
+    '%(message)s'
+)
+
+
+def get_console_handler():
+    """
+    Returns a console handler for logging.
+    """
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(console_formatter)
+    return console_handler
+
+
+def get_file_handler():
+    """
+    Returns a file handler for logging.
+    """
+    log_dir = os.path.join(os.getcwd(), 'logs')
+    os.makedirs(log_dir, exist_ok=True)
+    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+    file_name = f"opendevin_{timestamp}.log"
+    file_handler = logging.FileHandler(os.path.join(log_dir, file_name))
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(file_formatter)
+    return file_handler
+
+
+# Set up logging
+logging.basicConfig(level=logging.ERROR)
+
+
+def log_uncaught_exceptions(ex_cls, ex, tb):
+    """
+    Logs uncaught exceptions along with the traceback.
+
+    Args:
+        ex_cls (type): The type of the exception.
+        ex (Exception): The exception instance.
+        tb (traceback): The traceback object.
+
+    Returns:
+        None
+    """
+    logging.error(''.join(traceback.format_tb(tb)))
+    logging.error('{0}: {1}'.format(ex_cls, ex))
+
+
+sys.excepthook = log_uncaught_exceptions
+
+opendevin_logger = logging.getLogger()
+opendevin_logger.setLevel(logging.INFO)
+opendevin_logger.propagate = False
+opendevin_logger.addHandler(get_console_handler())
+opendevin_logger.addHandler(get_file_handler())
+opendevin_logger.debug('Logging initialized')
+opendevin_logger.debug('Logging to %s', os.path.join(
+    os.getcwd(), 'logs', 'opendevin.log'))
+opendevin_logger.name = 'opendevin'
+
+# Exclude "litellm" from logging output
+logging.getLogger('LiteLLM').disabled = True
+logging.getLogger('LiteLLM Router').disabled = True
+logging.getLogger('LiteLLM Proxy').disabled = True
+
+# LLM prompt and response logging
+class LlmFileHandler(logging.FileHandler):
+
+    def __init__(self, filename, mode='a', encoding=None, delay=False):
+        """
+        Initializes an instance of LlmFileHandler.
+
+        Args:
+            filename (str): The name of the log file.
+            mode (str, optional): The file mode. Defaults to 'a'.
+            encoding (str, optional): The file encoding. Defaults to None.
+            delay (bool, optional): Whether to delay file opening. Defaults to False.
+        """
+        self.filename = filename
+        self.message_counter = 1
+        self.session = datetime.now().strftime('%y-%m-%d_%H-%M-%S')
+        self.log_directory = os.path.join(
+            os.getcwd(), 'logs', 'llm', self.session)
+        os.makedirs(self.log_directory, exist_ok=True)
+        self.baseFilename = os.path.join(self.log_directory, f"{self.filename}_{self.message_counter:03}.log")
+        super().__init__(self.baseFilename, mode, encoding, delay)
+
+    def emit(self, record):
+        """
+        Emits a log record.
+
+        Args:
+            record (logging.LogRecord): The log record to emit.
+        """
+        self.baseFilename = os.path.join(self.log_directory, f"{self.filename}_{self.message_counter:03}.log")
+        self.stream = self._open()
+        super().emit(record)
+        self.stream.close
+        opendevin_logger.debug('Logging to %s', self.baseFilename)
+        self.message_counter += 1
+
+
+
+def get_llm_prompt_file_handler():
+    """
+    Returns a file handler for LLM prompt logging.
+    """
+    llm_prompt_file_handler = LlmFileHandler('prompt')
+    llm_prompt_file_handler.setLevel(logging.DEBUG)
+    llm_prompt_file_handler.setFormatter(llm_formatter)
+    return llm_prompt_file_handler
+
+
+def get_llm_response_file_handler():
+    """
+    Returns a file handler for LLM response logging.
+    """
+    llm_response_file_handler = LlmFileHandler('response')
+    llm_response_file_handler.setLevel(logging.DEBUG)
+    llm_response_file_handler.setFormatter(llm_formatter)
+    return llm_response_file_handler
+
+
+llm_prompt_logger = logging.getLogger('prompt')
+llm_prompt_logger.setLevel(logging.DEBUG)
+llm_prompt_logger.propagate = False
+llm_prompt_logger.addHandler(get_llm_prompt_file_handler())
+
+llm_response_logger = logging.getLogger('response')
+llm_response_logger.setLevel(logging.DEBUG)
+llm_response_logger.propagate = False
+llm_response_logger.addHandler(get_llm_response_file_handler())
diff --git a/opendevin/plan.py b/opendevin/plan.py
index 907ebb3590..0162bea05c 100644
--- a/opendevin/plan.py
+++ b/opendevin/plan.py
@@ -1,4 +1,5 @@
 from typing import List
+from opendevin.logging import opendevin_logger as logger
 
 OPEN_STATE = 'open'
 COMPLETED_STATE = 'completed'
@@ -87,6 +88,7 @@ class Task:
             ValueError: If the provided state is invalid.
         """
         if state not in STATES:
+            logger.error('Invalid state: %s', state)
             raise ValueError('Invalid state:' + state)
         self.state = state
         if state == COMPLETED_STATE or state == ABANDONED_STATE or state == VERIFIED_STATE:
diff --git a/opendevin/sandbox/sandbox.py b/opendevin/sandbox/sandbox.py
index 4726e8597e..5e1a6a1093 100644
--- a/opendevin/sandbox/sandbox.py
+++ b/opendevin/sandbox/sandbox.py
@@ -11,22 +11,23 @@ import docker
 import concurrent.futures
 
 from opendevin import config
+from opendevin.logging import opendevin_logger as logger
 
-InputType = namedtuple("InputType", ["content"])
-OutputType = namedtuple("OutputType", ["content"])
+InputType = namedtuple('InputType', ['content'])
+OutputType = namedtuple('OutputType', ['content'])
 
 DIRECTORY_REWRITE = config.get(
-    "DIRECTORY_REWRITE"
+    'DIRECTORY_REWRITE'
 )  # helpful for docker-in-docker scenarios
-CONTAINER_IMAGE = config.get("SANDBOX_CONTAINER_IMAGE")
+CONTAINER_IMAGE = config.get('SANDBOX_CONTAINER_IMAGE')
 
 # FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
 # How do we make this more flexible?
-RUN_AS_DEVIN = config.get("RUN_AS_DEVIN").lower() != "false"
+RUN_AS_DEVIN = config.get('RUN_AS_DEVIN').lower() != 'false'
 USER_ID = 1000
-if config.get_or_none("SANDBOX_USER_ID") is not None:
-    USER_ID = int(config.get_or_default("SANDBOX_USER_ID", ""))
-elif hasattr(os, "getuid"):
+if config.get_or_none('SANDBOX_USER_ID') is not None:
+    USER_ID = int(config.get_or_default('SANDBOX_USER_ID', ''))
+elif hasattr(os, 'getuid'):
     USER_ID = os.getuid()
 
 
@@ -38,47 +39,49 @@ class BackgroundCommand:
         self.pid = pid
 
     def parse_docker_exec_output(self, logs: bytes) -> Tuple[bytes, bytes]:
-        res = b""
-        tail = b""
+        res = b''
+        tail = b''
         i = 0
         byte_order = sys.byteorder
         while i < len(logs):
-            prefix = logs[i : i + 8]
+            prefix = logs[i: i + 8]
             if len(prefix) < 8:
                 msg_type = prefix[0:1]
-                if msg_type in [b"\x00", b"\x01", b"\x02", b"\x03"]:
+                if msg_type in [b'\x00', b'\x01', b'\x02', b'\x03']:
                     tail = prefix
                 break
 
             msg_type = prefix[0:1]
             padding = prefix[1:4]
             if (
-                msg_type in [b"\x00", b"\x01", b"\x02", b"\x03"]
-                and padding == b"\x00\x00\x00"
+                msg_type in [b'\x00', b'\x01', b'\x02', b'\x03']
+                and padding == b'\x00\x00\x00'
             ):
                 msg_length = int.from_bytes(prefix[4:8], byteorder=byte_order)
-                res += logs[i + 8 : i + 8 + msg_length]
+                res += logs[i + 8: i + 8 + msg_length]
                 i += 8 + msg_length
             else:
-                res += logs[i : i + 1]
+                res += logs[i: i + 1]
                 i += 1
         return res, tail
 
     def read_logs(self) -> str:
         # TODO: get an exit code if process is exited
-        logs = b""
-        last_remains = b""
+        logs = b''
+        last_remains = b''
         while True:
-            ready_to_read, _, _ = select.select([self.result.output], [], [], 0.1)  # type: ignore[has-type]
+            ready_to_read, _, _ = select.select(
+                [self.result.output], [], [], 0.1)  # type: ignore[has-type]
             if ready_to_read:
                 data = self.result.output.read(4096)  # type: ignore[has-type]
                 if not data:
                     break
-                chunk, last_remains = self.parse_docker_exec_output(last_remains + data)
+                chunk, last_remains = self.parse_docker_exec_output(
+                    last_remains + data)
                 logs += chunk
             else:
                 break
-        return (logs + last_remains).decode("utf-8", errors="replace")
+        return (logs + last_remains).decode('utf-8', errors='replace')
 
 
 class DockerInteractive:
@@ -103,11 +106,15 @@ class DockerInteractive:
             self.workspace_dir = os.path.abspath(workspace_dir)
         else:
             self.workspace_dir = os.getcwd()
-            print(f"workspace unspecified, using current directory: {workspace_dir}")
-        if DIRECTORY_REWRITE != "":
-            parts = DIRECTORY_REWRITE.split(":")
+            logger.info(
+                'workspace unspecified, using current directory: %s', workspace_dir)
+        if DIRECTORY_REWRITE != '':
+            parts = DIRECTORY_REWRITE.split(':')
             self.workspace_dir = self.workspace_dir.replace(parts[0], parts[1])
-            print("Rewriting workspace directory to:", self.workspace_dir)
+            logger.info('Rewriting workspace directory to: %s',
+                        self.workspace_dir)
+        else:
+            logger.info('Using workspace directory: %s', self.workspace_dir)
 
         # TODO: this timeout is actually essential - need a better way to set it
         # if it is too short, the container may still waiting for previous
@@ -131,29 +138,29 @@ class DockerInteractive:
     def setup_devin_user(self):
         exit_code, logs = self.container.exec_run(
             [
-                "/bin/bash",
-                "-c",
+                '/bin/bash',
+                '-c',
                 f'useradd --shell /bin/bash -u {USER_ID} -o -c "" -m devin',
             ],
-            workdir="/workspace",
+            workdir='/workspace',
         )
 
     def get_exec_cmd(self, cmd: str) -> List[str]:
         if RUN_AS_DEVIN:
-            return ["su", "devin", "-c", cmd]
+            return ['su', 'devin', '-c', cmd]
         else:
-            return ["/bin/bash", "-c", cmd]
+            return ['/bin/bash', '-c', cmd]
 
     def read_logs(self, id) -> str:
         if id not in self.background_commands:
-            raise ValueError("Invalid background command id")
+            raise ValueError('Invalid background command id')
         bg_cmd = self.background_commands[id]
         return bg_cmd.read_logs()
 
     def execute(self, cmd: str) -> Tuple[int, str]:
         # TODO: each execute is not stateful! We need to keep track of the current working directory
         def run_command(container, command):
-            return container.exec_run(command, workdir="/workspace")
+            return container.exec_run(command, workdir='/workspace')
 
         # Use ThreadPoolExecutor to control command and set timeout
         with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -163,16 +170,18 @@ class DockerInteractive:
             try:
                 exit_code, logs = future.result(timeout=self.timeout)
             except concurrent.futures.TimeoutError:
-                print("Command timed out, killing process...")
+                logger.exception(
+                    'Command timed out, killing process...', exc_info=False)
                 pid = self.get_pid(cmd)
                 if pid is not None:
-                    self.container.exec_run(f"kill -9 {pid}", workdir="/workspace")
+                    self.container.exec_run(
+                        f"kill -9 {pid}", workdir='/workspace')
                 return -1, f'Command: "{cmd}" timed out'
-        return exit_code, logs.decode("utf-8")
+        return exit_code, logs.decode('utf-8')
 
     def execute_in_background(self, cmd: str) -> BackgroundCommand:
         result = self.container.exec_run(
-            self.get_exec_cmd(cmd), socket=True, workdir="/workspace"
+            self.get_exec_cmd(cmd), socket=True, workdir='/workspace'
         )
         result.output._sock.setblocking(0)
         pid = self.get_pid(cmd)
@@ -182,9 +191,9 @@ class DockerInteractive:
         return bg_cmd
 
     def get_pid(self, cmd):
-        exec_result = self.container.exec_run("ps aux")
-        processes = exec_result.output.decode("utf-8").splitlines()
-        cmd = " ".join(self.get_exec_cmd(cmd))
+        exec_result = self.container.exec_run('ps aux')
+        processes = exec_result.output.decode('utf-8').splitlines()
+        cmd = ' '.join(self.get_exec_cmd(cmd))
 
         for process in processes:
             if cmd in process:
@@ -194,10 +203,11 @@ class DockerInteractive:
 
     def kill_background(self, id: int) -> BackgroundCommand:
         if id not in self.background_commands:
-            raise ValueError("Invalid background command id")
+            raise ValueError('Invalid background command id')
         bg_cmd = self.background_commands[id]
         if bg_cmd.pid is not None:
-            self.container.exec_run(f"kill -9 {bg_cmd.pid}", workdir="/workspace")
+            self.container.exec_run(
+                f"kill -9 {bg_cmd.pid}", workdir='/workspace')
         bg_cmd.result.output.close()
         self.background_commands.pop(id)
         return bg_cmd
@@ -212,8 +222,8 @@ class DockerInteractive:
         try:
             docker_client = docker.from_env()
         except docker.errors.DockerException as e:
-            print("Please check Docker is running using `docker ps`.")
-            print(f"Error! {e}", flush=True)
+            logger.exception(
+                'Please check Docker is running using `docker ps`.', exc_info=False)
             raise e
 
         try:
@@ -221,7 +231,7 @@ class DockerInteractive:
             container.stop()
             container.remove()
             elapsed = 0
-            while container.status != "exited":
+            while container.status != 'exited':
                 time.sleep(1)
                 elapsed += 1
                 if elapsed > self.timeout:
@@ -234,7 +244,7 @@ class DockerInteractive:
         try:
             docker_client = docker.from_env()
             container = docker_client.containers.get(self.container_name)
-            if container.status == "running":
+            if container.status == 'running':
                 self.container = container
                 return True
             return False
@@ -244,8 +254,9 @@ class DockerInteractive:
     def restart_docker_container(self):
         try:
             self.stop_docker_container()
+            logger.info('Container stopped')
         except docker.errors.DockerException as e:
-            print(f"Failed to stop container: {e}")
+            logger.exception('Failed to stop container', exc_info=False)
             raise e
 
         try:
@@ -255,32 +266,34 @@ class DockerInteractive:
             # start the container
             self.container = docker_client.containers.run(
                 self.container_image,
-                command="tail -f /dev/null",
-                network_mode="host",
-                working_dir="/workspace",
+                command='tail -f /dev/null',
+                network_mode='host',
+                working_dir='/workspace',
                 name=self.container_name,
                 detach=True,
-                volumes={self.workspace_dir: {"bind": "/workspace", "mode": "rw"}},
+                volumes={self.workspace_dir: {
+                    'bind': '/workspace', 'mode': 'rw'}},
             )
+            logger.info('Container started')
         except Exception as e:
-            print(f"Failed to start container: {e}")
+            logger.exception('Failed to start container', exc_info=False)
             raise e
 
         # wait for container to be ready
         elapsed = 0
-        while self.container.status != "running":
-            if self.container.status == "exited":
-                print("container exited")
-                print("container logs:")
-                print(self.container.logs())
+        while self.container.status != 'running':
+            if self.container.status == 'exited':
+                logger.info('container exited')
+                logger.info('container logs:')
+                logger.info(self.container.logs())
                 break
             time.sleep(1)
             elapsed += 1
             self.container = docker_client.containers.get(self.container_name)
             if elapsed > self.timeout:
                 break
-        if self.container.status != "running":
-            raise Exception("Failed to start container")
+        if self.container.status != 'running':
+            raise Exception('Failed to start container')
 
     # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
     def cleanup(self):
@@ -292,16 +305,17 @@ class DockerInteractive:
             pass
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     import argparse
 
-    parser = argparse.ArgumentParser(description="Interactive Docker container")
+    parser = argparse.ArgumentParser(
+        description='Interactive Docker container')
     parser.add_argument(
-        "-d",
-        "--directory",
+        '-d',
+        '--directory',
         type=str,
         default=None,
-        help="The directory to mount as the workspace in the Docker container.",
+        help='The directory to mount as the workspace in the Docker container.',
     )
     args = parser.parse_args()
 
@@ -310,10 +324,11 @@ if __name__ == "__main__":
             workspace_dir=args.directory,
         )
     except Exception as e:
-        print(f"Failed to start Docker container: {e}")
+        logger.exception('Failed to start Docker container: %s', e)
         sys.exit(1)
 
-    print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
+    logger.info(
+        "Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
 
     bg_cmd = docker_interactive.execute_in_background(
         "while true; do echo 'dot ' && sleep 1; done"
@@ -323,24 +338,24 @@ if __name__ == "__main__":
     try:
         while True:
             try:
-                user_input = input(">>> ")
+                user_input = input('>>> ')
             except EOFError:
-                print("\nExiting...")
+                logger.info('Exiting...')
                 break
-            if user_input.lower() == "exit":
-                print("Exiting...")
+            if user_input.lower() == 'exit':
+                logger.info('Exiting...')
                 break
-            if user_input.lower() == "kill":
+            if user_input.lower() == 'kill':
                 docker_interactive.kill_background(bg_cmd.id)
-                print("Background process killed")
+                logger.info('Background process killed')
                 continue
             exit_code, output = docker_interactive.execute(user_input)
-            print("exit code:", exit_code)
-            print(output + "\n", end="")
+            logger.info('exit code: %d', exit_code)
+            logger.info(output)
             if bg_cmd.id in docker_interactive.background_commands:
                 logs = docker_interactive.read_logs(bg_cmd.id)
-                print("background logs:", logs, "\n")
+                logger.info('background logs: %s', logs)
             sys.stdout.flush()
     except KeyboardInterrupt:
-        print("\nExiting...")
+        logger.info('Exiting...')
     docker_interactive.close()
diff --git a/opendevin/server/agent/manager.py b/opendevin/server/agent/manager.py
index 7cc3c5ba3a..2dbbe272ad 100644
--- a/opendevin/server/agent/manager.py
+++ b/opendevin/server/agent/manager.py
@@ -13,6 +13,7 @@ from opendevin.llm.llm import LLM
 from opendevin.observation import NullObservation, Observation, UserMessageObservation
 from opendevin.server.session import session_manager
 from opendevin.schema import ActionType
+from opendevin.logging import opendevin_logger as logger
 
 DEFAULT_API_KEY = config.get("LLM_API_KEY")
 DEFAULT_BASE_URL = config.get("LLM_BASE_URL")
@@ -121,7 +122,7 @@ class AgentManager:
             model = LLM_MODEL
 
         if not os.path.exists(directory):
-            print(f"Workspace directory {directory} does not exist. Creating it...")
+            logger.info("Workspace directory %s does not exist. Creating it...", directory)
             os.makedirs(directory)
         directory = os.path.relpath(directory, os.getcwd())
         llm = LLM(model=model, api_key=api_key, base_url=api_base)
@@ -137,7 +138,7 @@ class AgentManager:
                 callbacks=[self.on_agent_event],
             )
         except Exception:
-            print("Error creating controller.")
+            logger.exception("Error creating controller.")
             await self.send_error(
                 "Error creating controller. Please check Docker is running using `docker ps`."
             )
diff --git a/opendevin/server/auth/auth.py b/opendevin/server/auth/auth.py
index 9362bb2f7e..2ffc936808 100644
--- a/opendevin/server/auth/auth.py
+++ b/opendevin/server/auth/auth.py
@@ -1,6 +1,7 @@
 import os
 import jwt
 from typing import Dict
+from opendevin.logging import opendevin_logger as logger
 
 JWT_SECRET = os.getenv("JWT_SECRET", "5ecRe7")
 
@@ -10,11 +11,11 @@ def get_sid_from_token(token: str) -> str:
     try:
         payload = jwt.decode(token, JWT_SECRET, algorithms=["HS256"])
         if payload is None:
-            print("Invalid token")
+            logger.error("Invalid token")
             return ""
         return payload["sid"]
     except Exception as e:
-        print("Error decoding token:", e)
+        logger.exception("Error decoding token: %s", e)
         return ""
 
 
diff --git a/opendevin/server/session/msg_stack.py b/opendevin/server/session/msg_stack.py
index ed9daf5289..0cf89bcc80 100644
--- a/opendevin/server/session/msg_stack.py
+++ b/opendevin/server/session/msg_stack.py
@@ -6,6 +6,7 @@ import uuid
 from typing import Dict, List
 
 from opendevin.schema.action import ActionType
+from opendevin.logging import opendevin_logger as logger
 
 
 CACHE_DIR = os.getenv("CACHE_DIR", "cache")
@@ -44,7 +45,7 @@ class MessageStack:
         self._save_messages()
 
     def handle_signal(self, signum, _):
-        print(f"Received signal {signum}, exiting...")
+        logger.info("Received signal %s, exiting...", signum)
         self.close()
         exit(0)
 
diff --git a/opendevin/server/session/session.py b/opendevin/server/session/session.py
index 4746e5c158..04ad079842 100644
--- a/opendevin/server/session/session.py
+++ b/opendevin/server/session/session.py
@@ -3,6 +3,8 @@ from typing import Dict, Callable
 from fastapi import WebSocket, WebSocketDisconnect
 from .msg_stack import message_stack
 
+from opendevin.logging import opendevin_logger as logger
+
 DEL_DELT_SEC = 60 * 60 * 5
 
 
@@ -33,12 +35,12 @@ class Session:
                 await dispatch(action, data)
         except WebSocketDisconnect:
             self.is_alive = False
-            print(f"WebSocket disconnected, sid: {self.sid}")
+            logger.info("WebSocket disconnected, sid: %s", self.sid)
         except RuntimeError as e:
             # WebSocket is not connected
             if "WebSocket is not connected" in str(e):
                 self.is_alive = False
-            print(f"Error in loop_recv: {e}")
+            logger.exception("Error in loop_recv: %s", e)
 
     async def send(self, data: Dict[str, object]) -> bool:
         try: