From fc32efb52efe22a135a1bd6ab92c5e331f3f72a5 Mon Sep 17 00:00:00 2001
From: Chase <chase@p-1.ai>
Date: Sun, 4 May 2025 10:21:54 -0700
Subject: [PATCH] Small refactor to improve (CodeAct)Agent extensibility
 (#8244)

---
 .../agenthub/codeact_agent/codeact_agent.py   | 36 ++++++++++-------
 .../agenthub/readonly_agent/readonly_agent.py | 39 +++++++++++++------
 openhands/controller/agent.py                 | 12 ++++--
 tests/unit/test_agents.py                     |  3 --
 4 files changed, 58 insertions(+), 32 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 98e1d85cda..55de69df30 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -1,8 +1,12 @@
 import copy
 import os
 from collections import deque
+from typing import TYPE_CHECKING
 
-from litellm import ChatCompletionToolParam
+if TYPE_CHECKING:
+    from litellm import ChatCompletionToolParam
+    from openhands.events.action import Action
+    from openhands.llm.llm import ModelResponse
 
 import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
 from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
@@ -20,7 +24,7 @@ from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import Message
-from openhands.events.action import Action, AgentFinishAction, MessageAction
+from openhands.events.action import AgentFinishAction, MessageAction
 from openhands.events.event import Event
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
@@ -75,23 +79,26 @@ class CodeActAgent(Agent):
         - config (AgentConfig): The configuration for this agent
         """
         super().__init__(llm, config)
-        self.pending_actions: deque[Action] = deque()
+        self.pending_actions: deque['Action'] = deque()
         self.reset()
         self.tools = self._get_tools()
 
-        self.prompt_manager = PromptManager(
-            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
-        )
-
         # Create a ConversationMemory instance
         self.conversation_memory = ConversationMemory(self.config, self.prompt_manager)
 
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {type(self.condenser)}')
 
-        self.response_to_actions_fn = codeact_function_calling.response_to_actions
+    @property
+    def prompt_manager(self) -> PromptManager:
+        if self._prompt_manager is None:
+            self._prompt_manager = PromptManager(
+                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
+            )
 
-    def _get_tools(self) -> list[ChatCompletionToolParam]:
+        return self._prompt_manager
+
+    def _get_tools(self) -> list['ChatCompletionToolParam']:
         # For these models, we use short tool descriptions ( < 1024 tokens)
         # to avoid hitting the OpenAI token limit for tool descriptions.
         SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
@@ -130,7 +137,7 @@ class CodeActAgent(Agent):
         super().reset()
         self.pending_actions.clear()
 
-    def step(self, state: State) -> Action:
+    def step(self, state: State) -> 'Action':
         """Performs one step using the CodeAct Agent.
 
         This includes gathering info on previous steps and prompting the model to make a command to execute.
@@ -198,9 +205,7 @@ class CodeActAgent(Agent):
         params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
         response = self.llm.completion(**params)
         logger.debug(f'Response from LLM: {response}')
-        actions = self.response_to_actions_fn(
-            response, mcp_tool_names=list(self.mcp_tools.keys())
-        )
+        actions = self.response_to_actions(response)
         logger.debug(f'Actions after response_to_actions: {actions}')
         for action in actions:
             self.pending_actions.append(action)
@@ -274,3 +279,8 @@ class CodeActAgent(Agent):
             self.conversation_memory.apply_prompt_caching(messages)
 
         return messages
+
+    def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
+        return codeact_function_calling.response_to_actions(
+            response, mcp_tool_names=list(self.mcp_tools.keys())
+        )
diff --git a/openhands/agenthub/readonly_agent/readonly_agent.py b/openhands/agenthub/readonly_agent/readonly_agent.py
index ec03eb7c58..fdbb70a1b9 100644
--- a/openhands/agenthub/readonly_agent/readonly_agent.py
+++ b/openhands/agenthub/readonly_agent/readonly_agent.py
@@ -4,6 +4,13 @@ ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only t
 
 import os
 
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from litellm import ChatCompletionToolParam
+    from openhands.events.action import Action
+    from openhands.llm.llm import ModelResponse
+
 from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
 from openhands.agenthub.readonly_agent import (
     function_calling as readonly_function_calling,
@@ -41,24 +48,27 @@ class ReadOnlyAgent(CodeActAgent):
         - llm (LLM): The llm to be used by this agent
         - config (AgentConfig): The configuration for this agent
         """
-        # Initialize the CodeActAgent class but we'll override some of its behavior
+        # Initialize the CodeActAgent class; some of it is overridden with class methods
         super().__init__(llm, config)
 
-        # Override the tools to only include read-only tools
-        # Get the read-only tools from our own function_calling module
-        self.tools = readonly_function_calling.get_tools()
-
-        # Set up our own prompt manager
-        self.prompt_manager = PromptManager(
-            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
-        )
-
-        self.response_to_actions_fn = readonly_function_calling.response_to_actions
-
         logger.debug(
             f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}"
         )
 
+    @property
+    def prompt_manager(self) -> PromptManager:
+        # Set up our own prompt manager
+        if self._prompt_manager is None:
+            self._prompt_manager = PromptManager(
+                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
+            )
+        return self._prompt_manager
+
+    def _get_tools(self) -> list['ChatCompletionToolParam']:
+        # Override the tools to only include read-only tools
+        # Get the read-only tools from our own function_calling module
+        return readonly_function_calling.get_tools()
+
     def set_mcp_tools(self, mcp_tools: list[dict]) -> None:
         """Sets the list of MCP tools for the agent.
 
@@ -68,3 +78,8 @@ class ReadOnlyAgent(CodeActAgent):
         logger.warning(
             'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.'
         )
+
+    def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
+        return readonly_function_calling.response_to_actions(
+            response, mcp_tool_names=list(self.mcp_tools.keys())
+        )
diff --git a/openhands/controller/agent.py b/openhands/controller/agent.py
index b5e70209ad..26de052b8c 100644
--- a/openhands/controller/agent.py
+++ b/openhands/controller/agent.py
@@ -8,6 +8,7 @@ if TYPE_CHECKING:
     from openhands.core.config import AgentConfig
     from openhands.events.action import Action
     from openhands.events.action.message import SystemMessageAction
+    from openhands.utils.prompt import PromptManager
 from litellm import ChatCompletionToolParam
 
 from openhands.core.exceptions import (
@@ -19,9 +20,6 @@ from openhands.events.event import EventSource
 from openhands.llm.llm import LLM
 from openhands.runtime.plugins import PluginRequirement
 
-if TYPE_CHECKING:
-    from openhands.utils.prompt import PromptManager
-
 
 class Agent(ABC):
     DEPRECATED = False
@@ -43,10 +41,16 @@ class Agent(ABC):
         self.llm = llm
         self.config = config
         self._complete = False
-        self.prompt_manager: 'PromptManager' | None = None
+        self._prompt_manager: 'PromptManager' | None = None
         self.mcp_tools: dict[str, ChatCompletionToolParam] = {}
         self.tools: list = []
 
+    @property
+    def prompt_manager(self) -> 'PromptManager':
+        if self._prompt_manager is None:
+            raise ValueError(f'Prompt manager not initialized for agent {self.name}')
+        return self._prompt_manager
+
     def get_system_message(self) -> 'SystemMessageAction | None':
         """
         Returns a SystemMessageAction containing the system message and tools.
diff --git a/tests/unit/test_agents.py b/tests/unit/test_agents.py
index 641411e03f..01b4bb0a6f 100644
--- a/tests/unit/test_agents.py
+++ b/tests/unit/test_agents.py
@@ -437,9 +437,6 @@ def test_enhance_messages_adds_newlines_between_consecutive_user_messages(
     agent: CodeActAgent,
 ):
     """Test that _enhance_messages adds newlines between consecutive user messages."""
-    # Set up the prompt manager
-    agent.prompt_manager = Mock()
-
     # Create consecutive user messages with various content types
     messages = [
         # First user message with TextContent only