Small refactor to improve (CodeAct)Agent extensibility (#8244)

2026-01-10 07:18:10 -05:00 · 2025-05-04 10:21:54 -07:00
parent 2c085ae79e
commit fc32efb52e
4 changed files with 58 additions and 32 deletions
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -1,8 +1,12 @@
 import copy
 import os
 from collections import deque
+from typing import TYPE_CHECKING

-from litellm import ChatCompletionToolParam
+if TYPE_CHECKING:
+    from litellm import ChatCompletionToolParam
+    from openhands.events.action import Action
+    from openhands.llm.llm import ModelResponse

 import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
 from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
@@ -20,7 +24,7 @@ from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import Message
-from openhands.events.action import Action, AgentFinishAction, MessageAction
+from openhands.events.action import AgentFinishAction, MessageAction
 from openhands.events.event import Event
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
@@ -75,23 +79,26 @@ class CodeActAgent(Agent):
        - config (AgentConfig): The configuration for this agent
        """
        super().__init__(llm, config)
-        self.pending_actions: deque[Action] = deque()
+        self.pending_actions: deque['Action'] = deque()
        self.reset()
        self.tools = self._get_tools()

-        self.prompt_manager = PromptManager(
-            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
-        )
-
        # Create a ConversationMemory instance
        self.conversation_memory = ConversationMemory(self.config, self.prompt_manager)

        self.condenser = Condenser.from_config(self.config.condenser)
        logger.debug(f'Using condenser: {type(self.condenser)}')

-        self.response_to_actions_fn = codeact_function_calling.response_to_actions
+    @property
+    def prompt_manager(self) -> PromptManager:
+        if self._prompt_manager is None:
+            self._prompt_manager = PromptManager(
+                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
+            )

-    def _get_tools(self) -> list[ChatCompletionToolParam]:
+        return self._prompt_manager
+
+    def _get_tools(self) -> list['ChatCompletionToolParam']:
        # For these models, we use short tool descriptions ( < 1024 tokens)
        # to avoid hitting the OpenAI token limit for tool descriptions.
        SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
@@ -130,7 +137,7 @@ class CodeActAgent(Agent):
        super().reset()
        self.pending_actions.clear()

-    def step(self, state: State) -> Action:
+    def step(self, state: State) -> 'Action':
        """Performs one step using the CodeAct Agent.

        This includes gathering info on previous steps and prompting the model to make a command to execute.
@@ -198,9 +205,7 @@ class CodeActAgent(Agent):
        params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
        response = self.llm.completion(**params)
        logger.debug(f'Response from LLM: {response}')
-        actions = self.response_to_actions_fn(
-            response, mcp_tool_names=list(self.mcp_tools.keys())
-        )
+        actions = self.response_to_actions(response)
        logger.debug(f'Actions after response_to_actions: {actions}')
        for action in actions:
            self.pending_actions.append(action)
@@ -274,3 +279,8 @@ class CodeActAgent(Agent):
            self.conversation_memory.apply_prompt_caching(messages)

        return messages
+
+    def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
+        return codeact_function_calling.response_to_actions(
+            response, mcp_tool_names=list(self.mcp_tools.keys())
+        )
--- a/openhands/agenthub/readonly_agent/readonly_agent.py
+++ b/openhands/agenthub/readonly_agent/readonly_agent.py
@@ -4,6 +4,13 @@ ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only t

 import os

+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from litellm import ChatCompletionToolParam
+    from openhands.events.action import Action
+    from openhands.llm.llm import ModelResponse
+
 from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
 from openhands.agenthub.readonly_agent import (
    function_calling as readonly_function_calling,
@@ -41,24 +48,27 @@ class ReadOnlyAgent(CodeActAgent):
        - llm (LLM): The llm to be used by this agent
        - config (AgentConfig): The configuration for this agent
        """
-        # Initialize the CodeActAgent class but we'll override some of its behavior
+        # Initialize the CodeActAgent class; some of it is overridden with class methods
        super().__init__(llm, config)

-        # Override the tools to only include read-only tools
-        # Get the read-only tools from our own function_calling module
-        self.tools = readonly_function_calling.get_tools()
-
-        # Set up our own prompt manager
-        self.prompt_manager = PromptManager(
-            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
-        )
-
-        self.response_to_actions_fn = readonly_function_calling.response_to_actions
-
        logger.debug(
            f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}"
        )

+    @property
+    def prompt_manager(self) -> PromptManager:
+        # Set up our own prompt manager
+        if self._prompt_manager is None:
+            self._prompt_manager = PromptManager(
+                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
+            )
+        return self._prompt_manager
+
+    def _get_tools(self) -> list['ChatCompletionToolParam']:
+        # Override the tools to only include read-only tools
+        # Get the read-only tools from our own function_calling module
+        return readonly_function_calling.get_tools()
+
    def set_mcp_tools(self, mcp_tools: list[dict]) -> None:
        """Sets the list of MCP tools for the agent.

@@ -68,3 +78,8 @@ class ReadOnlyAgent(CodeActAgent):
        logger.warning(
            'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.'
        )
+
+    def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
+        return readonly_function_calling.response_to_actions(
+            response, mcp_tool_names=list(self.mcp_tools.keys())
+        )