From fc32efb52efe22a135a1bd6ab92c5e331f3f72a5 Mon Sep 17 00:00:00 2001 From: Chase Date: Sun, 4 May 2025 10:21:54 -0700 Subject: [PATCH] Small refactor to improve (CodeAct)Agent extensibility (#8244) --- .../agenthub/codeact_agent/codeact_agent.py | 36 ++++++++++------- .../agenthub/readonly_agent/readonly_agent.py | 39 +++++++++++++------ openhands/controller/agent.py | 12 ++++-- tests/unit/test_agents.py | 3 -- 4 files changed, 58 insertions(+), 32 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 98e1d85cda..55de69df30 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -1,8 +1,12 @@ import copy import os from collections import deque +from typing import TYPE_CHECKING -from litellm import ChatCompletionToolParam +if TYPE_CHECKING: + from litellm import ChatCompletionToolParam + from openhands.events.action import Action + from openhands.llm.llm import ModelResponse import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool @@ -20,7 +24,7 @@ from openhands.controller.state.state import State from openhands.core.config import AgentConfig from openhands.core.logger import openhands_logger as logger from openhands.core.message import Message -from openhands.events.action import Action, AgentFinishAction, MessageAction +from openhands.events.action import AgentFinishAction, MessageAction from openhands.events.event import Event from openhands.llm.llm import LLM from openhands.memory.condenser import Condenser @@ -75,23 +79,26 @@ class CodeActAgent(Agent): - config (AgentConfig): The configuration for this agent """ super().__init__(llm, config) - self.pending_actions: deque[Action] = deque() + self.pending_actions: deque['Action'] = deque() self.reset() self.tools = self._get_tools() - self.prompt_manager = PromptManager( - prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'), - ) - # Create a ConversationMemory instance self.conversation_memory = ConversationMemory(self.config, self.prompt_manager) self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {type(self.condenser)}') - self.response_to_actions_fn = codeact_function_calling.response_to_actions + @property + def prompt_manager(self) -> PromptManager: + if self._prompt_manager is None: + self._prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'), + ) - def _get_tools(self) -> list[ChatCompletionToolParam]: + return self._prompt_manager + + def _get_tools(self) -> list['ChatCompletionToolParam']: # For these models, we use short tool descriptions ( < 1024 tokens) # to avoid hitting the OpenAI token limit for tool descriptions. SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4'] @@ -130,7 +137,7 @@ class CodeActAgent(Agent): super().reset() self.pending_actions.clear() - def step(self, state: State) -> Action: + def step(self, state: State) -> 'Action': """Performs one step using the CodeAct Agent. This includes gathering info on previous steps and prompting the model to make a command to execute. @@ -198,9 +205,7 @@ class CodeActAgent(Agent): params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)} response = self.llm.completion(**params) logger.debug(f'Response from LLM: {response}') - actions = self.response_to_actions_fn( - response, mcp_tool_names=list(self.mcp_tools.keys()) - ) + actions = self.response_to_actions(response) logger.debug(f'Actions after response_to_actions: {actions}') for action in actions: self.pending_actions.append(action) @@ -274,3 +279,8 @@ class CodeActAgent(Agent): self.conversation_memory.apply_prompt_caching(messages) return messages + + def response_to_actions(self, response: 'ModelResponse') -> list['Action']: + return codeact_function_calling.response_to_actions( + response, mcp_tool_names=list(self.mcp_tools.keys()) + ) diff --git a/openhands/agenthub/readonly_agent/readonly_agent.py b/openhands/agenthub/readonly_agent/readonly_agent.py index ec03eb7c58..fdbb70a1b9 100644 --- a/openhands/agenthub/readonly_agent/readonly_agent.py +++ b/openhands/agenthub/readonly_agent/readonly_agent.py @@ -4,6 +4,13 @@ ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only t import os +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from litellm import ChatCompletionToolParam + from openhands.events.action import Action + from openhands.llm.llm import ModelResponse + from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent from openhands.agenthub.readonly_agent import ( function_calling as readonly_function_calling, @@ -41,24 +48,27 @@ class ReadOnlyAgent(CodeActAgent): - llm (LLM): The llm to be used by this agent - config (AgentConfig): The configuration for this agent """ - # Initialize the CodeActAgent class but we'll override some of its behavior + # Initialize the CodeActAgent class; some of it is overridden with class methods super().__init__(llm, config) - # Override the tools to only include read-only tools - # Get the read-only tools from our own function_calling module - self.tools = readonly_function_calling.get_tools() - - # Set up our own prompt manager - self.prompt_manager = PromptManager( - prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'), - ) - - self.response_to_actions_fn = readonly_function_calling.response_to_actions - logger.debug( f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}" ) + @property + def prompt_manager(self) -> PromptManager: + # Set up our own prompt manager + if self._prompt_manager is None: + self._prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'), + ) + return self._prompt_manager + + def _get_tools(self) -> list['ChatCompletionToolParam']: + # Override the tools to only include read-only tools + # Get the read-only tools from our own function_calling module + return readonly_function_calling.get_tools() + def set_mcp_tools(self, mcp_tools: list[dict]) -> None: """Sets the list of MCP tools for the agent. @@ -68,3 +78,8 @@ class ReadOnlyAgent(CodeActAgent): logger.warning( 'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.' ) + + def response_to_actions(self, response: 'ModelResponse') -> list['Action']: + return readonly_function_calling.response_to_actions( + response, mcp_tool_names=list(self.mcp_tools.keys()) + ) diff --git a/openhands/controller/agent.py b/openhands/controller/agent.py index b5e70209ad..26de052b8c 100644 --- a/openhands/controller/agent.py +++ b/openhands/controller/agent.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from openhands.core.config import AgentConfig from openhands.events.action import Action from openhands.events.action.message import SystemMessageAction + from openhands.utils.prompt import PromptManager from litellm import ChatCompletionToolParam from openhands.core.exceptions import ( @@ -19,9 +20,6 @@ from openhands.events.event import EventSource from openhands.llm.llm import LLM from openhands.runtime.plugins import PluginRequirement -if TYPE_CHECKING: - from openhands.utils.prompt import PromptManager - class Agent(ABC): DEPRECATED = False @@ -43,10 +41,16 @@ class Agent(ABC): self.llm = llm self.config = config self._complete = False - self.prompt_manager: 'PromptManager' | None = None + self._prompt_manager: 'PromptManager' | None = None self.mcp_tools: dict[str, ChatCompletionToolParam] = {} self.tools: list = [] + @property + def prompt_manager(self) -> 'PromptManager': + if self._prompt_manager is None: + raise ValueError(f'Prompt manager not initialized for agent {self.name}') + return self._prompt_manager + def get_system_message(self) -> 'SystemMessageAction | None': """ Returns a SystemMessageAction containing the system message and tools. diff --git a/tests/unit/test_agents.py b/tests/unit/test_agents.py index 641411e03f..01b4bb0a6f 100644 --- a/tests/unit/test_agents.py +++ b/tests/unit/test_agents.py @@ -437,9 +437,6 @@ def test_enhance_messages_adds_newlines_between_consecutive_user_messages( agent: CodeActAgent, ): """Test that _enhance_messages adds newlines between consecutive user messages.""" - # Set up the prompt manager - agent.prompt_manager = Mock() - # Create consecutive user messages with various content types messages = [ # First user message with TextContent only