Small refactor to improve (CodeAct)Agent extensibility (#8244)

This commit is contained in:
Chase
2025-05-04 10:21:54 -07:00
committed by GitHub
parent 2c085ae79e
commit fc32efb52e
4 changed files with 58 additions and 32 deletions

View File

@@ -1,8 +1,12 @@
import copy
import os
from collections import deque
from typing import TYPE_CHECKING
from litellm import ChatCompletionToolParam
if TYPE_CHECKING:
from litellm import ChatCompletionToolParam
from openhands.events.action import Action
from openhands.llm.llm import ModelResponse
import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
@@ -20,7 +24,7 @@ from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import Message
from openhands.events.action import Action, AgentFinishAction, MessageAction
from openhands.events.action import AgentFinishAction, MessageAction
from openhands.events.event import Event
from openhands.llm.llm import LLM
from openhands.memory.condenser import Condenser
@@ -75,23 +79,26 @@ class CodeActAgent(Agent):
- config (AgentConfig): The configuration for this agent
"""
super().__init__(llm, config)
self.pending_actions: deque[Action] = deque()
self.pending_actions: deque['Action'] = deque()
self.reset()
self.tools = self._get_tools()
self.prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
# Create a ConversationMemory instance
self.conversation_memory = ConversationMemory(self.config, self.prompt_manager)
self.condenser = Condenser.from_config(self.config.condenser)
logger.debug(f'Using condenser: {type(self.condenser)}')
self.response_to_actions_fn = codeact_function_calling.response_to_actions
@property
def prompt_manager(self) -> PromptManager:
if self._prompt_manager is None:
self._prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
def _get_tools(self) -> list[ChatCompletionToolParam]:
return self._prompt_manager
def _get_tools(self) -> list['ChatCompletionToolParam']:
# For these models, we use short tool descriptions ( < 1024 tokens)
# to avoid hitting the OpenAI token limit for tool descriptions.
SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
@@ -130,7 +137,7 @@ class CodeActAgent(Agent):
super().reset()
self.pending_actions.clear()
def step(self, state: State) -> Action:
def step(self, state: State) -> 'Action':
"""Performs one step using the CodeAct Agent.
This includes gathering info on previous steps and prompting the model to make a command to execute.
@@ -198,9 +205,7 @@ class CodeActAgent(Agent):
params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
response = self.llm.completion(**params)
logger.debug(f'Response from LLM: {response}')
actions = self.response_to_actions_fn(
response, mcp_tool_names=list(self.mcp_tools.keys())
)
actions = self.response_to_actions(response)
logger.debug(f'Actions after response_to_actions: {actions}')
for action in actions:
self.pending_actions.append(action)
@@ -274,3 +279,8 @@ class CodeActAgent(Agent):
self.conversation_memory.apply_prompt_caching(messages)
return messages
def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
return codeact_function_calling.response_to_actions(
response, mcp_tool_names=list(self.mcp_tools.keys())
)

View File

@@ -4,6 +4,13 @@ ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only t
import os
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from litellm import ChatCompletionToolParam
from openhands.events.action import Action
from openhands.llm.llm import ModelResponse
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.agenthub.readonly_agent import (
function_calling as readonly_function_calling,
@@ -41,24 +48,27 @@ class ReadOnlyAgent(CodeActAgent):
- llm (LLM): The llm to be used by this agent
- config (AgentConfig): The configuration for this agent
"""
# Initialize the CodeActAgent class but we'll override some of its behavior
# Initialize the CodeActAgent class; some of it is overridden with class methods
super().__init__(llm, config)
# Override the tools to only include read-only tools
# Get the read-only tools from our own function_calling module
self.tools = readonly_function_calling.get_tools()
# Set up our own prompt manager
self.prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
self.response_to_actions_fn = readonly_function_calling.response_to_actions
logger.debug(
f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}"
)
@property
def prompt_manager(self) -> PromptManager:
# Set up our own prompt manager
if self._prompt_manager is None:
self._prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
return self._prompt_manager
def _get_tools(self) -> list['ChatCompletionToolParam']:
# Override the tools to only include read-only tools
# Get the read-only tools from our own function_calling module
return readonly_function_calling.get_tools()
def set_mcp_tools(self, mcp_tools: list[dict]) -> None:
"""Sets the list of MCP tools for the agent.
@@ -68,3 +78,8 @@ class ReadOnlyAgent(CodeActAgent):
logger.warning(
'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.'
)
def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
return readonly_function_calling.response_to_actions(
response, mcp_tool_names=list(self.mcp_tools.keys())
)