From 30109e8f20fdc2e3431cf64bc76fbebdc392a2ab Mon Sep 17 00:00:00 2001 From: "Ryan H. Tran" Date: Sun, 16 Mar 2025 15:48:13 +0700 Subject: [PATCH] Separate tool descriptions to support models with limited description length (#7258) --- .../agenthub/codeact_agent/codeact_agent.py | 1 + .../codeact_agent/function_calling.py | 34 +++++- .../agenthub/codeact_agent/tools/__init__.py | 8 +- .../agenthub/codeact_agent/tools/bash.py | 56 +++++---- .../codeact_agent/tools/str_replace_editor.py | 111 +++++++++++------- tests/unit/test_codeact_agent.py | 32 ++++- 6 files changed, 167 insertions(+), 75 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index fa041afe25..bd7802e285 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -70,6 +70,7 @@ class CodeActAgent(Agent): codeact_enable_browsing=self.config.codeact_enable_browsing, codeact_enable_jupyter=self.config.codeact_enable_jupyter, codeact_enable_llm_editor=self.config.codeact_enable_llm_editor, + llm=self.llm, ) logger.debug( f'TOOLS loaded for CodeActAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}' diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py index 58485f4e14..bb63cb8f13 100644 --- a/openhands/agenthub/codeact_agent/function_calling.py +++ b/openhands/agenthub/codeact_agent/function_calling.py @@ -12,13 +12,13 @@ from litellm import ( from openhands.agenthub.codeact_agent.tools import ( BrowserTool, - CmdRunTool, FinishTool, IPythonTool, LLMBasedFileEditTool, - StrReplaceEditorTool, ThinkTool, WebReadTool, + create_cmd_run_tool, + create_str_replace_editor_tool, ) from openhands.core.exceptions import ( FunctionCallNotExistsError, @@ -39,6 +39,7 @@ from openhands.events.action import ( ) from openhands.events.event import FileEditSource, FileReadSource from openhands.events.tool import ToolCallMetadata +from openhands.llm import LLM def combine_thought(action: Action, thought: str) -> Action: @@ -80,7 +81,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]: # CmdRunTool (Bash) # ================================================ - if tool_call.function.name == CmdRunTool['function']['name']: + if tool_call.function.name == create_cmd_run_tool()['function']['name']: if 'command' not in arguments: raise FunctionCallValidationError( f'Missing required argument "command" in tool call {tool_call.function.name}' @@ -131,7 +132,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]: start=arguments.get('start', 1), end=arguments.get('end', -1), ) - elif tool_call.function.name == StrReplaceEditorTool['function']['name']: + elif ( + tool_call.function.name + == create_str_replace_editor_tool()['function']['name'] + ): if 'command' not in arguments: raise FunctionCallValidationError( f'Missing required argument "command" in tool call {tool_call.function.name}' @@ -219,8 +223,22 @@ def get_tools( codeact_enable_browsing: bool = False, codeact_enable_llm_editor: bool = False, codeact_enable_jupyter: bool = False, + llm: LLM | None = None, ) -> list[ChatCompletionToolParam]: - tools = [CmdRunTool, ThinkTool, FinishTool] + SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1'] + + use_simplified_tool_desc = False + if llm is not None: + use_simplified_tool_desc = any( + model_substr in llm.config.model + for model_substr in SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS + ) + + tools = [ + create_cmd_run_tool(use_simplified_description=use_simplified_tool_desc), + ThinkTool, + FinishTool, + ] if codeact_enable_browsing: tools.append(WebReadTool) tools.append(BrowserTool) @@ -229,5 +247,9 @@ def get_tools( if codeact_enable_llm_editor: tools.append(LLMBasedFileEditTool) else: - tools.append(StrReplaceEditorTool) + tools.append( + create_str_replace_editor_tool( + use_simplified_description=use_simplified_tool_desc + ) + ) return tools diff --git a/openhands/agenthub/codeact_agent/tools/__init__.py b/openhands/agenthub/codeact_agent/tools/__init__.py index 7895b2141f..49dcba2ebb 100644 --- a/openhands/agenthub/codeact_agent/tools/__init__.py +++ b/openhands/agenthub/codeact_agent/tools/__init__.py @@ -1,19 +1,19 @@ -from .bash import CmdRunTool +from .bash import create_cmd_run_tool from .browser import BrowserTool from .finish import FinishTool from .ipython import IPythonTool from .llm_based_edit import LLMBasedFileEditTool -from .str_replace_editor import StrReplaceEditorTool +from .str_replace_editor import create_str_replace_editor_tool from .think import ThinkTool from .web_read import WebReadTool __all__ = [ 'BrowserTool', - 'CmdRunTool', + 'create_cmd_run_tool', 'FinishTool', 'IPythonTool', 'LLMBasedFileEditTool', - 'StrReplaceEditorTool', + 'create_str_replace_editor_tool', 'WebReadTool', 'ThinkTool', ] diff --git a/openhands/agenthub/codeact_agent/tools/bash.py b/openhands/agenthub/codeact_agent/tools/bash.py index e09ff4053c..60265912a9 100644 --- a/openhands/agenthub/codeact_agent/tools/bash.py +++ b/openhands/agenthub/codeact_agent/tools/bash.py @@ -1,6 +1,6 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk -_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session. +_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session. ### Command Execution * One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, use `&&` or `;` to chain them together. @@ -22,25 +22,39 @@ _BASH_DESCRIPTION = """Execute a bash command in the terminal within a persisten * Output truncation: If the output exceeds a maximum length, it will be truncated before being returned. """ -CmdRunTool = ChatCompletionToolParam( - type='function', - function=ChatCompletionToolParamFunctionChunk( - name='execute_bash', - description=_BASH_DESCRIPTION, - parameters={ - 'type': 'object', - 'properties': { - 'command': { - 'type': 'string', - 'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.', - }, - 'is_input': { - 'type': 'string', - 'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.', - 'enum': ['true', 'false'], +_SIMPLIFIED_BASH_DESCRIPTION = """Execute a bash command in the terminal. +* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`. +* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process. +* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.""" + + +def create_cmd_run_tool( + use_simplified_description: bool = False, +) -> ChatCompletionToolParam: + description = ( + _SIMPLIFIED_BASH_DESCRIPTION + if use_simplified_description + else _DETAILED_BASH_DESCRIPTION + ) + return ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='execute_bash', + description=description, + parameters={ + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.', + }, + 'is_input': { + 'type': 'string', + 'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.', + 'enum': ['true', 'false'], + }, }, + 'required': ['command'], }, - 'required': ['command'], - }, - ), -) + ), + ) diff --git a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py index 599fb215f5..f6752a0121 100644 --- a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py +++ b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py @@ -1,6 +1,6 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk -_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format +_DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format * State is persistent across command calls and discussions with the user * If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep * The `create` command cannot be used if the specified `path` already exists as a file @@ -31,46 +31,73 @@ CRITICAL REQUIREMENTS FOR USING THIS TOOL: Remember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each. """ -StrReplaceEditorTool = ChatCompletionToolParam( - type='function', - function=ChatCompletionToolParamFunctionChunk( - name='str_replace_editor', - description=_STR_REPLACE_EDITOR_DESCRIPTION, - parameters={ - 'type': 'object', - 'properties': { - 'command': { - 'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.', - 'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'], - 'type': 'string', - }, - 'path': { - 'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.', - 'type': 'string', - }, - 'file_text': { - 'description': 'Required parameter of `create` command, with the content of the file to be created.', - 'type': 'string', - }, - 'old_str': { - 'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.', - 'type': 'string', - }, - 'new_str': { - 'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.', - 'type': 'string', - }, - 'insert_line': { - 'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.', - 'type': 'integer', - }, - 'view_range': { - 'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.', - 'items': {'type': 'integer'}, - 'type': 'array', +_SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format +* State is persistent across command calls and discussions with the user +* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep +* The `create` command cannot be used if the specified `path` already exists as a file +* If a `command` generates a long output, it will be truncated and marked with `` +* The `undo_edit` command will revert the last edit made to the file at `path` +Notes for using the `str_replace` command: +* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces! +* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique +* The `new_str` parameter should contain the edited lines that should replace the `old_str` +""" + + +def create_str_replace_editor_tool( + use_simplified_description: bool = False, +) -> ChatCompletionToolParam: + description = ( + _SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION + if use_simplified_description + else _DETAILED_STR_REPLACE_EDITOR_DESCRIPTION + ) + return ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='str_replace_editor', + description=description, + parameters={ + 'type': 'object', + 'properties': { + 'command': { + 'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.', + 'enum': [ + 'view', + 'create', + 'str_replace', + 'insert', + 'undo_edit', + ], + 'type': 'string', + }, + 'path': { + 'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.', + 'type': 'string', + }, + 'file_text': { + 'description': 'Required parameter of `create` command, with the content of the file to be created.', + 'type': 'string', + }, + 'old_str': { + 'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.', + 'type': 'string', + }, + 'new_str': { + 'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.', + 'type': 'string', + }, + 'insert_line': { + 'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.', + 'type': 'integer', + }, + 'view_range': { + 'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.', + 'items': {'type': 'integer'}, + 'type': 'array', + }, }, + 'required': ['command', 'path'], }, - 'required': ['command', 'path'], - }, - ), -) + ), + ) diff --git a/tests/unit/test_codeact_agent.py b/tests/unit/test_codeact_agent.py index 55cd1eb6bd..4b81d8babb 100644 --- a/tests/unit/test_codeact_agent.py +++ b/tests/unit/test_codeact_agent.py @@ -6,11 +6,11 @@ from litellm import ChatCompletionMessageToolCall from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent from openhands.agenthub.codeact_agent.function_calling import ( BrowserTool, - CmdRunTool, IPythonTool, LLMBasedFileEditTool, - StrReplaceEditorTool, WebReadTool, + create_cmd_run_tool, + create_str_replace_editor_tool, get_tools, response_to_actions, ) @@ -119,6 +119,7 @@ def test_get_tools_with_options(): def test_cmd_run_tool(): + CmdRunTool = create_cmd_run_tool() assert CmdRunTool['type'] == 'function' assert CmdRunTool['function']['name'] == 'execute_bash' assert 'command' in CmdRunTool['function']['parameters']['properties'] @@ -149,6 +150,7 @@ def test_llm_based_file_edit_tool(): def test_str_replace_editor_tool(): + StrReplaceEditorTool = create_str_replace_editor_tool() assert StrReplaceEditorTool['type'] == 'function' assert StrReplaceEditorTool['function']['name'] == 'str_replace_editor' @@ -236,7 +238,11 @@ def test_step_with_no_pending_actions(mock_state: State): mock_response.choices[0].message.content = 'Task completed' mock_response.choices[0].message.tool_calls = [] + mock_config = Mock() + mock_config.model = 'mock_model' + llm = Mock() + llm.config = mock_config llm.completion = Mock(return_value=mock_response) llm.is_function_calling_active = Mock(return_value=True) # Enable function calling llm.is_caching_prompt_active = Mock(return_value=False) @@ -260,6 +266,28 @@ def test_step_with_no_pending_actions(mock_state: State): assert action.content == 'Task completed' +def test_correct_tool_description_loaded_based_on_model_name(mock_state: State): + """Tests that the simplified tool descriptions are loaded for specific models.""" + o3_mock_config = Mock() + o3_mock_config.model = 'mock_o3_model' + + llm = Mock() + llm.config = o3_mock_config + + agent = CodeActAgent(llm=llm, config=AgentConfig()) + for tool in agent.tools: + # Assert all descriptions have less than 1024 characters + assert len(tool['function']['description']) < 1024 + + sonnet_mock_config = Mock() + sonnet_mock_config.model = 'mock_sonnet_model' + + llm.config = sonnet_mock_config + agent = CodeActAgent(llm=llm, config=AgentConfig()) + # Assert existence of the detailed tool descriptions that are longer than 1024 characters + assert any(len(tool['function']['description']) > 1024 for tool in agent.tools) + + def test_mismatched_tool_call_events(mock_state: State): """Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages.""" agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())