From 30109e8f20fdc2e3431cf64bc76fbebdc392a2ab Mon Sep 17 00:00:00 2001
From: "Ryan H. Tran" <descience.thh10@gmail.com>
Date: Sun, 16 Mar 2025 15:48:13 +0700
Subject: [PATCH] Separate tool descriptions to support models with limited
 description length (#7258)

---
 .../agenthub/codeact_agent/codeact_agent.py   |   1 +
 .../codeact_agent/function_calling.py         |  34 +++++-
 .../agenthub/codeact_agent/tools/__init__.py  |   8 +-
 .../agenthub/codeact_agent/tools/bash.py      |  56 +++++----
 .../codeact_agent/tools/str_replace_editor.py | 111 +++++++++++-------
 tests/unit/test_codeact_agent.py              |  32 ++++-
 6 files changed, 167 insertions(+), 75 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index fa041afe25..bd7802e285 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -70,6 +70,7 @@ class CodeActAgent(Agent):
             codeact_enable_browsing=self.config.codeact_enable_browsing,
             codeact_enable_jupyter=self.config.codeact_enable_jupyter,
             codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
+            llm=self.llm,
         )
         logger.debug(
             f'TOOLS loaded for CodeActAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}'
diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
index 58485f4e14..bb63cb8f13 100644
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -12,13 +12,13 @@ from litellm import (
 
 from openhands.agenthub.codeact_agent.tools import (
     BrowserTool,
-    CmdRunTool,
     FinishTool,
     IPythonTool,
     LLMBasedFileEditTool,
-    StrReplaceEditorTool,
     ThinkTool,
     WebReadTool,
+    create_cmd_run_tool,
+    create_str_replace_editor_tool,
 )
 from openhands.core.exceptions import (
     FunctionCallNotExistsError,
@@ -39,6 +39,7 @@ from openhands.events.action import (
 )
 from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.tool import ToolCallMetadata
+from openhands.llm import LLM
 
 
 def combine_thought(action: Action, thought: str) -> Action:
@@ -80,7 +81,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
             # CmdRunTool (Bash)
             # ================================================
 
-            if tool_call.function.name == CmdRunTool['function']['name']:
+            if tool_call.function.name == create_cmd_run_tool()['function']['name']:
                 if 'command' not in arguments:
                     raise FunctionCallValidationError(
                         f'Missing required argument "command" in tool call {tool_call.function.name}'
@@ -131,7 +132,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                     start=arguments.get('start', 1),
                     end=arguments.get('end', -1),
                 )
-            elif tool_call.function.name == StrReplaceEditorTool['function']['name']:
+            elif (
+                tool_call.function.name
+                == create_str_replace_editor_tool()['function']['name']
+            ):
                 if 'command' not in arguments:
                     raise FunctionCallValidationError(
                         f'Missing required argument "command" in tool call {tool_call.function.name}'
@@ -219,8 +223,22 @@ def get_tools(
     codeact_enable_browsing: bool = False,
     codeact_enable_llm_editor: bool = False,
     codeact_enable_jupyter: bool = False,
+    llm: LLM | None = None,
 ) -> list[ChatCompletionToolParam]:
-    tools = [CmdRunTool, ThinkTool, FinishTool]
+    SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1']
+
+    use_simplified_tool_desc = False
+    if llm is not None:
+        use_simplified_tool_desc = any(
+            model_substr in llm.config.model
+            for model_substr in SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS
+        )
+
+    tools = [
+        create_cmd_run_tool(use_simplified_description=use_simplified_tool_desc),
+        ThinkTool,
+        FinishTool,
+    ]
     if codeact_enable_browsing:
         tools.append(WebReadTool)
         tools.append(BrowserTool)
@@ -229,5 +247,9 @@ def get_tools(
     if codeact_enable_llm_editor:
         tools.append(LLMBasedFileEditTool)
     else:
-        tools.append(StrReplaceEditorTool)
+        tools.append(
+            create_str_replace_editor_tool(
+                use_simplified_description=use_simplified_tool_desc
+            )
+        )
     return tools
diff --git a/openhands/agenthub/codeact_agent/tools/__init__.py b/openhands/agenthub/codeact_agent/tools/__init__.py
index 7895b2141f..49dcba2ebb 100644
--- a/openhands/agenthub/codeact_agent/tools/__init__.py
+++ b/openhands/agenthub/codeact_agent/tools/__init__.py
@@ -1,19 +1,19 @@
-from .bash import CmdRunTool
+from .bash import create_cmd_run_tool
 from .browser import BrowserTool
 from .finish import FinishTool
 from .ipython import IPythonTool
 from .llm_based_edit import LLMBasedFileEditTool
-from .str_replace_editor import StrReplaceEditorTool
+from .str_replace_editor import create_str_replace_editor_tool
 from .think import ThinkTool
 from .web_read import WebReadTool
 
 __all__ = [
     'BrowserTool',
-    'CmdRunTool',
+    'create_cmd_run_tool',
     'FinishTool',
     'IPythonTool',
     'LLMBasedFileEditTool',
-    'StrReplaceEditorTool',
+    'create_str_replace_editor_tool',
     'WebReadTool',
     'ThinkTool',
 ]
diff --git a/openhands/agenthub/codeact_agent/tools/bash.py b/openhands/agenthub/codeact_agent/tools/bash.py
index e09ff4053c..60265912a9 100644
--- a/openhands/agenthub/codeact_agent/tools/bash.py
+++ b/openhands/agenthub/codeact_agent/tools/bash.py
@@ -1,6 +1,6 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
 
-_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
+_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
 
 ### Command Execution
 * One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, use `&&` or `;` to chain them together.
@@ -22,25 +22,39 @@ _BASH_DESCRIPTION = """Execute a bash command in the terminal within a persisten
 * Output truncation: If the output exceeds a maximum length, it will be truncated before being returned.
 """
 
-CmdRunTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='execute_bash',
-        description=_BASH_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'command': {
-                    'type': 'string',
-                    'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
-                },
-                'is_input': {
-                    'type': 'string',
-                    'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
-                    'enum': ['true', 'false'],
+_SIMPLIFIED_BASH_DESCRIPTION = """Execute a bash command in the terminal.
+* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
+* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
+* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together."""
+
+
+def create_cmd_run_tool(
+    use_simplified_description: bool = False,
+) -> ChatCompletionToolParam:
+    description = (
+        _SIMPLIFIED_BASH_DESCRIPTION
+        if use_simplified_description
+        else _DETAILED_BASH_DESCRIPTION
+    )
+    return ChatCompletionToolParam(
+        type='function',
+        function=ChatCompletionToolParamFunctionChunk(
+            name='execute_bash',
+            description=description,
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'command': {
+                        'type': 'string',
+                        'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
+                    },
+                    'is_input': {
+                        'type': 'string',
+                        'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
+                        'enum': ['true', 'false'],
+                    },
                 },
+                'required': ['command'],
             },
-            'required': ['command'],
-        },
-    ),
-)
+        ),
+    )
diff --git a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
index 599fb215f5..f6752a0121 100644
--- a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
+++ b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
@@ -1,6 +1,6 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
 
-_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
+_DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
 * State is persistent across command calls and discussions with the user
 * If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
 * The `create` command cannot be used if the specified `path` already exists as a file
@@ -31,46 +31,73 @@ CRITICAL REQUIREMENTS FOR USING THIS TOOL:
 Remember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each.
 """
 
-StrReplaceEditorTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='str_replace_editor',
-        description=_STR_REPLACE_EDITOR_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'command': {
-                    'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
-                    'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
-                    'type': 'string',
-                },
-                'path': {
-                    'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
-                    'type': 'string',
-                },
-                'file_text': {
-                    'description': 'Required parameter of `create` command, with the content of the file to be created.',
-                    'type': 'string',
-                },
-                'old_str': {
-                    'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
-                    'type': 'string',
-                },
-                'new_str': {
-                    'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
-                    'type': 'string',
-                },
-                'insert_line': {
-                    'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
-                    'type': 'integer',
-                },
-                'view_range': {
-                    'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
-                    'items': {'type': 'integer'},
-                    'type': 'array',
+_SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
+* State is persistent across command calls and discussions with the user
+* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
+* The `create` command cannot be used if the specified `path` already exists as a file
+* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
+* The `undo_edit` command will revert the last edit made to the file at `path`
+Notes for using the `str_replace` command:
+* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
+* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
+* The `new_str` parameter should contain the edited lines that should replace the `old_str`
+"""
+
+
+def create_str_replace_editor_tool(
+    use_simplified_description: bool = False,
+) -> ChatCompletionToolParam:
+    description = (
+        _SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION
+        if use_simplified_description
+        else _DETAILED_STR_REPLACE_EDITOR_DESCRIPTION
+    )
+    return ChatCompletionToolParam(
+        type='function',
+        function=ChatCompletionToolParamFunctionChunk(
+            name='str_replace_editor',
+            description=description,
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'command': {
+                        'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
+                        'enum': [
+                            'view',
+                            'create',
+                            'str_replace',
+                            'insert',
+                            'undo_edit',
+                        ],
+                        'type': 'string',
+                    },
+                    'path': {
+                        'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
+                        'type': 'string',
+                    },
+                    'file_text': {
+                        'description': 'Required parameter of `create` command, with the content of the file to be created.',
+                        'type': 'string',
+                    },
+                    'old_str': {
+                        'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
+                        'type': 'string',
+                    },
+                    'new_str': {
+                        'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
+                        'type': 'string',
+                    },
+                    'insert_line': {
+                        'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
+                        'type': 'integer',
+                    },
+                    'view_range': {
+                        'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
+                        'items': {'type': 'integer'},
+                        'type': 'array',
+                    },
                 },
+                'required': ['command', 'path'],
             },
-            'required': ['command', 'path'],
-        },
-    ),
-)
+        ),
+    )
diff --git a/tests/unit/test_codeact_agent.py b/tests/unit/test_codeact_agent.py
index 55cd1eb6bd..4b81d8babb 100644
--- a/tests/unit/test_codeact_agent.py
+++ b/tests/unit/test_codeact_agent.py
@@ -6,11 +6,11 @@ from litellm import ChatCompletionMessageToolCall
 from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
 from openhands.agenthub.codeact_agent.function_calling import (
     BrowserTool,
-    CmdRunTool,
     IPythonTool,
     LLMBasedFileEditTool,
-    StrReplaceEditorTool,
     WebReadTool,
+    create_cmd_run_tool,
+    create_str_replace_editor_tool,
     get_tools,
     response_to_actions,
 )
@@ -119,6 +119,7 @@ def test_get_tools_with_options():
 
 
 def test_cmd_run_tool():
+    CmdRunTool = create_cmd_run_tool()
     assert CmdRunTool['type'] == 'function'
     assert CmdRunTool['function']['name'] == 'execute_bash'
     assert 'command' in CmdRunTool['function']['parameters']['properties']
@@ -149,6 +150,7 @@ def test_llm_based_file_edit_tool():
 
 
 def test_str_replace_editor_tool():
+    StrReplaceEditorTool = create_str_replace_editor_tool()
     assert StrReplaceEditorTool['type'] == 'function'
     assert StrReplaceEditorTool['function']['name'] == 'str_replace_editor'
 
@@ -236,7 +238,11 @@ def test_step_with_no_pending_actions(mock_state: State):
     mock_response.choices[0].message.content = 'Task completed'
     mock_response.choices[0].message.tool_calls = []
 
+    mock_config = Mock()
+    mock_config.model = 'mock_model'
+
     llm = Mock()
+    llm.config = mock_config
     llm.completion = Mock(return_value=mock_response)
     llm.is_function_calling_active = Mock(return_value=True)  # Enable function calling
     llm.is_caching_prompt_active = Mock(return_value=False)
@@ -260,6 +266,28 @@ def test_step_with_no_pending_actions(mock_state: State):
     assert action.content == 'Task completed'
 
 
+def test_correct_tool_description_loaded_based_on_model_name(mock_state: State):
+    """Tests that the simplified tool descriptions are loaded for specific models."""
+    o3_mock_config = Mock()
+    o3_mock_config.model = 'mock_o3_model'
+
+    llm = Mock()
+    llm.config = o3_mock_config
+
+    agent = CodeActAgent(llm=llm, config=AgentConfig())
+    for tool in agent.tools:
+        # Assert all descriptions have less than 1024 characters
+        assert len(tool['function']['description']) < 1024
+
+    sonnet_mock_config = Mock()
+    sonnet_mock_config.model = 'mock_sonnet_model'
+
+    llm.config = sonnet_mock_config
+    agent = CodeActAgent(llm=llm, config=AgentConfig())
+    # Assert existence of the detailed tool descriptions that are longer than 1024 characters
+    assert any(len(tool['function']['description']) > 1024 for tool in agent.tools)
+
+
 def test_mismatched_tool_call_events(mock_state: State):
     """Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages."""
     agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())