Compare commits

...

79 Commits

Author SHA1 Message Date
Engel Nyst
6a49317c07 run precommit 2025-06-24 13:50:26 +02:00
Engel Nyst
99f30a3b24 tweaks 2025-06-23 22:46:03 +02:00
Engel Nyst
fb12270e83 Merge branch 'main' of https://github.com/All-Hands-AI/OpenHands into enyst/gemini 2025-06-23 22:39:20 +02:00
Engel Nyst
e823d88567 fix prompt, add line numbers notes 2025-05-19 15:37:09 +02:00
Engel Nyst
448262350c Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-05-17 19:13:26 +02:00
Engel Nyst
5daf947bbe Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-05-09 23:21:05 +02:00
Engel Nyst
d82930322d Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-05-07 17:46:00 +02:00
openhands
e9e5a5f6e6 Fix pr #7748: [Experiment] Gemini - API error tolerance, editing, prompting 2025-05-03 23:51:38 +00:00
Engel Nyst
17b6d87f82 Merge branch 'main' into enyst/gemini 2025-05-04 01:34:58 +02:00
Engel Nyst
dd0bc42e4f Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-28 09:41:27 +02:00
Engel Nyst
2d70af8bd5 refactor swe-bench instruction in template files 2025-04-22 16:42:10 +02:00
Engel Nyst
05478ad5b6 Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-22 11:01:20 +02:00
Engel Nyst
fa2a5ab3cf Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-22 00:07:52 +02:00
Engel Nyst
824f30c89a Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-20 11:47:28 +02:00
Engel Nyst
051c033257 fix lint 2025-04-19 21:10:58 +02:00
Engel Nyst
1e67c5a10c Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-19 20:45:29 +02:00
Engel Nyst
709e0035a9 fix, add diff tests 2025-04-18 13:52:53 +02:00
Engel Nyst
0b96072604 fix thought, refactor diff 2025-04-18 13:41:27 +02:00
Engel Nyst
fcb56fb87f lint 2025-04-17 23:13:29 +02:00
Engel Nyst
c839573f4d adjust the reminder too 2025-04-17 18:45:04 +02:00
Engel Nyst
9b8633275f adjust prompts for comprehensiveness 2025-04-17 18:41:30 +02:00
Engel Nyst
9a88e7b5d0 Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-17 17:50:43 +02:00
Engel Nyst
e27a5ef755 In-context reminder 2025-04-15 23:18:56 +02:00
Engel Nyst
d31171bb6d fix naming of new config options 2025-04-15 22:52:54 +02:00
Engel Nyst
a2aa1bcc93 Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-15 22:45:30 +02:00
Engel Nyst
237b08c2a9 fix KeyError when the instance failed prematurely 2025-04-15 17:04:36 +02:00
Engel Nyst
7e888a6943 Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-14 20:00:09 +02:00
Engel Nyst
a7b713883d md prompts 2025-04-13 20:52:27 +02:00
Engel Nyst
222242e148 tweak 2025-04-13 20:00:32 +02:00
Engel Nyst
d29b402ef4 Revert "prompt adjust take two" - too much emphasis
This reverts commit ab7739be15.
2025-04-13 19:59:16 +02:00
Engel Nyst
fe91cf627e add tests for multiple diffs 2025-04-13 19:44:00 +02:00
Engel Nyst
ab7739be15 prompt adjust take two 2025-04-13 15:16:24 +02:00
Engel Nyst
3952fa1fb7 Adjust instruction prompt 2025-04-13 14:28:37 +02:00
Engel Nyst
d021341568 fix instruction 2025-04-12 17:23:30 +02:00
Engel Nyst
75d1cb611b allow for different instructions 2025-04-12 17:21:07 +02:00
Engel Nyst
83b76d0a2b allow for more search in the instruction 2025-04-12 17:15:52 +02:00
Engel Nyst
eb966cb694 use Malformed exception for diff errors 2025-04-12 17:15:52 +02:00
OpenHands Bot
f689296897 🤖 Auto-fix Python linting issues 2025-04-11 21:38:51 +00:00
Engel Nyst
c987504145 Merge branch 'main' into enyst/gemini 2025-04-11 23:14:36 +02:00
Engel Nyst
b6a76a5a4a restore pre-commit 2025-04-11 23:12:46 +02:00
Engel Nyst
5bf81b4d1d Simplify the swe-bench instruction
Say what to do and what not to do, don't say how.
2025-04-11 22:51:23 +02:00
Engel Nyst
d30bf9622e fix diffs when the response contains both them and tool calls 2025-04-11 22:49:13 +02:00
Engel Nyst
cd6e716c74 fix import 2025-04-11 18:57:09 +02:00
Engel Nyst
3dbf74f400 add metadata 2025-04-11 18:42:39 +02:00
Engel Nyst
63b238a62b poetry lock 2025-04-11 18:10:41 +02:00
Engel Nyst
b39d6d0c17 Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-11 18:10:21 +02:00
Engel Nyst
ebfe189e19 add editing examples 2025-04-11 18:02:41 +02:00
Engel Nyst
e6eb088272 add microagent 2025-04-11 15:48:45 +02:00
Engel Nyst
fd9aa8bb7b remove unlocalized strings script 2025-04-11 10:59:46 +02:00
Engel Nyst
072c7f026f Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-11 10:53:54 +02:00
Engel Nyst
37a6cba50a remove redundant check 2025-04-11 10:47:03 +02:00
Engel Nyst
14b5dd8bfa remove aci fenced diff 2025-04-11 10:39:35 +02:00
Engel Nyst
a47c2bf0d7 clean out tool 2025-04-11 10:33:53 +02:00
Engel Nyst
e05bb8b0e8 clean out the tool variant 2025-04-11 10:19:13 +02:00
Engel Nyst
28f0949e37 parse edit diffs from message.content 2025-04-11 10:02:14 +02:00
Engel Nyst
8e471d3f4a fix lock 2025-04-10 02:25:35 +02:00
Engel Nyst
4546a63320 Merge branch 'main' into enyst/gemini 2025-04-10 02:03:43 +02:00
Engel Nyst
1f08228905 poetry lock 2025-04-10 01:51:15 +02:00
Engel Nyst
5c8ecb72f5 update aci 2025-04-10 01:50:03 +02:00
Engel Nyst
99f0a91430 move edit to aci subclass 2025-04-10 01:45:50 +02:00
Engel Nyst
954569d29b move diff editor to oh_aci 2025-04-10 01:28:52 +02:00
openhands
4f1520cf6d add and re-route utility tools to aci 2025-04-09 22:59:47 +02:00
openhands
19d7077514 fix client, prompt, use simple names 2025-04-09 21:21:00 +02:00
Engel Nyst
223635a1d7 error tolerance: whitespace, best sequence match 2025-04-09 15:48:23 +02:00
Engel Nyst
31bd1597cd fix test 2025-04-09 02:12:18 +02:00
Engel Nyst
41149836d9 Merge branch 'enyst/gemini' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-09 01:39:16 +02:00
Engel Nyst
726cb43f3e add license information 2025-04-09 01:38:09 +02:00
Engel Nyst
39a838a972 implement fenced diff 2025-04-09 01:06:37 +02:00
Engel Nyst
99d1ee3429 Revert "server tweak"
This reverts commit 29960e2fce.
2025-04-09 00:52:51 +02:00
Engel Nyst
29960e2fce server tweak 2025-04-09 00:52:08 +02:00
Engel Nyst
41c3dd4b85 glue in the fenced edit tool 2025-04-08 23:32:19 +02:00
Engel Nyst
c1ba9f6a3c add fenced diff 2025-04-08 23:08:26 +02:00
Engel Nyst
a312fa9fb3 Delete tests/unit/test_log_streamer.py 2025-04-08 15:10:12 +02:00
Engel Nyst
13e673be8e Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/gemini 2025-04-08 14:02:32 +02:00
Engel Nyst
738f7d0497 re-run on LLM or runtime API error 2025-04-07 19:59:15 +02:00
Engel Nyst
a84a749a48 account for retry delay 2025-04-07 19:57:38 +02:00
Engel Nyst
e79c0259d3 add top_k 2025-04-07 19:57:07 +02:00
Engel Nyst
6ddf228477 Merge branch 'main' of github.com:All-Hands-AI/OpenHands into enyst/small-stuff 2025-04-06 14:22:45 +02:00
Engel Nyst
5c500ce7bf reduce severity 2025-04-06 14:22:24 +02:00
29 changed files with 2700 additions and 82 deletions

View File

@@ -14,7 +14,7 @@ OpenHands includes and adapts the following open source projects. We are gratefu
#### [Aider](https://github.com/paul-gauthier/aider)
- License: Apache License 2.0
- Description: AI pair programming tool. OpenHands has adapted and integrated its linter module for code-related tasks in [`agentskills utilities`](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/agent_skills/utils/aider)
- Description: AI pair programming tool. OpenHands has adapted and integrated its linter module for code-related tasks in [`agentskills utilities`](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/agent_skills/utils/aider) and its fenced diff edit.
#### [BrowserGym](https://github.com/ServiceNow/BrowserGym)
- License: Apache License 2.0

View File

@@ -247,6 +247,7 @@ def get_config(
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=ENABLE_LLM_EDITOR,
enable_mcp=False,
enable_llm_diff=True,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
@@ -289,6 +290,17 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
# Set DEBUG environment variable
action = CmdRunAction(command="echo 'export DEBUG=1' >> ~/.bashrc")
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to export DEBUG=1: {str(obs)}',
)
# inject the init script
script_dir = os.path.dirname(__file__)

View File

@@ -39,6 +39,7 @@ from openhands.events.event import Event
from openhands.events.serialization.event import event_to_dict
from openhands.events.utils import get_pairs_from_events
from openhands.memory.condenser import get_condensation_metadata
from openhands.runtime.utils.request import RequestHTTPError
class EvalMetadata(BaseModel):
@@ -533,13 +534,14 @@ def compatibility_for_eval_history_pairs(
def is_fatal_evaluation_error(error: str | None) -> bool:
"""
The AgentController class overrides last error for certain exceptions
We want to ensure those exeption do not overlap with fatal exceptions defined here
This is because we do a comparisino against the stringified error
Check if the error is a fatal evaluation error, which triggers a rerun.
"""
if not error:
return False
# IMPORTANT: The AgentController class overrides last error for certain exceptions
# We need to ensure those exceptions do not overlap with fatal exceptions defined here
# This is because we do a comparison against the stringified error
FATAL_EXCEPTIONS = [
AgentRuntimeError,
AgentRuntimeBuildError,
@@ -549,6 +551,7 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
AgentRuntimeDisconnectedError,
AgentRuntimeNotFoundError,
ConnectionError,
RequestHTTPError,
]
if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):

View File

@@ -14,11 +14,13 @@ from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
from openhands.agenthub.codeact_agent.tools.browser import BrowserTool
from openhands.agenthub.codeact_agent.tools.finish import FinishTool
from openhands.agenthub.codeact_agent.tools.ipython import IPythonTool
from openhands.agenthub.codeact_agent.tools.list_directory import ListDirectoryTool
from openhands.agenthub.codeact_agent.tools.llm_based_edit import LLMBasedFileEditTool
from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
create_str_replace_editor_tool,
)
from openhands.agenthub.codeact_agent.tools.think import ThinkTool
from openhands.agenthub.codeact_agent.tools.view_file import ViewFileTool
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
@@ -95,6 +97,7 @@ class CodeActAgent(Agent):
if self._prompt_manager is None:
self._prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
config=self.config,
system_prompt_filename=self.config.system_prompt_filename,
)
@@ -126,14 +129,30 @@ class CodeActAgent(Agent):
tools.append(BrowserTool)
if self.config.enable_jupyter:
tools.append(IPythonTool)
if self.config.enable_llm_editor:
# Determine which editor tool(s) and utility tools to add based on config
if self.config.enable_llm_diff:
# Use LLM Diff mode:
# - LLM generates diffs in text, only non-edit tools available
# - No UndoEditTool
tools.append(ViewFileTool)
tools.append(ListDirectoryTool)
# NO EDITING TOOLS (LLMBasedFileEditTool, str_replace_editor)
# NO UndoEditTool
elif self.config.enable_llm_editor:
# Use LLM-based editor tool + separate utils (NO Undo)
tools.append(LLMBasedFileEditTool)
tools.append(ViewFileTool)
tools.append(ListDirectoryTool)
# No UndoEditTool here
elif self.config.enable_editor:
# Fallback to the complete str_replace_editor (includes view, list, undo)
tools.append(
create_str_replace_editor_tool(
use_short_description=use_short_tool_desc
)
)
return tools
def reset(self) -> None:
@@ -270,4 +289,5 @@ class CodeActAgent(Agent):
return codeact_function_calling.response_to_actions(
response,
mcp_tool_names=list(self.mcp_tools.keys()),
is_llm_diff_enabled=self.config.enable_llm_diff,
)

View File

@@ -9,12 +9,19 @@ from litellm import (
ModelResponse,
)
from openhands.agenthub.codeact_agent.llm_diff_parser import (
DiffBlock,
parse_llm_response_for_diffs,
)
from openhands.agenthub.codeact_agent.tools import (
BrowserTool,
FinishTool,
IPythonTool,
ListDirectoryTool,
LLMBasedFileEditTool,
ThinkTool,
UndoEditTool,
ViewFileTool,
create_cmd_run_tool,
create_str_replace_editor_tool,
)
@@ -51,25 +58,139 @@ def combine_thought(action: Action, thought: str) -> Action:
def response_to_actions(
response: ModelResponse, mcp_tool_names: list[str] | None = None
response: ModelResponse,
mcp_tool_names: list[str] | None = None,
is_llm_diff_enabled: bool = False,
) -> list[Action]:
actions: list[Action] = []
"""
Parses the LLM response and converts it into a list of OpenHands Actions.
The function handles two main types of responses:
1. Tool calls - When the LLM response contains function calls
2. Diff blocks - When is_llm_diff_enabled=True and the response contains code diffs
For tool calls, it converts each function call into a corresponding Action:
- execute_bash -> CmdRunAction
- execute_ipython_cell -> IPythonRunCellAction
- browser -> BrowseInteractiveAction
- web_read -> BrowseURLAction
- str_replace_editor -> FileEditAction/FileReadAction
- think -> AgentThinkAction
- finish -> AgentFinishAction
- delegate_to_browsing_agent -> AgentDelegateAction
For diff blocks (when is_llm_diff_enabled=True), it parses blocks in the format:
```language
filename.ext
<<<<<<< SEARCH
old code
=======
new code
>>>>>>> REPLACE
```
Each diff block is converted to a FileEditAction with:
- path = filename
- search = old code
- replace = new code
- impl_source = FileEditSource.LLM_DIFF
Examples:
# Tool call response
response = {
"tool_calls": [{
"function": {
"name": "execute_bash",
"arguments": {"command": "ls", "is_input": "false"}
}
}]
}
-> [CmdRunAction(command="ls", is_input=False)]
# Diff block response
response = {
"content": '''
```python
app.py
<<<<<<< SEARCH
old_code
=======
new_code
>>>>>>> REPLACE
```
'''
}
-> [FileEditAction(path="app.py", search="old_code", replace="new_code")]
Args:
response: The ModelResponse from the LLM.
mcp_tool_names: Optional list of MCP tool names to handle.
is_llm_diff_enabled: If True, will attempt to parse diff blocks from message
content if no tool calls exist. Defaults to False.
Returns:
A list of Action objects.
Note:
- If both tool calls and diff blocks exist, tool calls take precedence
- Message content outside of tool calls/diff blocks becomes MessageAction
- At least one action is always returned (AgentThinkAction as fallback)
"""
all_actions: list[Action] = []
parsed_llm_diff_actions: list[Action] = []
tool_call_actions: list[Action] = []
message_content: str = ''
diff_parse_error: Exception | None = None
action: Action
assert len(response.choices) == 1, 'Only one choice is supported for now'
choice = response.choices[0]
assistant_msg = choice.message
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
# Check if there's assistant_msg.content. If so, add it to the thought
thought = ''
if isinstance(assistant_msg.content, str):
thought = assistant_msg.content
elif isinstance(assistant_msg.content, list):
for msg in assistant_msg.content:
if msg['type'] == 'text':
thought += msg['text']
# Process each tool call to OpenHands action
# 1. Extract message content
if isinstance(assistant_msg.content, str):
message_content = assistant_msg.content
elif isinstance(assistant_msg.content, list):
for msg in assistant_msg.content:
if msg['type'] == 'text':
message_content += msg['text']
# 2. Try parsing diff blocks if enabled and content exists
if is_llm_diff_enabled and message_content:
logger.debug('LLM Diff mode enabled, attempting to parse response content.')
try:
# Call the updated parser, expecting list[DiffBlock]
parsed_blocks: list[DiffBlock] = parse_llm_response_for_diffs(
message_content
)
if parsed_blocks: # Check if the list is not empty
logger.info(
f'Parsed {len(parsed_blocks)} diff blocks from LLM response.'
)
# Create FileEditActions from the DiffBlock objects
for block in parsed_blocks:
action = FileEditAction(
path=block.filename,
search=block.search,
replace=block.replace,
impl_source=FileEditSource.LLM_DIFF,
)
parsed_llm_diff_actions.append(action)
except ValueError as e:
logger.error(f'Error parsing LLM diff blocks: {e}')
diff_parse_error = e # Store error for potential later use
except Exception as e:
logger.error(
f'Unexpected error during LLM diff parsing: {e}', exc_info=True
)
diff_parse_error = e # Store error
# 3. Process tool calls if they exist
tool_calls_exist = hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls
if tool_calls_exist:
# Process each tool call
for i, tool_call in enumerate(assistant_msg.tool_calls):
action: Action
logger.debug(f'Tool call in function_calling.py: {tool_call}')
try:
arguments = json.loads(tool_call.function.arguments)
@@ -198,6 +319,39 @@ def response_to_actions(
**valid_kwargs,
)
# ================================================
# New Utility Tools (Routing to OH_ACI)
# ================================================
elif tool_call.function.name == ViewFileTool['function']['name']:
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
action = FileReadAction(
path=arguments['path'],
impl_source=FileReadSource.OH_ACI,
view_range=arguments.get('view_range', None),
)
elif tool_call.function.name == ListDirectoryTool['function']['name']:
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
action = FileReadAction(
path=arguments['path'],
impl_source=FileReadSource.OH_ACI,
# view_range is not applicable for directories
)
elif tool_call.function.name == UndoEditTool['function']['name']:
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
action = FileEditAction(
path=arguments['path'],
command='undo_edit',
impl_source=FileEditSource.OH_ACI,
)
# ================================================
# AgentThinkAction
# ================================================
elif tool_call.function.name == ThinkTool['function']['name']:
@@ -226,9 +380,6 @@ def response_to_actions(
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
)
# We only add thought to the first action
if i == 0:
action = combine_thought(action, thought)
# Add metadata for tool calling
action.tool_call_metadata = ToolCallMetadata(
tool_call_id=tool_call.id,
@@ -236,21 +387,66 @@ def response_to_actions(
model_response=response,
total_calls_in_response=len(assistant_msg.tool_calls),
)
actions.append(action)
else:
actions.append(
MessageAction(
content=str(assistant_msg.content) if assistant_msg.content else '',
wait_for_response=True,
)
)
tool_call_actions.append(action)
# Add response id to actions
# This will ensure we can match both actions without tool calls (e.g. MessageAction)
# and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.)
# with the token usage data
for action in actions:
# 4. Combine actions: diffs first (if any), then tool calls (if any)
all_actions.extend(parsed_llm_diff_actions)
all_actions.extend(tool_call_actions)
# 5. Handle the case where no actions were generated (neither diffs nor tool calls)
if not all_actions:
# If there was a diff parsing error, report it (even if no blocks were parsed)
if diff_parse_error:
all_actions.append(
MessageAction(
content=f'[Error parsing diff blocks: {diff_parse_error}]\nOriginal Content:\n{message_content}',
wait_for_response=True,
)
)
# Otherwise, treat as a regular message if content exists
elif message_content:
all_actions.append(
MessageAction(
content=message_content,
wait_for_response=True,
)
)
# If no actions AND no content, add a default Think action
else:
logger.warning(
f'No actions generated and no message content for response: {response.id}'
)
all_actions.append(
AgentThinkAction(
thought='[No actionable content or tool calls found in response]'
)
)
# If actions *were* generated, determine and apply thought
else:
# 6. Determine contextual thought and apply to the first non-AgentThinkAction
contextual_thought = message_content.strip() if message_content else None
if contextual_thought:
for i, action in enumerate(all_actions):
if not isinstance(action, AgentThinkAction):
all_actions[i] = combine_thought(action, contextual_thought)
break # Apply thought only to the first eligible action
# 7. Add response id to all actions
for action in all_actions:
action.response_id = response.id
assert len(actions) >= 1
return actions
# 8. Final check (ensure at least one action exists)
if not all_actions:
# This case should ideally be handled by step 5, but as a safeguard:
logger.error(
f'CRITICAL: No actions generated for response {response.id} after all checks.'
)
# Create a default Think action if somehow we ended up with no actions
all_actions.append(
AgentThinkAction(thought='[Critical Error: Failed to generate any action]')
)
if all_actions[0]:
all_actions[0].response_id = response.id
return all_actions

View File

@@ -0,0 +1,408 @@
import difflib
import re
from dataclasses import dataclass
from openhands.core.exceptions import LLMMalformedActionError
from openhands.core.logger import openhands_logger as logger
# Regex patterns for search / replace
HEAD = r'^<{5,9} SEARCH\s*$'
DIVIDER = r'^={5,9}\s*$'
UPDATED = r'^>{5,9} REPLACE\s*$'
HEAD_ERR = '<<<<<<< SEARCH'
DIVIDER_ERR = '======='
UPDATED_ERR = '>>>>>>> REPLACE'
DEFAULT_FENCE = ('```', '```')
TRIPLE_BACKTICKS = '```'
@dataclass
class DiffBlock:
"""Represents a parsed SEARCH/REPLACE diff block."""
filename: str
search: str
replace: str
def strip_filename(filename: str, fence: tuple[str, str] = DEFAULT_FENCE) -> str | None:
"""
Strips potential wrapping characters from a filename line.
The function handles various common formats that LLMs might use to specify filenames:
- Markdown backticks: `filename.py`
- Emphasis markers: *filename.py*
- File prefixes: # filename.py
- Trailing colons: filename.py:
- Leading/trailing whitespace
Examples:
strip_filename(' file.py ') -> 'file.py'
strip_filename('`file.py`') -> 'file.py'
strip_filename('*file.py*') -> 'file.py'
strip_filename('file.py:') -> 'file.py'
strip_filename('# file.py') -> 'file.py'
strip_filename('`*# file.py:*` ') -> 'file.py'
strip_filename('...') -> None
strip_filename('```') -> None
Args:
filename: The string containing a potential filename
fence: tuple of opening and closing fence markers (e.g., ('```', '```'))
Returns:
The cleaned filename string, or None if no valid filename found
"""
filename = filename.strip()
if filename == '...':
return None
start_fence = fence[0]
if filename.startswith(start_fence) or filename.startswith(TRIPLE_BACKTICKS):
return None # Should not start with fence
filename = filename.rstrip(':')
filename = filename.lstrip('#')
filename = filename.strip()
filename = filename.strip('`')
filename = filename.strip('*')
return filename if filename else None
def find_filename(
lines: list[str],
fence: tuple[str, str] = DEFAULT_FENCE,
valid_fnames: list[str] | None = None,
) -> str | None:
"""
Searches preceding lines for a filename, handling potential fences.
Adapted from Aider's find_filename.
The function looks through the provided lines (up to 3 lines back) to find a filename,
using several strategies in order:
1. Direct filename search:
- Looks for a filename in each line
- Stops at non-fence lines if no filename found
- Continues past fences only if no filename found yet
2. Validation against valid_fnames (if provided):
a. Exact match: Returns first filename that exactly matches a valid filename
b. Fuzzy match: Uses difflib to find close matches (cutoff=0.8)
c. Extension heuristic: If no match, accepts any filename with an extension
3. Fallback: Returns the last potential filename found if no better match
Examples:
# Simple case
find_filename(['file.py', '```python']) -> 'file.py'
# With path
find_filename(['src/core/file.py', '```python']) -> 'src/core/file.py'
# With valid_fnames
find_filename(['appz.py', '```python'], valid_fnames=['app.py']) -> 'app.py'
# No valid filename
find_filename(['Just some text', '```python']) -> None
# Too far back
find_filename(['file.py', 'another line', 'yet another', '```python']) -> None
Args:
lines: List of lines to search through (usually preceding a diff block)
fence: tuple of opening and closing fence markers (e.g., ('```', '```'))
valid_fnames: Optional list of valid filenames to match against
Returns:
The best matching filename found, or None if no valid filename found
"""
if valid_fnames is None:
valid_fnames = []
# Go back through the 3 preceding lines (already reversed in caller)
lines = lines[:3]
filenames = []
for line in lines:
filename = strip_filename(line, fence)
if filename:
filenames.append(filename)
# Stop searching if we hit a non-fence line before finding a filename
if (
not filename
and not line.startswith(fence[0])
and not line.startswith(TRIPLE_BACKTICKS)
):
break
# Only continue searching past fences if we haven't found a filename yet
if (
not filenames
and not line.startswith(fence[0])
and not line.startswith(TRIPLE_BACKTICKS)
):
break
if not filenames:
return None
# Pick the *best* filename found
# Check for exact match first
for fname in filenames:
if fname in valid_fnames:
return fname
# Perform fuzzy matching with valid_fnames
for fname in filenames:
close_matches = difflib.get_close_matches(fname, valid_fnames, n=1, cutoff=0.8)
if len(close_matches) == 1:
return close_matches[0]
# If no fuzzy match, look for a file w/extension as a heuristic
for fname in filenames:
if '.' in fname:
return fname
# Fallback to the last potential filename found
if filenames:
return filenames[0]
return None
def parse_llm_response_for_diffs(
content: str,
fence: tuple[str, str] = DEFAULT_FENCE,
valid_fnames: list[str] | None = None,
) -> list[DiffBlock]:
"""
Parses LLM response content to find fenced diff blocks in the format:
```language
filename.ext
<<<<<<< SEARCH
old code
=======
new code
>>>>>>> REPLACE
```
The parser supports:
1. Multiple diff blocks in a single response
2. Different file types (indicated by fence language)
3. Empty SEARCH blocks for new files
4. Empty REPLACE blocks for deletions
5. Filename inference from context
6. Fuzzy filename matching with valid_fnames
Examples:
# Edit existing file
```python
app.py
<<<<<<< SEARCH
def old_func():
pass
=======
def new_func():
return True
>>>>>>> REPLACE
```
# Create new file
```python
new_file.py
<<<<<<< SEARCH
=======
def new_func():
return True
>>>>>>> REPLACE
```
# Delete content
```python
file.py
<<<<<<< SEARCH
def unused_func():
pass
=======
>>>>>>> REPLACE
```
Args:
content: The LLM response string.
fence: tuple of opening and closing fence markers (e.g., ('```', '```')).
valid_fnames: Optional list of valid filenames in the context for better matching.
Returns:
A list of DiffBlock objects representing the parsed blocks.
Raises:
LLMMalformedActionError: If malformed blocks are detected, such as:
- Missing filename before block
- Missing SEARCH/REPLACE markers
- Missing divider
- Unexpected divider in REPLACE block
"""
logger.info(f'Parsing LLM response for diffs:\n{content}')
edits: list[DiffBlock] = []
lines = content.splitlines(keepends=True)
i = 0
current_filename = None
head_pattern = re.compile(HEAD)
divider_pattern = re.compile(DIVIDER)
updated_pattern = re.compile(UPDATED)
fence_pattern = re.compile(
r'^' + re.escape(fence[0]) + r'\w*\s*$'
) # Pattern for opening fence
missing_filename_err = (
'Bad/missing filename. The filename must be alone on the line before the opening fence'
f' {fence[0]}'
)
while i < len(lines):
line = lines[i]
# Check for SEARCH/REPLACE blocks
if head_pattern.match(line.strip()):
block_start_line_index = i
try:
# Look backwards for filename and opening fence
fence_line_index = -1
temp_i = i - 1
while temp_i >= max(0, i - 4): # Look back up to 3 lines
prev_line = lines[temp_i]
if fence_pattern.match(prev_line.strip()):
fence_line_index = temp_i
break
temp_i -= 1
# If fence found, look for filename between fence and head
filename = None
if fence_line_index != -1:
filename_lines = lines[
fence_line_index + 1 : block_start_line_index
]
# Pass lines in original order for find_filename
filename = find_filename(filename_lines, fence, valid_fnames)
# Fallback / Contextual filename inference
if not filename:
if current_filename:
# Use filename from previous block if available
filename = current_filename # type: ignore [unreachable]
else:
# Raise error if no filename found and no fence/context
if fence_line_index == -1:
raise LLMMalformedActionError(missing_filename_err)
# If filename is still None after checks, raise error before proceeding
# unless it's potentially a new file block (empty search)
# We'll validate the empty search later.
if (
filename is None
and i + 1 < len(lines)
and not divider_pattern.match(lines[i + 1].strip())
):
raise LLMMalformedActionError(
missing_filename_err + ' (or fence not found)'
)
current_filename = filename # Remember for subsequent blocks
# --- Start parsing block content ---
original_text_lines = []
i += 1 # Move past HEAD line
while i < len(lines) and not divider_pattern.match(lines[i].strip()):
original_text_lines.append(lines[i])
i += 1
if i >= len(lines) or not divider_pattern.match(lines[i].strip()):
raise LLMMalformedActionError(
f'Expected `{DIVIDER_ERR}` after SEARCH block for {current_filename or "unknown file"}'
)
i += 1 # Move past DIVIDER line
updated_text_lines = []
while i < len(lines) and not updated_pattern.match(lines[i].strip()):
if divider_pattern.match(
lines[i].strip()
): # Error: unexpected divider
raise LLMMalformedActionError(
f'Unexpected `{DIVIDER_ERR}` inside REPLACE block for {current_filename or "unknown file"}'
)
updated_text_lines.append(lines[i])
i += 1
if i >= len(lines) or not updated_pattern.match(lines[i].strip()):
raise LLMMalformedActionError(
f'Expected `{UPDATED_ERR}` after REPLACE block for {current_filename or "unknown file"}'
)
i += 1 # Move past REPLACE marker line
# Check for closing fence (and move past it if found)
if i < len(lines) and lines[i].strip() == fence[1]:
i += 1 # Move past closing fence
else:
# Allow blocks without closing fence for flexibility
pass # Stay on the line after REPLACE marker
# Successfully parsed a block
original_text = ''.join(original_text_lines)
updated_text = ''.join(updated_text_lines)
# Final check for filename if it was initially None (new file case)
if filename is None:
if original_text.strip():
raise LLMMalformedActionError(
'SEARCH block must be empty when creating a new file (filename was missing before block).'
)
# Re-attempt filename finding if it was a new file block
if fence_line_index != -1:
filename_lines = lines[
fence_line_index + 1 : block_start_line_index
]
filename = find_filename(filename_lines, fence, valid_fnames)
if filename is None:
# If still None after re-check, it's an error
raise LLMMalformedActionError(
'Could not determine filename for new file block.'
)
# Create and add the DiffBlock object
edits.append(
DiffBlock(
filename=filename, search=original_text, replace=updated_text
)
)
continue # Continue to the next line
except (ValueError, LLMMalformedActionError) as e:
# Add context to the error message
processed_marker = len(edits) + 1 # Block number being processed
# Ensure we get the original message, whether it's ValueError or LLMMalformedActionError
original_message = e.args[0] if e.args else str(e)
err_context = f"Error parsing block #{processed_marker} for file '{current_filename or 'unknown'}': {original_message}"
# Add context to the error message
processed_marker = len(edits) + 1 # Block number being processed
# Ensure we get the original message, whether it's ValueError or LLMMalformedActionError
original_message = e.args[0] if e.args else str(e)
err_context = f"Error parsing block #{processed_marker} for file '{current_filename or 'unknown'}': {original_message}"
# Re-raise as LLMMalformedActionError
raise LLMMalformedActionError(err_context) from e
# If not a HEAD line, just advance line index
i += 1
return edits

View File

@@ -0,0 +1,208 @@
# OpenHands Agent
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
## Role and Core Function
Your primary role is to assist users by executing commands, interacting with tools, and solving technical problems effectively. You should be thorough, methodical, and prioritize **quality and correctness** over speed.
* You have access to a set of tools (like bash, python, browser, file viewer/lister). Use these tools when appropriate for tasks like running commands, executing code, browsing, or viewing/listing files.
* **IMPORTANT**: For **modifying files**, you MUST NOT use a tool. Instead, you MUST generate diff blocks directly in your response text, following the specific rules outlined in the **File Editing via LLM Diff** section below.
## Efficiency
* Where possible, combine multiple simple, related actions into a single action (e.g., combine multiple bash commands using `&&`). However, prioritize clarity and correctness for complex operations. You can fallback to multiple steps if needed.
* When exploring the codebase, use efficient tools like `find`, `grep`, and the provided file viewing/listing tools (`view_file`, `list_directory`) with appropriate filters to avoid unnecessary operations.
## File System Guidelines
* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system (e.g., using the `list_directory` tool) to locate the file before working on it.
* Use the `view_file` tool to examine file contents before deciding on edits.
* To propose changes to a file, follow the rules in **File Editing via LLM Diff**.
* To rename or delete files, use the `execute_bash` tool with `mv` or `rm` commands.
## The `view_file` Tool
* The `view_file` tool will show the file's contents in the indicated line number range, 1-indexed.
* The line numbers are added by the tool as an aid for you, they are NOT part of the file.
* The format of the output is:
```
1\tline1
2\tline2
...
n\tlinen
```
## File Editing via LLM Diff
* **IMPORTANT**:
To propose changes to a file, you MUST use fenced diff blocks embedded directly in your response. You do not need a tool for this.
* **Format Rules**: Every fenced diff block must strictly follow this format:
1. Three opening backticks and code language, e.g.: ```python
2. The **ABSOLUTE** file path alone on a line, verbatim. No bold asterisks, no quotes, etc.
3. The start of search block: `<<<<<<< SEARCH`
4. A contiguous chunk of lines to search for in the existing source code.
5. The dividing line: `=======`
6. The lines to replace the searched lines with.
7. The end of the replace block: `>>>>>>> REPLACE`
8. Three closing backticks: ```
* **Exact Matching**: The `SEARCH` section must **EXACTLY MATCH** the existing file content, character for character, including all whitespace, comments, indentation, etc. Use the `view_file` tool to confirm the exact content before creating a block. Be meticulous about whitespace.
* **Line Numbers are NOT part of the file**, so do NOT include them in the `SEARCH` or `REPLACE` section.
* **First Match Only**: When editing, *SEARCH/REPLACE* blocks will *only* replace the first match occurrence.
- include multiple unique *SEARCH/REPLACE* blocks if needed
- include enough lines in each SEARCH section to uniquely match each set of lines that need to change.
* **Conciseness**: Keep blocks focused. Include just enough lines in `SEARCH` for a unique match. Break large changes into smaller, logical blocks.
* **Moving Code**: To move code within a file, use 2 *SEARCH/REPLACE* blocks: 1 to delete it from its current location, 1 to insert it in the new location.
* **Creating Files**: If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
- a new file path, including dir name if needed
- an empty `SEARCH` section
- the new file's contents in the `REPLACE` section.
### Clarity
When proposing changes, clearly state your plan and then provide the corresponding fenced diff blocks.
### LLM Diff Examples
{# Examples for LLM Diff Fenced Block Editing #}
Here are examples of how to use the fenced diff format:
**Example 1: Modifying a function**
*User Message:*
> Change get_factorial() to use math.factorial
*Assistant Response:*
Okay, I will modify `mathweb/flask/app.py` to:
1. Import the math package.
2. Remove the existing factorial() function.
3. Update get_factorial() to call math.factorial instead.
Here are the changes:
```python
mathweb/flask/app.py
<<<<<<< SEARCH
from flask import Flask
=======
import math
from flask import Flask
>>>>>>> REPLACE
```
```python
mathweb/flask/app.py
<<<<<<< SEARCH
def factorial(n):
"compute factorial"
if n == 0:
return 1
else:
return n * factorial(n-1)
=======
>>>>>>> REPLACE
```
```python
mathweb/flask/app.py
<<<<<<< SEARCH
return str(factorial(n))
=======
return str(math.factorial(n))
>>>>>>> REPLACE
```
**Example 2: Refactoring into a new file**
*User Message:*
> Refactor hello() into its own file.
*Assistant Response:*
Okay, I will modify `main.py` and create a new file `hello.py`:
1. Create a new hello.py file with hello() in it.
2. Remove hello() from main.py and replace it with an import.
Here are the changes:
```python
hello.py
<<<<<<< SEARCH
=======
def hello():
"print a greeting"
print("hello")
>>>>>>> REPLACE
```
```python
main.py
<<<<<<< SEARCH
def hello():
"print a greeting"
print("hello")
=======
from hello import hello
>>>>>>> REPLACE
```
## Code Quality, Robustness, and Defensiveness
* **Goal:** Your primary goal when writing or modifying code is to produce a **correct, robust, and reliable solution** that successfully addresses the user's request and passes all necessary verification steps (especially tests).
* **Scope of Changes:** Implement **focused changes** directly related to solving the problem. The scope should be sufficient to ensure correctness and robustness for the task.
* **Defensive Programming:** Actively practice defensive programming within the scope of your changes. Anticipate potential edge cases, handle errors gracefully (e.g., check for null values, unexpected types), and consider different ways the affected code might be called to ensure the fix works reliably. Reliability is more important than focused changes
* **Readability:** Write clean, efficient code. Use comments when the logic isn't self-evident.
* **Refactoring:** If implementing the required fix involves adding significant complexity to a function or file, consider proposing a refactor (e.g., splitting the function/file) as a separate step *after* confirming the primary fix, but only if it significantly improves maintainability and is appropriate for the context.
## Version Control (Git)
* **Credentials:** When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
* **Caution:** Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so.
* **Committing:** When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible for simplicity, but be mindful of what you are committing.
* **Ignoring Files:** Do NOT commit files that typically shouldn't go into version control (e.g., `node_modules/`, `.env` files, build directories, cache files, large binaries) unless explicitly instructed by the user. Check `.gitignore` files if unsure.
* **Clarity:** Ask the user for clarification if unsure about which files to commit.
## Pull Requests (GitHub)
* **Frequency:** Create only ONE pull request per session/issue unless explicitly instructed otherwise.
* **Updates:** When working with an existing PR, update it with new commits unless instructed otherwise.
* **Content:** Preserve the original PR title and purpose; update the description only when necessary to reflect significant changes.
## Problem Solving Workflow
1. **Understand & Explore:** Thoroughly read the request. Explore relevant files (using `view_file`, `list_directory`, `grep`) and understand the context, potential calling contexts, and potentially relevant existing test files/suites before proposing solutions. Do not hurry.
* If the request is vague, ask clarifying questions to understand the user's intent and the problem's context.
* If the request is complex, break it down into smaller tasks and address them one at a time.
* If the request involves multiple files, explore all relevant files before proposing changes. Use `grep` to find function calls or references in other files.
* If the request involves a bug, try to reproduce it first. Use `grep` to find relevant test cases or code paths that might be related to the issue.
* If the request involves a new feature, consider how it fits into the existing codebase and prioritize the codebase patterns.
* If the request involves a bug fix, consider whether it requires changes to multiple files or if it's a simple fix. If it's complex, break it down into smaller tasks and address them one at a time.
2. **Plan:** Consider multiple approaches. Select the most promising one that balances correctness, robustness, and focused changes. Outline your plan.
3. **Implement:** Propose changes via fenced diff blocks as described in **File Editing via LLM Diff**.
4. **Test & Verify:**
* **Bug Fixes:** Create tests to verify issues before implementing fixes, if feasible using available tools (bash, python).
* **Existing Tests:** **Crucially**, identify and execute the **complete set** of relevant existing tests to ensure your changes are correct and haven't introduced regressions. Follow the testing procedure provided in the repository-specific or task-specific instructions if available, otherwise apply the principles here (identify all relevant tests, run them, ensure they pass).
* **Consult User:** If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing significant time.
5. **Refine:** If verification fails, analyze the failures, revise your implementation, and repeat testing until successful.
## Security
* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect.
* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing.
## Environment Setup
* When asked to run an application, don't stop if it's not installed. Attempt to install it (using bash) and run the command again.
* If you encounter missing dependencies:
1. First, look for existing dependency files (`requirements.txt`, `pyproject.toml`, `package.json`, `Gemfile`, etc.).
2. If found, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`).
3. Only install individual packages directly if no dependency files are found or if only specific packages are needed.
* Similarly, install missing dependencies for essential tools requested by the user if possible.
## Troubleshooting
* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken:
1. Step back and reflect on 5-7 different possible sources of the problem.
2. Assess the likelihood of each possible cause.
3. Methodically address the most likely causes, starting with the highest probability.
4. Document your reasoning process.
* When you run into a major issue while executing a plan from the user, please propose a new plan and confirm with the user before proceeding.

View File

@@ -2,16 +2,22 @@ from .bash import create_cmd_run_tool
from .browser import BrowserTool
from .finish import FinishTool
from .ipython import IPythonTool
from .list_directory import ListDirectoryTool
from .llm_based_edit import LLMBasedFileEditTool
from .str_replace_editor import create_str_replace_editor_tool
from .think import ThinkTool
from .undo_edit import UndoEditTool
from .view_file import ViewFileTool
__all__ = [
'BrowserTool',
'create_cmd_run_tool',
'ListDirectoryTool',
'FinishTool',
'IPythonTool',
'LLMBasedFileEditTool',
'create_str_replace_editor_tool',
'UndoEditTool',
'ViewFileTool',
'ThinkTool',
]

View File

@@ -0,0 +1,19 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
ListDirectoryTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='list_directory',
description='List non-hidden files and directories within a specified directory path, up to 2 levels deep.',
parameters={
'type': 'object',
'properties': {
'path': {
'description': 'Absolute path to the directory, e.g. `/workspace/subdir`.',
'type': 'string',
},
},
'required': ['path'],
},
),
)

View File

@@ -0,0 +1,19 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
UndoEditTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='undo_edit',
description='Revert the last edit made to the specified file.',
parameters={
'type': 'object',
'properties': {
'path': {
'description': 'Absolute path to the file for which to undo the last edit, e.g. `/workspace/file.py`.',
'type': 'string',
},
},
'required': ['path'],
},
),
)

View File

@@ -0,0 +1,24 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
ViewFileTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='view_file',
description='View the content of a specified file, optionally within a line range.',
parameters={
'type': 'object',
'properties': {
'path': {
'description': 'Absolute path to the file, e.g. `/workspace/file.py`.',
'type': 'string',
},
'view_range': {
'description': 'Optional line range [start, end] (1-based, inclusive). Shows full file if omitted. `[start, -1]` shows from start to end.',
'items': {'type': 'integer'},
'type': 'array',
},
},
'required': ['path'],
},
),
)

View File

@@ -61,6 +61,8 @@ class ReadOnlyAgent(CodeActAgent):
if self._prompt_manager is None:
self._prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
config=self.config,
system_prompt_filename=self.config.system_prompt_filename,
)
return self._prompt_manager

View File

@@ -19,6 +19,7 @@ class AgentConfig(BaseModel):
"""Whether to enable browsing tool.
Note: If using CLIRuntime, browsing is not implemented and should be disabled."""
enable_llm_editor: bool = Field(default=False)
enable_llm_diff: bool = Field(default=False)
"""Whether to enable LLM editor tool"""
enable_editor: bool = Field(default=True)
"""Whether to enable the standard editor tool (str_replace_editor), only has an effect if enable_llm_editor is False."""

View File

@@ -82,11 +82,12 @@ class FileEditAction(Action):
action (str): The type of action being performed (always ActionType.EDIT).
runnable (bool): Indicates if the action can be executed (always True).
security_risk (ActionSecurityRisk | None): Indicates any security risks associated with the action.
impl_source (FileEditSource): The source of the implementation (LLM_BASED_EDIT or OH_ACI).
impl_source (FileEditSource): The source of the implementation (LLM_BASED_EDIT, OH_ACI, or FENCED_DIFF).
Usage:
- For LLM-based editing: Use path, content, start, and end attributes.
- For ACI-based editing: Use path, command, and the appropriate attributes for the specific command.
- For LLM Diff editing: Use path, search, and replace attributes.
Note:
- If start is set to -1 in LLM-based editing, the content will be appended to the file.
@@ -107,6 +108,10 @@ class FileEditAction(Action):
start: int = 1
end: int = -1
# Fenced Diff arguments
search: str | None = None
replace: str | None = None
# Shared arguments
thought: str = ''
action: str = ActionType.EDIT
@@ -122,6 +127,9 @@ class FileEditAction(Action):
if self.impl_source == FileEditSource.LLM_BASED_EDIT:
ret += f'Range: [L{self.start}:L{self.end}]\n'
ret += f'Content:\n```\n{self.content}\n```\n'
elif self.impl_source == FileEditSource.LLM_DIFF:
ret += f'SEARCH:\n```\n{self.search}\n```\n'
ret += f'REPLACE:\n```\n{self.replace}\n```\n'
else: # OH_ACI mode
ret += f'Command: {self.command}\n'
if self.command == 'create':

View File

@@ -15,6 +15,7 @@ class EventSource(str, Enum):
class FileEditSource(str, Enum):
LLM_BASED_EDIT = 'llm_based_edit'
OH_ACI = 'oh_aci' # openhands-aci
LLM_DIFF = 'llm_diff' # Aider-style SEARCH/REPLACE diffs parsed directly from LLM message content
class FileReadSource(str, Enum):

View File

@@ -199,6 +199,7 @@ class LLM(RetryMixin, DebugMixin):
custom_llm_provider=self.config.custom_llm_provider,
timeout=self.config.timeout,
top_p=self.config.top_p,
top_k=self.config.top_k,
drop_params=self.config.drop_params,
seed=self.config.seed,
**kwargs,
@@ -433,10 +434,6 @@ class LLM(RetryMixin, DebugMixin):
)
resp_json = response.json()
if 'data' not in resp_json:
logger.error(
f'Error getting model info from LiteLLM proxy: {resp_json}'
)
all_model_info = resp_json.get('data', [])
current_model_info = next(
(

View File

@@ -1,3 +1,5 @@
import json
import re
from typing import Any, Callable
from tenacity import (
@@ -15,6 +17,80 @@ from openhands.utils.tenacity_stop import stop_if_should_exit
class RetryMixin:
"""Mixin class for retry logic."""
def _extract_retry_delay(self, exception) -> float | None:
"""
Extract retry delay in seconds from exception message if available.
Args:
exception: The exception object to extract retry delay from
Returns:
Retry delay in seconds or None if not found
"""
if not hasattr(exception, '__str__'):
return None
error_msg = str(exception)
# Try to extract "retryDelay": "Xs" pattern
retry_match = re.search(r'"retryDelay":\s*"(\d+)s"', error_msg)
if retry_match:
return float(retry_match.group(1))
# Try to extract any JSON containing retryDelay
try:
# Look for JSON objects in the string
json_match = re.search(r'({.*})', error_msg)
if json_match:
json_data = json.loads(json_match.group(1))
# Navigate through nested JSON to find retryDelay
if 'error' in json_data and 'details' in json_data['error']:
for detail in json_data['error']['details']:
if (
'@type' in detail
and 'RetryInfo' in detail['@type']
and 'retryDelay' in detail
):
delay_str = detail['retryDelay']
if isinstance(delay_str, str) and delay_str.endswith('s'):
return float(
delay_str[:-1]
) # Remove the 's' and convert to float
except Exception:
# If JSON parsing fails, continue with default retry behavior
pass
return None
def custom_wait_strategy(self, retry_state) -> float:
"""
Custom wait strategy that uses provider's retry delay when available.
Args:
retry_state: The current retry state
Returns:
Wait time in seconds
"""
# Get the fallback exponential wait
default_wait = wait_exponential(
multiplier=self.retry_multiplier,
min=self.retry_min_wait,
max=self.retry_max_wait,
)(retry_state)
# Check if there's an exception with retry delay info
if retry_state.outcome.failed:
exception = retry_state.outcome.exception()
retry_delay = self._extract_retry_delay(exception)
if retry_delay is not None:
logger.info(f'Using provider-suggested retry delay: {retry_delay}s')
return retry_delay
return default_wait
def retry_decorator(self, **kwargs: Any) -> Callable:
"""
Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes.
@@ -26,17 +102,18 @@ class RetryMixin:
Returns:
A retry decorator with the parameters customizable in configuration.
"""
num_retries = kwargs.get('num_retries')
retry_exceptions: tuple = kwargs.get('retry_exceptions', ())
retry_min_wait = kwargs.get('retry_min_wait')
retry_max_wait = kwargs.get('retry_max_wait')
retry_multiplier = kwargs.get('retry_multiplier')
# Store these values as instance variables so they can be accessed by custom_wait_strategy
self.num_retries = kwargs.get('num_retries')
self.retry_exceptions = kwargs.get('retry_exceptions', ())
self.retry_min_wait = kwargs.get('retry_min_wait')
self.retry_max_wait = kwargs.get('retry_max_wait')
self.retry_multiplier = kwargs.get('retry_multiplier')
retry_listener = kwargs.get('retry_listener')
def before_sleep(retry_state: Any) -> None:
self.log_retry_attempt(retry_state)
if retry_listener:
retry_listener(retry_state.attempt_number, num_retries)
retry_listener(retry_state.attempt_number, self.num_retries)
# Check if the exception is LLMNoResponseError
exception = retry_state.outcome.exception()
@@ -56,16 +133,10 @@ class RetryMixin:
retry_decorator: Callable = retry(
before_sleep=before_sleep,
stop=stop_after_attempt(num_retries) | stop_if_should_exit(),
stop=stop_after_attempt(self.num_retries) | stop_if_should_exit(),
reraise=True,
retry=(
retry_if_exception_type(retry_exceptions)
), # retry only for these types
wait=wait_exponential(
multiplier=retry_multiplier,
min=retry_min_wait,
max=retry_max_wait,
),
retry=retry_if_exception_type(self.retry_exceptions),
wait=self.custom_wait_strategy,
)
return retry_decorator

View File

@@ -21,7 +21,7 @@ from openhands.events.action import (
)
from openhands.events.action.mcp import MCPAction
from openhands.events.action.message import SystemMessageAction
from openhands.events.event import Event, RecallType
from openhands.events.event import Event, FileEditSource, RecallType
from openhands.events.observation import (
AgentCondensationObservation,
AgentDelegateObservation,
@@ -216,8 +216,32 @@ class ConversationMemory:
rather than being returned immediately. They will be processed later when all corresponding
tool call results are available.
"""
# create a regular message from an event
if isinstance(
# Handle LLM_DIFF FileEditAction specifically FIRST: create an assistant message
# containing the thought (if any) and the formatted diff block.
# This bypasses the tool_metadata check for this action type.
if (
isinstance(action, FileEditAction)
and action.impl_source == FileEditSource.LLM_DIFF
):
# TODO: Determine language hint from path extension if possible
lang_hint = 'diff'
# Ensure newline separation, handle None for search/replace
search_block = action.search or ''
replace_block = action.replace or ''
block = (
f'\n```{lang_hint}\n'
f'{action.path}\n'
f'<<<<<<< SEARCH\n{search_block}\n'
f'=======\n{replace_block}\n'
f'>>>>>>> REPLACE\n```'
)
# Combine thought and block
content_str = (action.thought + block) if action.thought else block.lstrip()
# Return the message directly, DO NOT add tool_calls structure
return [Message(role='assistant', content=[TextContent(text=content_str)])]
# Now handle other actions that might have tool metadata
elif isinstance(
action,
(
AgentDelegateAction,
@@ -252,7 +276,6 @@ class ConversationMemory:
else [],
tool_calls=assistant_msg.tool_calls,
)
return []
elif isinstance(action, AgentFinishAction):
role = 'user' if action.source == 'user' else 'assistant'
@@ -630,7 +653,7 @@ class ConversationMemory:
# when the LLM tries to return the next message
raise ValueError(f'Unknown observation type: {type(obs)}')
# Update the message as tool response properly
# Update the message as tool response properly (for non-LLM_DIFF observations with metadata)
if (tool_call_metadata := getattr(obs, 'tool_call_metadata', None)) is not None:
tool_call_id_to_message[tool_call_metadata.tool_call_id] = Message(
role='tool',

View File

@@ -0,0 +1,41 @@
# Plan: Decoupling File Utilities from Editing Mode
## Problem
The OpenHands CodeAct agent offers three primary ways to handle file edits:
1. **`str_replace_editor` (Default, OH_ACI):** A comprehensive tool (loaded via `create_str_replace_editor_tool`) with commands for viewing files/dirs, creating, replacing, inserting, and undoing edits. This is the default mode.
2. **`LLMBasedFileEditTool` (Optional):** An alternative editor tool enabled by `enable_llm_editor=True`, using line ranges and LLM refinement.
3. **`LLM_DIFF` Mode (Optional):** An alternative mode enabled by `enable_llm_diff=True`, where the agent parses Aider-style fenced diffs directly from the LLM's text response (not a tool call).
When either of the optional modes (`LLMBasedFileEditTool` or `LLM_DIFF`) is enabled, the default `str_replace_editor` tool (and its bundled utilities like view, list, undo) is not loaded. This creates a need to provide viewing/listing capabilities separately in these optional modes, while acknowledging that `undo` is inherently tied to the default mode's server-side implementation.
## Goal
Allow the use of the optional editing modes (`LLMBasedFileEditTool` or `LLM_DIFF` mode) while still providing access to the viewing (file/directory) capabilities originally part of the default `str_replace_editor`.
Important: the original `str_replace_editor` tool must remain available *unchanged* when neither alternative editor/mode is enabled, to maintain compatibility with models post-trained on it like Sonnet.
**Challenge:** The `undo_edit` command relies on the internal history of the `OHEditor` instance on the server, which is only populated when *it* performs edits using the `OH_ACI` source. Edits made client-side (`LLMBasedFileEditTool`, `LLM_DIFF` mode) bypass this history, rendering `undo_edit` ineffective for them.
## Current Solution: Conditional Tool Loading
The current implementation addresses the goal of providing viewing/listing utilities by conditionally loading tools based on the agent configuration flags in `openhands/agenthub/codeact_agent/function_calling.py: get_tools`:
1. **Default Mode (`enable_llm_editor=False`, `enable_llm_diff=False`):**
* Loads the full `create_str_replace_editor_tool()`. This single tool definition provides commands for editing (`replace`, `insert`, etc.), viewing (`view` file/directory), and `undo_edit`. All operations use `impl_source=OH_ACI` and are executed server-side by `OHEditor`.
2. **LLM-Based Editor Mode (`enable_llm_editor=True`):**
* Loads the `LLMBasedFileEditTool` for editing (client-side execution).
* Loads separate `ViewFileTool` and `ListDirectoryTool`. These tools generate `FileReadAction`s with `impl_source=OH_ACI`, ensuring they are still executed server-side by the `OHEditor`'s viewing capabilities.
* Does **not** load `UndoEditTool`.
3. **LLM_DIFF Mode (`enable_llm_diff=True`):**
* Loads **no** editing tools (edits are parsed from LLM text response and executed client-side).
* Loads separate `ViewFileTool` and `ListDirectoryTool` (same as above, executed server-side via `OH_ACI`).
* Does **not** load `UndoEditTool`.
This approach successfully decouples the viewing/listing functionality from the chosen editing method by falling back to the `OH_ACI` implementation for viewing/listing even when alternative editing modes are active.
## Conclusion
The current conditional tool loading mechanism effectively provides file/directory viewing capabilities regardless of the selected editing mode (`str_replace_editor`, `LLMBasedFileEditTool`, or `LLM_DIFF`). However, the `undo_edit` functionality remains intrinsically linked to the server-side edit history managed by `OHEditor` and is therefore only available when using the default `str_replace_editor` tool. Edits performed via the client-side `LLMBasedFileEditTool` or `LLM_DIFF` mode cannot be undone using the `undo_edit` command.

View File

@@ -31,8 +31,8 @@ from openhands.events.action import (
IPythonRunCellAction,
)
from openhands.events.action.action import Action
from openhands.events.action.files import FileEditSource
from openhands.events.action.mcp import MCPAction
from openhands.events.event import FileEditSource
from openhands.events.observation import (
AgentThinkObservation,
ErrorObservation,
@@ -267,11 +267,8 @@ class ActionExecutionClient(Runtime):
return ''
def send_action_for_execution(self, action: Action) -> Observation:
if (
isinstance(action, FileEditAction)
and action.impl_source == FileEditSource.LLM_BASED_EDIT
):
return self.llm_based_edit(action)
# NOTE: Dispatch for LLM_BASED_EDIT and FENCED_DIFF now happens in the `edit` method.
# This method now only forwards actions to the server.
# set timeout to default if not set
if action.timeout is None:
@@ -343,7 +340,16 @@ class ActionExecutionClient(Runtime):
return self.send_action_for_execution(action)
def edit(self, action: FileEditAction) -> Observation:
return self.send_action_for_execution(action)
# Dispatch based on impl_source
if action.impl_source == FileEditSource.LLM_BASED_EDIT:
# LLM-based edits are handled client-side (requires LLM)
return self.llm_based_edit(action)
elif action.impl_source == FileEditSource.LLM_DIFF:
# LLM Diff edits (parsed from message) are handled client-side
return self.llm_diff_edit(action)
else:
# OH_ACI edits (from tool calls) are handled server-side
return self.send_action_for_execution(action)
def browse(self, action: BrowseURLAction) -> Observation:
return self.send_action_for_execution(action)

View File

@@ -3,4 +3,11 @@ from openhands.runtime.utils.system import (
find_available_tcp_port,
)
"""
Utils for the client-side runtime.
The fenced diff editing logic, primarily in `edit.py`, is adapted from Aider (Apache 2.0 License).
- Original source: https://github.com/paul-gauthier/aider/blob/main/aider/coders/editblock_fenced_coder.py
- Please see fenced edit at https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/utils/edit.py
"""
__all__ = ['display_number_matrix', 'find_available_tcp_port']

View File

@@ -1,3 +1,4 @@
import difflib
import os
import re
import tempfile
@@ -345,3 +346,278 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface):
)
ret_obs.llm_metrics = self.draft_editor_llm.metrics
return ret_obs
# ==========================================================================
# Fenced Diff / LLM Diff Edit Logic (Adapted from Aider principles)
# ==========================================================================
def _exact_replace(
self,
original_lines: list[str],
search_lines: list[str],
replace_lines: list[str],
) -> list[str] | None:
"""Attempts an exact character-for-character replacement."""
search_tuple = tuple(search_lines)
search_len = len(search_lines)
if search_len == 0:
return None # Cannot replace empty search block exactly
for i in range(len(original_lines) - search_len + 1):
original_tuple = tuple(original_lines[i : i + search_len])
if search_tuple == original_tuple:
return (
original_lines[:i]
+ replace_lines
+ original_lines[i + search_len :]
)
return None
def _whitespace_flexible_replace(
self,
original_lines: list[str],
search_lines: list[str],
replace_lines: list[str],
) -> list[str] | None:
"""Attempts replacement ignoring consistent leading whitespace differences."""
search_len = len(search_lines)
if search_len == 0:
return None
# Calculate min leading whitespace in search block (ignoring blank lines)
search_leading_spaces = [
len(s) - len(s.lstrip(' ')) for s in search_lines if s.strip()
]
min_search_leading = min(search_leading_spaces) if search_leading_spaces else 0
stripped_search_lines = [
s[min_search_leading:] if s.strip() else s for s in search_lines
]
stripped_search_tuple = tuple(stripped_search_lines)
for i in range(len(original_lines) - search_len + 1):
original_chunk = original_lines[i : i + search_len]
# Calculate min leading whitespace in original chunk
original_leading_spaces = [
len(s) - len(s.lstrip(' ')) for s in original_chunk if s.strip()
]
min_original_leading = (
min(original_leading_spaces) if original_leading_spaces else 0
)
leading_whitespace_prefix = ' ' * min_original_leading
# Strip original chunk consistently
stripped_original_lines = [
s[min_original_leading:] if s.strip() else s for s in original_chunk
]
stripped_original_tuple = tuple(stripped_original_lines)
if stripped_search_tuple == stripped_original_tuple:
# Match found! Apply original leading whitespace to replace_lines
adjusted_replace_lines = [
leading_whitespace_prefix + rline[min_search_leading:]
if rline.strip()
else rline
for rline in replace_lines
]
return (
original_lines[:i]
+ adjusted_replace_lines
+ original_lines[i + search_len :]
)
return None
def _find_most_similar_block(
self, original_lines: list[str], search_lines: list[str], context_lines: int = 5
) -> tuple[str | None, float]:
"""Finds the block in original_lines most similar to search_lines."""
if not search_lines or not original_lines:
return None, 0.0
search_len = len(search_lines)
best_ratio = 0.0
best_match_start_idx = -1
matcher = difflib.SequenceMatcher(isjunk=None, a=search_lines)
for i in range(len(original_lines) - search_len + 1):
chunk = original_lines[i : i + search_len]
matcher.set_seq2(chunk)
ratio = matcher.ratio()
if ratio > best_ratio:
best_ratio = ratio
best_match_start_idx = i
if best_match_start_idx != -1:
start = max(0, best_match_start_idx - context_lines)
end = min(
len(original_lines), best_match_start_idx + search_len + context_lines
)
context_chunk = original_lines[start:end]
# Add markers to indicate the likely match within the context
marked_chunk = []
for idx, line in enumerate(context_chunk):
current_original_line_idx = start + idx
if (
best_match_start_idx
<= current_original_line_idx
< best_match_start_idx + search_len
):
marked_chunk.append(
f'> {line}'
) # Mark lines within the best match range
else:
marked_chunk.append(f' {line}')
return '\n'.join(marked_chunk), best_ratio
else:
return None, 0.0
def _apply_llm_diff_edit(
self, action: FileEditAction, original_content: str
) -> tuple[str | None, Observation | None]:
"""Helper to apply a single LLM_DIFF edit block."""
# Use standard splitlines (no keepends)
original_lines = original_content.splitlines()
# Use getattr to safely access search/replace, defaulting to empty string if not present
search_content = getattr(action, 'search', '')
replace_content = getattr(action, 'replace', '')
search_lines = search_content.splitlines()
replace_lines = replace_content.splitlines()
# Handle empty search block (create file / append)
if not search_content.strip():
# Append if file exists, otherwise content is replace_content
# Ensure single newline separation if original content exists and doesn't end with newline
separator = (
'\n' if original_content and not original_content.endswith('\n') else ''
)
updated_content = original_content + separator + replace_content
# Preserve original trailing newline status
if original_content.endswith('\n') and not updated_content.endswith('\n'):
updated_content += '\n'
elif not original_content.endswith('\n') and updated_content.endswith('\n'):
# This case is less likely, but if replace_content added a newline, remove it if original didn't have one
updated_content = updated_content.rstrip('\n')
return updated_content, None
# Attempt exact match
updated_lines = self._exact_replace(original_lines, search_lines, replace_lines)
# Attempt whitespace flexible match if exact failed
if updated_lines is None:
updated_lines = self._whitespace_flexible_replace(
original_lines, search_lines, replace_lines
)
# Handle match failure
if updated_lines is None:
logger.warning(f'Failed to apply diff to {action.path}')
similar_block, ratio = self._find_most_similar_block(
original_lines, search_lines
)
error_msg = (
f'Failed to apply edit to {action.path}.\n'
'The SEARCH block did not match the file content exactly or with flexible whitespace.\n\n'
'--- FAILED SEARCH BLOCK ---\n'
f'{search_content}\n'
'--- END FAILED SEARCH BLOCK ---\n'
)
if similar_block and ratio > 0.6: # Threshold from Aider
error_msg += (
f'\n--- MOST SIMILAR BLOCK FOUND (similarity: {ratio:.2f}) ---\n'
f'{similar_block}\n'
'--- END MOST SIMILAR BLOCK ---\n\n'
'Please check the SEARCH block and the file content and try again.'
)
else:
error_msg += '\nNo similar block found.'
return None, ErrorObservation(error_msg)
# Match successful - join with newline
final_content = '\n'.join(updated_lines)
return final_content, None
def llm_diff_edit(self, action: FileEditAction) -> Observation:
"""Applies a fenced diff edit parsed directly from LLM output."""
if action.impl_source != 'llm_diff':
# Should not happen if dispatch is correct, but safety check
raise ValueError(
f'llm_diff_edit called with incorrect action source: {action.impl_source}'
)
# Read original file or handle creation
read_obs = self.read(FileReadAction(path=action.path))
original_content = ''
prev_exist = True
if (
isinstance(read_obs, ErrorObservation)
and 'File not found'.lower() in read_obs.content.lower()
):
logger.info(
f'File {action.path} not found. Attempting to create based on edit.'
)
prev_exist = False
# Check if it's a valid creation (empty search block)
search_content = getattr(action, 'search', '')
if search_content.strip():
return ErrorObservation(
f'File {action.path} not found and SEARCH block is not empty. Cannot apply edit.'
)
# Content will be handled by _apply_llm_diff_edit
elif isinstance(read_obs, ErrorObservation):
# Other read error
return read_obs
elif isinstance(read_obs, FileReadObservation):
original_content = read_obs.content
else:
return ErrorObservation(
f'Unexpected observation type received when reading {action.path}: {type(read_obs)}'
)
# Apply the edit logic
updated_content, error_obs = self._apply_llm_diff_edit(action, original_content)
if error_obs:
return error_obs
if updated_content is None:
# Should have been caught by error_obs, but safety check
return ErrorObservation(
f'Edit application failed for {action.path}. Be more careful! You can try smaller or larger edit blocks.'
)
# Lint the updated content (optional)
diff = get_diff(original_content, updated_content, action.path)
if self.config.sandbox.enable_auto_lint:
suffix = os.path.splitext(action.path)[1]
lint_error_obs = self._get_lint_error(
suffix, original_content, updated_content, action.path, diff
)
if lint_error_obs is not None:
# Attach original diff to lint error message for context
lint_error_obs.content += (
'\n\n--- ORIGINAL DIFF ATTEMPTED ---\n'
f'{diff}\n'
'--- END ORIGINAL DIFF ATTEMPTED ---'
)
return lint_error_obs
# Write the final content
write_obs = self.write(
FileWriteAction(path=action.path, content=updated_content)
)
if isinstance(write_obs, ErrorObservation):
return write_obs # Return write error if it occurs
# Return success observation
return FileEditObservation(
content=diff, # Return the calculated diff
path=action.path,
prev_exist=prev_exist,
old_content=original_content,
new_content=updated_content,
impl_source=action.impl_source, # Pass source along
)

View File

@@ -5,6 +5,7 @@ from itertools import islice
from jinja2 import Template
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.message import Message, TextContent
from openhands.events.observation.agent import MicroagentKnowledge
@@ -52,12 +53,20 @@ class PromptManager:
def __init__(
self,
prompt_dir: str,
config: AgentConfig,
system_prompt_filename: str = 'system_prompt.j2',
):
self.prompt_dir: str = prompt_dir
self.system_template: Template = self._load_system_template(
system_prompt_filename
)
self.config: AgentConfig = config
# Load the correct system prompt based on the config
if self.config.enable_llm_diff:
system_template_name = 'system_prompt_llm_diff'
else:
system_template_name = 'system_prompt'
self.system_template: Template = self._load_template(system_template_name)
# Load other templates
self.user_template: Template = self._load_template('user_prompt')
self.additional_info_template: Template = self._load_template('additional_info')
self.microagent_info_template: Template = self._load_template('microagent_info')
@@ -147,5 +156,43 @@ class PromptManager:
None,
)
if latest_user_message:
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.iteration_flag.max_value - state.iteration_flag.current_value} turns left to complete the task. When finished reply with <finish></finish>.'
reminder_text = ''
# Every 10 steps
if state.iteration_flag.current_value % 10 == 0:
reminder_text += """\n\n## WORKFLOW REMINDER: ## Key Requirements & Constraints
1. **Understand the problem** very well: it is a bug report, and you know humans don't always write good descriptions. Explore the codebase to understand the related code and the problem in depth. It is possible that the solution needs to be a bit more extensive than just the stated text. Don't exagerate though: don't do unrelated refactoring, but also don't interpret the description too strictly.
2. **Focus on the issues:** Implement the fix focusing on non-test files related to the issue.
2. **Environment Ready:** The Python environment is pre-configured with all dependencies. Do not install packages.
3. **Mandatory Testing Procedure:**
* **Create Test to Reproduce the Issue:** *Before* implementing any fix, you MUST create a *new test* (separate from existing tests) that specifically reproduces the issue.
* Take existing tests as example to understand the testing format/structure.
* Enhance this test with edge cases.
* Run this test to confirm reproduction.
* **Verify Fix:** After implementing the fix, run your test again to verify the issue is resolved.
* **Identify ALL Relevant Tests:** You MUST perform a **dedicated search and analysis** to identify **all** existing unit tests potentially affected by your changes. This includes:
* Tests in the same module/directory as the changed files (e.g., `tests/` subdirectories).
* Tests explicitly importing or using the modified code/classes/functions.
* Tests mentioned in the issue description or related documentation.
* Tests covering functionalities that *depend on* the modified code (analyze callers/dependencies if necessary).
**If you cannot confidently identify a specific subset, you MUST identify and plan to run the entire test suite for the modified application or module(s). State your identified test scope clearly.**
* **Run Identified Relevant Tests:** You MUST execute the **complete set** of relevant existing unit tests you identified in the previous step. Ensure you are running the *correct and comprehensive set* of tests. You MUST NOT modify these existing tests.
* **Final Check & Verification:** Before finishing, ensure **all** identified relevant existing tests pass. **Explicitly confirm that you have considered potential omissions in your test selection and believe the executed tests comprehensively cover the impact of your changes.** Failing to identify and run the *complete* relevant set constitutes a failure. If any identified tests fail, revise your fix. Passing all relevant tests is the primary measure of success.
4. **Defensive Programming:** Actively practice defensive programming: anticipate and handle potential edge cases, unexpected inputs, and different ways the affected code might be called **to ensure the fix works reliably and allows relevant tests to pass.** Analyze the potential impact on other parts of the codebase.
5. **Final Review:** Compare your solution against the original issue and the base commit ({instance["base_commit"]}) to ensure completeness and test passage.
## General Workflow Guidance
* Prioritize understanding the problem, exploring the code, planning your fix, implementing it carefully using the required diff format, and **thoroughly testing** according to the **Mandatory Testing Procedure**.
* Consider trade-offs between different solutions. The goal is a **robust change that makes the relevant tests pass.** Quality, correctness, and reliability are key.
* Actively practice defensive programming: anticipate and handle potential edge cases, unexpected inputs, and different ways the affected code might be called **to ensure the fix works reliably and allows relevant tests to pass.** Analyze the potential impact on other parts of the codebase.
* IMPORTANT: Your solution will be tested by additional hidden tests, so do not assume the task is complete just because visible tests pass! Refine the solution until you are confident that it is robust and comprehensive according to the **Defensive Programming** requirement.
## Final Note
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
---
"""
reminder_text += f'ENVIRONMENT REMINDER: You have {state.iteration_flag.max_value - state.iteration_flag.current_value} turns left to complete the task. When finished reply with <finish></finish>.'
latest_user_message.content.append(TextContent(text=reminder_text))

2
poetry.lock generated
View File

@@ -6484,7 +6484,7 @@ name = "openhands-aci"
version = "0.3.0"
description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
optional = false
python-versions = "<4.0,>=3.12"
python-versions = "^3.12"
groups = ["main"]
files = [
{file = "openhands_aci-0.3.0-py3-none-any.whl", hash = "sha256:4f060aa1c2ae11a14f08eb72e9bf09f81db916766aab10ccea6df3bc52faafd1"},

View File

@@ -176,6 +176,8 @@ def test_file_edit_action_aci_serialization_deserialization():
'end': -1,
'thought': 'Replacing text',
'impl_source': 'oh_aci',
'search': None,
'replace': None,
},
}
serialization_deserialization(original_action_dict, FileEditAction)
@@ -196,6 +198,8 @@ def test_file_edit_action_llm_serialization_deserialization():
'end': 10,
'thought': 'Updating file content',
'impl_source': 'llm_based_edit',
'search': None,
'replace': None,
},
}
serialization_deserialization(original_action_dict, FileEditAction)

View File

@@ -6,18 +6,56 @@ from unittest.mock import patch
import pytest
from litellm import ModelResponse
from openhands.agenthub.codeact_agent.function_calling import response_to_actions
from openhands.agenthub.codeact_agent.function_calling import (
get_tools,
response_to_actions,
)
from openhands.agenthub.codeact_agent.tools import (
BrowserTool,
FencedDiffEditTool,
FinishTool,
IPythonTool,
ListDirectoryTool,
LLMBasedFileEditTool,
ThinkTool,
UndoEditTool,
ViewFileTool,
WebReadTool,
create_cmd_run_tool,
create_str_replace_editor_tool,
)
from openhands.core.config import AgentConfig
from openhands.core.exceptions import FunctionCallValidationError
from openhands.events.action import (
AgentThinkAction,
BrowseInteractiveAction,
CmdRunAction,
FileEditAction,
FileReadAction,
IPythonRunCellAction,
MessageAction,
)
from openhands.events.event import FileEditSource, FileReadSource
# Helper to create AgentConfig with specific flags
def create_test_config(**kwargs) -> AgentConfig:
defaults = {
'enable_browsing': False,
'enable_llm_editor': False,
'enable_llm_diff': False,
'enable_jupyter': False,
# Add other AgentConfig defaults if necessary for validation
'disabled_microagents': [],
'enable_prompt_extensions': True,
'enable_history_truncation': True,
'enable_som_visual_browsing': True,
}
defaults.update(kwargs)
# Use model_validate to ensure proper Pydantic model creation
return AgentConfig.model_validate(defaults)
def create_mock_response(function_name: str, arguments: dict) -> ModelResponse:
"""Helper function to create a mock response with a tool call."""
return ModelResponse(
@@ -42,6 +80,8 @@ def create_mock_response(function_name: str, arguments: dict) -> ModelResponse:
'finish_reason': 'tool_calls',
}
],
# Add dummy usage if needed by downstream processing
usage={'prompt_tokens': 10, 'completion_tokens': 10, 'total_tokens': 20},
)
@@ -98,9 +138,10 @@ def test_execute_ipython_cell_missing_code():
def test_edit_file_valid():
"""Test edit_file with valid arguments."""
"""Test edit_file with valid arguments (LLMBasedFileEditTool)."""
# This tests the LLMBasedFileEditTool specifically
response = create_mock_response(
'edit_file',
LLMBasedFileEditTool['function']['name'], # Use tool name
{'path': '/path/to/file', 'content': 'file content', 'start': 1, 'end': 10},
)
actions = response_to_actions(response)
@@ -110,12 +151,17 @@ def test_edit_file_valid():
assert actions[0].content == 'file content'
assert actions[0].start == 1
assert actions[0].end == 10
assert (
actions[0].impl_source == FileEditSource.LLM_BASED_EDIT
) # Default when tool called
def test_edit_file_missing_required():
"""Test edit_file with missing required arguments."""
"""Test edit_file with missing required arguments (LLMBasedFileEditTool)."""
# Missing path
response = create_mock_response('edit_file', {'content': 'content'})
response = create_mock_response(
LLMBasedFileEditTool['function']['name'], {'content': 'content'}
)
with pytest.raises(FunctionCallValidationError) as exc_info:
response_to_actions(response)
assert 'Missing required argument "path"' in str(exc_info.value)
@@ -139,7 +185,7 @@ def test_str_replace_editor_valid():
assert actions[0].path == '/path/to/file'
assert actions[0].impl_source == FileReadSource.OH_ACI
# Test other commands
# Test other commands (e.g., replace)
response = create_mock_response(
'str_replace_editor',
{
@@ -173,7 +219,9 @@ def test_str_replace_editor_missing_required():
def test_browser_valid():
"""Test browser with valid arguments."""
response = create_mock_response('browser', {'code': "click('button-1')"})
response = create_mock_response(
BrowserTool['function']['name'], {'code': "click('button-1')"}
)
actions = response_to_actions(response)
assert len(actions) == 1
assert isinstance(actions[0], BrowseInteractiveAction)
@@ -183,7 +231,7 @@ def test_browser_valid():
def test_browser_missing_code():
"""Test browser with missing code argument."""
response = create_mock_response('browser', {})
response = create_mock_response(BrowserTool['function']['name'], {})
with pytest.raises(FunctionCallValidationError) as exc_info:
response_to_actions(response)
assert 'Missing required argument "code"' in str(exc_info.value)
@@ -213,6 +261,7 @@ def test_invalid_json_arguments():
'finish_reason': 'tool_calls',
}
],
usage={'prompt_tokens': 10, 'completion_tokens': 10, 'total_tokens': 20},
)
with pytest.raises(FunctionCallValidationError) as exc_info:
response_to_actions(response)
@@ -243,3 +292,300 @@ def test_unexpected_argument_handling():
# Verify the error message mentions the unexpected argument
assert 'old_str_prefix' in str(exc_info.value)
assert 'Unexpected argument' in str(exc_info.value)
# =============================================
# Tests for get_tools based on AgentConfig
# =============================================
def test_get_tools_default():
"""Test default tools when no specific editor is enabled."""
config = create_test_config()
tools = get_tools(config)
tool_names = {t['function']['name'] for t in tools}
assert create_str_replace_editor_tool()['function']['name'] in tool_names
assert FencedDiffEditTool['function']['name'] not in tool_names
assert LLMBasedFileEditTool['function']['name'] not in tool_names
assert ViewFileTool['function']['name'] not in tool_names # Included in str_replace
assert (
ListDirectoryTool['function']['name'] not in tool_names
) # Included in str_replace
assert UndoEditTool['function']['name'] not in tool_names # Included in str_replace
def test_get_tools_llm_diff_enabled():
"""Test tools when LLM Diff mode is enabled."""
config = create_test_config(
enable_llm_diff=True,
enable_browsing=True,
enable_jupyter=True,
)
tools = get_tools(config)
tool_names = {t['function']['name'] for t in tools}
# Should include non-edit tools
assert create_cmd_run_tool()['function']['name'] in tool_names
assert ThinkTool['function']['name'] in tool_names
assert FinishTool['function']['name'] in tool_names
assert WebReadTool['function']['name'] in tool_names
assert BrowserTool['function']['name'] in tool_names
assert IPythonTool['function']['name'] in tool_names
assert ViewFileTool['function']['name'] in tool_names
assert ListDirectoryTool['function']['name'] in tool_names
# Should NOT include edit tools or Undo
assert create_str_replace_editor_tool()['function']['name'] not in tool_names
assert FencedDiffEditTool['function']['name'] not in tool_names
assert LLMBasedFileEditTool['function']['name'] not in tool_names
assert UndoEditTool['function']['name'] not in tool_names
def test_get_tools_llm_editor_enabled():
"""Test tools when LLM Editor tool is enabled."""
config = create_test_config(enable_llm_editor=True)
tools = get_tools(config)
tool_names = {t['function']['name'] for t in tools}
assert LLMBasedFileEditTool['function']['name'] in tool_names
assert ViewFileTool['function']['name'] in tool_names
assert ListDirectoryTool['function']['name'] in tool_names
assert UndoEditTool['function']['name'] not in tool_names # Undo not compatible
assert create_str_replace_editor_tool()['function']['name'] not in tool_names
assert FencedDiffEditTool['function']['name'] not in tool_names
# =============================================
# Tests for response_to_actions with LLM_DIFF
# =============================================
# Mock ModelResponse without tool calls, but with content
def create_mock_response_no_tools(content: str) -> ModelResponse:
"""Helper function to create a mock response without tool calls."""
return ModelResponse(
id='mock-no-tools-id',
choices=[
{
'message': {
'tool_calls': None, # Explicitly None
'content': content,
'role': 'assistant',
},
'index': 0,
'finish_reason': 'stop', # Or other non-tool reason
}
],
# Add dummy usage if needed by downstream processing
usage={'prompt_tokens': 10, 'completion_tokens': 10, 'total_tokens': 20},
)
def test_response_to_actions_llm_diff_mode_with_blocks():
"""Test parsing diff blocks when llm_diff is enabled and no tool calls."""
content = """
Thinking about the changes...
```python
file.py
<<<<<<< SEARCH
old line
=======
new line
>>>>>>> REPLACE
```
Another block:
```python
file.py
<<<<<<< SEARCH
second old
=======
second new
>>>>>>> REPLACE
```
"""
response = create_mock_response_no_tools(content)
actions = response_to_actions(response, is_llm_diff_enabled=True) # Enable flag
assert len(actions) == 3 # Think + 2 Edits
assert isinstance(actions[0], AgentThinkAction)
assert actions[0].thought == 'Thinking about the changes...'
assert isinstance(actions[1], FileEditAction)
assert actions[1].path == 'file.py'
assert actions[1].search == 'old line\n'
assert actions[1].replace == 'new line\n'
assert actions[1].impl_source == FileEditSource.LLM_DIFF
assert isinstance(actions[2], FileEditAction)
assert actions[2].path == 'file.py'
assert actions[2].search == 'second old\n'
assert actions[2].replace == 'second new\n'
assert actions[2].impl_source == FileEditSource.LLM_DIFF
def test_response_to_actions_llm_diff_mode_no_blocks():
"""Test llm_diff mode when response content has no blocks."""
content = 'Just a simple message.'
response = create_mock_response_no_tools(content)
actions = response_to_actions(response, is_llm_diff_enabled=True) # Enable flag
assert len(actions) == 1
assert isinstance(actions[0], MessageAction)
assert actions[0].content == content
def test_response_to_actions_llm_diff_mode_parse_error(mocker):
"""Test llm_diff mode when parsing fails."""
content = """
```python
file.py
<<<<<<< SEARCH
old
# Malformed block - missing divider
new
>>>>>>> REPLACE
```
"""
response = create_mock_response_no_tools(content)
# Mock the parser to raise ValueError
mocker.patch(
'openhands.agenthub.codeact_agent.function_calling.parse_llm_response_for_diffs',
side_effect=ValueError('Test parse error'),
)
actions = response_to_actions(response, is_llm_diff_enabled=True) # Enable flag
assert len(actions) == 1
assert isinstance(actions[0], MessageAction)
assert 'Error parsing diff blocks: Test parse error' in actions[0].content
def test_response_to_actions_llm_diff_mode_with_tool_calls():
"""Test llm_diff mode is ignored if tool calls are present."""
# Create a response that HAS tool calls, even though llm_diff is enabled
response = create_mock_response('execute_bash', {'command': 'echo hello'})
# Add some content that looks like a diff block, to ensure it's ignored
response.choices[0]['message']['content'] = """
```python
file.py
<<<<<<< SEARCH
ignored old
=======
ignored new
>>>>>>> REPLACE
```
"""
actions = response_to_actions(response, is_llm_diff_enabled=True)
assert len(actions) == 1
assert isinstance(actions[0], CmdRunAction)
assert actions[0].command == 'echo hello'
def test_response_to_actions_llm_diff_mode_multiple_errors():
"""Test handling of multiple errors in diff blocks."""
content = """
```python
file1.py
<<<<<<< SEARCH
old content
# Missing divider
new content
>>>>>>> REPLACE
```
```python
# Missing filename
<<<<<<< SEARCH
more old content
=======
more new content
>>>>>>> REPLACE
```
"""
response = create_mock_response_no_tools(content)
actions = response_to_actions(response, is_llm_diff_enabled=True)
assert len(actions) == 1
assert isinstance(actions[0], MessageAction)
assert 'Error parsing block #1' in actions[0].content
assert 'Expected `=======`' in actions[0].content
def test_response_to_actions_llm_diff_mode_whitespace_edge_cases():
"""Test handling of whitespace edge cases in diff blocks."""
content = """
```python
file.py
<<<<<<< SEARCH
indented line
trailing space
=======
new indented line
new trailing space
>>>>>>> REPLACE
```
"""
response = create_mock_response_no_tools(content)
actions = response_to_actions(response, is_llm_diff_enabled=True)
assert len(actions) == 1
assert isinstance(actions[0], FileEditAction)
assert actions[0].path == 'file.py'
assert actions[0].search == '\n indented line\ntrailing space \n'
assert actions[0].replace == '\n new indented line\nnew trailing space \n'
def test_response_to_actions_llm_diff_mode_empty_lines():
"""Test handling of empty lines in diff blocks."""
content = """
```python
file.py
<<<<<<< SEARCH
=======
>>>>>>> REPLACE
```
"""
response = create_mock_response_no_tools(content)
actions = response_to_actions(response, is_llm_diff_enabled=True)
assert len(actions) == 1
assert isinstance(actions[0], FileEditAction)
assert actions[0].path == 'file.py'
assert actions[0].search == '\n\n\n'
assert actions[0].replace == '\n\n\n'
# Should process the tool call, not the diff block
assert len(actions) == 1
assert isinstance(actions[0], CmdRunAction)
assert actions[0].command == 'echo hello'
# Check that the thought includes the content that was ignored for parsing
assert 'ignored old' in actions[0].thought
def test_response_to_actions_llm_diff_disabled_no_tools():
"""Test response parsing when llm_diff is disabled and no tool calls."""
content = """
Thinking about the changes...
```python
file.py
<<<<<<< SEARCH
old line
=======
new line
>>>>>>> REPLACE
```
"""
response = create_mock_response_no_tools(content)
actions = response_to_actions(
response, is_llm_diff_enabled=False
) # Explicitly disable
# Should just be a MessageAction, no parsing attempted
assert len(actions) == 1
assert isinstance(actions[0], MessageAction)
assert actions[0].content == content

View File

@@ -0,0 +1,640 @@
# tests/unit/test_llm_diff_parser.py
import pytest
from openhands.agenthub.codeact_agent.llm_diff_parser import (
DiffBlock,
LLMMalformedActionError,
find_filename, # Import if you add tests for this
parse_llm_response_for_diffs,
strip_filename, # Import if you add tests for this
)
# Test cases for parse_llm_response_for_diffs
def test_parse_single_valid_block():
content = """
Some text before.
```python
path/to/file.py
<<<<<<< SEARCH
print("Hello")
=======
print("Hello, World!")
>>>>>>> REPLACE
```
Some text after.
"""
expected_blocks = [
DiffBlock(
filename='path/to/file.py',
search='print("Hello")\n',
replace='print("Hello, World!")\n',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_multiple_valid_blocks():
content = """
Block 1:
```python
file1.py
<<<<<<< SEARCH
old line 1
=======
new line 1
>>>>>>> REPLACE
```
Some intermediate text.
Block 2:
```javascript
file2.js
<<<<<<< SEARCH
old js line
=======
new js line
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(filename='file1.py', search='old line 1\n', replace='new line 1\n'),
DiffBlock(filename='file2.js', search='old js line\n', replace='new js line\n'),
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_empty_search_block():
content = """
Creating a new file.
```python
new_file.py
<<<<<<< SEARCH
=======
# This is a new file
print("Created!")
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='new_file.py',
search='',
replace='# This is a new file\nprint("Created!")\n',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_empty_replace_block():
content = """
Deleting content.
```python
file_to_edit.py
<<<<<<< SEARCH
# Line to delete
print("Delete me")
=======
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='file_to_edit.py',
search='# Line to delete\nprint("Delete me")\n',
replace='',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_no_blocks():
content = 'This is just a regular message without any diff blocks.'
expected_blocks = []
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_malformed_missing_divider():
content = """
```python
file.py
<<<<<<< SEARCH
old content
# Missing divider here
new content
>>>>>>> REPLACE
```
"""
# The implementation raises LLMMalformedActionError for parsing issues
with pytest.raises(LLMMalformedActionError, match='Expected `=======`'):
parse_llm_response_for_diffs(content)
def test_parse_malformed_missing_replace_marker():
content = """
```python
file.py
<<<<<<< SEARCH
old content
=======
new content
# Missing replace marker
```
"""
# The implementation raises LLMMalformedActionError for parsing issues
with pytest.raises(LLMMalformedActionError, match='Expected `>>>>>>> REPLACE`'):
parse_llm_response_for_diffs(content)
def test_parse_filename_inference():
content = """
Editing file1.py
```python
file1.py
<<<<<<< SEARCH
content1
=======
new content1
>>>>>>> REPLACE
```
Now editing the same file again.
```python
<<<<<<< SEARCH
content2
=======
new content2
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(filename='file1.py', search='content1\n', replace='new content1\n'),
DiffBlock(filename='file1.py', search='content2\n', replace='new content2\n'),
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_valid_fnames_exact_match():
content = """
```python
src/app.py
<<<<<<< SEARCH
old
=======
new
>>>>>>> REPLACE
```
"""
valid_fnames = ['src/app.py', 'src/utils.py']
expected_blocks = [
DiffBlock(filename='src/app.py', search='old\n', replace='new\n')
]
blocks = parse_llm_response_for_diffs(content, valid_fnames=valid_fnames)
assert blocks == expected_blocks
def test_parse_valid_fnames_fuzzy_match():
content = """
```python
src/appz.py
<<<<<<< SEARCH
old
=======
new
>>>>>>> REPLACE
```
"""
valid_fnames = ['src/app.py', 'src/utils.py']
expected_blocks = [
DiffBlock(filename='src/app.py', search='old\n', replace='new\n')
] # Expects fuzzy match to correct filename
blocks = parse_llm_response_for_diffs(content, valid_fnames=valid_fnames)
assert blocks == expected_blocks
def test_parse_missing_filename_error_if_search_not_empty():
# If search is not empty, a filename MUST be present or inferrable
content = """
```python
<<<<<<< SEARCH
old content requires a filename
=======
new
>>>>>>> REPLACE
```
"""
with pytest.raises(LLMMalformedActionError, match='Bad/missing filename'):
parse_llm_response_for_diffs(content)
def test_parse_missing_filename_ok_if_search_empty_but_no_filename_line():
# If search is empty (new file), but no filename line is found *at all* before the block, it's an error
content = """
```python
<<<<<<< SEARCH
=======
new file content
>>>>>>> REPLACE
```
"""
with pytest.raises(
LLMMalformedActionError, match='Could not determine filename for new file block'
):
parse_llm_response_for_diffs(content)
def test_parse_missing_filename_ok_if_search_empty_and_filename_line_present():
# If search is empty (new file), and a filename line *is* present, it should work
content = """
new_file.py
```python
<<<<<<< SEARCH
=======
new file content
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(filename='new_file.py', search='', replace='new file content\n')
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_two_new_files():
content = """
First new file:
```python
new_file_1.py
<<<<<<< SEARCH
=======
# Content for file 1
print("File 1")
>>>>>>> REPLACE
```
Second new file:
```text
new_file_2.txt
<<<<<<< SEARCH
=======
Content for file 2.
This is plain text.
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='new_file_1.py',
search='',
replace='# Content for file 1\nprint("File 1")\n',
),
DiffBlock(
filename='new_file_2.txt',
search='',
replace='Content for file 2.\nThis is plain text.\n',
),
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_one_edit_one_new_file():
content = """
Editing an existing file:
```python
existing_file.py
<<<<<<< SEARCH
# Old line
print("Old")
=======
# New line
print("New")
>>>>>>> REPLACE
```
Creating a new file:
```python
new_file.py
<<<<<<< SEARCH
=======
# New file content
print("Newly created")
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='existing_file.py',
search='# Old line\nprint("Old")\n',
replace='# New line\nprint("New")\n',
),
DiffBlock(
filename='new_file.py',
search='',
replace='# New file content\nprint("Newly created")\n',
),
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_two_edits_different_files_explicit():
content = """
First edit:
```python
file_a.py
<<<<<<< SEARCH
# Original line in file A
a = 1
=======
# Modified line in file A
a = 2
>>>>>>> REPLACE
```
Some text between blocks.
Second edit:
```python
file_b.py
<<<<<<< SEARCH
# Original line in file B
b = 'hello'
=======
# Modified line in file B
b = 'world'
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='file_a.py',
search='# Original line in file A\na = 1\n',
replace='# Modified line in file A\na = 2\n',
),
DiffBlock(
filename='file_b.py',
search="# Original line in file B\nb = 'hello'\n",
replace="# Modified line in file B\nb = 'world'\n",
),
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_block_without_closing_fence():
content = """
```python
path/to/file.py
<<<<<<< SEARCH
print("Hello")
=======
print("Hello, World!")
>>>>>>> REPLACE
Some text after, no closing fence."""
expected_blocks = [
DiffBlock(
filename='path/to/file.py',
search='print("Hello")\n',
replace='print("Hello, World!")\n',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_block_with_extra_content_after_replace():
# Content after REPLACE but before closing fence should be ignored by parser
content = """
```python
path/to/file.py
<<<<<<< SEARCH
print("Hello")
=======
print("Hello, World!")
>>>>>>> REPLACE
This should be ignored.
```
More text."""
expected_blocks = [
DiffBlock(
filename='path/to/file.py',
search='print("Hello")\n',
replace='print("Hello, World!")\n',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_malformed_unexpected_divider():
content = """
```python
file.py
<<<<<<< SEARCH
old content
=======
new content
========= UNEXPECTED DIVIDER
>>>>>>> REPLACE
```
"""
with pytest.raises(LLMMalformedActionError, match='Unexpected `=======`'):
parse_llm_response_for_diffs(content)
def test_find_filename_simple():
lines = ['file.py', '```python']
assert find_filename(lines) == 'file.py'
def test_find_filename_with_path():
lines = ['src/core/file.py', '```python']
assert find_filename(lines) == 'src/core/file.py'
def test_find_filename_strip_chars():
lines = ['*`src/app.py`*', '```python']
assert find_filename(lines) == 'src/app.py'
def test_find_filename_strip_colon():
lines = ['File: src/app.py:', '```python']
assert (
find_filename(lines) == 'src/app.py'
) # Assuming strip_filename handles 'File: ' prefix
def test_find_filename_no_filename():
lines = ['Just some text', '```python']
assert find_filename(lines) is None
def test_find_filename_too_far():
lines = [
'file.py',
'another line',
'yet another',
'```python',
] # file.py is too far back
assert find_filename(lines) is None
def test_find_filename_valid_fnames_exact():
lines = ['app.py', '```python']
valid = ['src/app.py', 'app.py']
assert find_filename(lines, valid_fnames=valid) == 'app.py'
def test_find_filename_valid_fnames_fuzzy():
lines = ['appz.py', '```python']
valid = ['src/app.py', 'app.py']
assert find_filename(lines, valid_fnames=valid) == 'app.py'
def test_find_filename_valid_fnames_no_match():
lines = ['other.py', '```python']
valid = ['src/app.py', 'app.py']
# Should still return the best guess if no valid match
assert find_filename(lines, valid_fnames=valid) == 'other.py'
def test_strip_filename_basic():
assert strip_filename(' file.py ') == 'file.py'
assert strip_filename('`file.py`') == 'file.py'
assert strip_filename('*file.py*') == 'file.py'
assert strip_filename('file.py:') == 'file.py'
assert strip_filename('# file.py') == 'file.py'
assert strip_filename('`*# file.py:*` ') == 'file.py'
def test_strip_filename_no_strip():
assert strip_filename('file.py') == 'file.py'
def test_strip_filename_none():
assert strip_filename(' ') is None
assert strip_filename('```') is None
assert strip_filename('...') is None
def test_parse_with_different_fence():
content = """
~~~markdown
path/to/file.md
<<<<<<< SEARCH
Old text
=======
New text
>>>>>>> REPLACE
~~~
"""
expected_blocks = [
DiffBlock(filename='path/to/file.md', search='Old text\n', replace='New text\n')
]
blocks = parse_llm_response_for_diffs(content, fence=('~~~', '~~~'))
assert blocks == expected_blocks
def test_parse_whitespace_handling():
# Tests if leading/trailing whitespace inside blocks is preserved
content = """
```python
file_with_whitespace.py
<<<<<<< SEARCH
leading whitespace
trailing whitespace
=======
new leading whitespace
new trailing whitespace
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='file_with_whitespace.py',
search=' leading whitespace\ntrailing whitespace \n',
replace=' new leading whitespace\nnew trailing whitespace \n',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_consecutive_blocks():
# Tests parsing blocks immediately following each other
content = """
```python
file_a.py
<<<<<<< SEARCH
block 1 search
=======
block 1 replace
>>>>>>> REPLACE
```
```python
file_b.py
<<<<<<< SEARCH
block 2 search
=======
block 2 replace
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='file_a.py', search='block 1 search\n', replace='block 1 replace\n'
),
DiffBlock(
filename='file_b.py', search='block 2 search\n', replace='block 2 replace\n'
),
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks
def test_parse_filename_like_text_in_search():
# Tests that text resembling a filename inside SEARCH doesn't confuse the parser
content = """
```python
real_file.py
<<<<<<< SEARCH
This line contains fake_file.py
=======
This is the replacement.
>>>>>>> REPLACE
```
"""
expected_blocks = [
DiffBlock(
filename='real_file.py',
search='This line contains fake_file.py\n',
replace='This is the replacement.\n',
)
]
blocks = parse_llm_response_for_diffs(content)
assert blocks == expected_blocks

View File

@@ -5,6 +5,7 @@ import pytest
from openhands.controller.state.control_flags import IterationControlFlag
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.message import Message, TextContent
from openhands.events.observation.agent import MicroagentKnowledge
from openhands.microagent import BaseMicroagent
@@ -268,7 +269,28 @@ Today's date is {{ runtime_info.date }}
def test_prompt_manager_initialization_error():
"""Test that PromptManager raises an error if the prompt directory is not set."""
with pytest.raises(ValueError, match='Prompt directory is not set'):
PromptManager(None)
PromptManager(None, AgentConfig()) # Need config even for this error check
def test_prompt_manager_system_prompt_selection(prompt_dir):
"""Test that PromptManager loads the correct system prompt based on config."""
# Ensure both prompt files exist in the temp directory (copied by fixture)
assert os.path.exists(os.path.join(prompt_dir, 'system_prompt.j2'))
assert os.path.exists(os.path.join(prompt_dir, 'system_prompt_llm_diff.j2'))
# Case 1: llm_diff is False (default)
config_default = AgentConfig() # Defaults to enable_llm_diff=False
manager_default = PromptManager(prompt_dir=prompt_dir, config=config_default)
# Check the loaded template's origin filename
assert 'system_prompt.j2' in manager_default.system_template.filename
assert 'system_prompt_llm_diff.j2' not in manager_default.system_template.filename
# Case 2: llm_diff is True
config_llm_diff = AgentConfig(enable_llm_diff=True)
manager_llm_diff = PromptManager(prompt_dir=prompt_dir, config=config_llm_diff)
# Check the loaded template's origin filename
assert 'system_prompt_llm_diff.j2' in manager_llm_diff.system_template.filename
assert 'system_prompt.j2' not in manager_llm_diff.system_template.filename
def test_prompt_manager_custom_system_prompt_filename(prompt_dir):

View File

@@ -0,0 +1,211 @@
# tests/unit/test_runtime_edit.py
from unittest.mock import MagicMock
import pytest
from openhands.core.config import ( # Assuming AppConfig is needed
AppConfig,
SandboxConfig,
)
from openhands.events.action import FileEditAction, FileWriteAction
from openhands.events.event import FileEditSource
from openhands.events.observation import (
ErrorObservation,
FileEditObservation,
FileReadObservation,
FileWriteObservation,
)
from openhands.runtime.utils.edit import FileEditRuntimeMixin
# Mock AppConfig if necessary for FileEditRuntimeMixin initialization or methods
@pytest.fixture
def mock_config():
config = MagicMock(spec=AppConfig)
config.sandbox = MagicMock(spec=SandboxConfig)
config.sandbox.enable_auto_lint = (
False # Disable linting for simplicity unless testing it
)
return config
# Create a dummy class that inherits the mixin for testing
class MockRuntime(FileEditRuntimeMixin):
def __init__(self, config, *args, **kwargs):
# Initialize FileEditRuntimeMixin part
# Assuming enable_llm_editor=False as we are testing llm_diff_edit
super().__init__(enable_llm_editor=False, *args, **kwargs) # noqa: B026
self.config = config
# Mock essential methods used by llm_diff_edit
self.read = MagicMock()
self.write = MagicMock()
# Mock _get_lint_error if linting is enabled in tests
self._get_lint_error = MagicMock(return_value=None)
# Implement abstract methods if FileEditRuntimeInterface requires them
def run_ipython(self, action):
pass
# Add other abstract methods if needed
@pytest.fixture
def mock_runtime(mock_config):
return MockRuntime(config=mock_config)
# Test cases for llm_diff_edit
def test_llm_diff_edit_exact_match(mock_runtime):
action = FileEditAction(
path='test.py',
search="print('old')\nline2\n",
replace="print('new')\nline2\n",
impl_source=FileEditSource.LLM_DIFF,
)
original_content = "line0\nprint('old')\nline2\nline3\n"
expected_new_content = "line0\nprint('new')\nline2\nline3\n"
mock_runtime.read.return_value = FileReadObservation(
content=original_content, path='test.py'
)
mock_runtime.write.return_value = FileWriteObservation(
content='', path='test.py'
) # Assume write succeeds
obs = mock_runtime.llm_diff_edit(action)
assert isinstance(obs, FileEditObservation)
assert obs.path == 'test.py'
assert obs.old_content == original_content
assert obs.new_content == expected_new_content
mock_runtime.write.assert_called_once_with(
FileWriteAction(path='test.py', content=expected_new_content)
)
def test_llm_diff_edit_whitespace_flexible_match(mock_runtime):
action = FileEditAction(
path='test.py',
search=" print('old')\n line2\n", # Search has 2 spaces
replace=" print('new')\n line2\n", # Replace also has 2 spaces (relative indent)
impl_source=FileEditSource.LLM_DIFF,
)
original_content = (
"line0\n print('old')\n line2\nline3\n" # Original has 4 spaces
)
expected_new_content = (
"line0\n print('new')\n line2\nline3\n" # Expect 4 spaces preserved
)
mock_runtime.read.return_value = FileReadObservation(
content=original_content, path='test.py'
)
mock_runtime.write.return_value = FileWriteObservation(content='', path='test.py')
obs = mock_runtime.llm_diff_edit(action)
assert isinstance(obs, FileEditObservation)
assert obs.new_content == expected_new_content
mock_runtime.write.assert_called_once_with(
FileWriteAction(path='test.py', content=expected_new_content)
)
def test_llm_diff_edit_create_file(mock_runtime):
action = FileEditAction(
path='new_file.py',
search='', # Empty search for creation
replace="print('hello new file')\n",
impl_source=FileEditSource.LLM_DIFF,
)
original_content = '' # File doesn't exist
expected_new_content = "print('hello new file')\n"
mock_runtime.read.return_value = ErrorObservation(
content='File not found: new_file.py'
)
mock_runtime.write.return_value = FileWriteObservation(
content='', path='new_file.py'
)
obs = mock_runtime.llm_diff_edit(action)
assert isinstance(obs, FileEditObservation)
assert obs.path == 'new_file.py'
assert obs.old_content == original_content
assert obs.new_content == expected_new_content
assert obs.prev_exist is False
mock_runtime.write.assert_called_once_with(
FileWriteAction(path='new_file.py', content=expected_new_content)
)
def test_llm_diff_edit_append_file(mock_runtime):
action = FileEditAction(
path='append.py',
search='', # Empty search for append
replace="print('appended')\n",
impl_source=FileEditSource.LLM_DIFF,
)
original_content = "print('original')\n"
expected_new_content = "print('original')\nprint('appended')\n"
mock_runtime.read.return_value = FileReadObservation(
content=original_content, path='append.py'
)
mock_runtime.write.return_value = FileWriteObservation(content='', path='append.py')
obs = mock_runtime.llm_diff_edit(action)
assert isinstance(obs, FileEditObservation)
assert obs.new_content == expected_new_content
assert obs.prev_exist is True
mock_runtime.write.assert_called_once_with(
FileWriteAction(path='append.py', content=expected_new_content)
)
def test_llm_diff_edit_no_match(mock_runtime):
action = FileEditAction(
path='test.py',
search="print('not found')\n",
replace="print('should not apply')\n",
impl_source=FileEditSource.LLM_DIFF,
)
original_content = "print('something else')\n"
mock_runtime.read.return_value = FileReadObservation(
content=original_content, path='test.py'
)
obs = mock_runtime.llm_diff_edit(action)
assert isinstance(obs, ErrorObservation)
assert 'SEARCH block did not match' in obs.content
assert 'FAILED SEARCH BLOCK' in obs.content
assert "print('not found')" in obs.content
mock_runtime.write.assert_not_called() # Write should not be called on failure
def test_llm_diff_edit_create_file_non_empty_search_error(mock_runtime):
action = FileEditAction(
path='new_file.py',
search='some search content', # Non-empty search
replace="print('hello new file')\n",
impl_source=FileEditSource.LLM_DIFF,
)
mock_runtime.read.return_value = ErrorObservation(
content='File not found: new_file.py'
)
obs = mock_runtime.llm_diff_edit(action)
assert isinstance(obs, ErrorObservation)
assert 'File new_file.py not found and SEARCH block is not empty' in obs.content
mock_runtime.write.assert_not_called()
# TODO: Add tests for linting interaction if needed