mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
79 Commits
feat/gemin
...
enyst/gemi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a49317c07 | ||
|
|
99f30a3b24 | ||
|
|
fb12270e83 | ||
|
|
e823d88567 | ||
|
|
448262350c | ||
|
|
5daf947bbe | ||
|
|
d82930322d | ||
|
|
e9e5a5f6e6 | ||
|
|
17b6d87f82 | ||
|
|
dd0bc42e4f | ||
|
|
2d70af8bd5 | ||
|
|
05478ad5b6 | ||
|
|
fa2a5ab3cf | ||
|
|
824f30c89a | ||
|
|
051c033257 | ||
|
|
1e67c5a10c | ||
|
|
709e0035a9 | ||
|
|
0b96072604 | ||
|
|
fcb56fb87f | ||
|
|
c839573f4d | ||
|
|
9b8633275f | ||
|
|
9a88e7b5d0 | ||
|
|
e27a5ef755 | ||
|
|
d31171bb6d | ||
|
|
a2aa1bcc93 | ||
|
|
237b08c2a9 | ||
|
|
7e888a6943 | ||
|
|
a7b713883d | ||
|
|
222242e148 | ||
|
|
d29b402ef4 | ||
|
|
fe91cf627e | ||
|
|
ab7739be15 | ||
|
|
3952fa1fb7 | ||
|
|
d021341568 | ||
|
|
75d1cb611b | ||
|
|
83b76d0a2b | ||
|
|
eb966cb694 | ||
|
|
f689296897 | ||
|
|
c987504145 | ||
|
|
b6a76a5a4a | ||
|
|
5bf81b4d1d | ||
|
|
d30bf9622e | ||
|
|
cd6e716c74 | ||
|
|
3dbf74f400 | ||
|
|
63b238a62b | ||
|
|
b39d6d0c17 | ||
|
|
ebfe189e19 | ||
|
|
e6eb088272 | ||
|
|
fd9aa8bb7b | ||
|
|
072c7f026f | ||
|
|
37a6cba50a | ||
|
|
14b5dd8bfa | ||
|
|
a47c2bf0d7 | ||
|
|
e05bb8b0e8 | ||
|
|
28f0949e37 | ||
|
|
8e471d3f4a | ||
|
|
4546a63320 | ||
|
|
1f08228905 | ||
|
|
5c8ecb72f5 | ||
|
|
99f0a91430 | ||
|
|
954569d29b | ||
|
|
4f1520cf6d | ||
|
|
19d7077514 | ||
|
|
223635a1d7 | ||
|
|
31bd1597cd | ||
|
|
41149836d9 | ||
|
|
726cb43f3e | ||
|
|
39a838a972 | ||
|
|
99d1ee3429 | ||
|
|
29960e2fce | ||
|
|
41c3dd4b85 | ||
|
|
c1ba9f6a3c | ||
|
|
a312fa9fb3 | ||
|
|
13e673be8e | ||
|
|
738f7d0497 | ||
|
|
a84a749a48 | ||
|
|
e79c0259d3 | ||
|
|
6ddf228477 | ||
|
|
5c500ce7bf |
@@ -14,7 +14,7 @@ OpenHands includes and adapts the following open source projects. We are gratefu
|
||||
|
||||
#### [Aider](https://github.com/paul-gauthier/aider)
|
||||
- License: Apache License 2.0
|
||||
- Description: AI pair programming tool. OpenHands has adapted and integrated its linter module for code-related tasks in [`agentskills utilities`](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/agent_skills/utils/aider)
|
||||
- Description: AI pair programming tool. OpenHands has adapted and integrated its linter module for code-related tasks in [`agentskills utilities`](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/agent_skills/utils/aider) and its fenced diff edit.
|
||||
|
||||
#### [BrowserGym](https://github.com/ServiceNow/BrowserGym)
|
||||
- License: Apache License 2.0
|
||||
|
||||
@@ -247,6 +247,7 @@ def get_config(
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=ENABLE_LLM_EDITOR,
|
||||
enable_mcp=False,
|
||||
enable_llm_diff=True,
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
@@ -289,6 +290,17 @@ def initialize_runtime(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
|
||||
|
||||
# Set DEBUG environment variable
|
||||
action = CmdRunAction(command="echo 'export DEBUG=1' >> ~/.bashrc")
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to export DEBUG=1: {str(obs)}',
|
||||
)
|
||||
|
||||
# inject the init script
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ from openhands.events.event import Event
|
||||
from openhands.events.serialization.event import event_to_dict
|
||||
from openhands.events.utils import get_pairs_from_events
|
||||
from openhands.memory.condenser import get_condensation_metadata
|
||||
from openhands.runtime.utils.request import RequestHTTPError
|
||||
|
||||
|
||||
class EvalMetadata(BaseModel):
|
||||
@@ -533,13 +534,14 @@ def compatibility_for_eval_history_pairs(
|
||||
|
||||
def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
"""
|
||||
The AgentController class overrides last error for certain exceptions
|
||||
We want to ensure those exeption do not overlap with fatal exceptions defined here
|
||||
This is because we do a comparisino against the stringified error
|
||||
Check if the error is a fatal evaluation error, which triggers a rerun.
|
||||
"""
|
||||
if not error:
|
||||
return False
|
||||
|
||||
# IMPORTANT: The AgentController class overrides last error for certain exceptions
|
||||
# We need to ensure those exceptions do not overlap with fatal exceptions defined here
|
||||
# This is because we do a comparison against the stringified error
|
||||
FATAL_EXCEPTIONS = [
|
||||
AgentRuntimeError,
|
||||
AgentRuntimeBuildError,
|
||||
@@ -549,6 +551,7 @@ def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
ConnectionError,
|
||||
RequestHTTPError,
|
||||
]
|
||||
|
||||
if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
|
||||
|
||||
@@ -14,11 +14,13 @@ from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
|
||||
from openhands.agenthub.codeact_agent.tools.browser import BrowserTool
|
||||
from openhands.agenthub.codeact_agent.tools.finish import FinishTool
|
||||
from openhands.agenthub.codeact_agent.tools.ipython import IPythonTool
|
||||
from openhands.agenthub.codeact_agent.tools.list_directory import ListDirectoryTool
|
||||
from openhands.agenthub.codeact_agent.tools.llm_based_edit import LLMBasedFileEditTool
|
||||
from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
|
||||
create_str_replace_editor_tool,
|
||||
)
|
||||
from openhands.agenthub.codeact_agent.tools.think import ThinkTool
|
||||
from openhands.agenthub.codeact_agent.tools.view_file import ViewFileTool
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
@@ -95,6 +97,7 @@ class CodeActAgent(Agent):
|
||||
if self._prompt_manager is None:
|
||||
self._prompt_manager = PromptManager(
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
|
||||
config=self.config,
|
||||
system_prompt_filename=self.config.system_prompt_filename,
|
||||
)
|
||||
|
||||
@@ -126,14 +129,30 @@ class CodeActAgent(Agent):
|
||||
tools.append(BrowserTool)
|
||||
if self.config.enable_jupyter:
|
||||
tools.append(IPythonTool)
|
||||
if self.config.enable_llm_editor:
|
||||
|
||||
# Determine which editor tool(s) and utility tools to add based on config
|
||||
if self.config.enable_llm_diff:
|
||||
# Use LLM Diff mode:
|
||||
# - LLM generates diffs in text, only non-edit tools available
|
||||
# - No UndoEditTool
|
||||
tools.append(ViewFileTool)
|
||||
tools.append(ListDirectoryTool)
|
||||
# NO EDITING TOOLS (LLMBasedFileEditTool, str_replace_editor)
|
||||
# NO UndoEditTool
|
||||
elif self.config.enable_llm_editor:
|
||||
# Use LLM-based editor tool + separate utils (NO Undo)
|
||||
tools.append(LLMBasedFileEditTool)
|
||||
tools.append(ViewFileTool)
|
||||
tools.append(ListDirectoryTool)
|
||||
# No UndoEditTool here
|
||||
elif self.config.enable_editor:
|
||||
# Fallback to the complete str_replace_editor (includes view, list, undo)
|
||||
tools.append(
|
||||
create_str_replace_editor_tool(
|
||||
use_short_description=use_short_tool_desc
|
||||
)
|
||||
)
|
||||
|
||||
return tools
|
||||
|
||||
def reset(self) -> None:
|
||||
@@ -270,4 +289,5 @@ class CodeActAgent(Agent):
|
||||
return codeact_function_calling.response_to_actions(
|
||||
response,
|
||||
mcp_tool_names=list(self.mcp_tools.keys()),
|
||||
is_llm_diff_enabled=self.config.enable_llm_diff,
|
||||
)
|
||||
|
||||
@@ -9,12 +9,19 @@ from litellm import (
|
||||
ModelResponse,
|
||||
)
|
||||
|
||||
from openhands.agenthub.codeact_agent.llm_diff_parser import (
|
||||
DiffBlock,
|
||||
parse_llm_response_for_diffs,
|
||||
)
|
||||
from openhands.agenthub.codeact_agent.tools import (
|
||||
BrowserTool,
|
||||
FinishTool,
|
||||
IPythonTool,
|
||||
ListDirectoryTool,
|
||||
LLMBasedFileEditTool,
|
||||
ThinkTool,
|
||||
UndoEditTool,
|
||||
ViewFileTool,
|
||||
create_cmd_run_tool,
|
||||
create_str_replace_editor_tool,
|
||||
)
|
||||
@@ -51,25 +58,139 @@ def combine_thought(action: Action, thought: str) -> Action:
|
||||
|
||||
|
||||
def response_to_actions(
|
||||
response: ModelResponse, mcp_tool_names: list[str] | None = None
|
||||
response: ModelResponse,
|
||||
mcp_tool_names: list[str] | None = None,
|
||||
is_llm_diff_enabled: bool = False,
|
||||
) -> list[Action]:
|
||||
actions: list[Action] = []
|
||||
"""
|
||||
Parses the LLM response and converts it into a list of OpenHands Actions.
|
||||
|
||||
The function handles two main types of responses:
|
||||
1. Tool calls - When the LLM response contains function calls
|
||||
2. Diff blocks - When is_llm_diff_enabled=True and the response contains code diffs
|
||||
|
||||
For tool calls, it converts each function call into a corresponding Action:
|
||||
- execute_bash -> CmdRunAction
|
||||
- execute_ipython_cell -> IPythonRunCellAction
|
||||
- browser -> BrowseInteractiveAction
|
||||
- web_read -> BrowseURLAction
|
||||
- str_replace_editor -> FileEditAction/FileReadAction
|
||||
- think -> AgentThinkAction
|
||||
- finish -> AgentFinishAction
|
||||
- delegate_to_browsing_agent -> AgentDelegateAction
|
||||
|
||||
For diff blocks (when is_llm_diff_enabled=True), it parses blocks in the format:
|
||||
```language
|
||||
filename.ext
|
||||
<<<<<<< SEARCH
|
||||
old code
|
||||
=======
|
||||
new code
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Each diff block is converted to a FileEditAction with:
|
||||
- path = filename
|
||||
- search = old code
|
||||
- replace = new code
|
||||
- impl_source = FileEditSource.LLM_DIFF
|
||||
|
||||
Examples:
|
||||
# Tool call response
|
||||
response = {
|
||||
"tool_calls": [{
|
||||
"function": {
|
||||
"name": "execute_bash",
|
||||
"arguments": {"command": "ls", "is_input": "false"}
|
||||
}
|
||||
}]
|
||||
}
|
||||
-> [CmdRunAction(command="ls", is_input=False)]
|
||||
|
||||
# Diff block response
|
||||
response = {
|
||||
"content": '''
|
||||
```python
|
||||
app.py
|
||||
<<<<<<< SEARCH
|
||||
old_code
|
||||
=======
|
||||
new_code
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
'''
|
||||
}
|
||||
-> [FileEditAction(path="app.py", search="old_code", replace="new_code")]
|
||||
|
||||
Args:
|
||||
response: The ModelResponse from the LLM.
|
||||
mcp_tool_names: Optional list of MCP tool names to handle.
|
||||
is_llm_diff_enabled: If True, will attempt to parse diff blocks from message
|
||||
content if no tool calls exist. Defaults to False.
|
||||
|
||||
Returns:
|
||||
A list of Action objects.
|
||||
|
||||
Note:
|
||||
- If both tool calls and diff blocks exist, tool calls take precedence
|
||||
- Message content outside of tool calls/diff blocks becomes MessageAction
|
||||
- At least one action is always returned (AgentThinkAction as fallback)
|
||||
"""
|
||||
all_actions: list[Action] = []
|
||||
parsed_llm_diff_actions: list[Action] = []
|
||||
tool_call_actions: list[Action] = []
|
||||
message_content: str = ''
|
||||
diff_parse_error: Exception | None = None
|
||||
action: Action
|
||||
|
||||
assert len(response.choices) == 1, 'Only one choice is supported for now'
|
||||
choice = response.choices[0]
|
||||
assistant_msg = choice.message
|
||||
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
|
||||
# Check if there's assistant_msg.content. If so, add it to the thought
|
||||
thought = ''
|
||||
if isinstance(assistant_msg.content, str):
|
||||
thought = assistant_msg.content
|
||||
elif isinstance(assistant_msg.content, list):
|
||||
for msg in assistant_msg.content:
|
||||
if msg['type'] == 'text':
|
||||
thought += msg['text']
|
||||
|
||||
# Process each tool call to OpenHands action
|
||||
# 1. Extract message content
|
||||
if isinstance(assistant_msg.content, str):
|
||||
message_content = assistant_msg.content
|
||||
elif isinstance(assistant_msg.content, list):
|
||||
for msg in assistant_msg.content:
|
||||
if msg['type'] == 'text':
|
||||
message_content += msg['text']
|
||||
|
||||
# 2. Try parsing diff blocks if enabled and content exists
|
||||
if is_llm_diff_enabled and message_content:
|
||||
logger.debug('LLM Diff mode enabled, attempting to parse response content.')
|
||||
try:
|
||||
# Call the updated parser, expecting list[DiffBlock]
|
||||
parsed_blocks: list[DiffBlock] = parse_llm_response_for_diffs(
|
||||
message_content
|
||||
)
|
||||
|
||||
if parsed_blocks: # Check if the list is not empty
|
||||
logger.info(
|
||||
f'Parsed {len(parsed_blocks)} diff blocks from LLM response.'
|
||||
)
|
||||
# Create FileEditActions from the DiffBlock objects
|
||||
for block in parsed_blocks:
|
||||
action = FileEditAction(
|
||||
path=block.filename,
|
||||
search=block.search,
|
||||
replace=block.replace,
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
parsed_llm_diff_actions.append(action)
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f'Error parsing LLM diff blocks: {e}')
|
||||
diff_parse_error = e # Store error for potential later use
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Unexpected error during LLM diff parsing: {e}', exc_info=True
|
||||
)
|
||||
diff_parse_error = e # Store error
|
||||
|
||||
# 3. Process tool calls if they exist
|
||||
tool_calls_exist = hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls
|
||||
if tool_calls_exist:
|
||||
# Process each tool call
|
||||
for i, tool_call in enumerate(assistant_msg.tool_calls):
|
||||
action: Action
|
||||
logger.debug(f'Tool call in function_calling.py: {tool_call}')
|
||||
try:
|
||||
arguments = json.loads(tool_call.function.arguments)
|
||||
@@ -198,6 +319,39 @@ def response_to_actions(
|
||||
**valid_kwargs,
|
||||
)
|
||||
# ================================================
|
||||
# New Utility Tools (Routing to OH_ACI)
|
||||
# ================================================
|
||||
elif tool_call.function.name == ViewFileTool['function']['name']:
|
||||
if 'path' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "path" in tool call {tool_call.function.name}'
|
||||
)
|
||||
action = FileReadAction(
|
||||
path=arguments['path'],
|
||||
impl_source=FileReadSource.OH_ACI,
|
||||
view_range=arguments.get('view_range', None),
|
||||
)
|
||||
elif tool_call.function.name == ListDirectoryTool['function']['name']:
|
||||
if 'path' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "path" in tool call {tool_call.function.name}'
|
||||
)
|
||||
action = FileReadAction(
|
||||
path=arguments['path'],
|
||||
impl_source=FileReadSource.OH_ACI,
|
||||
# view_range is not applicable for directories
|
||||
)
|
||||
elif tool_call.function.name == UndoEditTool['function']['name']:
|
||||
if 'path' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "path" in tool call {tool_call.function.name}'
|
||||
)
|
||||
action = FileEditAction(
|
||||
path=arguments['path'],
|
||||
command='undo_edit',
|
||||
impl_source=FileEditSource.OH_ACI,
|
||||
)
|
||||
# ================================================
|
||||
# AgentThinkAction
|
||||
# ================================================
|
||||
elif tool_call.function.name == ThinkTool['function']['name']:
|
||||
@@ -226,9 +380,6 @@ def response_to_actions(
|
||||
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
|
||||
)
|
||||
|
||||
# We only add thought to the first action
|
||||
if i == 0:
|
||||
action = combine_thought(action, thought)
|
||||
# Add metadata for tool calling
|
||||
action.tool_call_metadata = ToolCallMetadata(
|
||||
tool_call_id=tool_call.id,
|
||||
@@ -236,21 +387,66 @@ def response_to_actions(
|
||||
model_response=response,
|
||||
total_calls_in_response=len(assistant_msg.tool_calls),
|
||||
)
|
||||
actions.append(action)
|
||||
else:
|
||||
actions.append(
|
||||
MessageAction(
|
||||
content=str(assistant_msg.content) if assistant_msg.content else '',
|
||||
wait_for_response=True,
|
||||
)
|
||||
)
|
||||
tool_call_actions.append(action)
|
||||
|
||||
# Add response id to actions
|
||||
# This will ensure we can match both actions without tool calls (e.g. MessageAction)
|
||||
# and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.)
|
||||
# with the token usage data
|
||||
for action in actions:
|
||||
# 4. Combine actions: diffs first (if any), then tool calls (if any)
|
||||
all_actions.extend(parsed_llm_diff_actions)
|
||||
all_actions.extend(tool_call_actions)
|
||||
|
||||
# 5. Handle the case where no actions were generated (neither diffs nor tool calls)
|
||||
if not all_actions:
|
||||
# If there was a diff parsing error, report it (even if no blocks were parsed)
|
||||
if diff_parse_error:
|
||||
all_actions.append(
|
||||
MessageAction(
|
||||
content=f'[Error parsing diff blocks: {diff_parse_error}]\nOriginal Content:\n{message_content}',
|
||||
wait_for_response=True,
|
||||
)
|
||||
)
|
||||
# Otherwise, treat as a regular message if content exists
|
||||
elif message_content:
|
||||
all_actions.append(
|
||||
MessageAction(
|
||||
content=message_content,
|
||||
wait_for_response=True,
|
||||
)
|
||||
)
|
||||
# If no actions AND no content, add a default Think action
|
||||
else:
|
||||
logger.warning(
|
||||
f'No actions generated and no message content for response: {response.id}'
|
||||
)
|
||||
all_actions.append(
|
||||
AgentThinkAction(
|
||||
thought='[No actionable content or tool calls found in response]'
|
||||
)
|
||||
)
|
||||
|
||||
# If actions *were* generated, determine and apply thought
|
||||
else:
|
||||
# 6. Determine contextual thought and apply to the first non-AgentThinkAction
|
||||
contextual_thought = message_content.strip() if message_content else None
|
||||
if contextual_thought:
|
||||
for i, action in enumerate(all_actions):
|
||||
if not isinstance(action, AgentThinkAction):
|
||||
all_actions[i] = combine_thought(action, contextual_thought)
|
||||
break # Apply thought only to the first eligible action
|
||||
|
||||
# 7. Add response id to all actions
|
||||
for action in all_actions:
|
||||
action.response_id = response.id
|
||||
|
||||
assert len(actions) >= 1
|
||||
return actions
|
||||
# 8. Final check (ensure at least one action exists)
|
||||
if not all_actions:
|
||||
# This case should ideally be handled by step 5, but as a safeguard:
|
||||
logger.error(
|
||||
f'CRITICAL: No actions generated for response {response.id} after all checks.'
|
||||
)
|
||||
# Create a default Think action if somehow we ended up with no actions
|
||||
all_actions.append(
|
||||
AgentThinkAction(thought='[Critical Error: Failed to generate any action]')
|
||||
)
|
||||
if all_actions[0]:
|
||||
all_actions[0].response_id = response.id
|
||||
|
||||
return all_actions
|
||||
|
||||
408
openhands/agenthub/codeact_agent/llm_diff_parser.py
Normal file
408
openhands/agenthub/codeact_agent/llm_diff_parser.py
Normal file
@@ -0,0 +1,408 @@
|
||||
import difflib
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from openhands.core.exceptions import LLMMalformedActionError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# Regex patterns for search / replace
|
||||
HEAD = r'^<{5,9} SEARCH\s*$'
|
||||
DIVIDER = r'^={5,9}\s*$'
|
||||
UPDATED = r'^>{5,9} REPLACE\s*$'
|
||||
|
||||
HEAD_ERR = '<<<<<<< SEARCH'
|
||||
DIVIDER_ERR = '======='
|
||||
UPDATED_ERR = '>>>>>>> REPLACE'
|
||||
|
||||
DEFAULT_FENCE = ('```', '```')
|
||||
TRIPLE_BACKTICKS = '```'
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiffBlock:
|
||||
"""Represents a parsed SEARCH/REPLACE diff block."""
|
||||
|
||||
filename: str
|
||||
search: str
|
||||
replace: str
|
||||
|
||||
|
||||
def strip_filename(filename: str, fence: tuple[str, str] = DEFAULT_FENCE) -> str | None:
|
||||
"""
|
||||
Strips potential wrapping characters from a filename line.
|
||||
|
||||
The function handles various common formats that LLMs might use to specify filenames:
|
||||
- Markdown backticks: `filename.py`
|
||||
- Emphasis markers: *filename.py*
|
||||
- File prefixes: # filename.py
|
||||
- Trailing colons: filename.py:
|
||||
- Leading/trailing whitespace
|
||||
|
||||
Examples:
|
||||
strip_filename(' file.py ') -> 'file.py'
|
||||
strip_filename('`file.py`') -> 'file.py'
|
||||
strip_filename('*file.py*') -> 'file.py'
|
||||
strip_filename('file.py:') -> 'file.py'
|
||||
strip_filename('# file.py') -> 'file.py'
|
||||
strip_filename('`*# file.py:*` ') -> 'file.py'
|
||||
strip_filename('...') -> None
|
||||
strip_filename('```') -> None
|
||||
|
||||
Args:
|
||||
filename: The string containing a potential filename
|
||||
fence: tuple of opening and closing fence markers (e.g., ('```', '```'))
|
||||
|
||||
Returns:
|
||||
The cleaned filename string, or None if no valid filename found
|
||||
"""
|
||||
filename = filename.strip()
|
||||
|
||||
if filename == '...':
|
||||
return None
|
||||
|
||||
start_fence = fence[0]
|
||||
if filename.startswith(start_fence) or filename.startswith(TRIPLE_BACKTICKS):
|
||||
return None # Should not start with fence
|
||||
|
||||
filename = filename.rstrip(':')
|
||||
filename = filename.lstrip('#')
|
||||
filename = filename.strip()
|
||||
filename = filename.strip('`')
|
||||
filename = filename.strip('*')
|
||||
|
||||
return filename if filename else None
|
||||
|
||||
|
||||
def find_filename(
|
||||
lines: list[str],
|
||||
fence: tuple[str, str] = DEFAULT_FENCE,
|
||||
valid_fnames: list[str] | None = None,
|
||||
) -> str | None:
|
||||
"""
|
||||
Searches preceding lines for a filename, handling potential fences.
|
||||
Adapted from Aider's find_filename.
|
||||
|
||||
The function looks through the provided lines (up to 3 lines back) to find a filename,
|
||||
using several strategies in order:
|
||||
|
||||
1. Direct filename search:
|
||||
- Looks for a filename in each line
|
||||
- Stops at non-fence lines if no filename found
|
||||
- Continues past fences only if no filename found yet
|
||||
|
||||
2. Validation against valid_fnames (if provided):
|
||||
a. Exact match: Returns first filename that exactly matches a valid filename
|
||||
b. Fuzzy match: Uses difflib to find close matches (cutoff=0.8)
|
||||
c. Extension heuristic: If no match, accepts any filename with an extension
|
||||
|
||||
3. Fallback: Returns the last potential filename found if no better match
|
||||
|
||||
Examples:
|
||||
# Simple case
|
||||
find_filename(['file.py', '```python']) -> 'file.py'
|
||||
|
||||
# With path
|
||||
find_filename(['src/core/file.py', '```python']) -> 'src/core/file.py'
|
||||
|
||||
# With valid_fnames
|
||||
find_filename(['appz.py', '```python'], valid_fnames=['app.py']) -> 'app.py'
|
||||
|
||||
# No valid filename
|
||||
find_filename(['Just some text', '```python']) -> None
|
||||
|
||||
# Too far back
|
||||
find_filename(['file.py', 'another line', 'yet another', '```python']) -> None
|
||||
|
||||
Args:
|
||||
lines: List of lines to search through (usually preceding a diff block)
|
||||
fence: tuple of opening and closing fence markers (e.g., ('```', '```'))
|
||||
valid_fnames: Optional list of valid filenames to match against
|
||||
|
||||
Returns:
|
||||
The best matching filename found, or None if no valid filename found
|
||||
"""
|
||||
if valid_fnames is None:
|
||||
valid_fnames = []
|
||||
|
||||
# Go back through the 3 preceding lines (already reversed in caller)
|
||||
lines = lines[:3]
|
||||
|
||||
filenames = []
|
||||
for line in lines:
|
||||
filename = strip_filename(line, fence)
|
||||
if filename:
|
||||
filenames.append(filename)
|
||||
|
||||
# Stop searching if we hit a non-fence line before finding a filename
|
||||
if (
|
||||
not filename
|
||||
and not line.startswith(fence[0])
|
||||
and not line.startswith(TRIPLE_BACKTICKS)
|
||||
):
|
||||
break
|
||||
# Only continue searching past fences if we haven't found a filename yet
|
||||
if (
|
||||
not filenames
|
||||
and not line.startswith(fence[0])
|
||||
and not line.startswith(TRIPLE_BACKTICKS)
|
||||
):
|
||||
break
|
||||
|
||||
if not filenames:
|
||||
return None
|
||||
|
||||
# Pick the *best* filename found
|
||||
|
||||
# Check for exact match first
|
||||
for fname in filenames:
|
||||
if fname in valid_fnames:
|
||||
return fname
|
||||
|
||||
# Perform fuzzy matching with valid_fnames
|
||||
for fname in filenames:
|
||||
close_matches = difflib.get_close_matches(fname, valid_fnames, n=1, cutoff=0.8)
|
||||
if len(close_matches) == 1:
|
||||
return close_matches[0]
|
||||
|
||||
# If no fuzzy match, look for a file w/extension as a heuristic
|
||||
for fname in filenames:
|
||||
if '.' in fname:
|
||||
return fname
|
||||
|
||||
# Fallback to the last potential filename found
|
||||
if filenames:
|
||||
return filenames[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_llm_response_for_diffs(
|
||||
content: str,
|
||||
fence: tuple[str, str] = DEFAULT_FENCE,
|
||||
valid_fnames: list[str] | None = None,
|
||||
) -> list[DiffBlock]:
|
||||
"""
|
||||
Parses LLM response content to find fenced diff blocks in the format:
|
||||
|
||||
```language
|
||||
filename.ext
|
||||
<<<<<<< SEARCH
|
||||
old code
|
||||
=======
|
||||
new code
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
The parser supports:
|
||||
1. Multiple diff blocks in a single response
|
||||
2. Different file types (indicated by fence language)
|
||||
3. Empty SEARCH blocks for new files
|
||||
4. Empty REPLACE blocks for deletions
|
||||
5. Filename inference from context
|
||||
6. Fuzzy filename matching with valid_fnames
|
||||
|
||||
Examples:
|
||||
# Edit existing file
|
||||
```python
|
||||
app.py
|
||||
<<<<<<< SEARCH
|
||||
def old_func():
|
||||
pass
|
||||
=======
|
||||
def new_func():
|
||||
return True
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
# Create new file
|
||||
```python
|
||||
new_file.py
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
def new_func():
|
||||
return True
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
# Delete content
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
def unused_func():
|
||||
pass
|
||||
=======
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
Args:
|
||||
content: The LLM response string.
|
||||
fence: tuple of opening and closing fence markers (e.g., ('```', '```')).
|
||||
valid_fnames: Optional list of valid filenames in the context for better matching.
|
||||
|
||||
Returns:
|
||||
A list of DiffBlock objects representing the parsed blocks.
|
||||
|
||||
Raises:
|
||||
LLMMalformedActionError: If malformed blocks are detected, such as:
|
||||
- Missing filename before block
|
||||
- Missing SEARCH/REPLACE markers
|
||||
- Missing divider
|
||||
- Unexpected divider in REPLACE block
|
||||
"""
|
||||
logger.info(f'Parsing LLM response for diffs:\n{content}')
|
||||
|
||||
edits: list[DiffBlock] = []
|
||||
lines = content.splitlines(keepends=True)
|
||||
i = 0
|
||||
current_filename = None
|
||||
|
||||
head_pattern = re.compile(HEAD)
|
||||
divider_pattern = re.compile(DIVIDER)
|
||||
updated_pattern = re.compile(UPDATED)
|
||||
fence_pattern = re.compile(
|
||||
r'^' + re.escape(fence[0]) + r'\w*\s*$'
|
||||
) # Pattern for opening fence
|
||||
|
||||
missing_filename_err = (
|
||||
'Bad/missing filename. The filename must be alone on the line before the opening fence'
|
||||
f' {fence[0]}'
|
||||
)
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# Check for SEARCH/REPLACE blocks
|
||||
if head_pattern.match(line.strip()):
|
||||
block_start_line_index = i
|
||||
try:
|
||||
# Look backwards for filename and opening fence
|
||||
fence_line_index = -1
|
||||
temp_i = i - 1
|
||||
while temp_i >= max(0, i - 4): # Look back up to 3 lines
|
||||
prev_line = lines[temp_i]
|
||||
if fence_pattern.match(prev_line.strip()):
|
||||
fence_line_index = temp_i
|
||||
break
|
||||
temp_i -= 1
|
||||
|
||||
# If fence found, look for filename between fence and head
|
||||
filename = None
|
||||
if fence_line_index != -1:
|
||||
filename_lines = lines[
|
||||
fence_line_index + 1 : block_start_line_index
|
||||
]
|
||||
# Pass lines in original order for find_filename
|
||||
filename = find_filename(filename_lines, fence, valid_fnames)
|
||||
|
||||
# Fallback / Contextual filename inference
|
||||
if not filename:
|
||||
if current_filename:
|
||||
# Use filename from previous block if available
|
||||
filename = current_filename # type: ignore [unreachable]
|
||||
else:
|
||||
# Raise error if no filename found and no fence/context
|
||||
if fence_line_index == -1:
|
||||
raise LLMMalformedActionError(missing_filename_err)
|
||||
|
||||
# If filename is still None after checks, raise error before proceeding
|
||||
# unless it's potentially a new file block (empty search)
|
||||
# We'll validate the empty search later.
|
||||
if (
|
||||
filename is None
|
||||
and i + 1 < len(lines)
|
||||
and not divider_pattern.match(lines[i + 1].strip())
|
||||
):
|
||||
raise LLMMalformedActionError(
|
||||
missing_filename_err + ' (or fence not found)'
|
||||
)
|
||||
|
||||
current_filename = filename # Remember for subsequent blocks
|
||||
|
||||
# --- Start parsing block content ---
|
||||
original_text_lines = []
|
||||
i += 1 # Move past HEAD line
|
||||
while i < len(lines) and not divider_pattern.match(lines[i].strip()):
|
||||
original_text_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
if i >= len(lines) or not divider_pattern.match(lines[i].strip()):
|
||||
raise LLMMalformedActionError(
|
||||
f'Expected `{DIVIDER_ERR}` after SEARCH block for {current_filename or "unknown file"}'
|
||||
)
|
||||
|
||||
i += 1 # Move past DIVIDER line
|
||||
|
||||
updated_text_lines = []
|
||||
while i < len(lines) and not updated_pattern.match(lines[i].strip()):
|
||||
if divider_pattern.match(
|
||||
lines[i].strip()
|
||||
): # Error: unexpected divider
|
||||
raise LLMMalformedActionError(
|
||||
f'Unexpected `{DIVIDER_ERR}` inside REPLACE block for {current_filename or "unknown file"}'
|
||||
)
|
||||
updated_text_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
if i >= len(lines) or not updated_pattern.match(lines[i].strip()):
|
||||
raise LLMMalformedActionError(
|
||||
f'Expected `{UPDATED_ERR}` after REPLACE block for {current_filename or "unknown file"}'
|
||||
)
|
||||
|
||||
i += 1 # Move past REPLACE marker line
|
||||
|
||||
# Check for closing fence (and move past it if found)
|
||||
if i < len(lines) and lines[i].strip() == fence[1]:
|
||||
i += 1 # Move past closing fence
|
||||
else:
|
||||
# Allow blocks without closing fence for flexibility
|
||||
pass # Stay on the line after REPLACE marker
|
||||
|
||||
# Successfully parsed a block
|
||||
original_text = ''.join(original_text_lines)
|
||||
updated_text = ''.join(updated_text_lines)
|
||||
|
||||
# Final check for filename if it was initially None (new file case)
|
||||
if filename is None:
|
||||
if original_text.strip():
|
||||
raise LLMMalformedActionError(
|
||||
'SEARCH block must be empty when creating a new file (filename was missing before block).'
|
||||
)
|
||||
# Re-attempt filename finding if it was a new file block
|
||||
if fence_line_index != -1:
|
||||
filename_lines = lines[
|
||||
fence_line_index + 1 : block_start_line_index
|
||||
]
|
||||
filename = find_filename(filename_lines, fence, valid_fnames)
|
||||
if filename is None:
|
||||
# If still None after re-check, it's an error
|
||||
raise LLMMalformedActionError(
|
||||
'Could not determine filename for new file block.'
|
||||
)
|
||||
|
||||
# Create and add the DiffBlock object
|
||||
edits.append(
|
||||
DiffBlock(
|
||||
filename=filename, search=original_text, replace=updated_text
|
||||
)
|
||||
)
|
||||
|
||||
continue # Continue to the next line
|
||||
|
||||
except (ValueError, LLMMalformedActionError) as e:
|
||||
# Add context to the error message
|
||||
processed_marker = len(edits) + 1 # Block number being processed
|
||||
# Ensure we get the original message, whether it's ValueError or LLMMalformedActionError
|
||||
original_message = e.args[0] if e.args else str(e)
|
||||
err_context = f"Error parsing block #{processed_marker} for file '{current_filename or 'unknown'}': {original_message}"
|
||||
# Add context to the error message
|
||||
processed_marker = len(edits) + 1 # Block number being processed
|
||||
# Ensure we get the original message, whether it's ValueError or LLMMalformedActionError
|
||||
original_message = e.args[0] if e.args else str(e)
|
||||
err_context = f"Error parsing block #{processed_marker} for file '{current_filename or 'unknown'}': {original_message}"
|
||||
# Re-raise as LLMMalformedActionError
|
||||
raise LLMMalformedActionError(err_context) from e
|
||||
|
||||
# If not a HEAD line, just advance line index
|
||||
i += 1
|
||||
|
||||
return edits
|
||||
@@ -0,0 +1,208 @@
|
||||
# OpenHands Agent
|
||||
|
||||
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
|
||||
|
||||
## Role and Core Function
|
||||
|
||||
Your primary role is to assist users by executing commands, interacting with tools, and solving technical problems effectively. You should be thorough, methodical, and prioritize **quality and correctness** over speed.
|
||||
|
||||
* You have access to a set of tools (like bash, python, browser, file viewer/lister). Use these tools when appropriate for tasks like running commands, executing code, browsing, or viewing/listing files.
|
||||
* **IMPORTANT**: For **modifying files**, you MUST NOT use a tool. Instead, you MUST generate diff blocks directly in your response text, following the specific rules outlined in the **File Editing via LLM Diff** section below.
|
||||
|
||||
## Efficiency
|
||||
|
||||
* Where possible, combine multiple simple, related actions into a single action (e.g., combine multiple bash commands using `&&`). However, prioritize clarity and correctness for complex operations. You can fallback to multiple steps if needed.
|
||||
* When exploring the codebase, use efficient tools like `find`, `grep`, and the provided file viewing/listing tools (`view_file`, `list_directory`) with appropriate filters to avoid unnecessary operations.
|
||||
|
||||
## File System Guidelines
|
||||
|
||||
* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system (e.g., using the `list_directory` tool) to locate the file before working on it.
|
||||
* Use the `view_file` tool to examine file contents before deciding on edits.
|
||||
* To propose changes to a file, follow the rules in **File Editing via LLM Diff**.
|
||||
* To rename or delete files, use the `execute_bash` tool with `mv` or `rm` commands.
|
||||
|
||||
## The `view_file` Tool
|
||||
* The `view_file` tool will show the file's contents in the indicated line number range, 1-indexed.
|
||||
* The line numbers are added by the tool as an aid for you, they are NOT part of the file.
|
||||
* The format of the output is:
|
||||
```
|
||||
1\tline1
|
||||
2\tline2
|
||||
...
|
||||
n\tlinen
|
||||
```
|
||||
|
||||
## File Editing via LLM Diff
|
||||
|
||||
* **IMPORTANT**:
|
||||
To propose changes to a file, you MUST use fenced diff blocks embedded directly in your response. You do not need a tool for this.
|
||||
* **Format Rules**: Every fenced diff block must strictly follow this format:
|
||||
1. Three opening backticks and code language, e.g.: ```python
|
||||
2. The **ABSOLUTE** file path alone on a line, verbatim. No bold asterisks, no quotes, etc.
|
||||
3. The start of search block: `<<<<<<< SEARCH`
|
||||
4. A contiguous chunk of lines to search for in the existing source code.
|
||||
5. The dividing line: `=======`
|
||||
6. The lines to replace the searched lines with.
|
||||
7. The end of the replace block: `>>>>>>> REPLACE`
|
||||
8. Three closing backticks: ```
|
||||
* **Exact Matching**: The `SEARCH` section must **EXACTLY MATCH** the existing file content, character for character, including all whitespace, comments, indentation, etc. Use the `view_file` tool to confirm the exact content before creating a block. Be meticulous about whitespace.
|
||||
* **Line Numbers are NOT part of the file**, so do NOT include them in the `SEARCH` or `REPLACE` section.
|
||||
* **First Match Only**: When editing, *SEARCH/REPLACE* blocks will *only* replace the first match occurrence.
|
||||
- include multiple unique *SEARCH/REPLACE* blocks if needed
|
||||
- include enough lines in each SEARCH section to uniquely match each set of lines that need to change.
|
||||
* **Conciseness**: Keep blocks focused. Include just enough lines in `SEARCH` for a unique match. Break large changes into smaller, logical blocks.
|
||||
* **Moving Code**: To move code within a file, use 2 *SEARCH/REPLACE* blocks: 1 to delete it from its current location, 1 to insert it in the new location.
|
||||
* **Creating Files**: If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
|
||||
- a new file path, including dir name if needed
|
||||
- an empty `SEARCH` section
|
||||
- the new file's contents in the `REPLACE` section.
|
||||
|
||||
|
||||
### Clarity
|
||||
|
||||
When proposing changes, clearly state your plan and then provide the corresponding fenced diff blocks.
|
||||
|
||||
### LLM Diff Examples
|
||||
|
||||
{# Examples for LLM Diff Fenced Block Editing #}
|
||||
Here are examples of how to use the fenced diff format:
|
||||
|
||||
**Example 1: Modifying a function**
|
||||
*User Message:*
|
||||
> Change get_factorial() to use math.factorial
|
||||
|
||||
*Assistant Response:*
|
||||
Okay, I will modify `mathweb/flask/app.py` to:
|
||||
1. Import the math package.
|
||||
2. Remove the existing factorial() function.
|
||||
3. Update get_factorial() to call math.factorial instead.
|
||||
|
||||
Here are the changes:
|
||||
```python
|
||||
mathweb/flask/app.py
|
||||
<<<<<<< SEARCH
|
||||
from flask import Flask
|
||||
=======
|
||||
import math
|
||||
from flask import Flask
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
```python
|
||||
mathweb/flask/app.py
|
||||
<<<<<<< SEARCH
|
||||
def factorial(n):
|
||||
"compute factorial"
|
||||
|
||||
if n == 0:
|
||||
return 1
|
||||
else:
|
||||
return n * factorial(n-1)
|
||||
|
||||
=======
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
```python
|
||||
mathweb/flask/app.py
|
||||
<<<<<<< SEARCH
|
||||
return str(factorial(n))
|
||||
=======
|
||||
return str(math.factorial(n))
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
**Example 2: Refactoring into a new file**
|
||||
*User Message:*
|
||||
> Refactor hello() into its own file.
|
||||
|
||||
*Assistant Response:*
|
||||
Okay, I will modify `main.py` and create a new file `hello.py`:
|
||||
1. Create a new hello.py file with hello() in it.
|
||||
2. Remove hello() from main.py and replace it with an import.
|
||||
|
||||
Here are the changes:
|
||||
```python
|
||||
hello.py
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
def hello():
|
||||
"print a greeting"
|
||||
|
||||
print("hello")
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
```python
|
||||
main.py
|
||||
<<<<<<< SEARCH
|
||||
def hello():
|
||||
"print a greeting"
|
||||
|
||||
print("hello")
|
||||
=======
|
||||
from hello import hello
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
|
||||
## Code Quality, Robustness, and Defensiveness
|
||||
|
||||
* **Goal:** Your primary goal when writing or modifying code is to produce a **correct, robust, and reliable solution** that successfully addresses the user's request and passes all necessary verification steps (especially tests).
|
||||
* **Scope of Changes:** Implement **focused changes** directly related to solving the problem. The scope should be sufficient to ensure correctness and robustness for the task.
|
||||
* **Defensive Programming:** Actively practice defensive programming within the scope of your changes. Anticipate potential edge cases, handle errors gracefully (e.g., check for null values, unexpected types), and consider different ways the affected code might be called to ensure the fix works reliably. Reliability is more important than focused changes
|
||||
* **Readability:** Write clean, efficient code. Use comments when the logic isn't self-evident.
|
||||
* **Refactoring:** If implementing the required fix involves adding significant complexity to a function or file, consider proposing a refactor (e.g., splitting the function/file) as a separate step *after* confirming the primary fix, but only if it significantly improves maintainability and is appropriate for the context.
|
||||
|
||||
## Version Control (Git)
|
||||
|
||||
* **Credentials:** When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
|
||||
* **Caution:** Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so.
|
||||
* **Committing:** When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible for simplicity, but be mindful of what you are committing.
|
||||
* **Ignoring Files:** Do NOT commit files that typically shouldn't go into version control (e.g., `node_modules/`, `.env` files, build directories, cache files, large binaries) unless explicitly instructed by the user. Check `.gitignore` files if unsure.
|
||||
* **Clarity:** Ask the user for clarification if unsure about which files to commit.
|
||||
|
||||
## Pull Requests (GitHub)
|
||||
|
||||
* **Frequency:** Create only ONE pull request per session/issue unless explicitly instructed otherwise.
|
||||
* **Updates:** When working with an existing PR, update it with new commits unless instructed otherwise.
|
||||
* **Content:** Preserve the original PR title and purpose; update the description only when necessary to reflect significant changes.
|
||||
|
||||
## Problem Solving Workflow
|
||||
|
||||
1. **Understand & Explore:** Thoroughly read the request. Explore relevant files (using `view_file`, `list_directory`, `grep`) and understand the context, potential calling contexts, and potentially relevant existing test files/suites before proposing solutions. Do not hurry.
|
||||
* If the request is vague, ask clarifying questions to understand the user's intent and the problem's context.
|
||||
* If the request is complex, break it down into smaller tasks and address them one at a time.
|
||||
* If the request involves multiple files, explore all relevant files before proposing changes. Use `grep` to find function calls or references in other files.
|
||||
* If the request involves a bug, try to reproduce it first. Use `grep` to find relevant test cases or code paths that might be related to the issue.
|
||||
* If the request involves a new feature, consider how it fits into the existing codebase and prioritize the codebase patterns.
|
||||
* If the request involves a bug fix, consider whether it requires changes to multiple files or if it's a simple fix. If it's complex, break it down into smaller tasks and address them one at a time.
|
||||
2. **Plan:** Consider multiple approaches. Select the most promising one that balances correctness, robustness, and focused changes. Outline your plan.
|
||||
3. **Implement:** Propose changes via fenced diff blocks as described in **File Editing via LLM Diff**.
|
||||
4. **Test & Verify:**
|
||||
* **Bug Fixes:** Create tests to verify issues before implementing fixes, if feasible using available tools (bash, python).
|
||||
* **Existing Tests:** **Crucially**, identify and execute the **complete set** of relevant existing tests to ensure your changes are correct and haven't introduced regressions. Follow the testing procedure provided in the repository-specific or task-specific instructions if available, otherwise apply the principles here (identify all relevant tests, run them, ensure they pass).
|
||||
* **Consult User:** If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing significant time.
|
||||
5. **Refine:** If verification fails, analyze the failures, revise your implementation, and repeat testing until successful.
|
||||
|
||||
## Security
|
||||
|
||||
* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect.
|
||||
* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing.
|
||||
|
||||
## Environment Setup
|
||||
|
||||
* When asked to run an application, don't stop if it's not installed. Attempt to install it (using bash) and run the command again.
|
||||
* If you encounter missing dependencies:
|
||||
1. First, look for existing dependency files (`requirements.txt`, `pyproject.toml`, `package.json`, `Gemfile`, etc.).
|
||||
2. If found, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`).
|
||||
3. Only install individual packages directly if no dependency files are found or if only specific packages are needed.
|
||||
* Similarly, install missing dependencies for essential tools requested by the user if possible.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken:
|
||||
1. Step back and reflect on 5-7 different possible sources of the problem.
|
||||
2. Assess the likelihood of each possible cause.
|
||||
3. Methodically address the most likely causes, starting with the highest probability.
|
||||
4. Document your reasoning process.
|
||||
* When you run into a major issue while executing a plan from the user, please propose a new plan and confirm with the user before proceeding.
|
||||
@@ -2,16 +2,22 @@ from .bash import create_cmd_run_tool
|
||||
from .browser import BrowserTool
|
||||
from .finish import FinishTool
|
||||
from .ipython import IPythonTool
|
||||
from .list_directory import ListDirectoryTool
|
||||
from .llm_based_edit import LLMBasedFileEditTool
|
||||
from .str_replace_editor import create_str_replace_editor_tool
|
||||
from .think import ThinkTool
|
||||
from .undo_edit import UndoEditTool
|
||||
from .view_file import ViewFileTool
|
||||
|
||||
__all__ = [
|
||||
'BrowserTool',
|
||||
'create_cmd_run_tool',
|
||||
'ListDirectoryTool',
|
||||
'FinishTool',
|
||||
'IPythonTool',
|
||||
'LLMBasedFileEditTool',
|
||||
'create_str_replace_editor_tool',
|
||||
'UndoEditTool',
|
||||
'ViewFileTool',
|
||||
'ThinkTool',
|
||||
]
|
||||
|
||||
19
openhands/agenthub/codeact_agent/tools/list_directory.py
Normal file
19
openhands/agenthub/codeact_agent/tools/list_directory.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
ListDirectoryTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='list_directory',
|
||||
description='List non-hidden files and directories within a specified directory path, up to 2 levels deep.',
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'path': {
|
||||
'description': 'Absolute path to the directory, e.g. `/workspace/subdir`.',
|
||||
'type': 'string',
|
||||
},
|
||||
},
|
||||
'required': ['path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
19
openhands/agenthub/codeact_agent/tools/undo_edit.py
Normal file
19
openhands/agenthub/codeact_agent/tools/undo_edit.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
UndoEditTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='undo_edit',
|
||||
description='Revert the last edit made to the specified file.',
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'path': {
|
||||
'description': 'Absolute path to the file for which to undo the last edit, e.g. `/workspace/file.py`.',
|
||||
'type': 'string',
|
||||
},
|
||||
},
|
||||
'required': ['path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
24
openhands/agenthub/codeact_agent/tools/view_file.py
Normal file
24
openhands/agenthub/codeact_agent/tools/view_file.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
ViewFileTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='view_file',
|
||||
description='View the content of a specified file, optionally within a line range.',
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'path': {
|
||||
'description': 'Absolute path to the file, e.g. `/workspace/file.py`.',
|
||||
'type': 'string',
|
||||
},
|
||||
'view_range': {
|
||||
'description': 'Optional line range [start, end] (1-based, inclusive). Shows full file if omitted. `[start, -1]` shows from start to end.',
|
||||
'items': {'type': 'integer'},
|
||||
'type': 'array',
|
||||
},
|
||||
},
|
||||
'required': ['path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -61,6 +61,8 @@ class ReadOnlyAgent(CodeActAgent):
|
||||
if self._prompt_manager is None:
|
||||
self._prompt_manager = PromptManager(
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
|
||||
config=self.config,
|
||||
system_prompt_filename=self.config.system_prompt_filename,
|
||||
)
|
||||
return self._prompt_manager
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ class AgentConfig(BaseModel):
|
||||
"""Whether to enable browsing tool.
|
||||
Note: If using CLIRuntime, browsing is not implemented and should be disabled."""
|
||||
enable_llm_editor: bool = Field(default=False)
|
||||
enable_llm_diff: bool = Field(default=False)
|
||||
"""Whether to enable LLM editor tool"""
|
||||
enable_editor: bool = Field(default=True)
|
||||
"""Whether to enable the standard editor tool (str_replace_editor), only has an effect if enable_llm_editor is False."""
|
||||
|
||||
@@ -82,11 +82,12 @@ class FileEditAction(Action):
|
||||
action (str): The type of action being performed (always ActionType.EDIT).
|
||||
runnable (bool): Indicates if the action can be executed (always True).
|
||||
security_risk (ActionSecurityRisk | None): Indicates any security risks associated with the action.
|
||||
impl_source (FileEditSource): The source of the implementation (LLM_BASED_EDIT or OH_ACI).
|
||||
impl_source (FileEditSource): The source of the implementation (LLM_BASED_EDIT, OH_ACI, or FENCED_DIFF).
|
||||
|
||||
Usage:
|
||||
- For LLM-based editing: Use path, content, start, and end attributes.
|
||||
- For ACI-based editing: Use path, command, and the appropriate attributes for the specific command.
|
||||
- For LLM Diff editing: Use path, search, and replace attributes.
|
||||
|
||||
Note:
|
||||
- If start is set to -1 in LLM-based editing, the content will be appended to the file.
|
||||
@@ -107,6 +108,10 @@ class FileEditAction(Action):
|
||||
start: int = 1
|
||||
end: int = -1
|
||||
|
||||
# Fenced Diff arguments
|
||||
search: str | None = None
|
||||
replace: str | None = None
|
||||
|
||||
# Shared arguments
|
||||
thought: str = ''
|
||||
action: str = ActionType.EDIT
|
||||
@@ -122,6 +127,9 @@ class FileEditAction(Action):
|
||||
if self.impl_source == FileEditSource.LLM_BASED_EDIT:
|
||||
ret += f'Range: [L{self.start}:L{self.end}]\n'
|
||||
ret += f'Content:\n```\n{self.content}\n```\n'
|
||||
elif self.impl_source == FileEditSource.LLM_DIFF:
|
||||
ret += f'SEARCH:\n```\n{self.search}\n```\n'
|
||||
ret += f'REPLACE:\n```\n{self.replace}\n```\n'
|
||||
else: # OH_ACI mode
|
||||
ret += f'Command: {self.command}\n'
|
||||
if self.command == 'create':
|
||||
|
||||
@@ -15,6 +15,7 @@ class EventSource(str, Enum):
|
||||
class FileEditSource(str, Enum):
|
||||
LLM_BASED_EDIT = 'llm_based_edit'
|
||||
OH_ACI = 'oh_aci' # openhands-aci
|
||||
LLM_DIFF = 'llm_diff' # Aider-style SEARCH/REPLACE diffs parsed directly from LLM message content
|
||||
|
||||
|
||||
class FileReadSource(str, Enum):
|
||||
|
||||
@@ -199,6 +199,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
custom_llm_provider=self.config.custom_llm_provider,
|
||||
timeout=self.config.timeout,
|
||||
top_p=self.config.top_p,
|
||||
top_k=self.config.top_k,
|
||||
drop_params=self.config.drop_params,
|
||||
seed=self.config.seed,
|
||||
**kwargs,
|
||||
@@ -433,10 +434,6 @@ class LLM(RetryMixin, DebugMixin):
|
||||
)
|
||||
|
||||
resp_json = response.json()
|
||||
if 'data' not in resp_json:
|
||||
logger.error(
|
||||
f'Error getting model info from LiteLLM proxy: {resp_json}'
|
||||
)
|
||||
all_model_info = resp_json.get('data', [])
|
||||
current_model_info = next(
|
||||
(
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Callable
|
||||
|
||||
from tenacity import (
|
||||
@@ -15,6 +17,80 @@ from openhands.utils.tenacity_stop import stop_if_should_exit
|
||||
class RetryMixin:
|
||||
"""Mixin class for retry logic."""
|
||||
|
||||
def _extract_retry_delay(self, exception) -> float | None:
|
||||
"""
|
||||
Extract retry delay in seconds from exception message if available.
|
||||
|
||||
Args:
|
||||
exception: The exception object to extract retry delay from
|
||||
|
||||
Returns:
|
||||
Retry delay in seconds or None if not found
|
||||
"""
|
||||
if not hasattr(exception, '__str__'):
|
||||
return None
|
||||
|
||||
error_msg = str(exception)
|
||||
|
||||
# Try to extract "retryDelay": "Xs" pattern
|
||||
retry_match = re.search(r'"retryDelay":\s*"(\d+)s"', error_msg)
|
||||
if retry_match:
|
||||
return float(retry_match.group(1))
|
||||
|
||||
# Try to extract any JSON containing retryDelay
|
||||
try:
|
||||
# Look for JSON objects in the string
|
||||
json_match = re.search(r'({.*})', error_msg)
|
||||
if json_match:
|
||||
json_data = json.loads(json_match.group(1))
|
||||
|
||||
# Navigate through nested JSON to find retryDelay
|
||||
if 'error' in json_data and 'details' in json_data['error']:
|
||||
for detail in json_data['error']['details']:
|
||||
if (
|
||||
'@type' in detail
|
||||
and 'RetryInfo' in detail['@type']
|
||||
and 'retryDelay' in detail
|
||||
):
|
||||
delay_str = detail['retryDelay']
|
||||
if isinstance(delay_str, str) and delay_str.endswith('s'):
|
||||
return float(
|
||||
delay_str[:-1]
|
||||
) # Remove the 's' and convert to float
|
||||
except Exception:
|
||||
# If JSON parsing fails, continue with default retry behavior
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def custom_wait_strategy(self, retry_state) -> float:
|
||||
"""
|
||||
Custom wait strategy that uses provider's retry delay when available.
|
||||
|
||||
Args:
|
||||
retry_state: The current retry state
|
||||
|
||||
Returns:
|
||||
Wait time in seconds
|
||||
"""
|
||||
# Get the fallback exponential wait
|
||||
default_wait = wait_exponential(
|
||||
multiplier=self.retry_multiplier,
|
||||
min=self.retry_min_wait,
|
||||
max=self.retry_max_wait,
|
||||
)(retry_state)
|
||||
|
||||
# Check if there's an exception with retry delay info
|
||||
if retry_state.outcome.failed:
|
||||
exception = retry_state.outcome.exception()
|
||||
retry_delay = self._extract_retry_delay(exception)
|
||||
|
||||
if retry_delay is not None:
|
||||
logger.info(f'Using provider-suggested retry delay: {retry_delay}s')
|
||||
return retry_delay
|
||||
|
||||
return default_wait
|
||||
|
||||
def retry_decorator(self, **kwargs: Any) -> Callable:
|
||||
"""
|
||||
Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes.
|
||||
@@ -26,17 +102,18 @@ class RetryMixin:
|
||||
Returns:
|
||||
A retry decorator with the parameters customizable in configuration.
|
||||
"""
|
||||
num_retries = kwargs.get('num_retries')
|
||||
retry_exceptions: tuple = kwargs.get('retry_exceptions', ())
|
||||
retry_min_wait = kwargs.get('retry_min_wait')
|
||||
retry_max_wait = kwargs.get('retry_max_wait')
|
||||
retry_multiplier = kwargs.get('retry_multiplier')
|
||||
# Store these values as instance variables so they can be accessed by custom_wait_strategy
|
||||
self.num_retries = kwargs.get('num_retries')
|
||||
self.retry_exceptions = kwargs.get('retry_exceptions', ())
|
||||
self.retry_min_wait = kwargs.get('retry_min_wait')
|
||||
self.retry_max_wait = kwargs.get('retry_max_wait')
|
||||
self.retry_multiplier = kwargs.get('retry_multiplier')
|
||||
retry_listener = kwargs.get('retry_listener')
|
||||
|
||||
def before_sleep(retry_state: Any) -> None:
|
||||
self.log_retry_attempt(retry_state)
|
||||
if retry_listener:
|
||||
retry_listener(retry_state.attempt_number, num_retries)
|
||||
retry_listener(retry_state.attempt_number, self.num_retries)
|
||||
|
||||
# Check if the exception is LLMNoResponseError
|
||||
exception = retry_state.outcome.exception()
|
||||
@@ -56,16 +133,10 @@ class RetryMixin:
|
||||
|
||||
retry_decorator: Callable = retry(
|
||||
before_sleep=before_sleep,
|
||||
stop=stop_after_attempt(num_retries) | stop_if_should_exit(),
|
||||
stop=stop_after_attempt(self.num_retries) | stop_if_should_exit(),
|
||||
reraise=True,
|
||||
retry=(
|
||||
retry_if_exception_type(retry_exceptions)
|
||||
), # retry only for these types
|
||||
wait=wait_exponential(
|
||||
multiplier=retry_multiplier,
|
||||
min=retry_min_wait,
|
||||
max=retry_max_wait,
|
||||
),
|
||||
retry=retry_if_exception_type(self.retry_exceptions),
|
||||
wait=self.custom_wait_strategy,
|
||||
)
|
||||
return retry_decorator
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from openhands.events.action import (
|
||||
)
|
||||
from openhands.events.action.mcp import MCPAction
|
||||
from openhands.events.action.message import SystemMessageAction
|
||||
from openhands.events.event import Event, RecallType
|
||||
from openhands.events.event import Event, FileEditSource, RecallType
|
||||
from openhands.events.observation import (
|
||||
AgentCondensationObservation,
|
||||
AgentDelegateObservation,
|
||||
@@ -216,8 +216,32 @@ class ConversationMemory:
|
||||
rather than being returned immediately. They will be processed later when all corresponding
|
||||
tool call results are available.
|
||||
"""
|
||||
# create a regular message from an event
|
||||
if isinstance(
|
||||
# Handle LLM_DIFF FileEditAction specifically FIRST: create an assistant message
|
||||
# containing the thought (if any) and the formatted diff block.
|
||||
# This bypasses the tool_metadata check for this action type.
|
||||
if (
|
||||
isinstance(action, FileEditAction)
|
||||
and action.impl_source == FileEditSource.LLM_DIFF
|
||||
):
|
||||
# TODO: Determine language hint from path extension if possible
|
||||
lang_hint = 'diff'
|
||||
# Ensure newline separation, handle None for search/replace
|
||||
search_block = action.search or ''
|
||||
replace_block = action.replace or ''
|
||||
block = (
|
||||
f'\n```{lang_hint}\n'
|
||||
f'{action.path}\n'
|
||||
f'<<<<<<< SEARCH\n{search_block}\n'
|
||||
f'=======\n{replace_block}\n'
|
||||
f'>>>>>>> REPLACE\n```'
|
||||
)
|
||||
# Combine thought and block
|
||||
content_str = (action.thought + block) if action.thought else block.lstrip()
|
||||
# Return the message directly, DO NOT add tool_calls structure
|
||||
return [Message(role='assistant', content=[TextContent(text=content_str)])]
|
||||
|
||||
# Now handle other actions that might have tool metadata
|
||||
elif isinstance(
|
||||
action,
|
||||
(
|
||||
AgentDelegateAction,
|
||||
@@ -252,7 +276,6 @@ class ConversationMemory:
|
||||
else [],
|
||||
tool_calls=assistant_msg.tool_calls,
|
||||
)
|
||||
return []
|
||||
elif isinstance(action, AgentFinishAction):
|
||||
role = 'user' if action.source == 'user' else 'assistant'
|
||||
|
||||
@@ -630,7 +653,7 @@ class ConversationMemory:
|
||||
# when the LLM tries to return the next message
|
||||
raise ValueError(f'Unknown observation type: {type(obs)}')
|
||||
|
||||
# Update the message as tool response properly
|
||||
# Update the message as tool response properly (for non-LLM_DIFF observations with metadata)
|
||||
if (tool_call_metadata := getattr(obs, 'tool_call_metadata', None)) is not None:
|
||||
tool_call_id_to_message[tool_call_metadata.tool_call_id] = Message(
|
||||
role='tool',
|
||||
|
||||
41
openhands/microagents/task/plan_functionality.md
Normal file
41
openhands/microagents/task/plan_functionality.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# Plan: Decoupling File Utilities from Editing Mode
|
||||
|
||||
## Problem
|
||||
|
||||
The OpenHands CodeAct agent offers three primary ways to handle file edits:
|
||||
1. **`str_replace_editor` (Default, OH_ACI):** A comprehensive tool (loaded via `create_str_replace_editor_tool`) with commands for viewing files/dirs, creating, replacing, inserting, and undoing edits. This is the default mode.
|
||||
2. **`LLMBasedFileEditTool` (Optional):** An alternative editor tool enabled by `enable_llm_editor=True`, using line ranges and LLM refinement.
|
||||
3. **`LLM_DIFF` Mode (Optional):** An alternative mode enabled by `enable_llm_diff=True`, where the agent parses Aider-style fenced diffs directly from the LLM's text response (not a tool call).
|
||||
|
||||
When either of the optional modes (`LLMBasedFileEditTool` or `LLM_DIFF`) is enabled, the default `str_replace_editor` tool (and its bundled utilities like view, list, undo) is not loaded. This creates a need to provide viewing/listing capabilities separately in these optional modes, while acknowledging that `undo` is inherently tied to the default mode's server-side implementation.
|
||||
|
||||
## Goal
|
||||
|
||||
Allow the use of the optional editing modes (`LLMBasedFileEditTool` or `LLM_DIFF` mode) while still providing access to the viewing (file/directory) capabilities originally part of the default `str_replace_editor`.
|
||||
|
||||
Important: the original `str_replace_editor` tool must remain available *unchanged* when neither alternative editor/mode is enabled, to maintain compatibility with models post-trained on it like Sonnet.
|
||||
|
||||
**Challenge:** The `undo_edit` command relies on the internal history of the `OHEditor` instance on the server, which is only populated when *it* performs edits using the `OH_ACI` source. Edits made client-side (`LLMBasedFileEditTool`, `LLM_DIFF` mode) bypass this history, rendering `undo_edit` ineffective for them.
|
||||
|
||||
## Current Solution: Conditional Tool Loading
|
||||
|
||||
The current implementation addresses the goal of providing viewing/listing utilities by conditionally loading tools based on the agent configuration flags in `openhands/agenthub/codeact_agent/function_calling.py: get_tools`:
|
||||
|
||||
1. **Default Mode (`enable_llm_editor=False`, `enable_llm_diff=False`):**
|
||||
* Loads the full `create_str_replace_editor_tool()`. This single tool definition provides commands for editing (`replace`, `insert`, etc.), viewing (`view` file/directory), and `undo_edit`. All operations use `impl_source=OH_ACI` and are executed server-side by `OHEditor`.
|
||||
|
||||
2. **LLM-Based Editor Mode (`enable_llm_editor=True`):**
|
||||
* Loads the `LLMBasedFileEditTool` for editing (client-side execution).
|
||||
* Loads separate `ViewFileTool` and `ListDirectoryTool`. These tools generate `FileReadAction`s with `impl_source=OH_ACI`, ensuring they are still executed server-side by the `OHEditor`'s viewing capabilities.
|
||||
* Does **not** load `UndoEditTool`.
|
||||
|
||||
3. **LLM_DIFF Mode (`enable_llm_diff=True`):**
|
||||
* Loads **no** editing tools (edits are parsed from LLM text response and executed client-side).
|
||||
* Loads separate `ViewFileTool` and `ListDirectoryTool` (same as above, executed server-side via `OH_ACI`).
|
||||
* Does **not** load `UndoEditTool`.
|
||||
|
||||
This approach successfully decouples the viewing/listing functionality from the chosen editing method by falling back to the `OH_ACI` implementation for viewing/listing even when alternative editing modes are active.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The current conditional tool loading mechanism effectively provides file/directory viewing capabilities regardless of the selected editing mode (`str_replace_editor`, `LLMBasedFileEditTool`, or `LLM_DIFF`). However, the `undo_edit` functionality remains intrinsically linked to the server-side edit history managed by `OHEditor` and is therefore only available when using the default `str_replace_editor` tool. Edits performed via the client-side `LLMBasedFileEditTool` or `LLM_DIFF` mode cannot be undone using the `undo_edit` command.
|
||||
@@ -31,8 +31,8 @@ from openhands.events.action import (
|
||||
IPythonRunCellAction,
|
||||
)
|
||||
from openhands.events.action.action import Action
|
||||
from openhands.events.action.files import FileEditSource
|
||||
from openhands.events.action.mcp import MCPAction
|
||||
from openhands.events.event import FileEditSource
|
||||
from openhands.events.observation import (
|
||||
AgentThinkObservation,
|
||||
ErrorObservation,
|
||||
@@ -267,11 +267,8 @@ class ActionExecutionClient(Runtime):
|
||||
return ''
|
||||
|
||||
def send_action_for_execution(self, action: Action) -> Observation:
|
||||
if (
|
||||
isinstance(action, FileEditAction)
|
||||
and action.impl_source == FileEditSource.LLM_BASED_EDIT
|
||||
):
|
||||
return self.llm_based_edit(action)
|
||||
# NOTE: Dispatch for LLM_BASED_EDIT and FENCED_DIFF now happens in the `edit` method.
|
||||
# This method now only forwards actions to the server.
|
||||
|
||||
# set timeout to default if not set
|
||||
if action.timeout is None:
|
||||
@@ -343,7 +340,16 @@ class ActionExecutionClient(Runtime):
|
||||
return self.send_action_for_execution(action)
|
||||
|
||||
def edit(self, action: FileEditAction) -> Observation:
|
||||
return self.send_action_for_execution(action)
|
||||
# Dispatch based on impl_source
|
||||
if action.impl_source == FileEditSource.LLM_BASED_EDIT:
|
||||
# LLM-based edits are handled client-side (requires LLM)
|
||||
return self.llm_based_edit(action)
|
||||
elif action.impl_source == FileEditSource.LLM_DIFF:
|
||||
# LLM Diff edits (parsed from message) are handled client-side
|
||||
return self.llm_diff_edit(action)
|
||||
else:
|
||||
# OH_ACI edits (from tool calls) are handled server-side
|
||||
return self.send_action_for_execution(action)
|
||||
|
||||
def browse(self, action: BrowseURLAction) -> Observation:
|
||||
return self.send_action_for_execution(action)
|
||||
|
||||
@@ -3,4 +3,11 @@ from openhands.runtime.utils.system import (
|
||||
find_available_tcp_port,
|
||||
)
|
||||
|
||||
"""
|
||||
Utils for the client-side runtime.
|
||||
|
||||
The fenced diff editing logic, primarily in `edit.py`, is adapted from Aider (Apache 2.0 License).
|
||||
- Original source: https://github.com/paul-gauthier/aider/blob/main/aider/coders/editblock_fenced_coder.py
|
||||
- Please see fenced edit at https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/utils/edit.py
|
||||
"""
|
||||
__all__ = ['display_number_matrix', 'find_available_tcp_port']
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import difflib
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
@@ -345,3 +346,278 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface):
|
||||
)
|
||||
ret_obs.llm_metrics = self.draft_editor_llm.metrics
|
||||
return ret_obs
|
||||
|
||||
# ==========================================================================
|
||||
# Fenced Diff / LLM Diff Edit Logic (Adapted from Aider principles)
|
||||
# ==========================================================================
|
||||
|
||||
def _exact_replace(
|
||||
self,
|
||||
original_lines: list[str],
|
||||
search_lines: list[str],
|
||||
replace_lines: list[str],
|
||||
) -> list[str] | None:
|
||||
"""Attempts an exact character-for-character replacement."""
|
||||
search_tuple = tuple(search_lines)
|
||||
search_len = len(search_lines)
|
||||
if search_len == 0:
|
||||
return None # Cannot replace empty search block exactly
|
||||
|
||||
for i in range(len(original_lines) - search_len + 1):
|
||||
original_tuple = tuple(original_lines[i : i + search_len])
|
||||
if search_tuple == original_tuple:
|
||||
return (
|
||||
original_lines[:i]
|
||||
+ replace_lines
|
||||
+ original_lines[i + search_len :]
|
||||
)
|
||||
return None
|
||||
|
||||
def _whitespace_flexible_replace(
|
||||
self,
|
||||
original_lines: list[str],
|
||||
search_lines: list[str],
|
||||
replace_lines: list[str],
|
||||
) -> list[str] | None:
|
||||
"""Attempts replacement ignoring consistent leading whitespace differences."""
|
||||
search_len = len(search_lines)
|
||||
if search_len == 0:
|
||||
return None
|
||||
|
||||
# Calculate min leading whitespace in search block (ignoring blank lines)
|
||||
search_leading_spaces = [
|
||||
len(s) - len(s.lstrip(' ')) for s in search_lines if s.strip()
|
||||
]
|
||||
min_search_leading = min(search_leading_spaces) if search_leading_spaces else 0
|
||||
stripped_search_lines = [
|
||||
s[min_search_leading:] if s.strip() else s for s in search_lines
|
||||
]
|
||||
stripped_search_tuple = tuple(stripped_search_lines)
|
||||
|
||||
for i in range(len(original_lines) - search_len + 1):
|
||||
original_chunk = original_lines[i : i + search_len]
|
||||
|
||||
# Calculate min leading whitespace in original chunk
|
||||
original_leading_spaces = [
|
||||
len(s) - len(s.lstrip(' ')) for s in original_chunk if s.strip()
|
||||
]
|
||||
min_original_leading = (
|
||||
min(original_leading_spaces) if original_leading_spaces else 0
|
||||
)
|
||||
leading_whitespace_prefix = ' ' * min_original_leading
|
||||
|
||||
# Strip original chunk consistently
|
||||
stripped_original_lines = [
|
||||
s[min_original_leading:] if s.strip() else s for s in original_chunk
|
||||
]
|
||||
stripped_original_tuple = tuple(stripped_original_lines)
|
||||
|
||||
if stripped_search_tuple == stripped_original_tuple:
|
||||
# Match found! Apply original leading whitespace to replace_lines
|
||||
adjusted_replace_lines = [
|
||||
leading_whitespace_prefix + rline[min_search_leading:]
|
||||
if rline.strip()
|
||||
else rline
|
||||
for rline in replace_lines
|
||||
]
|
||||
return (
|
||||
original_lines[:i]
|
||||
+ adjusted_replace_lines
|
||||
+ original_lines[i + search_len :]
|
||||
)
|
||||
return None
|
||||
|
||||
def _find_most_similar_block(
|
||||
self, original_lines: list[str], search_lines: list[str], context_lines: int = 5
|
||||
) -> tuple[str | None, float]:
|
||||
"""Finds the block in original_lines most similar to search_lines."""
|
||||
if not search_lines or not original_lines:
|
||||
return None, 0.0
|
||||
|
||||
search_len = len(search_lines)
|
||||
best_ratio = 0.0
|
||||
best_match_start_idx = -1
|
||||
|
||||
matcher = difflib.SequenceMatcher(isjunk=None, a=search_lines)
|
||||
for i in range(len(original_lines) - search_len + 1):
|
||||
chunk = original_lines[i : i + search_len]
|
||||
matcher.set_seq2(chunk)
|
||||
ratio = matcher.ratio()
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_match_start_idx = i
|
||||
|
||||
if best_match_start_idx != -1:
|
||||
start = max(0, best_match_start_idx - context_lines)
|
||||
end = min(
|
||||
len(original_lines), best_match_start_idx + search_len + context_lines
|
||||
)
|
||||
context_chunk = original_lines[start:end]
|
||||
|
||||
# Add markers to indicate the likely match within the context
|
||||
marked_chunk = []
|
||||
for idx, line in enumerate(context_chunk):
|
||||
current_original_line_idx = start + idx
|
||||
if (
|
||||
best_match_start_idx
|
||||
<= current_original_line_idx
|
||||
< best_match_start_idx + search_len
|
||||
):
|
||||
marked_chunk.append(
|
||||
f'> {line}'
|
||||
) # Mark lines within the best match range
|
||||
else:
|
||||
marked_chunk.append(f' {line}')
|
||||
|
||||
return '\n'.join(marked_chunk), best_ratio
|
||||
else:
|
||||
return None, 0.0
|
||||
|
||||
def _apply_llm_diff_edit(
|
||||
self, action: FileEditAction, original_content: str
|
||||
) -> tuple[str | None, Observation | None]:
|
||||
"""Helper to apply a single LLM_DIFF edit block."""
|
||||
# Use standard splitlines (no keepends)
|
||||
original_lines = original_content.splitlines()
|
||||
# Use getattr to safely access search/replace, defaulting to empty string if not present
|
||||
search_content = getattr(action, 'search', '')
|
||||
replace_content = getattr(action, 'replace', '')
|
||||
search_lines = search_content.splitlines()
|
||||
replace_lines = replace_content.splitlines()
|
||||
|
||||
# Handle empty search block (create file / append)
|
||||
if not search_content.strip():
|
||||
# Append if file exists, otherwise content is replace_content
|
||||
# Ensure single newline separation if original content exists and doesn't end with newline
|
||||
separator = (
|
||||
'\n' if original_content and not original_content.endswith('\n') else ''
|
||||
)
|
||||
updated_content = original_content + separator + replace_content
|
||||
# Preserve original trailing newline status
|
||||
if original_content.endswith('\n') and not updated_content.endswith('\n'):
|
||||
updated_content += '\n'
|
||||
elif not original_content.endswith('\n') and updated_content.endswith('\n'):
|
||||
# This case is less likely, but if replace_content added a newline, remove it if original didn't have one
|
||||
updated_content = updated_content.rstrip('\n')
|
||||
return updated_content, None
|
||||
|
||||
# Attempt exact match
|
||||
updated_lines = self._exact_replace(original_lines, search_lines, replace_lines)
|
||||
|
||||
# Attempt whitespace flexible match if exact failed
|
||||
if updated_lines is None:
|
||||
updated_lines = self._whitespace_flexible_replace(
|
||||
original_lines, search_lines, replace_lines
|
||||
)
|
||||
|
||||
# Handle match failure
|
||||
if updated_lines is None:
|
||||
logger.warning(f'Failed to apply diff to {action.path}')
|
||||
similar_block, ratio = self._find_most_similar_block(
|
||||
original_lines, search_lines
|
||||
)
|
||||
error_msg = (
|
||||
f'Failed to apply edit to {action.path}.\n'
|
||||
'The SEARCH block did not match the file content exactly or with flexible whitespace.\n\n'
|
||||
'--- FAILED SEARCH BLOCK ---\n'
|
||||
f'{search_content}\n'
|
||||
'--- END FAILED SEARCH BLOCK ---\n'
|
||||
)
|
||||
if similar_block and ratio > 0.6: # Threshold from Aider
|
||||
error_msg += (
|
||||
f'\n--- MOST SIMILAR BLOCK FOUND (similarity: {ratio:.2f}) ---\n'
|
||||
f'{similar_block}\n'
|
||||
'--- END MOST SIMILAR BLOCK ---\n\n'
|
||||
'Please check the SEARCH block and the file content and try again.'
|
||||
)
|
||||
else:
|
||||
error_msg += '\nNo similar block found.'
|
||||
|
||||
return None, ErrorObservation(error_msg)
|
||||
|
||||
# Match successful - join with newline
|
||||
final_content = '\n'.join(updated_lines)
|
||||
|
||||
return final_content, None
|
||||
|
||||
def llm_diff_edit(self, action: FileEditAction) -> Observation:
|
||||
"""Applies a fenced diff edit parsed directly from LLM output."""
|
||||
if action.impl_source != 'llm_diff':
|
||||
# Should not happen if dispatch is correct, but safety check
|
||||
raise ValueError(
|
||||
f'llm_diff_edit called with incorrect action source: {action.impl_source}'
|
||||
)
|
||||
|
||||
# Read original file or handle creation
|
||||
read_obs = self.read(FileReadAction(path=action.path))
|
||||
original_content = ''
|
||||
prev_exist = True
|
||||
|
||||
if (
|
||||
isinstance(read_obs, ErrorObservation)
|
||||
and 'File not found'.lower() in read_obs.content.lower()
|
||||
):
|
||||
logger.info(
|
||||
f'File {action.path} not found. Attempting to create based on edit.'
|
||||
)
|
||||
prev_exist = False
|
||||
# Check if it's a valid creation (empty search block)
|
||||
search_content = getattr(action, 'search', '')
|
||||
if search_content.strip():
|
||||
return ErrorObservation(
|
||||
f'File {action.path} not found and SEARCH block is not empty. Cannot apply edit.'
|
||||
)
|
||||
# Content will be handled by _apply_llm_diff_edit
|
||||
elif isinstance(read_obs, ErrorObservation):
|
||||
# Other read error
|
||||
return read_obs
|
||||
elif isinstance(read_obs, FileReadObservation):
|
||||
original_content = read_obs.content
|
||||
else:
|
||||
return ErrorObservation(
|
||||
f'Unexpected observation type received when reading {action.path}: {type(read_obs)}'
|
||||
)
|
||||
|
||||
# Apply the edit logic
|
||||
updated_content, error_obs = self._apply_llm_diff_edit(action, original_content)
|
||||
|
||||
if error_obs:
|
||||
return error_obs
|
||||
if updated_content is None:
|
||||
# Should have been caught by error_obs, but safety check
|
||||
return ErrorObservation(
|
||||
f'Edit application failed for {action.path}. Be more careful! You can try smaller or larger edit blocks.'
|
||||
)
|
||||
|
||||
# Lint the updated content (optional)
|
||||
diff = get_diff(original_content, updated_content, action.path)
|
||||
if self.config.sandbox.enable_auto_lint:
|
||||
suffix = os.path.splitext(action.path)[1]
|
||||
lint_error_obs = self._get_lint_error(
|
||||
suffix, original_content, updated_content, action.path, diff
|
||||
)
|
||||
if lint_error_obs is not None:
|
||||
# Attach original diff to lint error message for context
|
||||
lint_error_obs.content += (
|
||||
'\n\n--- ORIGINAL DIFF ATTEMPTED ---\n'
|
||||
f'{diff}\n'
|
||||
'--- END ORIGINAL DIFF ATTEMPTED ---'
|
||||
)
|
||||
return lint_error_obs
|
||||
|
||||
# Write the final content
|
||||
write_obs = self.write(
|
||||
FileWriteAction(path=action.path, content=updated_content)
|
||||
)
|
||||
if isinstance(write_obs, ErrorObservation):
|
||||
return write_obs # Return write error if it occurs
|
||||
|
||||
# Return success observation
|
||||
return FileEditObservation(
|
||||
content=diff, # Return the calculated diff
|
||||
path=action.path,
|
||||
prev_exist=prev_exist,
|
||||
old_content=original_content,
|
||||
new_content=updated_content,
|
||||
impl_source=action.impl_source, # Pass source along
|
||||
)
|
||||
|
||||
@@ -5,6 +5,7 @@ from itertools import islice
|
||||
from jinja2 import Template
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.events.observation.agent import MicroagentKnowledge
|
||||
|
||||
@@ -52,12 +53,20 @@ class PromptManager:
|
||||
def __init__(
|
||||
self,
|
||||
prompt_dir: str,
|
||||
config: AgentConfig,
|
||||
system_prompt_filename: str = 'system_prompt.j2',
|
||||
):
|
||||
self.prompt_dir: str = prompt_dir
|
||||
self.system_template: Template = self._load_system_template(
|
||||
system_prompt_filename
|
||||
)
|
||||
self.config: AgentConfig = config
|
||||
|
||||
# Load the correct system prompt based on the config
|
||||
if self.config.enable_llm_diff:
|
||||
system_template_name = 'system_prompt_llm_diff'
|
||||
else:
|
||||
system_template_name = 'system_prompt'
|
||||
self.system_template: Template = self._load_template(system_template_name)
|
||||
|
||||
# Load other templates
|
||||
self.user_template: Template = self._load_template('user_prompt')
|
||||
self.additional_info_template: Template = self._load_template('additional_info')
|
||||
self.microagent_info_template: Template = self._load_template('microagent_info')
|
||||
@@ -147,5 +156,43 @@ class PromptManager:
|
||||
None,
|
||||
)
|
||||
if latest_user_message:
|
||||
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.iteration_flag.max_value - state.iteration_flag.current_value} turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
reminder_text = ''
|
||||
# Every 10 steps
|
||||
if state.iteration_flag.current_value % 10 == 0:
|
||||
reminder_text += """\n\n## WORKFLOW REMINDER: ## Key Requirements & Constraints
|
||||
|
||||
1. **Understand the problem** very well: it is a bug report, and you know humans don't always write good descriptions. Explore the codebase to understand the related code and the problem in depth. It is possible that the solution needs to be a bit more extensive than just the stated text. Don't exagerate though: don't do unrelated refactoring, but also don't interpret the description too strictly.
|
||||
2. **Focus on the issues:** Implement the fix focusing on non-test files related to the issue.
|
||||
2. **Environment Ready:** The Python environment is pre-configured with all dependencies. Do not install packages.
|
||||
3. **Mandatory Testing Procedure:**
|
||||
* **Create Test to Reproduce the Issue:** *Before* implementing any fix, you MUST create a *new test* (separate from existing tests) that specifically reproduces the issue.
|
||||
* Take existing tests as example to understand the testing format/structure.
|
||||
* Enhance this test with edge cases.
|
||||
* Run this test to confirm reproduction.
|
||||
* **Verify Fix:** After implementing the fix, run your test again to verify the issue is resolved.
|
||||
* **Identify ALL Relevant Tests:** You MUST perform a **dedicated search and analysis** to identify **all** existing unit tests potentially affected by your changes. This includes:
|
||||
* Tests in the same module/directory as the changed files (e.g., `tests/` subdirectories).
|
||||
* Tests explicitly importing or using the modified code/classes/functions.
|
||||
* Tests mentioned in the issue description or related documentation.
|
||||
* Tests covering functionalities that *depend on* the modified code (analyze callers/dependencies if necessary).
|
||||
**If you cannot confidently identify a specific subset, you MUST identify and plan to run the entire test suite for the modified application or module(s). State your identified test scope clearly.**
|
||||
* **Run Identified Relevant Tests:** You MUST execute the **complete set** of relevant existing unit tests you identified in the previous step. Ensure you are running the *correct and comprehensive set* of tests. You MUST NOT modify these existing tests.
|
||||
* **Final Check & Verification:** Before finishing, ensure **all** identified relevant existing tests pass. **Explicitly confirm that you have considered potential omissions in your test selection and believe the executed tests comprehensively cover the impact of your changes.** Failing to identify and run the *complete* relevant set constitutes a failure. If any identified tests fail, revise your fix. Passing all relevant tests is the primary measure of success.
|
||||
4. **Defensive Programming:** Actively practice defensive programming: anticipate and handle potential edge cases, unexpected inputs, and different ways the affected code might be called **to ensure the fix works reliably and allows relevant tests to pass.** Analyze the potential impact on other parts of the codebase.
|
||||
5. **Final Review:** Compare your solution against the original issue and the base commit ({instance["base_commit"]}) to ensure completeness and test passage.
|
||||
|
||||
## General Workflow Guidance
|
||||
|
||||
* Prioritize understanding the problem, exploring the code, planning your fix, implementing it carefully using the required diff format, and **thoroughly testing** according to the **Mandatory Testing Procedure**.
|
||||
* Consider trade-offs between different solutions. The goal is a **robust change that makes the relevant tests pass.** Quality, correctness, and reliability are key.
|
||||
* Actively practice defensive programming: anticipate and handle potential edge cases, unexpected inputs, and different ways the affected code might be called **to ensure the fix works reliably and allows relevant tests to pass.** Analyze the potential impact on other parts of the codebase.
|
||||
|
||||
* IMPORTANT: Your solution will be tested by additional hidden tests, so do not assume the task is complete just because visible tests pass! Refine the solution until you are confident that it is robust and comprehensive according to the **Defensive Programming** requirement.
|
||||
|
||||
## Final Note
|
||||
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
|
||||
|
||||
---
|
||||
"""
|
||||
reminder_text += f'ENVIRONMENT REMINDER: You have {state.iteration_flag.max_value - state.iteration_flag.current_value} turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
latest_user_message.content.append(TextContent(text=reminder_text))
|
||||
|
||||
2
poetry.lock
generated
2
poetry.lock
generated
@@ -6484,7 +6484,7 @@ name = "openhands-aci"
|
||||
version = "0.3.0"
|
||||
description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.12"
|
||||
python-versions = "^3.12"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openhands_aci-0.3.0-py3-none-any.whl", hash = "sha256:4f060aa1c2ae11a14f08eb72e9bf09f81db916766aab10ccea6df3bc52faafd1"},
|
||||
|
||||
@@ -176,6 +176,8 @@ def test_file_edit_action_aci_serialization_deserialization():
|
||||
'end': -1,
|
||||
'thought': 'Replacing text',
|
||||
'impl_source': 'oh_aci',
|
||||
'search': None,
|
||||
'replace': None,
|
||||
},
|
||||
}
|
||||
serialization_deserialization(original_action_dict, FileEditAction)
|
||||
@@ -196,6 +198,8 @@ def test_file_edit_action_llm_serialization_deserialization():
|
||||
'end': 10,
|
||||
'thought': 'Updating file content',
|
||||
'impl_source': 'llm_based_edit',
|
||||
'search': None,
|
||||
'replace': None,
|
||||
},
|
||||
}
|
||||
serialization_deserialization(original_action_dict, FileEditAction)
|
||||
|
||||
@@ -6,18 +6,56 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
from litellm import ModelResponse
|
||||
|
||||
from openhands.agenthub.codeact_agent.function_calling import response_to_actions
|
||||
from openhands.agenthub.codeact_agent.function_calling import (
|
||||
get_tools,
|
||||
response_to_actions,
|
||||
)
|
||||
from openhands.agenthub.codeact_agent.tools import (
|
||||
BrowserTool,
|
||||
FencedDiffEditTool,
|
||||
FinishTool,
|
||||
IPythonTool,
|
||||
ListDirectoryTool,
|
||||
LLMBasedFileEditTool,
|
||||
ThinkTool,
|
||||
UndoEditTool,
|
||||
ViewFileTool,
|
||||
WebReadTool,
|
||||
create_cmd_run_tool,
|
||||
create_str_replace_editor_tool,
|
||||
)
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.exceptions import FunctionCallValidationError
|
||||
from openhands.events.action import (
|
||||
AgentThinkAction,
|
||||
BrowseInteractiveAction,
|
||||
CmdRunAction,
|
||||
FileEditAction,
|
||||
FileReadAction,
|
||||
IPythonRunCellAction,
|
||||
MessageAction,
|
||||
)
|
||||
from openhands.events.event import FileEditSource, FileReadSource
|
||||
|
||||
|
||||
# Helper to create AgentConfig with specific flags
|
||||
def create_test_config(**kwargs) -> AgentConfig:
|
||||
defaults = {
|
||||
'enable_browsing': False,
|
||||
'enable_llm_editor': False,
|
||||
'enable_llm_diff': False,
|
||||
'enable_jupyter': False,
|
||||
# Add other AgentConfig defaults if necessary for validation
|
||||
'disabled_microagents': [],
|
||||
'enable_prompt_extensions': True,
|
||||
'enable_history_truncation': True,
|
||||
'enable_som_visual_browsing': True,
|
||||
}
|
||||
defaults.update(kwargs)
|
||||
# Use model_validate to ensure proper Pydantic model creation
|
||||
return AgentConfig.model_validate(defaults)
|
||||
|
||||
|
||||
def create_mock_response(function_name: str, arguments: dict) -> ModelResponse:
|
||||
"""Helper function to create a mock response with a tool call."""
|
||||
return ModelResponse(
|
||||
@@ -42,6 +80,8 @@ def create_mock_response(function_name: str, arguments: dict) -> ModelResponse:
|
||||
'finish_reason': 'tool_calls',
|
||||
}
|
||||
],
|
||||
# Add dummy usage if needed by downstream processing
|
||||
usage={'prompt_tokens': 10, 'completion_tokens': 10, 'total_tokens': 20},
|
||||
)
|
||||
|
||||
|
||||
@@ -98,9 +138,10 @@ def test_execute_ipython_cell_missing_code():
|
||||
|
||||
|
||||
def test_edit_file_valid():
|
||||
"""Test edit_file with valid arguments."""
|
||||
"""Test edit_file with valid arguments (LLMBasedFileEditTool)."""
|
||||
# This tests the LLMBasedFileEditTool specifically
|
||||
response = create_mock_response(
|
||||
'edit_file',
|
||||
LLMBasedFileEditTool['function']['name'], # Use tool name
|
||||
{'path': '/path/to/file', 'content': 'file content', 'start': 1, 'end': 10},
|
||||
)
|
||||
actions = response_to_actions(response)
|
||||
@@ -110,12 +151,17 @@ def test_edit_file_valid():
|
||||
assert actions[0].content == 'file content'
|
||||
assert actions[0].start == 1
|
||||
assert actions[0].end == 10
|
||||
assert (
|
||||
actions[0].impl_source == FileEditSource.LLM_BASED_EDIT
|
||||
) # Default when tool called
|
||||
|
||||
|
||||
def test_edit_file_missing_required():
|
||||
"""Test edit_file with missing required arguments."""
|
||||
"""Test edit_file with missing required arguments (LLMBasedFileEditTool)."""
|
||||
# Missing path
|
||||
response = create_mock_response('edit_file', {'content': 'content'})
|
||||
response = create_mock_response(
|
||||
LLMBasedFileEditTool['function']['name'], {'content': 'content'}
|
||||
)
|
||||
with pytest.raises(FunctionCallValidationError) as exc_info:
|
||||
response_to_actions(response)
|
||||
assert 'Missing required argument "path"' in str(exc_info.value)
|
||||
@@ -139,7 +185,7 @@ def test_str_replace_editor_valid():
|
||||
assert actions[0].path == '/path/to/file'
|
||||
assert actions[0].impl_source == FileReadSource.OH_ACI
|
||||
|
||||
# Test other commands
|
||||
# Test other commands (e.g., replace)
|
||||
response = create_mock_response(
|
||||
'str_replace_editor',
|
||||
{
|
||||
@@ -173,7 +219,9 @@ def test_str_replace_editor_missing_required():
|
||||
|
||||
def test_browser_valid():
|
||||
"""Test browser with valid arguments."""
|
||||
response = create_mock_response('browser', {'code': "click('button-1')"})
|
||||
response = create_mock_response(
|
||||
BrowserTool['function']['name'], {'code': "click('button-1')"}
|
||||
)
|
||||
actions = response_to_actions(response)
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], BrowseInteractiveAction)
|
||||
@@ -183,7 +231,7 @@ def test_browser_valid():
|
||||
|
||||
def test_browser_missing_code():
|
||||
"""Test browser with missing code argument."""
|
||||
response = create_mock_response('browser', {})
|
||||
response = create_mock_response(BrowserTool['function']['name'], {})
|
||||
with pytest.raises(FunctionCallValidationError) as exc_info:
|
||||
response_to_actions(response)
|
||||
assert 'Missing required argument "code"' in str(exc_info.value)
|
||||
@@ -213,6 +261,7 @@ def test_invalid_json_arguments():
|
||||
'finish_reason': 'tool_calls',
|
||||
}
|
||||
],
|
||||
usage={'prompt_tokens': 10, 'completion_tokens': 10, 'total_tokens': 20},
|
||||
)
|
||||
with pytest.raises(FunctionCallValidationError) as exc_info:
|
||||
response_to_actions(response)
|
||||
@@ -243,3 +292,300 @@ def test_unexpected_argument_handling():
|
||||
# Verify the error message mentions the unexpected argument
|
||||
assert 'old_str_prefix' in str(exc_info.value)
|
||||
assert 'Unexpected argument' in str(exc_info.value)
|
||||
|
||||
|
||||
# =============================================
|
||||
# Tests for get_tools based on AgentConfig
|
||||
# =============================================
|
||||
|
||||
|
||||
def test_get_tools_default():
|
||||
"""Test default tools when no specific editor is enabled."""
|
||||
config = create_test_config()
|
||||
tools = get_tools(config)
|
||||
tool_names = {t['function']['name'] for t in tools}
|
||||
assert create_str_replace_editor_tool()['function']['name'] in tool_names
|
||||
assert FencedDiffEditTool['function']['name'] not in tool_names
|
||||
assert LLMBasedFileEditTool['function']['name'] not in tool_names
|
||||
assert ViewFileTool['function']['name'] not in tool_names # Included in str_replace
|
||||
assert (
|
||||
ListDirectoryTool['function']['name'] not in tool_names
|
||||
) # Included in str_replace
|
||||
assert UndoEditTool['function']['name'] not in tool_names # Included in str_replace
|
||||
|
||||
|
||||
def test_get_tools_llm_diff_enabled():
|
||||
"""Test tools when LLM Diff mode is enabled."""
|
||||
config = create_test_config(
|
||||
enable_llm_diff=True,
|
||||
enable_browsing=True,
|
||||
enable_jupyter=True,
|
||||
)
|
||||
tools = get_tools(config)
|
||||
tool_names = {t['function']['name'] for t in tools}
|
||||
|
||||
# Should include non-edit tools
|
||||
assert create_cmd_run_tool()['function']['name'] in tool_names
|
||||
assert ThinkTool['function']['name'] in tool_names
|
||||
assert FinishTool['function']['name'] in tool_names
|
||||
assert WebReadTool['function']['name'] in tool_names
|
||||
assert BrowserTool['function']['name'] in tool_names
|
||||
assert IPythonTool['function']['name'] in tool_names
|
||||
assert ViewFileTool['function']['name'] in tool_names
|
||||
assert ListDirectoryTool['function']['name'] in tool_names
|
||||
|
||||
# Should NOT include edit tools or Undo
|
||||
assert create_str_replace_editor_tool()['function']['name'] not in tool_names
|
||||
assert FencedDiffEditTool['function']['name'] not in tool_names
|
||||
assert LLMBasedFileEditTool['function']['name'] not in tool_names
|
||||
assert UndoEditTool['function']['name'] not in tool_names
|
||||
|
||||
|
||||
def test_get_tools_llm_editor_enabled():
|
||||
"""Test tools when LLM Editor tool is enabled."""
|
||||
config = create_test_config(enable_llm_editor=True)
|
||||
tools = get_tools(config)
|
||||
tool_names = {t['function']['name'] for t in tools}
|
||||
assert LLMBasedFileEditTool['function']['name'] in tool_names
|
||||
assert ViewFileTool['function']['name'] in tool_names
|
||||
assert ListDirectoryTool['function']['name'] in tool_names
|
||||
assert UndoEditTool['function']['name'] not in tool_names # Undo not compatible
|
||||
assert create_str_replace_editor_tool()['function']['name'] not in tool_names
|
||||
assert FencedDiffEditTool['function']['name'] not in tool_names
|
||||
|
||||
|
||||
# =============================================
|
||||
# Tests for response_to_actions with LLM_DIFF
|
||||
# =============================================
|
||||
|
||||
|
||||
# Mock ModelResponse without tool calls, but with content
|
||||
def create_mock_response_no_tools(content: str) -> ModelResponse:
|
||||
"""Helper function to create a mock response without tool calls."""
|
||||
return ModelResponse(
|
||||
id='mock-no-tools-id',
|
||||
choices=[
|
||||
{
|
||||
'message': {
|
||||
'tool_calls': None, # Explicitly None
|
||||
'content': content,
|
||||
'role': 'assistant',
|
||||
},
|
||||
'index': 0,
|
||||
'finish_reason': 'stop', # Or other non-tool reason
|
||||
}
|
||||
],
|
||||
# Add dummy usage if needed by downstream processing
|
||||
usage={'prompt_tokens': 10, 'completion_tokens': 10, 'total_tokens': 20},
|
||||
)
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_with_blocks():
|
||||
"""Test parsing diff blocks when llm_diff is enabled and no tool calls."""
|
||||
content = """
|
||||
Thinking about the changes...
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
old line
|
||||
=======
|
||||
new line
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Another block:
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
second old
|
||||
=======
|
||||
second new
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
response = create_mock_response_no_tools(content)
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True) # Enable flag
|
||||
|
||||
assert len(actions) == 3 # Think + 2 Edits
|
||||
assert isinstance(actions[0], AgentThinkAction)
|
||||
assert actions[0].thought == 'Thinking about the changes...'
|
||||
|
||||
assert isinstance(actions[1], FileEditAction)
|
||||
assert actions[1].path == 'file.py'
|
||||
assert actions[1].search == 'old line\n'
|
||||
assert actions[1].replace == 'new line\n'
|
||||
assert actions[1].impl_source == FileEditSource.LLM_DIFF
|
||||
|
||||
assert isinstance(actions[2], FileEditAction)
|
||||
assert actions[2].path == 'file.py'
|
||||
assert actions[2].search == 'second old\n'
|
||||
assert actions[2].replace == 'second new\n'
|
||||
assert actions[2].impl_source == FileEditSource.LLM_DIFF
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_no_blocks():
|
||||
"""Test llm_diff mode when response content has no blocks."""
|
||||
content = 'Just a simple message.'
|
||||
response = create_mock_response_no_tools(content)
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True) # Enable flag
|
||||
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], MessageAction)
|
||||
assert actions[0].content == content
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_parse_error(mocker):
|
||||
"""Test llm_diff mode when parsing fails."""
|
||||
content = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
old
|
||||
# Malformed block - missing divider
|
||||
new
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
response = create_mock_response_no_tools(content)
|
||||
# Mock the parser to raise ValueError
|
||||
mocker.patch(
|
||||
'openhands.agenthub.codeact_agent.function_calling.parse_llm_response_for_diffs',
|
||||
side_effect=ValueError('Test parse error'),
|
||||
)
|
||||
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True) # Enable flag
|
||||
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], MessageAction)
|
||||
assert 'Error parsing diff blocks: Test parse error' in actions[0].content
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_with_tool_calls():
|
||||
"""Test llm_diff mode is ignored if tool calls are present."""
|
||||
# Create a response that HAS tool calls, even though llm_diff is enabled
|
||||
response = create_mock_response('execute_bash', {'command': 'echo hello'})
|
||||
# Add some content that looks like a diff block, to ensure it's ignored
|
||||
response.choices[0]['message']['content'] = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
ignored old
|
||||
=======
|
||||
ignored new
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True)
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], CmdRunAction)
|
||||
assert actions[0].command == 'echo hello'
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_multiple_errors():
|
||||
"""Test handling of multiple errors in diff blocks."""
|
||||
content = """
|
||||
```python
|
||||
file1.py
|
||||
<<<<<<< SEARCH
|
||||
old content
|
||||
# Missing divider
|
||||
new content
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
```python
|
||||
# Missing filename
|
||||
<<<<<<< SEARCH
|
||||
more old content
|
||||
=======
|
||||
more new content
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
response = create_mock_response_no_tools(content)
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True)
|
||||
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], MessageAction)
|
||||
assert 'Error parsing block #1' in actions[0].content
|
||||
assert 'Expected `=======`' in actions[0].content
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_whitespace_edge_cases():
|
||||
"""Test handling of whitespace edge cases in diff blocks."""
|
||||
content = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
|
||||
indented line
|
||||
trailing space
|
||||
=======
|
||||
|
||||
new indented line
|
||||
new trailing space
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
response = create_mock_response_no_tools(content)
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True)
|
||||
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], FileEditAction)
|
||||
assert actions[0].path == 'file.py'
|
||||
assert actions[0].search == '\n indented line\ntrailing space \n'
|
||||
assert actions[0].replace == '\n new indented line\nnew trailing space \n'
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_mode_empty_lines():
|
||||
"""Test handling of empty lines in diff blocks."""
|
||||
content = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
|
||||
|
||||
=======
|
||||
|
||||
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
response = create_mock_response_no_tools(content)
|
||||
actions = response_to_actions(response, is_llm_diff_enabled=True)
|
||||
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], FileEditAction)
|
||||
assert actions[0].path == 'file.py'
|
||||
assert actions[0].search == '\n\n\n'
|
||||
assert actions[0].replace == '\n\n\n'
|
||||
|
||||
# Should process the tool call, not the diff block
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], CmdRunAction)
|
||||
assert actions[0].command == 'echo hello'
|
||||
# Check that the thought includes the content that was ignored for parsing
|
||||
assert 'ignored old' in actions[0].thought
|
||||
|
||||
|
||||
def test_response_to_actions_llm_diff_disabled_no_tools():
|
||||
"""Test response parsing when llm_diff is disabled and no tool calls."""
|
||||
content = """
|
||||
Thinking about the changes...
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
old line
|
||||
=======
|
||||
new line
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
response = create_mock_response_no_tools(content)
|
||||
actions = response_to_actions(
|
||||
response, is_llm_diff_enabled=False
|
||||
) # Explicitly disable
|
||||
|
||||
# Should just be a MessageAction, no parsing attempted
|
||||
assert len(actions) == 1
|
||||
assert isinstance(actions[0], MessageAction)
|
||||
assert actions[0].content == content
|
||||
|
||||
640
tests/unit/test_llm_diff_parser.py
Normal file
640
tests/unit/test_llm_diff_parser.py
Normal file
@@ -0,0 +1,640 @@
|
||||
# tests/unit/test_llm_diff_parser.py
|
||||
import pytest
|
||||
|
||||
from openhands.agenthub.codeact_agent.llm_diff_parser import (
|
||||
DiffBlock,
|
||||
LLMMalformedActionError,
|
||||
find_filename, # Import if you add tests for this
|
||||
parse_llm_response_for_diffs,
|
||||
strip_filename, # Import if you add tests for this
|
||||
)
|
||||
|
||||
# Test cases for parse_llm_response_for_diffs
|
||||
|
||||
|
||||
def test_parse_single_valid_block():
|
||||
content = """
|
||||
Some text before.
|
||||
```python
|
||||
path/to/file.py
|
||||
<<<<<<< SEARCH
|
||||
print("Hello")
|
||||
=======
|
||||
print("Hello, World!")
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Some text after.
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='path/to/file.py',
|
||||
search='print("Hello")\n',
|
||||
replace='print("Hello, World!")\n',
|
||||
)
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_multiple_valid_blocks():
|
||||
content = """
|
||||
Block 1:
|
||||
```python
|
||||
file1.py
|
||||
<<<<<<< SEARCH
|
||||
old line 1
|
||||
=======
|
||||
new line 1
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Some intermediate text.
|
||||
Block 2:
|
||||
```javascript
|
||||
file2.js
|
||||
<<<<<<< SEARCH
|
||||
old js line
|
||||
=======
|
||||
new js line
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(filename='file1.py', search='old line 1\n', replace='new line 1\n'),
|
||||
DiffBlock(filename='file2.js', search='old js line\n', replace='new js line\n'),
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_empty_search_block():
|
||||
content = """
|
||||
Creating a new file.
|
||||
```python
|
||||
new_file.py
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
# This is a new file
|
||||
print("Created!")
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='new_file.py',
|
||||
search='',
|
||||
replace='# This is a new file\nprint("Created!")\n',
|
||||
)
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_empty_replace_block():
|
||||
content = """
|
||||
Deleting content.
|
||||
```python
|
||||
file_to_edit.py
|
||||
<<<<<<< SEARCH
|
||||
# Line to delete
|
||||
print("Delete me")
|
||||
=======
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='file_to_edit.py',
|
||||
search='# Line to delete\nprint("Delete me")\n',
|
||||
replace='',
|
||||
)
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_no_blocks():
|
||||
content = 'This is just a regular message without any diff blocks.'
|
||||
expected_blocks = []
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_malformed_missing_divider():
|
||||
content = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
old content
|
||||
# Missing divider here
|
||||
new content
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
# The implementation raises LLMMalformedActionError for parsing issues
|
||||
with pytest.raises(LLMMalformedActionError, match='Expected `=======`'):
|
||||
parse_llm_response_for_diffs(content)
|
||||
|
||||
|
||||
def test_parse_malformed_missing_replace_marker():
|
||||
content = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
old content
|
||||
=======
|
||||
new content
|
||||
# Missing replace marker
|
||||
```
|
||||
"""
|
||||
# The implementation raises LLMMalformedActionError for parsing issues
|
||||
with pytest.raises(LLMMalformedActionError, match='Expected `>>>>>>> REPLACE`'):
|
||||
parse_llm_response_for_diffs(content)
|
||||
|
||||
|
||||
def test_parse_filename_inference():
|
||||
content = """
|
||||
Editing file1.py
|
||||
```python
|
||||
file1.py
|
||||
<<<<<<< SEARCH
|
||||
content1
|
||||
=======
|
||||
new content1
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Now editing the same file again.
|
||||
```python
|
||||
<<<<<<< SEARCH
|
||||
content2
|
||||
=======
|
||||
new content2
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(filename='file1.py', search='content1\n', replace='new content1\n'),
|
||||
DiffBlock(filename='file1.py', search='content2\n', replace='new content2\n'),
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_valid_fnames_exact_match():
|
||||
content = """
|
||||
```python
|
||||
src/app.py
|
||||
<<<<<<< SEARCH
|
||||
old
|
||||
=======
|
||||
new
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
valid_fnames = ['src/app.py', 'src/utils.py']
|
||||
expected_blocks = [
|
||||
DiffBlock(filename='src/app.py', search='old\n', replace='new\n')
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content, valid_fnames=valid_fnames)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_valid_fnames_fuzzy_match():
|
||||
content = """
|
||||
```python
|
||||
src/appz.py
|
||||
<<<<<<< SEARCH
|
||||
old
|
||||
=======
|
||||
new
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
valid_fnames = ['src/app.py', 'src/utils.py']
|
||||
expected_blocks = [
|
||||
DiffBlock(filename='src/app.py', search='old\n', replace='new\n')
|
||||
] # Expects fuzzy match to correct filename
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content, valid_fnames=valid_fnames)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_missing_filename_error_if_search_not_empty():
|
||||
# If search is not empty, a filename MUST be present or inferrable
|
||||
content = """
|
||||
```python
|
||||
<<<<<<< SEARCH
|
||||
old content requires a filename
|
||||
=======
|
||||
new
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
with pytest.raises(LLMMalformedActionError, match='Bad/missing filename'):
|
||||
parse_llm_response_for_diffs(content)
|
||||
|
||||
|
||||
def test_parse_missing_filename_ok_if_search_empty_but_no_filename_line():
|
||||
# If search is empty (new file), but no filename line is found *at all* before the block, it's an error
|
||||
content = """
|
||||
```python
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
new file content
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
with pytest.raises(
|
||||
LLMMalformedActionError, match='Could not determine filename for new file block'
|
||||
):
|
||||
parse_llm_response_for_diffs(content)
|
||||
|
||||
|
||||
def test_parse_missing_filename_ok_if_search_empty_and_filename_line_present():
|
||||
# If search is empty (new file), and a filename line *is* present, it should work
|
||||
content = """
|
||||
new_file.py
|
||||
```python
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
new file content
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(filename='new_file.py', search='', replace='new file content\n')
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_two_new_files():
|
||||
content = """
|
||||
First new file:
|
||||
```python
|
||||
new_file_1.py
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
# Content for file 1
|
||||
print("File 1")
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Second new file:
|
||||
```text
|
||||
new_file_2.txt
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
Content for file 2.
|
||||
This is plain text.
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='new_file_1.py',
|
||||
search='',
|
||||
replace='# Content for file 1\nprint("File 1")\n',
|
||||
),
|
||||
DiffBlock(
|
||||
filename='new_file_2.txt',
|
||||
search='',
|
||||
replace='Content for file 2.\nThis is plain text.\n',
|
||||
),
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_one_edit_one_new_file():
|
||||
content = """
|
||||
Editing an existing file:
|
||||
```python
|
||||
existing_file.py
|
||||
<<<<<<< SEARCH
|
||||
# Old line
|
||||
print("Old")
|
||||
=======
|
||||
# New line
|
||||
print("New")
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Creating a new file:
|
||||
```python
|
||||
new_file.py
|
||||
<<<<<<< SEARCH
|
||||
=======
|
||||
# New file content
|
||||
print("Newly created")
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='existing_file.py',
|
||||
search='# Old line\nprint("Old")\n',
|
||||
replace='# New line\nprint("New")\n',
|
||||
),
|
||||
DiffBlock(
|
||||
filename='new_file.py',
|
||||
search='',
|
||||
replace='# New file content\nprint("Newly created")\n',
|
||||
),
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_two_edits_different_files_explicit():
|
||||
content = """
|
||||
First edit:
|
||||
```python
|
||||
file_a.py
|
||||
<<<<<<< SEARCH
|
||||
# Original line in file A
|
||||
a = 1
|
||||
=======
|
||||
# Modified line in file A
|
||||
a = 2
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
Some text between blocks.
|
||||
Second edit:
|
||||
```python
|
||||
file_b.py
|
||||
<<<<<<< SEARCH
|
||||
# Original line in file B
|
||||
b = 'hello'
|
||||
=======
|
||||
# Modified line in file B
|
||||
b = 'world'
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='file_a.py',
|
||||
search='# Original line in file A\na = 1\n',
|
||||
replace='# Modified line in file A\na = 2\n',
|
||||
),
|
||||
DiffBlock(
|
||||
filename='file_b.py',
|
||||
search="# Original line in file B\nb = 'hello'\n",
|
||||
replace="# Modified line in file B\nb = 'world'\n",
|
||||
),
|
||||
]
|
||||
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_block_without_closing_fence():
|
||||
content = """
|
||||
```python
|
||||
path/to/file.py
|
||||
<<<<<<< SEARCH
|
||||
print("Hello")
|
||||
=======
|
||||
print("Hello, World!")
|
||||
>>>>>>> REPLACE
|
||||
Some text after, no closing fence."""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='path/to/file.py',
|
||||
search='print("Hello")\n',
|
||||
replace='print("Hello, World!")\n',
|
||||
)
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_block_with_extra_content_after_replace():
|
||||
# Content after REPLACE but before closing fence should be ignored by parser
|
||||
content = """
|
||||
```python
|
||||
path/to/file.py
|
||||
<<<<<<< SEARCH
|
||||
print("Hello")
|
||||
=======
|
||||
print("Hello, World!")
|
||||
>>>>>>> REPLACE
|
||||
This should be ignored.
|
||||
```
|
||||
More text."""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='path/to/file.py',
|
||||
search='print("Hello")\n',
|
||||
replace='print("Hello, World!")\n',
|
||||
)
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_malformed_unexpected_divider():
|
||||
content = """
|
||||
```python
|
||||
file.py
|
||||
<<<<<<< SEARCH
|
||||
old content
|
||||
=======
|
||||
new content
|
||||
========= UNEXPECTED DIVIDER
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
with pytest.raises(LLMMalformedActionError, match='Unexpected `=======`'):
|
||||
parse_llm_response_for_diffs(content)
|
||||
|
||||
|
||||
def test_find_filename_simple():
|
||||
lines = ['file.py', '```python']
|
||||
assert find_filename(lines) == 'file.py'
|
||||
|
||||
|
||||
def test_find_filename_with_path():
|
||||
lines = ['src/core/file.py', '```python']
|
||||
assert find_filename(lines) == 'src/core/file.py'
|
||||
|
||||
|
||||
def test_find_filename_strip_chars():
|
||||
lines = ['*`src/app.py`*', '```python']
|
||||
assert find_filename(lines) == 'src/app.py'
|
||||
|
||||
|
||||
def test_find_filename_strip_colon():
|
||||
lines = ['File: src/app.py:', '```python']
|
||||
assert (
|
||||
find_filename(lines) == 'src/app.py'
|
||||
) # Assuming strip_filename handles 'File: ' prefix
|
||||
|
||||
|
||||
def test_find_filename_no_filename():
|
||||
lines = ['Just some text', '```python']
|
||||
assert find_filename(lines) is None
|
||||
|
||||
|
||||
def test_find_filename_too_far():
|
||||
lines = [
|
||||
'file.py',
|
||||
'another line',
|
||||
'yet another',
|
||||
'```python',
|
||||
] # file.py is too far back
|
||||
assert find_filename(lines) is None
|
||||
|
||||
|
||||
def test_find_filename_valid_fnames_exact():
|
||||
lines = ['app.py', '```python']
|
||||
valid = ['src/app.py', 'app.py']
|
||||
assert find_filename(lines, valid_fnames=valid) == 'app.py'
|
||||
|
||||
|
||||
def test_find_filename_valid_fnames_fuzzy():
|
||||
lines = ['appz.py', '```python']
|
||||
valid = ['src/app.py', 'app.py']
|
||||
assert find_filename(lines, valid_fnames=valid) == 'app.py'
|
||||
|
||||
|
||||
def test_find_filename_valid_fnames_no_match():
|
||||
lines = ['other.py', '```python']
|
||||
valid = ['src/app.py', 'app.py']
|
||||
# Should still return the best guess if no valid match
|
||||
assert find_filename(lines, valid_fnames=valid) == 'other.py'
|
||||
|
||||
|
||||
def test_strip_filename_basic():
|
||||
assert strip_filename(' file.py ') == 'file.py'
|
||||
assert strip_filename('`file.py`') == 'file.py'
|
||||
assert strip_filename('*file.py*') == 'file.py'
|
||||
assert strip_filename('file.py:') == 'file.py'
|
||||
assert strip_filename('# file.py') == 'file.py'
|
||||
assert strip_filename('`*# file.py:*` ') == 'file.py'
|
||||
|
||||
|
||||
def test_strip_filename_no_strip():
|
||||
assert strip_filename('file.py') == 'file.py'
|
||||
|
||||
|
||||
def test_strip_filename_none():
|
||||
assert strip_filename(' ') is None
|
||||
assert strip_filename('```') is None
|
||||
assert strip_filename('...') is None
|
||||
|
||||
|
||||
def test_parse_with_different_fence():
|
||||
content = """
|
||||
~~~markdown
|
||||
path/to/file.md
|
||||
<<<<<<< SEARCH
|
||||
Old text
|
||||
=======
|
||||
New text
|
||||
>>>>>>> REPLACE
|
||||
~~~
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(filename='path/to/file.md', search='Old text\n', replace='New text\n')
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content, fence=('~~~', '~~~'))
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_whitespace_handling():
|
||||
# Tests if leading/trailing whitespace inside blocks is preserved
|
||||
content = """
|
||||
```python
|
||||
file_with_whitespace.py
|
||||
<<<<<<< SEARCH
|
||||
leading whitespace
|
||||
trailing whitespace
|
||||
=======
|
||||
new leading whitespace
|
||||
new trailing whitespace
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='file_with_whitespace.py',
|
||||
search=' leading whitespace\ntrailing whitespace \n',
|
||||
replace=' new leading whitespace\nnew trailing whitespace \n',
|
||||
)
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_consecutive_blocks():
|
||||
# Tests parsing blocks immediately following each other
|
||||
content = """
|
||||
```python
|
||||
file_a.py
|
||||
<<<<<<< SEARCH
|
||||
block 1 search
|
||||
=======
|
||||
block 1 replace
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
```python
|
||||
file_b.py
|
||||
<<<<<<< SEARCH
|
||||
block 2 search
|
||||
=======
|
||||
block 2 replace
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='file_a.py', search='block 1 search\n', replace='block 1 replace\n'
|
||||
),
|
||||
DiffBlock(
|
||||
filename='file_b.py', search='block 2 search\n', replace='block 2 replace\n'
|
||||
),
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
assert blocks == expected_blocks
|
||||
|
||||
|
||||
def test_parse_filename_like_text_in_search():
|
||||
# Tests that text resembling a filename inside SEARCH doesn't confuse the parser
|
||||
content = """
|
||||
```python
|
||||
real_file.py
|
||||
<<<<<<< SEARCH
|
||||
This line contains fake_file.py
|
||||
=======
|
||||
This is the replacement.
|
||||
>>>>>>> REPLACE
|
||||
```
|
||||
"""
|
||||
expected_blocks = [
|
||||
DiffBlock(
|
||||
filename='real_file.py',
|
||||
search='This line contains fake_file.py\n',
|
||||
replace='This is the replacement.\n',
|
||||
)
|
||||
]
|
||||
blocks = parse_llm_response_for_diffs(content)
|
||||
assert blocks == expected_blocks
|
||||
@@ -5,6 +5,7 @@ import pytest
|
||||
|
||||
from openhands.controller.state.control_flags import IterationControlFlag
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.events.observation.agent import MicroagentKnowledge
|
||||
from openhands.microagent import BaseMicroagent
|
||||
@@ -268,7 +269,28 @@ Today's date is {{ runtime_info.date }}
|
||||
def test_prompt_manager_initialization_error():
|
||||
"""Test that PromptManager raises an error if the prompt directory is not set."""
|
||||
with pytest.raises(ValueError, match='Prompt directory is not set'):
|
||||
PromptManager(None)
|
||||
PromptManager(None, AgentConfig()) # Need config even for this error check
|
||||
|
||||
|
||||
def test_prompt_manager_system_prompt_selection(prompt_dir):
|
||||
"""Test that PromptManager loads the correct system prompt based on config."""
|
||||
# Ensure both prompt files exist in the temp directory (copied by fixture)
|
||||
assert os.path.exists(os.path.join(prompt_dir, 'system_prompt.j2'))
|
||||
assert os.path.exists(os.path.join(prompt_dir, 'system_prompt_llm_diff.j2'))
|
||||
|
||||
# Case 1: llm_diff is False (default)
|
||||
config_default = AgentConfig() # Defaults to enable_llm_diff=False
|
||||
manager_default = PromptManager(prompt_dir=prompt_dir, config=config_default)
|
||||
# Check the loaded template's origin filename
|
||||
assert 'system_prompt.j2' in manager_default.system_template.filename
|
||||
assert 'system_prompt_llm_diff.j2' not in manager_default.system_template.filename
|
||||
|
||||
# Case 2: llm_diff is True
|
||||
config_llm_diff = AgentConfig(enable_llm_diff=True)
|
||||
manager_llm_diff = PromptManager(prompt_dir=prompt_dir, config=config_llm_diff)
|
||||
# Check the loaded template's origin filename
|
||||
assert 'system_prompt_llm_diff.j2' in manager_llm_diff.system_template.filename
|
||||
assert 'system_prompt.j2' not in manager_llm_diff.system_template.filename
|
||||
|
||||
|
||||
def test_prompt_manager_custom_system_prompt_filename(prompt_dir):
|
||||
|
||||
211
tests/unit/test_runtime_edit.py
Normal file
211
tests/unit/test_runtime_edit.py
Normal file
@@ -0,0 +1,211 @@
|
||||
# tests/unit/test_runtime_edit.py
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.core.config import ( # Assuming AppConfig is needed
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
)
|
||||
from openhands.events.action import FileEditAction, FileWriteAction
|
||||
from openhands.events.event import FileEditSource
|
||||
from openhands.events.observation import (
|
||||
ErrorObservation,
|
||||
FileEditObservation,
|
||||
FileReadObservation,
|
||||
FileWriteObservation,
|
||||
)
|
||||
from openhands.runtime.utils.edit import FileEditRuntimeMixin
|
||||
|
||||
|
||||
# Mock AppConfig if necessary for FileEditRuntimeMixin initialization or methods
|
||||
@pytest.fixture
|
||||
def mock_config():
|
||||
config = MagicMock(spec=AppConfig)
|
||||
config.sandbox = MagicMock(spec=SandboxConfig)
|
||||
config.sandbox.enable_auto_lint = (
|
||||
False # Disable linting for simplicity unless testing it
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
# Create a dummy class that inherits the mixin for testing
|
||||
class MockRuntime(FileEditRuntimeMixin):
|
||||
def __init__(self, config, *args, **kwargs):
|
||||
# Initialize FileEditRuntimeMixin part
|
||||
# Assuming enable_llm_editor=False as we are testing llm_diff_edit
|
||||
super().__init__(enable_llm_editor=False, *args, **kwargs) # noqa: B026
|
||||
self.config = config
|
||||
# Mock essential methods used by llm_diff_edit
|
||||
self.read = MagicMock()
|
||||
self.write = MagicMock()
|
||||
# Mock _get_lint_error if linting is enabled in tests
|
||||
self._get_lint_error = MagicMock(return_value=None)
|
||||
|
||||
# Implement abstract methods if FileEditRuntimeInterface requires them
|
||||
def run_ipython(self, action):
|
||||
pass
|
||||
|
||||
# Add other abstract methods if needed
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_runtime(mock_config):
|
||||
return MockRuntime(config=mock_config)
|
||||
|
||||
|
||||
# Test cases for llm_diff_edit
|
||||
|
||||
|
||||
def test_llm_diff_edit_exact_match(mock_runtime):
|
||||
action = FileEditAction(
|
||||
path='test.py',
|
||||
search="print('old')\nline2\n",
|
||||
replace="print('new')\nline2\n",
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
original_content = "line0\nprint('old')\nline2\nline3\n"
|
||||
expected_new_content = "line0\nprint('new')\nline2\nline3\n"
|
||||
|
||||
mock_runtime.read.return_value = FileReadObservation(
|
||||
content=original_content, path='test.py'
|
||||
)
|
||||
mock_runtime.write.return_value = FileWriteObservation(
|
||||
content='', path='test.py'
|
||||
) # Assume write succeeds
|
||||
|
||||
obs = mock_runtime.llm_diff_edit(action)
|
||||
|
||||
assert isinstance(obs, FileEditObservation)
|
||||
assert obs.path == 'test.py'
|
||||
assert obs.old_content == original_content
|
||||
assert obs.new_content == expected_new_content
|
||||
mock_runtime.write.assert_called_once_with(
|
||||
FileWriteAction(path='test.py', content=expected_new_content)
|
||||
)
|
||||
|
||||
|
||||
def test_llm_diff_edit_whitespace_flexible_match(mock_runtime):
|
||||
action = FileEditAction(
|
||||
path='test.py',
|
||||
search=" print('old')\n line2\n", # Search has 2 spaces
|
||||
replace=" print('new')\n line2\n", # Replace also has 2 spaces (relative indent)
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
original_content = (
|
||||
"line0\n print('old')\n line2\nline3\n" # Original has 4 spaces
|
||||
)
|
||||
expected_new_content = (
|
||||
"line0\n print('new')\n line2\nline3\n" # Expect 4 spaces preserved
|
||||
)
|
||||
|
||||
mock_runtime.read.return_value = FileReadObservation(
|
||||
content=original_content, path='test.py'
|
||||
)
|
||||
mock_runtime.write.return_value = FileWriteObservation(content='', path='test.py')
|
||||
|
||||
obs = mock_runtime.llm_diff_edit(action)
|
||||
|
||||
assert isinstance(obs, FileEditObservation)
|
||||
assert obs.new_content == expected_new_content
|
||||
mock_runtime.write.assert_called_once_with(
|
||||
FileWriteAction(path='test.py', content=expected_new_content)
|
||||
)
|
||||
|
||||
|
||||
def test_llm_diff_edit_create_file(mock_runtime):
|
||||
action = FileEditAction(
|
||||
path='new_file.py',
|
||||
search='', # Empty search for creation
|
||||
replace="print('hello new file')\n",
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
original_content = '' # File doesn't exist
|
||||
expected_new_content = "print('hello new file')\n"
|
||||
|
||||
mock_runtime.read.return_value = ErrorObservation(
|
||||
content='File not found: new_file.py'
|
||||
)
|
||||
mock_runtime.write.return_value = FileWriteObservation(
|
||||
content='', path='new_file.py'
|
||||
)
|
||||
|
||||
obs = mock_runtime.llm_diff_edit(action)
|
||||
|
||||
assert isinstance(obs, FileEditObservation)
|
||||
assert obs.path == 'new_file.py'
|
||||
assert obs.old_content == original_content
|
||||
assert obs.new_content == expected_new_content
|
||||
assert obs.prev_exist is False
|
||||
mock_runtime.write.assert_called_once_with(
|
||||
FileWriteAction(path='new_file.py', content=expected_new_content)
|
||||
)
|
||||
|
||||
|
||||
def test_llm_diff_edit_append_file(mock_runtime):
|
||||
action = FileEditAction(
|
||||
path='append.py',
|
||||
search='', # Empty search for append
|
||||
replace="print('appended')\n",
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
original_content = "print('original')\n"
|
||||
expected_new_content = "print('original')\nprint('appended')\n"
|
||||
|
||||
mock_runtime.read.return_value = FileReadObservation(
|
||||
content=original_content, path='append.py'
|
||||
)
|
||||
mock_runtime.write.return_value = FileWriteObservation(content='', path='append.py')
|
||||
|
||||
obs = mock_runtime.llm_diff_edit(action)
|
||||
|
||||
assert isinstance(obs, FileEditObservation)
|
||||
assert obs.new_content == expected_new_content
|
||||
assert obs.prev_exist is True
|
||||
mock_runtime.write.assert_called_once_with(
|
||||
FileWriteAction(path='append.py', content=expected_new_content)
|
||||
)
|
||||
|
||||
|
||||
def test_llm_diff_edit_no_match(mock_runtime):
|
||||
action = FileEditAction(
|
||||
path='test.py',
|
||||
search="print('not found')\n",
|
||||
replace="print('should not apply')\n",
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
original_content = "print('something else')\n"
|
||||
|
||||
mock_runtime.read.return_value = FileReadObservation(
|
||||
content=original_content, path='test.py'
|
||||
)
|
||||
|
||||
obs = mock_runtime.llm_diff_edit(action)
|
||||
|
||||
assert isinstance(obs, ErrorObservation)
|
||||
assert 'SEARCH block did not match' in obs.content
|
||||
assert 'FAILED SEARCH BLOCK' in obs.content
|
||||
assert "print('not found')" in obs.content
|
||||
mock_runtime.write.assert_not_called() # Write should not be called on failure
|
||||
|
||||
|
||||
def test_llm_diff_edit_create_file_non_empty_search_error(mock_runtime):
|
||||
action = FileEditAction(
|
||||
path='new_file.py',
|
||||
search='some search content', # Non-empty search
|
||||
replace="print('hello new file')\n",
|
||||
impl_source=FileEditSource.LLM_DIFF,
|
||||
)
|
||||
|
||||
mock_runtime.read.return_value = ErrorObservation(
|
||||
content='File not found: new_file.py'
|
||||
)
|
||||
|
||||
obs = mock_runtime.llm_diff_edit(action)
|
||||
|
||||
assert isinstance(obs, ErrorObservation)
|
||||
assert 'File new_file.py not found and SEARCH block is not empty' in obs.content
|
||||
mock_runtime.write.assert_not_called()
|
||||
|
||||
|
||||
# TODO: Add tests for linting interaction if needed
|
||||
Reference in New Issue
Block a user