mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
feat(agent): CodeAct with function calling (#4537)
Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: tobitege <10787084+tobitege@users.noreply.github.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: tofarr <tofarr@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
@@ -24,6 +25,7 @@ from openhands.core.config import (
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import MessageAction
|
||||
from openhands.events.serialization.event import event_to_dict
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
|
||||
@@ -60,6 +62,12 @@ def get_config(
|
||||
f'{metadata.llm_config.log_completions_folder}'
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = AgentConfig(
|
||||
codeact_enable_jupyter=True,
|
||||
codeact_enable_browsing_delegate=True,
|
||||
codeact_enable_llm_editor=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -122,7 +130,7 @@ def process_instance(
|
||||
# # result evaluation
|
||||
# # =============================================
|
||||
|
||||
histories = state.history.get_events()
|
||||
histories = [event_to_dict(event) for event in state.history.get_events()]
|
||||
test_result: TestResult = test_class.verify_result(runtime, histories)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ from evaluation.utils.shared import (
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
@@ -45,11 +46,6 @@ AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActSWEAgent': codeact_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
|
||||
'CodeActSWEAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
|
||||
}
|
||||
|
||||
|
||||
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
|
||||
return f'{instance.repo}__{instance.version}'.replace('/', '__')
|
||||
@@ -71,25 +67,27 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
|
||||
)
|
||||
instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name)
|
||||
else:
|
||||
# Testing general agents
|
||||
# Instruction based on Anthropic's official trajectory
|
||||
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
|
||||
instruction = (
|
||||
f'Please fix the following issue for the repository in /workspace/{workspace_dir_name}.\n'
|
||||
'Environment has been set up for you to start working. You may assume all necessary tools are installed.\n\n'
|
||||
'# Problem Statement\n'
|
||||
f'{instance.problem_statement}\n\n'
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
|
||||
f'<pr_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</pr_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
'Your task is to make the minimal changes to non-tests files in the /repo directory to ensure the <pr_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases and make sure your fix handles them as well\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
)
|
||||
if USE_HINT_TEXT and instance.hints_text:
|
||||
instruction += f'# Hints\n{instance.hints_text}\n\n'
|
||||
instruction += (
|
||||
'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
|
||||
'You should NOT modify any existing test case files. You SHOULD add new test in a NEW file to reproduce the issue.\n'
|
||||
'You should verify that the issue is resolved and any new tests you create pass successfully.\n'
|
||||
'You should NEVER use web browsing or any other web-based tools.\n'
|
||||
'You should ALWAYS use the default Python interpreter available in the <execute_bash> environment to run code related to the provided issue and/or repository.\n'
|
||||
)
|
||||
|
||||
# NOTE: You can actually set slightly different instruction for different agents
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
return instruction
|
||||
|
||||
|
||||
@@ -153,6 +151,12 @@ def get_config(
|
||||
f'{metadata.llm_config.log_completions_folder}'
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = AgentConfig(
|
||||
codeact_enable_jupyter=False,
|
||||
codeact_enable_browsing_delegate=False,
|
||||
codeact_enable_llm_editor=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -312,7 +316,7 @@ def complete_runtime(
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
@@ -322,7 +326,7 @@ def complete_runtime(
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git config --global core.pager "": {str(obs)}',
|
||||
)
|
||||
|
||||
@@ -331,7 +335,10 @@ def complete_runtime(
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {str(obs)}')
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git add -A: {str(obs)}',
|
||||
)
|
||||
|
||||
n_retries = 0
|
||||
git_patch = None
|
||||
@@ -404,6 +411,7 @@ def process_instance(
|
||||
if (
|
||||
state.last_error
|
||||
and 'fatal error during agent execution' in state.last_error
|
||||
and 'stuck in a loop' not in state.last_error
|
||||
):
|
||||
raise EvalException('Fatal error detected: ' + state.last_error)
|
||||
|
||||
@@ -488,6 +496,7 @@ if __name__ == '__main__':
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
llm_config.log_completions = True
|
||||
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
mkdir -p $NEW_FOLDER_PATH
|
||||
|
||||
# Build all_preds.jsonl
|
||||
poetry run python evaluation/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
|
||||
mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl
|
||||
|
||||
# Build trajs/
|
||||
mkdir -p $NEW_FOLDER_PATH/trajs
|
||||
for instance_dir in $FOLDER_PATH/llm_completions/*/; do
|
||||
instance_id=$(basename "$instance_dir")
|
||||
latest_json=$(ls -t "$instance_dir"/*.json | head -n1)
|
||||
if [ -n "$latest_json" ]; then
|
||||
cat "$latest_json" | jq -r '.messages' > "$NEW_FOLDER_PATH/trajs/$instance_id.json"
|
||||
fi
|
||||
done
|
||||
|
||||
# Build logs/
|
||||
# check if $FOLDER_PATH/eval_outputs exists, if so copy over - else raise error
|
||||
if [ -d "$FOLDER_PATH/eval_outputs" ]; then
|
||||
cp -r $FOLDER_PATH/eval_outputs $NEW_FOLDER_PATH/logs
|
||||
else
|
||||
echo "Error: $FOLDER_PATH/eval_outputs does not exist. You should run the local docker eval_infer.sh first."
|
||||
exit 1
|
||||
fi
|
||||
@@ -104,7 +104,7 @@ def codeact_user_response(
|
||||
)
|
||||
msg = (
|
||||
'Please continue working on the task on whatever approach you think is suitable.\n'
|
||||
'If you think you have solved the task, please first send your answer to user through message and then <execute_bash> exit </execute_bash>.\n'
|
||||
'If you think you have solved the task, please first send your answer to user through message and then finish the interaction.\n'
|
||||
f'{encaps_str}'
|
||||
'IMPORTANT: YOU SHOULD NEVER ASK FOR HUMAN HELP.\n'
|
||||
)
|
||||
|
||||
@@ -64,6 +64,23 @@ class CodeActResponseParser(ResponseParser):
|
||||
return action_parser.parse(action_str)
|
||||
return self.default_parser.parse(action_str)
|
||||
|
||||
def action_to_str(self, action: Action) -> str:
|
||||
if isinstance(action, CmdRunAction):
|
||||
return (
|
||||
f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
|
||||
)
|
||||
elif isinstance(action, IPythonRunCellAction):
|
||||
return f'{action.thought}\n<execute_ipython>\n{action.code}\n</execute_ipython>'
|
||||
elif isinstance(action, AgentDelegateAction):
|
||||
return f'{action.thought}\n<execute_browse>\n{action.inputs["task"]}\n</execute_browse>'
|
||||
elif isinstance(action, FileEditAction):
|
||||
return f'{action.thought}\n<file_edit path={action.path}>\n{action.content}\n</file_edit>'
|
||||
elif isinstance(action, MessageAction):
|
||||
return action.content
|
||||
elif isinstance(action, AgentFinishAction) and action.source == 'agent':
|
||||
return action.thought
|
||||
return ''
|
||||
|
||||
|
||||
class CodeActActionParserFinish(ActionParser):
|
||||
"""Parser action:
|
||||
|
||||
@@ -1,10 +1,16 @@
|
||||
import json
|
||||
import os
|
||||
from collections import deque
|
||||
from itertools import islice
|
||||
|
||||
from litellm import ModelResponse
|
||||
|
||||
import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
|
||||
from openhands.agenthub.codeact_agent.action_parser import CodeActResponseParser
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.message import ImageContent, Message, TextContent
|
||||
from openhands.events.action import (
|
||||
Action,
|
||||
@@ -36,7 +42,7 @@ from openhands.utils.prompt import PromptManager
|
||||
|
||||
|
||||
class CodeActAgent(Agent):
|
||||
VERSION = '2.0'
|
||||
VERSION = '2.1'
|
||||
"""
|
||||
The Code Act Agent is a minimalist agent.
|
||||
The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
|
||||
@@ -63,8 +69,7 @@ class CodeActAgent(Agent):
|
||||
AgentSkillsRequirement(),
|
||||
JupyterRequirement(),
|
||||
]
|
||||
|
||||
action_parser = CodeActResponseParser()
|
||||
obs_prefix = 'OBSERVATION:\n'
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -88,66 +93,165 @@ class CodeActAgent(Agent):
|
||||
if config.micro_agent_name
|
||||
else None
|
||||
)
|
||||
|
||||
self.prompt_manager = PromptManager(
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__)),
|
||||
agent_skills_docs=AgentSkillsRequirement.documentation,
|
||||
micro_agent=self.micro_agent,
|
||||
)
|
||||
|
||||
def action_to_str(self, action: Action) -> str:
|
||||
if isinstance(action, CmdRunAction):
|
||||
return (
|
||||
f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
|
||||
if (
|
||||
self.config.function_calling
|
||||
and not self.llm.config.supports_function_calling
|
||||
):
|
||||
logger.warning(
|
||||
f'Function calling not supported for model {self.llm.config.model}. '
|
||||
'Disabling function calling.'
|
||||
)
|
||||
elif isinstance(action, IPythonRunCellAction):
|
||||
return f'{action.thought}\n<execute_ipython>\n{action.code}\n</execute_ipython>'
|
||||
elif isinstance(action, AgentDelegateAction):
|
||||
return f'{action.thought}\n<execute_browse>\n{action.inputs["task"]}\n</execute_browse>'
|
||||
elif isinstance(action, FileEditAction):
|
||||
return f'{action.thought}\n<file_edit path={action.path}>\n{action.content}\n</file_edit>'
|
||||
elif isinstance(action, MessageAction):
|
||||
return action.content
|
||||
elif isinstance(action, AgentFinishAction) and action.source == 'agent':
|
||||
return action.thought
|
||||
return ''
|
||||
self.config.function_calling = False
|
||||
|
||||
def get_action_message(self, action: Action) -> Message | None:
|
||||
if self.config.function_calling:
|
||||
# Function calling mode
|
||||
self.tools = codeact_function_calling.get_tools(
|
||||
codeact_enable_browsing_delegate=self.config.codeact_enable_browsing_delegate,
|
||||
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
|
||||
codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
|
||||
)
|
||||
logger.info(
|
||||
f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
|
||||
)
|
||||
self.system_prompt = codeact_function_calling.SYSTEM_PROMPT
|
||||
self.initial_user_message = None
|
||||
else:
|
||||
# Non-function-calling mode
|
||||
self.action_parser = CodeActResponseParser()
|
||||
self.prompt_manager = PromptManager(
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__)),
|
||||
agent_skills_docs=AgentSkillsRequirement.documentation,
|
||||
micro_agent=self.micro_agent,
|
||||
)
|
||||
self.system_prompt = self.prompt_manager.system_message
|
||||
self.initial_user_message = self.prompt_manager.initial_user_message
|
||||
|
||||
self.pending_actions: deque[Action] = deque()
|
||||
|
||||
def get_action_message(
|
||||
self,
|
||||
action: Action,
|
||||
pending_tool_call_action_messages: dict[str, Message],
|
||||
) -> list[Message]:
|
||||
"""Converts an action into a message format that can be sent to the LLM.
|
||||
|
||||
This method handles different types of actions and formats them appropriately:
|
||||
1. For tool-based actions (AgentDelegate, CmdRun, IPythonRunCell, FileEdit) and agent-sourced AgentFinish:
|
||||
- In function calling mode: Stores the LLM's response in pending_tool_call_action_messages
|
||||
- In non-function calling mode: Creates a message with the action string
|
||||
2. For MessageActions: Creates a message with the text content and optional image content
|
||||
|
||||
Args:
|
||||
action (Action): The action to convert. Can be one of:
|
||||
- AgentDelegateAction: For delegating tasks to other agents
|
||||
- CmdRunAction: For executing bash commands
|
||||
- IPythonRunCellAction: For running IPython code
|
||||
- FileEditAction: For editing files
|
||||
- AgentFinishAction: For ending the interaction
|
||||
- MessageAction: For sending messages
|
||||
pending_tool_call_action_messages (dict[str, Message]): Dictionary mapping response IDs
|
||||
to their corresponding messages. Used in function calling mode to track tool calls
|
||||
that are waiting for their results.
|
||||
|
||||
Returns:
|
||||
list[Message]: A list containing the formatted message(s) for the action.
|
||||
May be empty if the action is handled as a tool call in function calling mode.
|
||||
|
||||
Note:
|
||||
In function calling mode, tool-based actions are stored in pending_tool_call_action_messages
|
||||
rather than being returned immediately. They will be processed later when all corresponding
|
||||
tool call results are available.
|
||||
"""
|
||||
# create a regular message from an event
|
||||
if isinstance(
|
||||
action,
|
||||
(
|
||||
AgentDelegateAction,
|
||||
CmdRunAction,
|
||||
IPythonRunCellAction,
|
||||
MessageAction,
|
||||
FileEditAction,
|
||||
),
|
||||
) or (isinstance(action, AgentFinishAction) and action.source == 'agent'):
|
||||
content = [TextContent(text=self.action_to_str(action))]
|
||||
if self.config.function_calling:
|
||||
tool_metadata = action.tool_call_metadata
|
||||
assert tool_metadata is not None, (
|
||||
'Tool call metadata should NOT be None when function calling is enabled. Action: '
|
||||
+ str(action)
|
||||
)
|
||||
|
||||
if (
|
||||
self.llm.vision_is_active()
|
||||
and isinstance(action, MessageAction)
|
||||
and action.images_urls
|
||||
):
|
||||
llm_response: ModelResponse = tool_metadata.model_response
|
||||
assistant_msg = llm_response.choices[0].message
|
||||
# Add the LLM message (assistant) that initiated the tool calls
|
||||
# (overwrites any previous message with the same response_id)
|
||||
pending_tool_call_action_messages[llm_response.id] = Message(
|
||||
role=assistant_msg.role,
|
||||
# tool call content SHOULD BE a string
|
||||
content=[TextContent(text=assistant_msg.content)]
|
||||
if assistant_msg.content is not None
|
||||
else [],
|
||||
tool_calls=assistant_msg.tool_calls,
|
||||
)
|
||||
return []
|
||||
else:
|
||||
content = [TextContent(text=self.action_parser.action_to_str(action))]
|
||||
return [
|
||||
Message(
|
||||
role='user' if action.source == 'user' else 'assistant',
|
||||
content=content,
|
||||
)
|
||||
]
|
||||
elif isinstance(action, MessageAction):
|
||||
role = 'user' if action.source == 'user' else 'assistant'
|
||||
content = [TextContent(text=action.content)]
|
||||
if self.llm.vision_is_active() and action.images_urls:
|
||||
content.append(ImageContent(image_urls=action.images_urls))
|
||||
return [
|
||||
Message(
|
||||
role=role,
|
||||
content=content,
|
||||
)
|
||||
]
|
||||
return []
|
||||
|
||||
return Message(
|
||||
role='user' if action.source == 'user' else 'assistant', content=content
|
||||
)
|
||||
return None
|
||||
def get_observation_message(
|
||||
self,
|
||||
obs: Observation,
|
||||
tool_call_id_to_message: dict[str, Message],
|
||||
) -> list[Message]:
|
||||
"""Converts an observation into a message format that can be sent to the LLM.
|
||||
|
||||
def get_observation_message(self, obs: Observation) -> Message | None:
|
||||
This method handles different types of observations and formats them appropriately:
|
||||
- CmdOutputObservation: Formats command execution results with exit codes
|
||||
- IPythonRunCellObservation: Formats IPython cell execution results, replacing base64 images
|
||||
- FileEditObservation: Formats file editing results
|
||||
- AgentDelegateObservation: Formats results from delegated agent tasks
|
||||
- ErrorObservation: Formats error messages from failed actions
|
||||
- UserRejectObservation: Formats user rejection messages
|
||||
|
||||
In function calling mode, observations with tool_call_metadata are stored in
|
||||
tool_call_id_to_message for later processing instead of being returned immediately.
|
||||
|
||||
Args:
|
||||
obs (Observation): The observation to convert
|
||||
tool_call_id_to_message (dict[str, Message]): Dictionary mapping tool call IDs
|
||||
to their corresponding messages (used in function calling mode)
|
||||
|
||||
Returns:
|
||||
list[Message]: A list containing the formatted message(s) for the observation.
|
||||
May be empty if the observation is handled as a tool response in function calling mode.
|
||||
|
||||
Raises:
|
||||
ValueError: If the observation type is unknown
|
||||
"""
|
||||
message: Message
|
||||
max_message_chars = self.llm.config.max_message_chars
|
||||
obs_prefix = 'OBSERVATION:\n'
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
text = obs_prefix + truncate_content(
|
||||
obs.content + obs.interpreter_details, max_message_chars
|
||||
)
|
||||
text += (
|
||||
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
|
||||
)
|
||||
return Message(role='user', content=[TextContent(text=text)])
|
||||
text += f'\n[Command finished with exit code {obs.exit_code}]'
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, IPythonRunCellObservation):
|
||||
text = obs_prefix + obs.content
|
||||
# replace base64 images with a placeholder
|
||||
@@ -159,29 +263,45 @@ class CodeActAgent(Agent):
|
||||
)
|
||||
text = '\n'.join(splitted)
|
||||
text = truncate_content(text, max_message_chars)
|
||||
return Message(role='user', content=[TextContent(text=text)])
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, FileEditObservation):
|
||||
text = obs_prefix + truncate_content(str(obs), max_message_chars)
|
||||
return Message(role='user', content=[TextContent(text=text)])
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, AgentDelegateObservation):
|
||||
text = obs_prefix + truncate_content(
|
||||
obs.outputs['content'] if 'content' in obs.outputs else '',
|
||||
max_message_chars,
|
||||
)
|
||||
return Message(role='user', content=[TextContent(text=text)])
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
text = obs_prefix + truncate_content(obs.content, max_message_chars)
|
||||
text += '\n[Error occurred in processing last action]'
|
||||
return Message(role='user', content=[TextContent(text=text)])
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
elif isinstance(obs, UserRejectObservation):
|
||||
text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
|
||||
text += '\n[Last action has been rejected by the user]'
|
||||
return Message(role='user', content=[TextContent(text=text)])
|
||||
message = Message(role='user', content=[TextContent(text=text)])
|
||||
else:
|
||||
# If an observation message is not returned, it will cause an error
|
||||
# when the LLM tries to return the next message
|
||||
raise ValueError(f'Unknown observation type: {type(obs)}')
|
||||
|
||||
if self.config.function_calling:
|
||||
# Update the message as tool response properly
|
||||
if (tool_call_metadata := obs.tool_call_metadata) is not None:
|
||||
tool_call_id_to_message[tool_call_metadata.tool_call_id] = Message(
|
||||
role='tool',
|
||||
content=message.content,
|
||||
tool_call_id=tool_call_metadata.tool_call_id,
|
||||
name=tool_call_metadata.function_name,
|
||||
)
|
||||
# No need to return the observation message
|
||||
# because it will be added by get_action_message when all the corresponding
|
||||
# tool calls in the SAME request are processed
|
||||
return []
|
||||
|
||||
return [message]
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Resets the CodeAct Agent."""
|
||||
super().reset()
|
||||
@@ -200,6 +320,10 @@ class CodeActAgent(Agent):
|
||||
- MessageAction(content) - Message action to run (e.g. ask for clarification)
|
||||
- AgentFinishAction() - end the interaction
|
||||
"""
|
||||
# Continue with pending actions if any
|
||||
if self.pending_actions:
|
||||
return self.pending_actions.popleft()
|
||||
|
||||
# if we're done, go back
|
||||
latest_user_message = state.history.get_last_user_message()
|
||||
if latest_user_message and latest_user_message.strip() == '/exit':
|
||||
@@ -207,87 +331,172 @@ class CodeActAgent(Agent):
|
||||
|
||||
# prepare what we want to send to the LLM
|
||||
messages = self._get_messages(state)
|
||||
params = {
|
||||
params: dict = {
|
||||
'messages': self.llm.format_messages_for_llm(messages),
|
||||
'stop': [
|
||||
}
|
||||
if self.config.function_calling:
|
||||
params['tools'] = self.tools
|
||||
else:
|
||||
params['stop'] = [
|
||||
'</execute_ipython>',
|
||||
'</execute_bash>',
|
||||
'</execute_browse>',
|
||||
'</file_edit>',
|
||||
],
|
||||
}
|
||||
|
||||
]
|
||||
response = self.llm.completion(**params)
|
||||
|
||||
return self.action_parser.parse(response)
|
||||
if self.config.function_calling:
|
||||
actions = codeact_function_calling.response_to_actions(response)
|
||||
for action in actions:
|
||||
self.pending_actions.append(action)
|
||||
return self.pending_actions.popleft()
|
||||
else:
|
||||
return self.action_parser.parse(response)
|
||||
|
||||
def _get_messages(self, state: State) -> list[Message]:
|
||||
"""Constructs the message history for the LLM conversation.
|
||||
|
||||
This method builds a structured conversation history by processing events from the state
|
||||
and formatting them into messages that the LLM can understand. It handles both regular
|
||||
message flow and function-calling scenarios.
|
||||
|
||||
The method performs the following steps:
|
||||
1. Initializes with system prompt and optional initial user message
|
||||
2. Processes events (Actions and Observations) into messages
|
||||
3. Handles tool calls and their responses in function-calling mode
|
||||
4. Manages message role alternation (user/assistant/tool)
|
||||
5. Applies caching for specific LLM providers (e.g., Anthropic)
|
||||
6. Adds environment reminders for non-function-calling mode
|
||||
|
||||
Args:
|
||||
state (State): The current state object containing conversation history and other metadata
|
||||
|
||||
Returns:
|
||||
list[Message]: A list of formatted messages ready for LLM consumption, including:
|
||||
- System message with prompt
|
||||
- Initial user message (if configured)
|
||||
- Action messages (from both user and assistant)
|
||||
- Observation messages (including tool responses)
|
||||
- Environment reminders (in non-function-calling mode)
|
||||
|
||||
Note:
|
||||
- In function-calling mode, tool calls and their responses are carefully tracked
|
||||
to maintain proper conversation flow
|
||||
- Messages from the same role are combined to prevent consecutive same-role messages
|
||||
- For Anthropic models, specific messages are cached according to their documentation
|
||||
"""
|
||||
messages: list[Message] = [
|
||||
Message(
|
||||
role='system',
|
||||
content=[
|
||||
TextContent(
|
||||
text=self.prompt_manager.system_message,
|
||||
text=self.system_prompt,
|
||||
cache_prompt=self.llm.is_caching_prompt_active(), # Cache system prompt
|
||||
)
|
||||
],
|
||||
),
|
||||
Message(
|
||||
role='user',
|
||||
content=[
|
||||
TextContent(
|
||||
text=self.prompt_manager.initial_user_message,
|
||||
cache_prompt=self.llm.is_caching_prompt_active(), # if the user asks the same query,
|
||||
)
|
||||
],
|
||||
),
|
||||
)
|
||||
]
|
||||
if self.initial_user_message:
|
||||
messages.append(
|
||||
Message(
|
||||
role='user',
|
||||
content=[TextContent(text=self.initial_user_message)],
|
||||
)
|
||||
)
|
||||
|
||||
for event in state.history.get_events():
|
||||
pending_tool_call_action_messages: dict[str, Message] = {}
|
||||
tool_call_id_to_message: dict[str, Message] = {}
|
||||
events = list(state.history.get_events())
|
||||
for event in events:
|
||||
# create a regular message from an event
|
||||
if isinstance(event, Action):
|
||||
message = self.get_action_message(event)
|
||||
messages_to_add = self.get_action_message(
|
||||
action=event,
|
||||
pending_tool_call_action_messages=pending_tool_call_action_messages,
|
||||
)
|
||||
elif isinstance(event, Observation):
|
||||
message = self.get_observation_message(event)
|
||||
messages_to_add = self.get_observation_message(
|
||||
obs=event,
|
||||
tool_call_id_to_message=tool_call_id_to_message,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'Unknown event type: {type(event)}')
|
||||
|
||||
# add regular message
|
||||
if message:
|
||||
# handle error if the message is the SAME role as the previous message
|
||||
# litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
|
||||
# there shouldn't be two consecutive messages from the same role
|
||||
if messages and messages[-1].role == message.role:
|
||||
messages[-1].content.extend(message.content)
|
||||
else:
|
||||
messages.append(message)
|
||||
# Check pending tool call action messages and see if they are complete
|
||||
_response_ids_to_remove = []
|
||||
for (
|
||||
response_id,
|
||||
pending_message,
|
||||
) in pending_tool_call_action_messages.items():
|
||||
assert pending_message.tool_calls is not None, (
|
||||
'Tool calls should NOT be None when function calling is enabled & the message is considered pending tool call. '
|
||||
f'Pending message: {pending_message}'
|
||||
)
|
||||
if all(
|
||||
tool_call.id in tool_call_id_to_message
|
||||
for tool_call in pending_message.tool_calls
|
||||
):
|
||||
# If complete:
|
||||
# -- 1. Add the message that **initiated** the tool calls
|
||||
messages_to_add.append(pending_message)
|
||||
# -- 2. Add the tool calls **results***
|
||||
for tool_call in pending_message.tool_calls:
|
||||
messages_to_add.append(tool_call_id_to_message[tool_call.id])
|
||||
tool_call_id_to_message.pop(tool_call.id)
|
||||
_response_ids_to_remove.append(response_id)
|
||||
# Cleanup the processed pending tool messages
|
||||
for response_id in _response_ids_to_remove:
|
||||
pending_tool_call_action_messages.pop(response_id)
|
||||
|
||||
for message in messages_to_add:
|
||||
# add regular message
|
||||
if message:
|
||||
# handle error if the message is the SAME role as the previous message
|
||||
# litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
|
||||
# there shouldn't be two consecutive messages from the same role
|
||||
# NOTE: we shouldn't combine tool messages because each of them has a different tool_call_id
|
||||
if (
|
||||
messages
|
||||
and messages[-1].role == message.role
|
||||
and message.role != 'tool'
|
||||
):
|
||||
messages[-1].content.extend(message.content)
|
||||
else:
|
||||
messages.append(message)
|
||||
|
||||
# Add caching to the last 2 user messages
|
||||
if self.llm.is_caching_prompt_active():
|
||||
user_turns_processed = 0
|
||||
# NOTE: this is only needed for anthropic
|
||||
# following logic here:
|
||||
# https://github.com/anthropics/anthropic-quickstarts/blob/8f734fd08c425c6ec91ddd613af04ff87d70c5a0/computer-use-demo/computer_use_demo/loop.py#L241-L262
|
||||
breakpoints_remaining = 3 # remaining 1 for system/tool
|
||||
for message in reversed(messages):
|
||||
if message.role == 'user' and user_turns_processed < 2:
|
||||
message.content[
|
||||
-1
|
||||
].cache_prompt = True # Last item inside the message content
|
||||
user_turns_processed += 1
|
||||
if message.role == 'user' or message.role == 'tool':
|
||||
if breakpoints_remaining > 0:
|
||||
message.content[
|
||||
-1
|
||||
].cache_prompt = True # Last item inside the message content
|
||||
breakpoints_remaining -= 1
|
||||
else:
|
||||
break
|
||||
|
||||
# The latest user message is important:
|
||||
# we want to remind the agent of the environment constraints
|
||||
latest_user_message = next(
|
||||
islice(
|
||||
(
|
||||
m
|
||||
for m in reversed(messages)
|
||||
if m.role == 'user'
|
||||
and any(isinstance(c, TextContent) for c in m.content)
|
||||
if not self.config.function_calling:
|
||||
# The latest user message is important:
|
||||
# we want to remind the agent of the environment constraints
|
||||
latest_user_message = next(
|
||||
islice(
|
||||
(
|
||||
m
|
||||
for m in reversed(messages)
|
||||
if m.role == 'user'
|
||||
and any(isinstance(c, TextContent) for c in m.content)
|
||||
),
|
||||
1,
|
||||
),
|
||||
1,
|
||||
),
|
||||
None,
|
||||
)
|
||||
if latest_user_message:
|
||||
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
latest_user_message.content.append(TextContent(text=reminder_text))
|
||||
None,
|
||||
)
|
||||
# do not add this for function calling
|
||||
if latest_user_message:
|
||||
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
latest_user_message.content.append(TextContent(text=reminder_text))
|
||||
|
||||
return messages
|
||||
|
||||
397
openhands/agenthub/codeact_agent/function_calling.py
Normal file
397
openhands/agenthub/codeact_agent/function_calling.py
Normal file
@@ -0,0 +1,397 @@
|
||||
"""This file contains the function calling implementation for different actions.
|
||||
|
||||
This is similar to the functionality of `CodeActResponseParser`.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from litellm import (
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ModelResponse,
|
||||
)
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import (
|
||||
Action,
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
CmdRunAction,
|
||||
FileEditAction,
|
||||
IPythonRunCellAction,
|
||||
MessageAction,
|
||||
)
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
SYSTEM_PROMPT = """You are a helpful assistant that can interact with a computer to solve tasks.
|
||||
<IMPORTANT>
|
||||
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
|
||||
</IMPORTANT>
|
||||
"""
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
|
||||
* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
|
||||
"""
|
||||
|
||||
CmdRunTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_bash',
|
||||
description=_BASH_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
|
||||
},
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment.
|
||||
* The assistant should define variables and import packages before using them.
|
||||
* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
|
||||
"""
|
||||
# We are not using agentskills's file_ops for viewing files now because StrReplaceEditorTool already supports viewing files
|
||||
# """* Apart from the standard Python library, the assistant can also use the following functions (already imported):
|
||||
# {AgentSkillsRequirement.documentation}"""
|
||||
|
||||
IPythonTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_ipython_cell',
|
||||
description=_IPYTHON_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'code': {
|
||||
'type': 'string',
|
||||
'description': 'The Python code to execute. Supports magic commands like %pip.',
|
||||
},
|
||||
},
|
||||
'required': ['code'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_FILE_EDIT_DESCRIPTION = """Edit a file.
|
||||
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
|
||||
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
|
||||
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
|
||||
* To append to a file, set both `start` and `end` to `-1`.
|
||||
* If the file doesn't exist, a new file will be created with the provided content.
|
||||
|
||||
**Example 1: general edit for short files**
|
||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5| self.z = 3
|
||||
6|
|
||||
7|print(MyClass().z)
|
||||
8|print(MyClass().x)
|
||||
(this is the end of the file)
|
||||
|
||||
The assistant wants to edit the file to look like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5|
|
||||
6|print(MyClass().y)
|
||||
(this is the end of the file)
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=1 end=-1
|
||||
content=```
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
# no changes before
|
||||
self.y = 2
|
||||
# self.z is removed
|
||||
|
||||
# MyClass().z is removed
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
**Example 2: append to file for short files**
|
||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5| self.z = 3
|
||||
6|
|
||||
7|print(MyClass().z)
|
||||
8|print(MyClass().x)
|
||||
(this is the end of the file)
|
||||
|
||||
To append the following lines to the file:
|
||||
```python
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=-1 end=-1
|
||||
content=```
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
**Example 3: edit for long files**
|
||||
|
||||
Given an existing file `/path/to/file.py` that looks like this:
|
||||
(1000 more lines above)
|
||||
1001|class MyClass:
|
||||
1002| def __init__(self):
|
||||
1003| self.x = 1
|
||||
1004| self.y = 2
|
||||
1005| self.z = 3
|
||||
1006|
|
||||
1007|print(MyClass().z)
|
||||
1008|print(MyClass().x)
|
||||
(2000 more lines below)
|
||||
|
||||
The assistant wants to edit the file to look like this:
|
||||
|
||||
(1000 more lines above)
|
||||
1001|class MyClass:
|
||||
1002| def __init__(self):
|
||||
1003| self.x = 1
|
||||
1004| self.y = 2
|
||||
1005|
|
||||
1006|print(MyClass().y)
|
||||
(2000 more lines below)
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=1001 end=1008
|
||||
content=```
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
# no changes before
|
||||
self.y = 2
|
||||
# self.z is removed
|
||||
|
||||
# MyClass().z is removed
|
||||
print(MyClass().y)
|
||||
```
|
||||
"""
|
||||
|
||||
LLMBasedFileEditTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='edit_file',
|
||||
description=_FILE_EDIT_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'path': {
|
||||
'type': 'string',
|
||||
'description': 'The absolute path to the file to be edited.',
|
||||
},
|
||||
'new_content_draft': {
|
||||
'type': 'string',
|
||||
'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
|
||||
},
|
||||
'start': {
|
||||
'type': 'integer',
|
||||
'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.',
|
||||
},
|
||||
'end': {
|
||||
'type': 'integer',
|
||||
'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).',
|
||||
},
|
||||
},
|
||||
'required': ['path', 'content'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files
|
||||
* State is persistent across command calls and discussions with the user
|
||||
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
|
||||
* The `create` command cannot be used if the specified `path` already exists as a file
|
||||
* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
|
||||
* The `undo_edit` command will revert the last edit made to the file at `path`
|
||||
|
||||
Notes for using the `str_replace` command:
|
||||
* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
|
||||
* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
|
||||
* The `new_str` parameter should contain the edited lines that should replace the `old_str`
|
||||
"""
|
||||
|
||||
StrReplaceEditorTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='str_replace_editor',
|
||||
description=_STR_REPLACE_EDITOR_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
|
||||
'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
|
||||
'type': 'string',
|
||||
},
|
||||
'path': {
|
||||
'description': 'Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.',
|
||||
'type': 'string',
|
||||
},
|
||||
'file_text': {
|
||||
'description': 'Required parameter of `create` command, with the content of the file to be created.',
|
||||
'type': 'string',
|
||||
},
|
||||
'old_str': {
|
||||
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
|
||||
'type': 'string',
|
||||
},
|
||||
'new_str': {
|
||||
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
|
||||
'type': 'string',
|
||||
},
|
||||
'insert_line': {
|
||||
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
|
||||
'type': 'integer',
|
||||
},
|
||||
'view_range': {
|
||||
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
|
||||
'items': {'type': 'integer'},
|
||||
'type': 'array',
|
||||
},
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_BROWSER_DELEGATION = """Delegate the task to another browsing agent.
|
||||
The assistant should delegate the task if it needs to browse the Internet.
|
||||
"""
|
||||
|
||||
BrowserDelegationTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='delegate_to_browsing_agent',
|
||||
description=_BROWSER_DELEGATION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'task': {
|
||||
'type': 'string',
|
||||
'description': 'The task for the browsing agent to execute. It should include all the necessary context and specify what information the browsing agent should return.',
|
||||
},
|
||||
},
|
||||
'required': ['task'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
|
||||
|
||||
FinishTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='finish',
|
||||
description=_FINISH_DESCRIPTION,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def combine_thought(action: Action, thought: str) -> Action:
|
||||
if not hasattr(action, 'thought'):
|
||||
return action
|
||||
if thought:
|
||||
action.thought = thought
|
||||
return action
|
||||
|
||||
|
||||
def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
actions: list[Action] = []
|
||||
assert len(response.choices) == 1, 'Only one choice is supported for now'
|
||||
assistant_msg = response.choices[0].message
|
||||
if assistant_msg.tool_calls:
|
||||
# Check if there's assistant_msg.content. If so, add it to the thought
|
||||
thought = ''
|
||||
if isinstance(assistant_msg.content, str):
|
||||
thought = assistant_msg.content
|
||||
elif isinstance(assistant_msg.content, list):
|
||||
for msg in assistant_msg.content:
|
||||
if msg['type'] == 'text':
|
||||
thought += msg['text']
|
||||
|
||||
# Process each tool call to OpenHands action
|
||||
for i, tool_call in enumerate(assistant_msg.tool_calls):
|
||||
action: Action
|
||||
try:
|
||||
arguments = json.loads(tool_call.function.arguments)
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
raise RuntimeError(
|
||||
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
|
||||
) from e
|
||||
if tool_call.function.name == 'execute_bash':
|
||||
action = CmdRunAction(**arguments)
|
||||
elif tool_call.function.name == 'execute_ipython_cell':
|
||||
action = IPythonRunCellAction(**arguments)
|
||||
elif tool_call.function.name == 'delegate_to_browsing_agent':
|
||||
action = AgentDelegateAction(
|
||||
agent='BrowsingAgent',
|
||||
inputs=arguments,
|
||||
)
|
||||
elif tool_call.function.name == 'finish':
|
||||
action = AgentFinishAction()
|
||||
elif tool_call.function.name == 'edit_file':
|
||||
action = FileEditAction(**arguments)
|
||||
elif tool_call.function.name == 'str_replace_editor':
|
||||
# We implement this in agent_skills, which can be used via Jupyter
|
||||
# convert tool_call.function.arguments to kwargs that can be passed to file_editor
|
||||
code = f'print(file_editor(**{arguments}))'
|
||||
logger.debug(
|
||||
f'TOOL CALL: str_replace_editor -> file_editor with code: {code}'
|
||||
)
|
||||
action = IPythonRunCellAction(code=code, include_extra=False)
|
||||
else:
|
||||
raise RuntimeError(f'Unknown tool call: {tool_call.function.name}')
|
||||
|
||||
# We only add thought to the first action
|
||||
if i == 0:
|
||||
action = combine_thought(action, thought)
|
||||
# Add metadata for tool calling
|
||||
action.tool_call_metadata = ToolCallMetadata(
|
||||
tool_call_id=tool_call.id,
|
||||
function_name=tool_call.function.name,
|
||||
model_response=response,
|
||||
total_calls_in_response=len(assistant_msg.tool_calls),
|
||||
)
|
||||
actions.append(action)
|
||||
else:
|
||||
actions.append(
|
||||
MessageAction(content=assistant_msg.content, wait_for_response=True)
|
||||
)
|
||||
|
||||
assert len(actions) >= 1
|
||||
return actions
|
||||
|
||||
|
||||
def get_tools(
|
||||
codeact_enable_browsing_delegate: bool = False,
|
||||
codeact_enable_llm_editor: bool = False,
|
||||
codeact_enable_jupyter: bool = False,
|
||||
) -> list[ChatCompletionToolParam]:
|
||||
tools = [CmdRunTool, FinishTool]
|
||||
if codeact_enable_browsing_delegate:
|
||||
tools.append(BrowserDelegationTool)
|
||||
if codeact_enable_jupyter:
|
||||
tools.append(IPythonTool)
|
||||
if codeact_enable_llm_editor:
|
||||
tools.append(LLMBasedFileEditTool)
|
||||
else:
|
||||
tools.append(StrReplaceEditorTool)
|
||||
return tools
|
||||
@@ -453,6 +453,12 @@ class AgentController:
|
||||
# and send the underlying exception to the LLM for self-correction
|
||||
await self.report_error(str(e))
|
||||
return
|
||||
# FIXME: more graceful handling of litellm.exceptions.ContextWindowExceededError
|
||||
# e.g. try to condense the memory and try again
|
||||
except litellm.exceptions.ContextWindowExceededError as e:
|
||||
self.state.last_error = str(e)
|
||||
await self.set_agent_state_to(AgentState.ERROR)
|
||||
return
|
||||
|
||||
if action.runnable:
|
||||
if self.state.confirmation_mode and (
|
||||
|
||||
@@ -8,12 +8,20 @@ class AgentConfig:
|
||||
"""Configuration for the agent.
|
||||
|
||||
Attributes:
|
||||
function_calling: Whether function calling is enabled. Default is True.
|
||||
codeact_enable_browsing_delegate: Whether browsing delegate is enabled in the action space. Default is False. Only works with function calling.
|
||||
codeact_enable_llm_editor: Whether LLM editor is enabled in the action space. Default is False. Only works with function calling.
|
||||
codeact_enable_jupyter: Whether Jupyter is enabled in the action space. Default is False.
|
||||
micro_agent_name: The name of the micro agent to use for this agent.
|
||||
memory_enabled: Whether long-term memory (embeddings) is enabled.
|
||||
memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
|
||||
llm_config: The name of the llm config to use. If specified, this will override global llm config.
|
||||
"""
|
||||
|
||||
function_calling: bool = True
|
||||
codeact_enable_browsing_delegate: bool = False
|
||||
codeact_enable_llm_editor: bool = False
|
||||
codeact_enable_jupyter: bool = False
|
||||
micro_agent_name: str | None = None
|
||||
memory_enabled: bool = False
|
||||
memory_max_threads: int = 3
|
||||
|
||||
@@ -42,6 +42,7 @@ class LLMConfig:
|
||||
log_completions: Whether to log LLM completions to the state.
|
||||
log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
|
||||
draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985).
|
||||
supports_function_calling: Whether the model supports function calling.
|
||||
"""
|
||||
|
||||
model: str = 'gpt-4o'
|
||||
@@ -61,7 +62,7 @@ class LLMConfig:
|
||||
retry_min_wait: int = 15
|
||||
retry_max_wait: int = 120
|
||||
timeout: int | None = None
|
||||
max_message_chars: int = 10_000 # maximum number of characters in an observation's content when sent to the llm
|
||||
max_message_chars: int = 30_000 # maximum number of characters in an observation's content when sent to the llm
|
||||
temperature: float = 0.0
|
||||
top_p: float = 1.0
|
||||
custom_llm_provider: str | None = None
|
||||
@@ -76,6 +77,7 @@ class LLMConfig:
|
||||
log_completions: bool = False
|
||||
log_completions_folder: str | None = None
|
||||
draft_editor: Optional['LLMConfig'] = None
|
||||
supports_function_calling: bool = False
|
||||
|
||||
def defaults_to_dict(self) -> dict:
|
||||
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
|
||||
|
||||
@@ -12,6 +12,9 @@ LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO').upper()
|
||||
DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
|
||||
if DEBUG:
|
||||
LOG_LEVEL = 'DEBUG'
|
||||
import litellm
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
LOG_TO_FILE = os.getenv('LOG_TO_FILE', 'False').lower() in ['true', '1', 'yes']
|
||||
DISABLE_COLOR_PRINTING = False
|
||||
@@ -70,11 +73,6 @@ class ColoredFormatter(logging.Formatter):
|
||||
return super().format(record)
|
||||
|
||||
|
||||
console_formatter = ColoredFormatter(
|
||||
'\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
)
|
||||
|
||||
file_formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s:%(levelname)s: %(filename)s:%(lineno)s - %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
@@ -123,10 +121,10 @@ def get_console_handler(log_level=logging.INFO, extra_info: str | None = None):
|
||||
"""Returns a console handler for logging."""
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(log_level)
|
||||
formatter_str = '%(asctime)s - %(levelname)s - %(message)s'
|
||||
formatter_str = '\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s'
|
||||
if extra_info:
|
||||
formatter_str = f'{extra_info} - ' + formatter_str
|
||||
console_handler.setFormatter(logging.Formatter(formatter_str))
|
||||
console_handler.setFormatter(ColoredFormatter(formatter_str, datefmt='%H:%M:%S'))
|
||||
return console_handler
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
from litellm import ChatCompletionMessageToolCall
|
||||
from pydantic import BaseModel, Field, model_serializer
|
||||
|
||||
|
||||
@@ -48,10 +49,16 @@ class ImageContent(Content):
|
||||
|
||||
|
||||
class Message(BaseModel):
|
||||
role: Literal['user', 'system', 'assistant']
|
||||
content: list[TextContent | ImageContent] = Field(default=list)
|
||||
role: Literal['user', 'system', 'assistant', 'tool']
|
||||
content: list[TextContent | ImageContent] = Field(default_factory=list)
|
||||
cache_enabled: bool = False
|
||||
vision_enabled: bool = False
|
||||
# function calling
|
||||
# - tool calls (from LLM)
|
||||
tool_calls: list[ChatCompletionMessageToolCall] | None = None
|
||||
# - tool execution result (to LLM)
|
||||
tool_call_id: str | None = None
|
||||
name: str | None = None # name of the tool
|
||||
|
||||
@property
|
||||
def contains_image(self) -> bool:
|
||||
@@ -59,23 +66,31 @@ class Message(BaseModel):
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self) -> dict:
|
||||
content: list[dict] | str
|
||||
# two kinds of serializer:
|
||||
# 1. vision serializer: when prompt caching or vision is enabled
|
||||
# 2. single text serializer: for other cases
|
||||
# remove this when liteLLM or providers support this format translation
|
||||
if self.cache_enabled or self.vision_enabled:
|
||||
# when prompt caching or vision is enabled, use vision serializer
|
||||
content = []
|
||||
for item in self.content:
|
||||
if isinstance(item, TextContent):
|
||||
content.append(item.model_dump())
|
||||
elif isinstance(item, ImageContent):
|
||||
content.extend(item.model_dump())
|
||||
else:
|
||||
# for other cases, concatenate all text content
|
||||
# into a single string per message
|
||||
content = '\n'.join(
|
||||
item.text for item in self.content if isinstance(item, TextContent)
|
||||
)
|
||||
return {'content': content, 'role': self.role}
|
||||
content: list[dict] = []
|
||||
role_tool_with_prompt_caching = False
|
||||
for item in self.content:
|
||||
d = item.model_dump()
|
||||
# We have to remove cache_prompt for tool content and move it up to the message level
|
||||
# See discussion here for details: https://github.com/BerriAI/litellm/issues/6422#issuecomment-2438765472
|
||||
if self.role == 'tool' and item.cache_prompt:
|
||||
role_tool_with_prompt_caching = True
|
||||
d.pop('cache_control')
|
||||
if isinstance(item, TextContent):
|
||||
content.append(d)
|
||||
elif isinstance(item, ImageContent) and self.vision_enabled:
|
||||
content.extend(d)
|
||||
|
||||
ret: dict = {'content': content, 'role': self.role}
|
||||
|
||||
if role_tool_with_prompt_caching:
|
||||
ret['cache_control'] = {'type': 'ephemeral'}
|
||||
|
||||
if self.tool_call_id is not None:
|
||||
assert (
|
||||
self.name is not None
|
||||
), 'name is required when tool_call_id is not None'
|
||||
ret['tool_call_id'] = self.tool_call_id
|
||||
ret['name'] = self.name
|
||||
if self.tool_calls:
|
||||
ret['tool_calls'] = self.tool_calls
|
||||
return ret
|
||||
|
||||
@@ -2,6 +2,7 @@ import json
|
||||
from datetime import datetime
|
||||
|
||||
from json_repair import repair_json
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
from openhands.core.exceptions import LLMResponseError
|
||||
from openhands.events.event import Event
|
||||
@@ -17,6 +18,8 @@ def my_default_encoder(obj):
|
||||
return event_to_dict(obj)
|
||||
if isinstance(obj, Metrics):
|
||||
return obj.get()
|
||||
if isinstance(obj, ModelResponse):
|
||||
return obj.model_dump()
|
||||
return json.JSONEncoder().default(obj)
|
||||
|
||||
|
||||
|
||||
@@ -47,6 +47,9 @@ class CmdRunAction(Action):
|
||||
class IPythonRunCellAction(Action):
|
||||
code: str
|
||||
thought: str = ''
|
||||
include_extra: bool = (
|
||||
True # whether to include CWD & Python interpreter in the output
|
||||
)
|
||||
action: str = ActionType.RUN_IPYTHON
|
||||
runnable: ClassVar[bool] = True
|
||||
confirmation_state: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
|
||||
|
||||
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
from openhands.llm.metrics import Metrics
|
||||
|
||||
|
||||
@@ -71,3 +72,14 @@ class Event:
|
||||
@llm_metrics.setter
|
||||
def llm_metrics(self, value: Metrics) -> None:
|
||||
self._llm_metrics = value
|
||||
|
||||
# optional field
|
||||
@property
|
||||
def tool_call_metadata(self) -> ToolCallMetadata | None:
|
||||
if hasattr(self, '_tool_call_metadata'):
|
||||
return self._tool_call_metadata # type: ignore[attr-defined]
|
||||
return None
|
||||
|
||||
@tool_call_metadata.setter
|
||||
def tool_call_metadata(self, value: ToolCallMetadata) -> None:
|
||||
self._tool_call_metadata = value
|
||||
|
||||
@@ -6,10 +6,20 @@ from openhands.events.observation.observation import Observation
|
||||
from openhands.events.serialization.action import action_from_dict
|
||||
from openhands.events.serialization.observation import observation_from_dict
|
||||
from openhands.events.serialization.utils import remove_fields
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
# TODO: move `content` into `extras`
|
||||
TOP_KEYS = ['id', 'timestamp', 'source', 'message', 'cause', 'action', 'observation']
|
||||
UNDERSCORE_KEYS = ['id', 'timestamp', 'source', 'cause']
|
||||
TOP_KEYS = [
|
||||
'id',
|
||||
'timestamp',
|
||||
'source',
|
||||
'message',
|
||||
'cause',
|
||||
'action',
|
||||
'observation',
|
||||
'tool_call_metadata',
|
||||
]
|
||||
UNDERSCORE_KEYS = ['id', 'timestamp', 'source', 'cause', 'tool_call_metadata']
|
||||
|
||||
DELETE_FROM_TRAJECTORY_EXTRAS = {
|
||||
'screenshot',
|
||||
@@ -40,6 +50,8 @@ def event_from_dict(data) -> 'Event':
|
||||
value = value.isoformat()
|
||||
if key == 'source':
|
||||
value = EventSource(value)
|
||||
if key == 'tool_call_metadata':
|
||||
value = ToolCallMetadata(**value)
|
||||
setattr(evt, '_' + key, value)
|
||||
return evt
|
||||
|
||||
@@ -59,6 +71,8 @@ def event_to_dict(event: 'Event') -> dict:
|
||||
d['timestamp'] = d['timestamp'].isoformat()
|
||||
if key == 'source' and 'source' in d:
|
||||
d['source'] = d['source'].value
|
||||
if key == 'tool_call_metadata' and 'tool_call_metadata' in d:
|
||||
d['tool_call_metadata'] = d['tool_call_metadata'].model_dump()
|
||||
props.pop(key, None)
|
||||
if 'security_risk' in props and props['security_risk'] is None:
|
||||
props.pop('security_risk')
|
||||
|
||||
11
openhands/events/tool.py
Normal file
11
openhands/events/tool.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from litellm import ModelResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ToolCallMetadata(BaseModel):
|
||||
# See https://docs.litellm.ai/docs/completion/function_call#step-3---second-litellmcompletion-call
|
||||
function_name: str # Name of the function that was called
|
||||
tool_call_id: str # ID of the tool call
|
||||
|
||||
model_response: ModelResponse
|
||||
total_calls_in_response: int
|
||||
@@ -1,11 +1,12 @@
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import warnings
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from openhands.core.config import LLMConfig
|
||||
|
||||
with warnings.catch_warnings():
|
||||
@@ -81,16 +82,47 @@ class LLM(RetryMixin, DebugMixin):
|
||||
|
||||
# litellm actually uses base Exception here for unknown model
|
||||
self.model_info: ModelInfo | None = None
|
||||
try:
|
||||
if self.config.model.startswith('openrouter'):
|
||||
self.model_info = litellm.get_model_info(self.config.model)
|
||||
else:
|
||||
|
||||
if self.config.model.startswith('openrouter'):
|
||||
self.model_info = litellm.get_model_info(self.config.model)
|
||||
elif self.config.model.startswith('litellm_proxy/'):
|
||||
# IF we are using LiteLLM proxy, get model info from LiteLLM proxy
|
||||
# GET {base_url}/v1/model/info with litellm_model_id as path param
|
||||
response = requests.get(
|
||||
f'{self.config.base_url}/v1/model/info',
|
||||
headers={'Authorization': f'Bearer {self.config.api_key}'},
|
||||
)
|
||||
all_model_info = response.json()['data']
|
||||
current_model_info = next(
|
||||
(
|
||||
info
|
||||
for info in all_model_info
|
||||
if info['model_name']
|
||||
== self.config.model.removeprefix('litellm_proxy/')
|
||||
),
|
||||
None,
|
||||
)
|
||||
if current_model_info:
|
||||
self.model_info = current_model_info['model_info']
|
||||
|
||||
# Last two attempts to get model info from NAME
|
||||
if not self.model_info:
|
||||
try:
|
||||
self.model_info = litellm.get_model_info(
|
||||
self.config.model.split(':')[0]
|
||||
)
|
||||
# noinspection PyBroadException
|
||||
except Exception as e:
|
||||
logger.warning(f'Could not get model info for {config.model}:\n{e}')
|
||||
# noinspection PyBroadException
|
||||
except Exception:
|
||||
pass
|
||||
if not self.model_info:
|
||||
try:
|
||||
self.model_info = litellm.get_model_info(
|
||||
self.config.model.split('/')[-1]
|
||||
)
|
||||
# noinspection PyBroadException
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(f'Model info: {self.model_info}')
|
||||
|
||||
if self.config.log_completions:
|
||||
if self.config.log_completions_folder is None:
|
||||
@@ -126,6 +158,11 @@ class LLM(RetryMixin, DebugMixin):
|
||||
):
|
||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||
|
||||
self.config.supports_function_calling = (
|
||||
self.model_info is not None
|
||||
and self.model_info.get('supports_function_calling', False)
|
||||
)
|
||||
|
||||
self._completion = partial(
|
||||
litellm_completion,
|
||||
model=self.config.model,
|
||||
@@ -144,6 +181,8 @@ class LLM(RetryMixin, DebugMixin):
|
||||
logger.debug('LLM: model has vision enabled')
|
||||
if self.is_caching_prompt_active():
|
||||
logger.debug('LLM: caching prompt enabled')
|
||||
if self.config.supports_function_calling:
|
||||
logger.debug('LLM: model supports function calling')
|
||||
|
||||
completion_unwrapped = self._completion
|
||||
|
||||
@@ -195,26 +234,32 @@ class LLM(RetryMixin, DebugMixin):
|
||||
try:
|
||||
# we don't support streaming here, thus we get a ModelResponse
|
||||
resp: ModelResponse = completion_unwrapped(*args, **kwargs)
|
||||
|
||||
# log for evals or other scripts that need the raw completion
|
||||
if self.config.log_completions:
|
||||
assert self.config.log_completions_folder is not None
|
||||
log_file = os.path.join(
|
||||
self.config.log_completions_folder,
|
||||
# use the metric model name (for draft editor)
|
||||
f'{self.metrics.model_name}-{time.time()}.json',
|
||||
f'{self.metrics.model_name.replace("/", "__")}-{time.time()}.json',
|
||||
)
|
||||
from openhands.core.utils import json
|
||||
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(
|
||||
{
|
||||
'messages': messages,
|
||||
'response': resp,
|
||||
'args': args,
|
||||
'kwargs': kwargs,
|
||||
'timestamp': time.time(),
|
||||
'cost': self._completion_cost(resp),
|
||||
},
|
||||
f,
|
||||
f.write(
|
||||
json.dumps(
|
||||
{
|
||||
'messages': messages,
|
||||
'response': resp,
|
||||
'args': args,
|
||||
'kwargs': {
|
||||
k: v
|
||||
for k, v in kwargs.items()
|
||||
if k != 'messages'
|
||||
},
|
||||
'timestamp': time.time(),
|
||||
'cost': self._completion_cost(resp),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
message_back: str = resp['choices'][0]['message']['content']
|
||||
@@ -390,16 +435,18 @@ class LLM(RetryMixin, DebugMixin):
|
||||
logger.info(f'Using custom cost per token: {cost_per_token}')
|
||||
extra_kwargs['custom_cost_per_token'] = cost_per_token
|
||||
|
||||
if not self._is_local():
|
||||
try:
|
||||
try:
|
||||
# try directly get response_cost from response
|
||||
cost = getattr(response, '_hidden_params', {}).get('response_cost', None)
|
||||
if cost is None:
|
||||
cost = litellm_completion_cost(
|
||||
completion_response=response, **extra_kwargs
|
||||
)
|
||||
self.metrics.add_cost(cost)
|
||||
return cost
|
||||
except Exception:
|
||||
self.cost_metric_supported = False
|
||||
logger.warning('Cost calculation not supported for this model.')
|
||||
self.metrics.add_cost(cost)
|
||||
return cost
|
||||
except Exception:
|
||||
self.cost_metric_supported = False
|
||||
logger.warning('Cost calculation not supported for this model.')
|
||||
return 0.0
|
||||
|
||||
def __str__(self):
|
||||
|
||||
@@ -194,10 +194,11 @@ class ActionExecutor:
|
||||
|
||||
obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
|
||||
obs.content = obs.content.rstrip()
|
||||
obs.content += (
|
||||
f'\n[Jupyter current working directory: {self.bash_session.pwd}]'
|
||||
)
|
||||
obs.content += f'\n[Jupyter Python interpreter: {_jupyter_plugin.python_interpreter_path}]'
|
||||
if action.include_extra:
|
||||
obs.content += (
|
||||
f'\n[Jupyter current working directory: {self.bash_session.pwd}]'
|
||||
)
|
||||
obs.content += f'\n[Jupyter Python interpreter: {_jupyter_plugin.python_interpreter_path}]'
|
||||
return obs
|
||||
else:
|
||||
raise RuntimeError(
|
||||
|
||||
@@ -127,8 +127,11 @@ class Runtime(FileEditRuntimeMixin):
|
||||
if event.timeout is None:
|
||||
event.timeout = self.config.sandbox.timeout
|
||||
assert event.timeout is not None
|
||||
observation = await call_sync_from_async(self.run_action, event)
|
||||
observation: Observation = await call_sync_from_async(
|
||||
self.run_action, event
|
||||
)
|
||||
observation._cause = event.id # type: ignore[attr-defined]
|
||||
observation.tool_call_metadata = event.tool_call_metadata
|
||||
source = event.source if event.source else EventSource.AGENT
|
||||
await self.event_stream.async_add_event(observation, source) # type: ignore[arg-type]
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
from functools import lru_cache
|
||||
from typing import Callable
|
||||
from zipfile import ZipFile
|
||||
@@ -496,7 +497,9 @@ class EventStreamRuntime(Runtime):
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f'Error during action execution: {e}')
|
||||
obs = FatalErrorObservation(f'Action execution failed: {str(e)}')
|
||||
obs = FatalErrorObservation(
|
||||
f'Action execution failed: {str(e)}.\n{traceback.format_exc()}'
|
||||
)
|
||||
self._refresh_logs()
|
||||
return obs
|
||||
|
||||
|
||||
@@ -23,3 +23,9 @@ for func_name in __all__:
|
||||
|
||||
fn_signature = f'{func.__name__}' + str(signature(func))
|
||||
DOCUMENTATION += f'{fn_signature}:\n{cur_doc}\n\n'
|
||||
|
||||
|
||||
# Add file_editor (a function)
|
||||
from openhands.runtime.plugins.agent_skills.file_editor import file_editor # noqa: E402
|
||||
|
||||
__all__ += ['file_editor']
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
# File Editor
|
||||
|
||||
This file editor is largely based on Anthorpic released [`str_replace_editor`](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo/computer_use_demo/tools/edit.py). The original code was released under [MIT license](https://github.com/anthropics/anthropic-quickstarts/blob/e373524f07594d48c3f9563248ea282a4c306c0c/LICENSE).
|
||||
@@ -0,0 +1,60 @@
|
||||
"""This file contains a global singleton of the `EditTool` class as well as raw functions that expose its __call__."""
|
||||
|
||||
from .base import CLIResult, ToolError, ToolResult
|
||||
from .impl import Command, EditTool
|
||||
|
||||
_GLOBAL_EDITOR = EditTool()
|
||||
|
||||
|
||||
def _make_api_tool_result(
|
||||
result: ToolResult,
|
||||
) -> str:
|
||||
"""Convert an agent ToolResult to an API ToolResultBlockParam."""
|
||||
tool_result_content: str = ''
|
||||
is_error = False
|
||||
if result.error:
|
||||
is_error = True
|
||||
tool_result_content = _maybe_prepend_system_tool_result(result, result.error)
|
||||
else:
|
||||
assert result.output, 'Expecting output in file_editor'
|
||||
tool_result_content = _maybe_prepend_system_tool_result(result, result.output)
|
||||
assert (
|
||||
not result.base64_image
|
||||
), 'Not expecting base64_image as output in file_editor'
|
||||
if is_error:
|
||||
return f'ERROR:\n{tool_result_content}'
|
||||
else:
|
||||
return tool_result_content
|
||||
|
||||
|
||||
def _maybe_prepend_system_tool_result(result: ToolResult, result_text: str) -> str:
|
||||
if result.system:
|
||||
result_text = f'<system>{result.system}</system>\n{result_text}'
|
||||
return result_text
|
||||
|
||||
|
||||
def file_editor(
|
||||
command: Command,
|
||||
path: str,
|
||||
file_text: str | None = None,
|
||||
view_range: list[int] | None = None,
|
||||
old_str: str | None = None,
|
||||
new_str: str | None = None,
|
||||
insert_line: int | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
result: CLIResult = _GLOBAL_EDITOR(
|
||||
command=command,
|
||||
path=path,
|
||||
file_text=file_text,
|
||||
view_range=view_range,
|
||||
old_str=old_str,
|
||||
new_str=new_str,
|
||||
insert_line=insert_line,
|
||||
)
|
||||
except ToolError as e:
|
||||
return _make_api_tool_result(ToolResult(error=e.message))
|
||||
return _make_api_tool_result(result)
|
||||
|
||||
|
||||
__all__ = ['file_editor']
|
||||
50
openhands/runtime/plugins/agent_skills/file_editor/base.py
Normal file
50
openhands/runtime/plugins/agent_skills/file_editor/base.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from dataclasses import dataclass, fields, replace
|
||||
|
||||
|
||||
@dataclass(kw_only=True, frozen=True)
|
||||
class ToolResult:
|
||||
"""Represents the result of a tool execution."""
|
||||
|
||||
output: str | None = None
|
||||
error: str | None = None
|
||||
base64_image: str | None = None
|
||||
system: str | None = None
|
||||
|
||||
def __bool__(self):
|
||||
return any(getattr(self, field.name) for field in fields(self))
|
||||
|
||||
def __add__(self, other: 'ToolResult'):
|
||||
def combine_fields(
|
||||
field: str | None, other_field: str | None, concatenate: bool = True
|
||||
):
|
||||
if field and other_field:
|
||||
if concatenate:
|
||||
return field + other_field
|
||||
raise ValueError('Cannot combine tool results')
|
||||
return field or other_field
|
||||
|
||||
return ToolResult(
|
||||
output=combine_fields(self.output, other.output),
|
||||
error=combine_fields(self.error, other.error),
|
||||
base64_image=combine_fields(self.base64_image, other.base64_image, False),
|
||||
system=combine_fields(self.system, other.system),
|
||||
)
|
||||
|
||||
def replace(self, **kwargs):
|
||||
"""Returns a new ToolResult with the given fields replaced."""
|
||||
return replace(self, **kwargs)
|
||||
|
||||
|
||||
class CLIResult(ToolResult):
|
||||
"""A ToolResult that can be rendered as a CLI output."""
|
||||
|
||||
|
||||
class ToolFailure(ToolResult):
|
||||
"""A ToolResult that represents a failure."""
|
||||
|
||||
|
||||
class ToolError(Exception):
|
||||
"""Raised when a tool encounters an error."""
|
||||
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
279
openhands/runtime/plugins/agent_skills/file_editor/impl.py
Normal file
279
openhands/runtime/plugins/agent_skills/file_editor/impl.py
Normal file
@@ -0,0 +1,279 @@
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Literal, get_args
|
||||
|
||||
from .base import CLIResult, ToolError, ToolResult
|
||||
from .run import maybe_truncate, run
|
||||
|
||||
Command = Literal[
|
||||
'view',
|
||||
'create',
|
||||
'str_replace',
|
||||
'insert',
|
||||
'undo_edit',
|
||||
]
|
||||
SNIPPET_LINES: int = 4
|
||||
|
||||
|
||||
class EditTool:
|
||||
"""
|
||||
An filesystem editor tool that allows the agent to view, create, and edit files.
|
||||
The tool parameters are defined by Anthropic and are not editable.
|
||||
|
||||
Original implementation: https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/computer_use_demo/tools/edit.py
|
||||
"""
|
||||
|
||||
_file_history: dict[Path, list[str]]
|
||||
|
||||
def __init__(self):
|
||||
self._file_history = defaultdict(list)
|
||||
super().__init__()
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
*,
|
||||
command: Command,
|
||||
path: str,
|
||||
file_text: str | None = None,
|
||||
view_range: list[int] | None = None,
|
||||
old_str: str | None = None,
|
||||
new_str: str | None = None,
|
||||
insert_line: int | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
_path = Path(path)
|
||||
self.validate_path(command, _path)
|
||||
if command == 'view':
|
||||
return self.view(_path, view_range)
|
||||
elif command == 'create':
|
||||
if not file_text:
|
||||
raise ToolError('Parameter `file_text` is required for command: create')
|
||||
self.write_file(_path, file_text)
|
||||
self._file_history[_path].append(file_text)
|
||||
return ToolResult(output=f'File created successfully at: {_path}')
|
||||
elif command == 'str_replace':
|
||||
if not old_str:
|
||||
raise ToolError(
|
||||
'Parameter `old_str` is required for command: str_replace'
|
||||
)
|
||||
return self.str_replace(_path, old_str, new_str)
|
||||
elif command == 'insert':
|
||||
if insert_line is None:
|
||||
raise ToolError(
|
||||
'Parameter `insert_line` is required for command: insert'
|
||||
)
|
||||
if not new_str:
|
||||
raise ToolError('Parameter `new_str` is required for command: insert')
|
||||
return self.insert(_path, insert_line, new_str)
|
||||
elif command == 'undo_edit':
|
||||
return self.undo_edit(_path)
|
||||
raise ToolError(
|
||||
f'Unrecognized command {command}. The allowed commands for the {self.name} tool are: {", ".join(get_args(Command))}'
|
||||
)
|
||||
|
||||
def validate_path(self, command: str, path: Path):
|
||||
"""
|
||||
Check that the path/command combination is valid.
|
||||
"""
|
||||
# Check if its an absolute path
|
||||
if not path.is_absolute():
|
||||
suggested_path = Path('') / path
|
||||
raise ToolError(
|
||||
f'The path {path} is not an absolute path, it should start with `/`. Maybe you meant {suggested_path}?'
|
||||
)
|
||||
# Check if path exists
|
||||
if not path.exists() and command != 'create':
|
||||
raise ToolError(
|
||||
f'The path {path} does not exist. Please provide a valid path.'
|
||||
)
|
||||
if path.exists() and command == 'create':
|
||||
raise ToolError(
|
||||
f'File already exists at: {path}. Cannot overwrite files using command `create`.'
|
||||
)
|
||||
# Check if the path points to a directory
|
||||
if path.is_dir():
|
||||
if command != 'view':
|
||||
raise ToolError(
|
||||
f'The path {path} is a directory and only the `view` command can be used on directories'
|
||||
)
|
||||
|
||||
def view(self, path: Path, view_range: list[int] | None = None):
|
||||
"""Implement the view command"""
|
||||
if path.is_dir():
|
||||
if view_range:
|
||||
raise ToolError(
|
||||
'The `view_range` parameter is not allowed when `path` points to a directory.'
|
||||
)
|
||||
|
||||
_, stdout, stderr = run(rf"find {path} -maxdepth 2 -not -path '*/\.*'")
|
||||
if not stderr:
|
||||
stdout = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{stdout}\n"
|
||||
return CLIResult(output=stdout, error=stderr)
|
||||
|
||||
file_content = self.read_file(path)
|
||||
init_line = 1
|
||||
if view_range:
|
||||
if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range):
|
||||
raise ToolError(
|
||||
'Invalid `view_range`. It should be a list of two integers.'
|
||||
)
|
||||
file_lines = file_content.split('\n')
|
||||
n_lines_file = len(file_lines)
|
||||
init_line, final_line = view_range
|
||||
if init_line < 1 or init_line > n_lines_file:
|
||||
raise ToolError(
|
||||
f"Invalid `view_range`: {view_range}. It's first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}"
|
||||
)
|
||||
if final_line > n_lines_file:
|
||||
raise ToolError(
|
||||
f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`"
|
||||
)
|
||||
if final_line != -1 and final_line < init_line:
|
||||
raise ToolError(
|
||||
f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be larger or equal than its first `{init_line}`"
|
||||
)
|
||||
|
||||
if final_line == -1:
|
||||
file_content = '\n'.join(file_lines[init_line - 1 :])
|
||||
else:
|
||||
file_content = '\n'.join(file_lines[init_line - 1 : final_line])
|
||||
|
||||
return CLIResult(
|
||||
output=self._make_output(file_content, str(path), init_line=init_line)
|
||||
)
|
||||
|
||||
def str_replace(self, path: Path, old_str: str, new_str: str | None):
|
||||
"""Implement the str_replace command, which replaces old_str with new_str in the file content"""
|
||||
# Read the file content
|
||||
file_content = self.read_file(path).expandtabs()
|
||||
old_str = old_str.expandtabs()
|
||||
new_str = new_str.expandtabs() if new_str is not None else ''
|
||||
|
||||
# Check if old_str is unique in the file
|
||||
occurrences = file_content.count(old_str)
|
||||
if occurrences == 0:
|
||||
raise ToolError(
|
||||
f'No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}.'
|
||||
)
|
||||
elif occurrences > 1:
|
||||
file_content_lines = file_content.split('\n')
|
||||
lines = [
|
||||
idx + 1
|
||||
for idx, line in enumerate(file_content_lines)
|
||||
if old_str in line
|
||||
]
|
||||
raise ToolError(
|
||||
f'No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique'
|
||||
)
|
||||
|
||||
# Replace old_str with new_str
|
||||
new_file_content = file_content.replace(old_str, new_str)
|
||||
|
||||
# Write the new content to the file
|
||||
self.write_file(path, new_file_content)
|
||||
|
||||
# Save the content to history
|
||||
self._file_history[path].append(file_content)
|
||||
|
||||
# Create a snippet of the edited section
|
||||
replacement_line = file_content.split(old_str)[0].count('\n')
|
||||
start_line = max(0, replacement_line - SNIPPET_LINES)
|
||||
end_line = replacement_line + SNIPPET_LINES + new_str.count('\n')
|
||||
snippet = '\n'.join(new_file_content.split('\n')[start_line : end_line + 1])
|
||||
|
||||
# Prepare the success message
|
||||
success_msg = f'The file {path} has been edited. '
|
||||
success_msg += self._make_output(
|
||||
snippet, f'a snippet of {path}', start_line + 1
|
||||
)
|
||||
success_msg += 'Review the changes and make sure they are as expected. Edit the file again if necessary.'
|
||||
|
||||
return CLIResult(output=success_msg)
|
||||
|
||||
def insert(self, path: Path, insert_line: int, new_str: str):
|
||||
"""Implement the insert command, which inserts new_str at the specified line in the file content."""
|
||||
file_text = self.read_file(path).expandtabs()
|
||||
new_str = new_str.expandtabs()
|
||||
file_text_lines = file_text.split('\n')
|
||||
n_lines_file = len(file_text_lines)
|
||||
|
||||
if insert_line < 0 or insert_line > n_lines_file:
|
||||
raise ToolError(
|
||||
f'Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the file: {[0, n_lines_file]}'
|
||||
)
|
||||
|
||||
new_str_lines = new_str.split('\n')
|
||||
new_file_text_lines = (
|
||||
file_text_lines[:insert_line]
|
||||
+ new_str_lines
|
||||
+ file_text_lines[insert_line:]
|
||||
)
|
||||
snippet_lines = (
|
||||
file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
|
||||
+ new_str_lines
|
||||
+ file_text_lines[insert_line : insert_line + SNIPPET_LINES]
|
||||
)
|
||||
|
||||
new_file_text = '\n'.join(new_file_text_lines)
|
||||
snippet = '\n'.join(snippet_lines)
|
||||
|
||||
self.write_file(path, new_file_text)
|
||||
self._file_history[path].append(file_text)
|
||||
|
||||
success_msg = f'The file {path} has been edited. '
|
||||
success_msg += self._make_output(
|
||||
snippet,
|
||||
'a snippet of the edited file',
|
||||
max(1, insert_line - SNIPPET_LINES + 1),
|
||||
)
|
||||
success_msg += 'Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary.'
|
||||
return CLIResult(output=success_msg)
|
||||
|
||||
def undo_edit(self, path: Path):
|
||||
"""Implement the undo_edit command."""
|
||||
if not self._file_history[path]:
|
||||
raise ToolError(f'No edit history found for {path}.')
|
||||
|
||||
old_text = self._file_history[path].pop()
|
||||
self.write_file(path, old_text)
|
||||
|
||||
return CLIResult(
|
||||
output=f'Last edit to {path} undone successfully. {self._make_output(old_text, str(path))}'
|
||||
)
|
||||
|
||||
def read_file(self, path: Path):
|
||||
"""Read the content of a file from a given path; raise a ToolError if an error occurs."""
|
||||
try:
|
||||
return path.read_text()
|
||||
except Exception as e:
|
||||
raise ToolError(f'Ran into {e} while trying to read {path}') from None
|
||||
|
||||
def write_file(self, path: Path, file: str):
|
||||
"""Write the content of a file to a given path; raise a ToolError if an error occurs."""
|
||||
try:
|
||||
path.write_text(file)
|
||||
except Exception as e:
|
||||
raise ToolError(f'Ran into {e} while trying to write to {path}') from None
|
||||
|
||||
def _make_output(
|
||||
self,
|
||||
file_content: str,
|
||||
file_descriptor: str,
|
||||
init_line: int = 1,
|
||||
expand_tabs: bool = True,
|
||||
):
|
||||
"""Generate output for the CLI based on the content of a file."""
|
||||
file_content = maybe_truncate(file_content)
|
||||
if expand_tabs:
|
||||
file_content = file_content.expandtabs()
|
||||
file_content = '\n'.join(
|
||||
[
|
||||
f'{i + init_line:6}\t{line}'
|
||||
for i, line in enumerate(file_content.split('\n'))
|
||||
]
|
||||
)
|
||||
return (
|
||||
f"Here's the result of running `cat -n` on {file_descriptor}:\n"
|
||||
+ file_content
|
||||
+ '\n'
|
||||
)
|
||||
44
openhands/runtime/plugins/agent_skills/file_editor/run.py
Normal file
44
openhands/runtime/plugins/agent_skills/file_editor/run.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Utility to run shell commands asynchronously with a timeout."""
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
TRUNCATED_MESSAGE: str = '<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>'
|
||||
MAX_RESPONSE_LEN: int = 16000
|
||||
|
||||
|
||||
def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
|
||||
"""Truncate content and append a notice if content exceeds the specified length."""
|
||||
return (
|
||||
content
|
||||
if not truncate_after or len(content) <= truncate_after
|
||||
else content[:truncate_after] + TRUNCATED_MESSAGE
|
||||
)
|
||||
|
||||
|
||||
def run(
|
||||
cmd: str,
|
||||
timeout: float | None = 120.0, # seconds
|
||||
truncate_after: int | None = MAX_RESPONSE_LEN,
|
||||
):
|
||||
"""Run a shell command synchronously with a timeout."""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
||||
)
|
||||
|
||||
stdout, stderr = process.communicate(timeout=timeout)
|
||||
|
||||
return (
|
||||
process.returncode or 0,
|
||||
maybe_truncate(stdout, truncate_after=truncate_after),
|
||||
maybe_truncate(stderr, truncate_after=truncate_after),
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
elapsed_time = time.time() - start_time
|
||||
raise TimeoutError(
|
||||
f"Command '{cmd}' timed out after {elapsed_time:.2f} seconds"
|
||||
)
|
||||
26
poetry.lock
generated
26
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aenum"
|
||||
@@ -3901,22 +3901,20 @@ name = "litellm"
|
||||
version = "1.50.4"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
optional = false
|
||||
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
|
||||
files = [
|
||||
{file = "litellm-1.50.4-py3-none-any.whl", hash = "sha256:cc6992275e24a0bbb4a3b377e6842d45a8510fc85d7f255930a64bb872980a36"},
|
||||
{file = "litellm-1.50.4.tar.gz", hash = "sha256:a7e68ef614f631b58969c2c7a5154a565ba5974558d437c8cd6c8623654880ea"},
|
||||
]
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
files = []
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = "*"
|
||||
click = "*"
|
||||
importlib-metadata = ">=6.8.0"
|
||||
jinja2 = ">=3.1.2,<4.0.0"
|
||||
jsonschema = ">=4.22.0,<5.0.0"
|
||||
jinja2 = "^3.1.2"
|
||||
jsonschema = "^4.22.0"
|
||||
openai = ">=1.52.0"
|
||||
pydantic = ">=2.0.0,<3.0.0"
|
||||
pydantic = "^2.0.0"
|
||||
python-dotenv = ">=0.2.0"
|
||||
requests = ">=2.31.0,<3.0.0"
|
||||
requests = "^2.31.0"
|
||||
tiktoken = ">=0.7.0"
|
||||
tokenizers = "*"
|
||||
|
||||
@@ -3924,6 +3922,12 @@ tokenizers = "*"
|
||||
extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"]
|
||||
proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"]
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/BerriAI/litellm.git"
|
||||
reference = "58fe6610601297c5a90367fd4583469e2df3fcf9"
|
||||
resolved_reference = "58fe6610601297c5a90367fd4583469e2df3fcf9"
|
||||
|
||||
[[package]]
|
||||
name = "llama-index"
|
||||
version = "0.10.45.post1"
|
||||
@@ -10095,4 +10099,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "aeb09e429a789c3f8ced605e7e1a5932fd6cce7f7f4ce30a960da77fba18b9a3"
|
||||
content-hash = "880b0251ec1ac83a7a8f6b1637b0860f75553d1f9c7c67313af9f3bb686c166a"
|
||||
|
||||
@@ -14,7 +14,7 @@ packages = [
|
||||
python = "^3.12"
|
||||
datasets = "*"
|
||||
pandas = "*"
|
||||
litellm = "*"
|
||||
litellm = { git = "https://github.com/BerriAI/litellm.git", rev = "58fe6610601297c5a90367fd4583469e2df3fcf9" }
|
||||
google-generativeai = "*" # To use litellm with Gemini Pro API
|
||||
termcolor = "*"
|
||||
seaborn = "*"
|
||||
@@ -89,6 +89,7 @@ reportlab = "*"
|
||||
[tool.coverage.run]
|
||||
concurrency = ["gevent"]
|
||||
|
||||
|
||||
[tool.poetry.group.runtime.dependencies]
|
||||
jupyterlab = "*"
|
||||
notebook = "*"
|
||||
@@ -119,6 +120,7 @@ ignore = ["D1"]
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
|
||||
[tool.poetry.group.evaluation.dependencies]
|
||||
streamlit = "*"
|
||||
whatthepatch = "*"
|
||||
|
||||
@@ -5,6 +5,7 @@ import sys
|
||||
import docx
|
||||
import pytest
|
||||
|
||||
from openhands.runtime.plugins.agent_skills.agentskills import file_editor
|
||||
from openhands.runtime.plugins.agent_skills.file_ops.file_ops import (
|
||||
WINDOW,
|
||||
_print_window,
|
||||
@@ -715,3 +716,302 @@ def test_parse_pptx(tmp_path):
|
||||
'Hello, this is the second test PPTX slide.\n\n'
|
||||
)
|
||||
assert output == expected_output, f'Expected output does not match. Got: {output}'
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_file_editor_view(tmp_path):
|
||||
# generate a random directory
|
||||
random_dir = tmp_path / 'dir_1'
|
||||
random_dir.mkdir()
|
||||
# create a file in the directory
|
||||
random_file = random_dir / 'a.txt'
|
||||
random_file.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
|
||||
random_dir_2 = tmp_path / 'dir_2'
|
||||
random_dir_2.mkdir()
|
||||
random_file_2 = random_dir_2 / 'b.txt'
|
||||
random_file_2.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
|
||||
|
||||
from openhands.runtime.plugins.agent_skills.agentskills import file_editor
|
||||
|
||||
# view the file
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
print('\n', result)
|
||||
assert result is not None
|
||||
assert (
|
||||
result.split('\n')
|
||||
== f"""Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 1
|
||||
2\tLine 2
|
||||
3\tLine 3
|
||||
4\tLine 4
|
||||
5\tLine 5
|
||||
""".split('\n')
|
||||
)
|
||||
|
||||
# view the directory
|
||||
result = file_editor(command='view', path=str(tmp_path))
|
||||
print('\n', result)
|
||||
assert result is not None
|
||||
assert (
|
||||
result.strip().split('\n')
|
||||
== f"""Here's the files and directories up to 2 levels deep in {tmp_path}, excluding hidden items:
|
||||
{tmp_path}
|
||||
{tmp_path}/dir_2
|
||||
{tmp_path}/dir_2/b.txt
|
||||
{tmp_path}/dir_1
|
||||
{tmp_path}/dir_1/a.txt
|
||||
""".strip().split('\n')
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_create(tmp_path):
|
||||
# generate a random directory
|
||||
random_dir = tmp_path / 'dir_1'
|
||||
random_dir.mkdir()
|
||||
# create a file in the directory
|
||||
random_file = random_dir / 'a.txt'
|
||||
|
||||
from openhands.runtime.plugins.agent_skills.agentskills import file_editor
|
||||
|
||||
# view an unexist file
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
print(result)
|
||||
assert result is not None
|
||||
assert (
|
||||
result
|
||||
== f'ERROR:\nThe path {random_file} does not exist. Please provide a valid path.'
|
||||
)
|
||||
|
||||
# create a file
|
||||
result = file_editor(command='create', path=str(random_file), file_text='Line 6')
|
||||
print(result)
|
||||
assert result is not None
|
||||
assert result == f'File created successfully at: {random_file}'
|
||||
|
||||
# view again
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
print(result)
|
||||
assert result is not None
|
||||
assert (
|
||||
result.strip().split('\n')
|
||||
== f"""Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 6
|
||||
""".strip().split('\n')
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_file(tmp_path):
|
||||
random_dir = tmp_path / 'dir_1'
|
||||
random_dir.mkdir()
|
||||
random_file = random_dir / 'a.txt'
|
||||
return random_file
|
||||
|
||||
|
||||
def test_file_editor_create_and_view(setup_file):
|
||||
random_file = setup_file
|
||||
|
||||
# Test create command
|
||||
result = file_editor(
|
||||
command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3'
|
||||
)
|
||||
print(result)
|
||||
assert result == f'File created successfully at: {random_file}'
|
||||
|
||||
# Test view command for file
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
print(result)
|
||||
assert (
|
||||
result.strip().split('\n')
|
||||
== f"""Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 1
|
||||
2\tLine 2
|
||||
3\tLine 3
|
||||
""".strip().split('\n')
|
||||
)
|
||||
|
||||
# Test view command for directory
|
||||
result = file_editor(command='view', path=str(random_file.parent))
|
||||
assert f'{random_file.parent}' in result
|
||||
assert f'{random_file.name}' in result
|
||||
|
||||
|
||||
def test_file_editor_view_nonexistent(setup_file):
|
||||
random_file = setup_file
|
||||
|
||||
# Test view command for non-existent file
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
assert (
|
||||
result
|
||||
== f'ERROR:\nThe path {random_file} does not exist. Please provide a valid path.'
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_str_replace(setup_file):
|
||||
random_file = setup_file
|
||||
file_editor(
|
||||
command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3'
|
||||
)
|
||||
|
||||
# Test str_replace command
|
||||
result = file_editor(
|
||||
command='str_replace',
|
||||
path=str(random_file),
|
||||
old_str='Line 2',
|
||||
new_str='New Line 2',
|
||||
)
|
||||
print(result)
|
||||
assert (
|
||||
result
|
||||
== f"""The file {random_file} has been edited. Here's the result of running `cat -n` on a snippet of {random_file}:
|
||||
1\tLine 1
|
||||
2\tNew Line 2
|
||||
3\tLine 3
|
||||
Review the changes and make sure they are as expected. Edit the file again if necessary."""
|
||||
)
|
||||
|
||||
# View the file after str_replace
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
print(result)
|
||||
assert (
|
||||
result.strip().split('\n')
|
||||
== f"""Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 1
|
||||
2\tNew Line 2
|
||||
3\tLine 3
|
||||
""".strip().split('\n')
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_str_replace_non_existent(setup_file):
|
||||
random_file = setup_file
|
||||
file_editor(
|
||||
command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3'
|
||||
)
|
||||
|
||||
# Test str_replace with non-existent string
|
||||
result = file_editor(
|
||||
command='str_replace',
|
||||
path=str(random_file),
|
||||
old_str='Non-existent Line',
|
||||
new_str='New Line',
|
||||
)
|
||||
print(result)
|
||||
assert (
|
||||
result
|
||||
== f'ERROR:\nNo replacement was performed, old_str `Non-existent Line` did not appear verbatim in {random_file}.'
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_insert(setup_file):
|
||||
random_file = setup_file
|
||||
file_editor(
|
||||
command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3'
|
||||
)
|
||||
|
||||
# Test insert command
|
||||
result = file_editor(
|
||||
command='insert', path=str(random_file), insert_line=2, new_str='Inserted Line'
|
||||
)
|
||||
print(result)
|
||||
assert (
|
||||
result
|
||||
== f"""The file {random_file} has been edited. Here's the result of running `cat -n` on a snippet of the edited file:
|
||||
1\tLine 1
|
||||
2\tLine 2
|
||||
3\tInserted Line
|
||||
4\tLine 3
|
||||
Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary."""
|
||||
)
|
||||
|
||||
# View the file after insert
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
assert (
|
||||
result.strip().split('\n')
|
||||
== f"""Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 1
|
||||
2\tLine 2
|
||||
3\tInserted Line
|
||||
4\tLine 3
|
||||
""".strip().split('\n')
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_insert_invalid_line(setup_file):
|
||||
random_file = setup_file
|
||||
file_editor(
|
||||
command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3'
|
||||
)
|
||||
|
||||
# Test insert with invalid line number
|
||||
result = file_editor(
|
||||
command='insert',
|
||||
path=str(random_file),
|
||||
insert_line=10,
|
||||
new_str='Invalid Insert',
|
||||
)
|
||||
assert (
|
||||
result
|
||||
== 'ERROR:\nInvalid `insert_line` parameter: 10. It should be within the range of lines of the file: [0, 3]'
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_undo_edit(setup_file):
|
||||
random_file = setup_file
|
||||
result = file_editor(
|
||||
command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3'
|
||||
)
|
||||
print(result)
|
||||
assert result == f"""File created successfully at: {random_file}"""
|
||||
|
||||
# Make an edit
|
||||
result = file_editor(
|
||||
command='str_replace',
|
||||
path=str(random_file),
|
||||
old_str='Line 2',
|
||||
new_str='New Line 2',
|
||||
)
|
||||
print(result)
|
||||
assert (
|
||||
result
|
||||
== f"""The file {random_file} has been edited. Here's the result of running `cat -n` on a snippet of {random_file}:
|
||||
1\tLine 1
|
||||
2\tNew Line 2
|
||||
3\tLine 3
|
||||
Review the changes and make sure they are as expected. Edit the file again if necessary."""
|
||||
)
|
||||
|
||||
# Test undo_edit command
|
||||
result = file_editor(command='undo_edit', path=str(random_file))
|
||||
print(result)
|
||||
assert (
|
||||
result
|
||||
== f"""Last edit to {random_file} undone successfully. Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 1
|
||||
2\tLine 2
|
||||
3\tLine 3
|
||||
"""
|
||||
)
|
||||
|
||||
# View the file after undo_edit
|
||||
result = file_editor(command='view', path=str(random_file))
|
||||
assert (
|
||||
result.strip().split('\n')
|
||||
== f"""Here's the result of running `cat -n` on {random_file}:
|
||||
1\tLine 1
|
||||
2\tLine 2
|
||||
3\tLine 3
|
||||
""".strip().split('\n')
|
||||
)
|
||||
|
||||
|
||||
def test_file_editor_undo_edit_no_edits(tmp_path):
|
||||
random_file = tmp_path / 'a.txt'
|
||||
random_file.touch()
|
||||
|
||||
# Test undo_edit when no edits have been made
|
||||
result = file_editor(command='undo_edit', path=str(random_file))
|
||||
print(result)
|
||||
assert result == f'ERROR:\nNo edit history found for {random_file}.'
|
||||
|
||||
@@ -24,29 +24,35 @@ def agent() -> CodeActAgent:
|
||||
|
||||
|
||||
def test_cmd_output_observation_message(agent: CodeActAgent):
|
||||
agent.config.function_calling = False
|
||||
obs = CmdOutputObservation(
|
||||
command='echo hello', content='Command output', command_id=1, exit_code=0
|
||||
)
|
||||
|
||||
result = agent.get_observation_message(obs)
|
||||
results = agent.get_observation_message(obs, tool_call_id_to_message={})
|
||||
assert len(results) == 1
|
||||
|
||||
result = results[0]
|
||||
assert result is not None
|
||||
assert result.role == 'user'
|
||||
assert len(result.content) == 1
|
||||
assert isinstance(result.content[0], TextContent)
|
||||
assert 'OBSERVATION:' in result.content[0].text
|
||||
assert 'Command output' in result.content[0].text
|
||||
assert 'Command 1 finished with exit code 0' in result.content[0].text
|
||||
assert 'Command finished with exit code 0' in result.content[0].text
|
||||
|
||||
|
||||
def test_ipython_run_cell_observation_message(agent: CodeActAgent):
|
||||
agent.config.function_calling = False
|
||||
obs = IPythonRunCellObservation(
|
||||
code='plt.plot()',
|
||||
content='IPython output\n',
|
||||
)
|
||||
|
||||
result = agent.get_observation_message(obs)
|
||||
results = agent.get_observation_message(obs, tool_call_id_to_message={})
|
||||
assert len(results) == 1
|
||||
|
||||
result = results[0]
|
||||
assert result is not None
|
||||
assert result.role == 'user'
|
||||
assert len(result.content) == 1
|
||||
@@ -61,12 +67,15 @@ def test_ipython_run_cell_observation_message(agent: CodeActAgent):
|
||||
|
||||
|
||||
def test_agent_delegate_observation_message(agent: CodeActAgent):
|
||||
agent.config.function_calling = False
|
||||
obs = AgentDelegateObservation(
|
||||
content='Content', outputs={'content': 'Delegated agent output'}
|
||||
)
|
||||
|
||||
result = agent.get_observation_message(obs)
|
||||
results = agent.get_observation_message(obs, tool_call_id_to_message={})
|
||||
assert len(results) == 1
|
||||
|
||||
result = results[0]
|
||||
assert result is not None
|
||||
assert result.role == 'user'
|
||||
assert len(result.content) == 1
|
||||
@@ -76,10 +85,13 @@ def test_agent_delegate_observation_message(agent: CodeActAgent):
|
||||
|
||||
|
||||
def test_error_observation_message(agent: CodeActAgent):
|
||||
agent.config.function_calling = False
|
||||
obs = ErrorObservation('Error message')
|
||||
|
||||
result = agent.get_observation_message(obs)
|
||||
results = agent.get_observation_message(obs, tool_call_id_to_message={})
|
||||
assert len(results) == 1
|
||||
|
||||
result = results[0]
|
||||
assert result is not None
|
||||
assert result.role == 'user'
|
||||
assert len(result.content) == 1
|
||||
@@ -93,4 +105,4 @@ def test_unknown_observation_message(agent: CodeActAgent):
|
||||
obs = Mock()
|
||||
|
||||
with pytest.raises(ValueError, match='Unknown observation type:'):
|
||||
agent.get_observation_message(obs)
|
||||
agent.get_observation_message(obs, tool_call_id_to_message={})
|
||||
|
||||
@@ -78,7 +78,10 @@ def test_message_with_only_text_content_and_vision_disabled():
|
||||
|
||||
expected_serialized_message = {
|
||||
'role': 'user',
|
||||
'content': 'This is a text message\nThis is another text message',
|
||||
'content': [
|
||||
{'type': 'text', 'text': 'This is a text message'},
|
||||
{'type': 'text', 'text': 'This is another text message'},
|
||||
],
|
||||
}
|
||||
|
||||
assert serialized_message == expected_serialized_message
|
||||
@@ -107,7 +110,10 @@ def test_message_with_mixed_content_and_vision_disabled():
|
||||
# Expected serialization ignores images and concatenates text
|
||||
expected_serialized_message = {
|
||||
'role': 'user',
|
||||
'content': 'This is a text message\nThis is another text message',
|
||||
'content': [
|
||||
{'type': 'text', 'text': 'This is a text message'},
|
||||
{'type': 'text', 'text': 'This is another text message'},
|
||||
],
|
||||
}
|
||||
|
||||
# Assert serialized message matches expectation
|
||||
|
||||
@@ -25,12 +25,38 @@ def mock_event_stream(tmp_path):
|
||||
return EventStream('test_session', file_store)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def codeact_agent(mock_llm):
|
||||
@pytest.fixture(params=[False, True])
|
||||
def codeact_agent(mock_llm, request):
|
||||
config = AgentConfig()
|
||||
config.function_calling = request.param
|
||||
return CodeActAgent(mock_llm, config)
|
||||
|
||||
|
||||
def response_mock(content: str):
|
||||
class MockModelResponse:
|
||||
def __init__(self, content):
|
||||
self.choices = [
|
||||
{
|
||||
'message': {
|
||||
'content': content,
|
||||
'tool_calls': [
|
||||
{
|
||||
'function': {
|
||||
'name': 'execute_bash',
|
||||
'arguments': '{}',
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def model_dump(self):
|
||||
return {'choices': self.choices}
|
||||
|
||||
return MockModelResponse(content)
|
||||
|
||||
|
||||
def test_get_messages_with_reminder(codeact_agent, mock_event_stream):
|
||||
# Add some events to the stream
|
||||
mock_event_stream.add_event(MessageAction('Initial user message'), EventSource.USER)
|
||||
@@ -49,9 +75,13 @@ def test_get_messages_with_reminder(codeact_agent, mock_event_stream):
|
||||
) # System, initial user + user message, agent message, last user message
|
||||
assert messages[0].content[0].cache_prompt # system message
|
||||
assert messages[1].role == 'user'
|
||||
assert messages[1].content[0].text.endswith("LET'S START!")
|
||||
assert messages[1].content[1].text.endswith('Initial user message')
|
||||
assert messages[1].content[0].cache_prompt
|
||||
if not codeact_agent.config.function_calling:
|
||||
assert messages[1].content[0].text.endswith("LET'S START!")
|
||||
assert messages[1].content[1].text.endswith('Initial user message')
|
||||
else:
|
||||
assert messages[1].content[0].text.endswith('Initial user message')
|
||||
# we add cache breakpoint to the last 3 user messages
|
||||
assert messages[1].content[-1].cache_prompt
|
||||
|
||||
assert messages[3].role == 'user'
|
||||
assert messages[3].content[0].text == ('Hello, agent!')
|
||||
@@ -62,13 +92,14 @@ def test_get_messages_with_reminder(codeact_agent, mock_event_stream):
|
||||
assert messages[5].role == 'user'
|
||||
assert messages[5].content[0].text.startswith('Laaaaaaaast!')
|
||||
assert messages[5].content[0].cache_prompt
|
||||
assert (
|
||||
messages[5]
|
||||
.content[1]
|
||||
.text.endswith(
|
||||
'ENVIRONMENT REMINDER: You have 5 turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
if not codeact_agent.config.function_calling:
|
||||
assert (
|
||||
messages[5]
|
||||
.content[1]
|
||||
.text.endswith(
|
||||
'ENVIRONMENT REMINDER: You have 5 turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_get_messages_prompt_caching(codeact_agent, mock_event_stream):
|
||||
@@ -97,9 +128,10 @@ def test_get_messages_prompt_caching(codeact_agent, mock_event_stream):
|
||||
) # Including the initial system+user + 2 last user message
|
||||
|
||||
# Verify that these are indeed the last two user messages (from start)
|
||||
assert (
|
||||
cached_user_messages[0].content[0].text.startswith('A chat between')
|
||||
) # system message
|
||||
if not codeact_agent.config.function_calling:
|
||||
assert (
|
||||
cached_user_messages[0].content[0].text.startswith('A chat between')
|
||||
) # system message
|
||||
assert cached_user_messages[2].content[0].text.startswith('User message 1')
|
||||
assert cached_user_messages[3].content[0].text.startswith('User message 1')
|
||||
|
||||
@@ -144,14 +176,15 @@ def test_get_messages_with_cmd_action(codeact_agent, mock_event_stream):
|
||||
# Assert the presence of key elements in the messages
|
||||
assert (
|
||||
messages[1]
|
||||
.content[1]
|
||||
.content[-1]
|
||||
.text.startswith("Let's list the contents of the current directory.")
|
||||
) # user, included in the initial message
|
||||
assert any(
|
||||
'List files in current directory\n<execute_bash>\nls -l\n</execute_bash>'
|
||||
in msg.content[0].text
|
||||
for msg in messages
|
||||
) # agent
|
||||
if not codeact_agent.config.function_calling:
|
||||
assert any(
|
||||
'List files in current directory\n<execute_bash>\nls -l\n</execute_bash>'
|
||||
in msg.content[0].text
|
||||
for msg in messages
|
||||
) # agent
|
||||
assert any(
|
||||
'total 0\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file1.txt\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file2.txt'
|
||||
in msg.content[0].text
|
||||
@@ -160,7 +193,8 @@ def test_get_messages_with_cmd_action(codeact_agent, mock_event_stream):
|
||||
assert any(
|
||||
"Now, let's create a new directory." in msg.content[0].text for msg in messages
|
||||
) # agent
|
||||
assert messages[4].content[1].text.startswith('Create a new directory') # agent
|
||||
if not codeact_agent.config.function_calling:
|
||||
assert messages[4].content[1].text.startswith('Create a new directory') # agent
|
||||
assert any(
|
||||
'finished with exit code 0' in msg.content[0].text for msg in messages
|
||||
) # user, observation
|
||||
@@ -171,16 +205,20 @@ def test_get_messages_with_cmd_action(codeact_agent, mock_event_stream):
|
||||
# prompt cache is added to the system message
|
||||
assert messages[0].content[0].cache_prompt
|
||||
# and the first initial user message
|
||||
assert messages[1].content[0].cache_prompt
|
||||
assert messages[1].content[-1].cache_prompt
|
||||
# and to the last two user messages
|
||||
assert messages[3].content[0].cache_prompt
|
||||
assert messages[5].content[0].cache_prompt
|
||||
|
||||
# reminder is added to the last user message
|
||||
assert 'ENVIRONMENT REMINDER: You have 5 turns' in messages[5].content[1].text
|
||||
if not codeact_agent.config.function_calling:
|
||||
assert 'ENVIRONMENT REMINDER: You have 5 turns' in messages[5].content[1].text
|
||||
|
||||
|
||||
def test_prompt_caching_headers(codeact_agent, mock_event_stream):
|
||||
if codeact_agent.config.function_calling:
|
||||
pytest.skip('Skipping this test for function calling')
|
||||
|
||||
# Setup
|
||||
mock_event_stream.add_event(MessageAction('Hello, agent!'), EventSource.USER)
|
||||
mock_event_stream.add_event(MessageAction('Hello, user!'), EventSource.AGENT)
|
||||
|
||||
@@ -221,8 +221,9 @@ def test_unsafe_bash_command(temp_dir: str):
|
||||
name=ActionType.RUN_IPYTHON,
|
||||
arguments={
|
||||
'code': "print('hello')",
|
||||
'kernel_init_code': '',
|
||||
'include_extra': True,
|
||||
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
|
||||
'kernel_init_code': '',
|
||||
},
|
||||
),
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user