From e5eadeace4c02e3d6c210eb06f3def41ca6ead68 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Thu, 6 Mar 2025 19:07:41 +0700 Subject: [PATCH] feat(backend): Improve SmartDecisionMaker Agent-loop capability & add Anthropics support (#9585) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Changes 🏗️ There are a few agent-loop issues that this PR is addressing: * There is a lack of support for agent-loop in Anthropic. * Duplicated system & user prompt as the main objective prompt in the agent loop. * A long rendered text of conversation history by SmartDecisionMakerBlock agent-loop in the UI. * A lack of execution input being rendered in the execution list making it harder to debug. https://github.com/user-attachments/assets/be430000-bde0-40c6-8f2e-c97ce45b5ed1 ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] Create from scratch and execute an agent with at least 3 blocks using SmartDecisionMaker Block. --- .../backend/backend/blocks/llm.py | 14 +- .../backend/blocks/smart_decision_maker.py | 132 +++++++++++++++--- .../src/components/OutputModalComponent.tsx | 7 +- .../frontend/src/hooks/useAgentGraph.tsx | 5 +- 4 files changed, 135 insertions(+), 23 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py index 2ec54b6d6e..1ffb0f24ce 100644 --- a/autogpt_platform/backend/backend/blocks/llm.py +++ b/autogpt_platform/backend/backend/blocks/llm.py @@ -371,12 +371,16 @@ def llm_call( last_role = None for p in prompt: if p["role"] in ["user", "assistant"]: - if p["role"] != last_role: + if ( + p["role"] == last_role + and isinstance(messages[-1]["content"], str) + and isinstance(p["content"], str) + ): + # If the role is the same as the last one, combine the content + messages[-1]["content"] += p["content"] + else: messages.append({"role": p["role"], "content": p["content"]}) last_role = p["role"] - else: - # If the role is the same as the last one, combine the content - messages[-1]["content"] += "\n" + p["content"] client = anthropic.Anthropic(api_key=credentials.api_key.get_secret_value()) try: @@ -415,7 +419,7 @@ def llm_call( ) return LLMResponse( - raw_response=resp.content[0], + raw_response=resp, prompt=prompt, response=( resp.content[0].name diff --git a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py index 72988b426b..b41f5506f4 100644 --- a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py +++ b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py @@ -33,6 +33,81 @@ def get_database_manager_client(): return get_service_client(DatabaseManager) +def _get_tool_requests(entry: dict[str, Any]) -> list[str]: + """ + Return a list of tool_call_ids if the entry is a tool request. + Supports both OpenAI and Anthropics formats. + """ + tool_call_ids = [] + if entry.get("role") != "assistant": + return tool_call_ids + + # OpenAI: check for tool_calls in the entry. + calls = entry.get("tool_calls") + if isinstance(calls, list): + for call in calls: + if tool_id := call.get("id"): + tool_call_ids.append(tool_id) + + # Anthropics: check content items for tool_use type. + content = entry.get("content") + if isinstance(content, list): + for item in content: + if item.get("type") != "tool_use": + continue + if tool_id := item.get("id"): + tool_call_ids.append(tool_id) + + return tool_call_ids + + +def _get_tool_responses(entry: dict[str, Any]) -> list[str]: + """ + Return a list of tool_call_ids if the entry is a tool response. + Supports both OpenAI and Anthropics formats. + """ + tool_call_ids: list[str] = [] + + # OpenAI: a tool response message with role "tool" and key "tool_call_id". + if entry.get("role") == "tool": + if tool_call_id := entry.get("tool_call_id"): + tool_call_ids.append(str(tool_call_id)) + + # Anthropics: check content items for tool_result type. + if entry.get("role") == "user": + content = entry.get("content") + if isinstance(content, list): + for item in content: + if item.get("type") != "tool_result": + continue + if tool_call_id := item.get("tool_use_id"): + tool_call_ids.append(tool_call_id) + + return tool_call_ids + + +def _create_tool_response(call_id: str, output: dict[str, Any]) -> dict[str, Any]: + """ + Create a tool response message for either OpenAI or Anthropics, + based on the tool_id format. + """ + content = output if isinstance(output, str) else json.dumps(output) + + # Anthropics format: tool IDs typically start with "toolu_" + if call_id.startswith("toolu_"): + return { + "role": "user", + "type": "message", + "content": [ + {"tool_use_id": call_id, "type": "tool_result", "content": content} + ], + } + + # OpenAI format: tool IDs typically start with "call_". + # Or default fallback (if the tool_id doesn't match any known prefix) + return {"role": "tool", "tool_call_id": call_id, "content": content} + + def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]: """ All the tool calls entry in the conversation history requires a response. @@ -42,10 +117,10 @@ def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]: """ pending_calls = Counter() for history in conversation_history: - for call in history.get("tool_calls") or []: - pending_calls[call.get("id")] += 1 + for call_id in _get_tool_requests(history): + pending_calls[call_id] += 1 - if call_id := history.get("tool_call_id"): + for call_id in _get_tool_responses(history): pending_calls[call_id] -= 1 return {call_id: count for call_id, count in pending_calls.items() if count > 0} @@ -70,7 +145,13 @@ class SmartDecisionMakerBlock(Block): credentials: llm.AICredentials = llm.AICredentialsField() sys_prompt: str = SchemaField( title="System Prompt", - default="Thinking carefully step by step decide which function to call. Always choose a function call from the list of function signatures.", + default="Thinking carefully step by step decide which function to call. " + "Always choose a function call from the list of function signatures, " + "and always provide the complete argument provided with the type " + "matching the required jsonschema signature, no missing argument is allowed. " + "If you have already completed the task objective, you can end the task " + "by providing the end result of your work as a finish message. " + "Only provide EXACTLY one function call, multiple tool calls is strictly prohibited.", description="The system prompt to provide additional context to the model.", ) conversation_history: list[dict] = SchemaField( @@ -122,7 +203,6 @@ class SmartDecisionMakerBlock(Block): conversation_history = data.get("conversation_history", []) pending_tool_calls = get_pending_tool_calls(conversation_history) - last_tool_output = data.get("last_tool_output") if not last_tool_output and pending_tool_calls: return {"last_tool_output"} @@ -347,17 +427,31 @@ class SmartDecisionMakerBlock(Block): # Prefill all missing tool calls with the last tool output/ # TODO: we need a better way to handle this. tool_output = [ - { - "role": "tool", - "content": input_data.last_tool_output, - "tool_call_id": pending_call_id, - } + _create_tool_response(pending_call_id, input_data.last_tool_output) for pending_call_id, count in pending_tool_calls.items() for _ in range(count) ] + + # If the SDM block only calls 1 tool at a time, this should not happen. if len(tool_output) > 1: logger.warning( - f"[node_exec_id={node_exec_id}] Multiple pending tool calls are prefilled using a single output. Execution may not be accurate." + f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] " + f"Multiple pending tool calls are prefilled using a single output. " + f"Execution may not be accurate." + ) + + # Fallback on adding tool output in the conversation history as user prompt. + if len(tool_output) == 0: + logger.warning( + f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] " + f"No pending tool calls found. This may indicate an issue with the " + f"conversation history, or an LLM calling two tools at the same time." + ) + tool_output.append( + { + "role": "user", + "content": f"Last tool output: {json.dumps(input_data.last_tool_output)}", + } ) prompt.extend(tool_output) @@ -367,11 +461,17 @@ class SmartDecisionMakerBlock(Block): input_data.prompt = llm.fmt.format_string(input_data.prompt, values) input_data.sys_prompt = llm.fmt.format_string(input_data.sys_prompt, values) - if input_data.sys_prompt: - prompt.append({"role": "system", "content": input_data.sys_prompt}) + prefix = "[Main Objective Prompt]: " - if input_data.prompt: - prompt.append({"role": "user", "content": input_data.prompt}) + if input_data.sys_prompt and not any( + p["role"] == "system" and p["content"].startswith(prefix) for p in prompt + ): + prompt.append({"role": "system", "content": prefix + input_data.sys_prompt}) + + if input_data.prompt and not any( + p["role"] == "user" and p["content"].startswith(prefix) for p in prompt + ): + prompt.append({"role": "user", "content": prefix + input_data.prompt}) response = llm.llm_call( credentials=credentials, @@ -384,7 +484,7 @@ class SmartDecisionMakerBlock(Block): ) if not response.tool_calls: - yield "finished", f"No Decision Made finishing task: {response.response}" + yield "finished", response.response return for tool_call in response.tool_calls: diff --git a/autogpt_platform/frontend/src/components/OutputModalComponent.tsx b/autogpt_platform/frontend/src/components/OutputModalComponent.tsx index 082bdb022d..8cd7efa451 100644 --- a/autogpt_platform/frontend/src/components/OutputModalComponent.tsx +++ b/autogpt_platform/frontend/src/components/OutputModalComponent.tsx @@ -29,7 +29,12 @@ const OutputModalComponent: FC = ({
{executionResults.map((data, i) => ( <> - + ))} diff --git a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx index 1ce8aa389f..84cc3b15f4 100644 --- a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx +++ b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx @@ -325,7 +325,10 @@ export default function useAgentGraph( ...(node.data.executionResults || []), { execId: executionData.node_exec_id, - data: executionData.output_data, + data: { + "[Input]": [executionData.input_data], + ...executionData.output_data, + }, }, ] : node.data.executionResults,