feat(backend): Improve SmartDecisionMaker Agent-loop capability & add Anthropics support (#9585)

### Changes 🏗️ There are a few agent-loop issues that this PR is addressing: * There is a lack of support for agent-loop in Anthropic. * Duplicated system & user prompt as the main objective prompt in the agent loop. * A long rendered text of conversation history by SmartDecisionMakerBlock agent-loop in the UI. * A lack of execution input being rendered in the execution list making it harder to debug. https://github.com/user-attachments/assets/be430000-bde0-40c6-8f2e-c97ce45b5ed1 ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan:  - [x] Create from scratch and execute an agent with at least 3 blocks using SmartDecisionMaker Block.
2026-01-09 15:17:59 -05:00 · 2025-03-06 19:07:41 +07:00
parent 231775f4d4
commit e5eadeace4
4 changed files with 135 additions and 23 deletions
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -371,12 +371,16 @@ def llm_call(
        last_role = None
        for p in prompt:
            if p["role"] in ["user", "assistant"]:
-                if p["role"] != last_role:
+                if (
+                    p["role"] == last_role
+                    and isinstance(messages[-1]["content"], str)
+                    and isinstance(p["content"], str)
+                ):
+                    # If the role is the same as the last one, combine the content
+                    messages[-1]["content"] += p["content"]
+                else:
                    messages.append({"role": p["role"], "content": p["content"]})
                    last_role = p["role"]
-                else:
-                    # If the role is the same as the last one, combine the content
-                    messages[-1]["content"] += "\n" + p["content"]

        client = anthropic.Anthropic(api_key=credentials.api_key.get_secret_value())
        try:
@@ -415,7 +419,7 @@ def llm_call(
                )

            return LLMResponse(
-                raw_response=resp.content[0],
+                raw_response=resp,
                prompt=prompt,
                response=(
                    resp.content[0].name
--- a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
@@ -33,6 +33,81 @@ def get_database_manager_client():
    return get_service_client(DatabaseManager)


+def _get_tool_requests(entry: dict[str, Any]) -> list[str]:
+    """
+    Return a list of tool_call_ids if the entry is a tool request.
+    Supports both OpenAI and Anthropics formats.
+    """
+    tool_call_ids = []
+    if entry.get("role") != "assistant":
+        return tool_call_ids
+
+    # OpenAI: check for tool_calls in the entry.
+    calls = entry.get("tool_calls")
+    if isinstance(calls, list):
+        for call in calls:
+            if tool_id := call.get("id"):
+                tool_call_ids.append(tool_id)
+
+    # Anthropics: check content items for tool_use type.
+    content = entry.get("content")
+    if isinstance(content, list):
+        for item in content:
+            if item.get("type") != "tool_use":
+                continue
+            if tool_id := item.get("id"):
+                tool_call_ids.append(tool_id)
+
+    return tool_call_ids
+
+
+def _get_tool_responses(entry: dict[str, Any]) -> list[str]:
+    """
+    Return a list of tool_call_ids if the entry is a tool response.
+    Supports both OpenAI and Anthropics formats.
+    """
+    tool_call_ids: list[str] = []
+
+    # OpenAI: a tool response message with role "tool" and key "tool_call_id".
+    if entry.get("role") == "tool":
+        if tool_call_id := entry.get("tool_call_id"):
+            tool_call_ids.append(str(tool_call_id))
+
+    # Anthropics: check content items for tool_result type.
+    if entry.get("role") == "user":
+        content = entry.get("content")
+        if isinstance(content, list):
+            for item in content:
+                if item.get("type") != "tool_result":
+                    continue
+                if tool_call_id := item.get("tool_use_id"):
+                    tool_call_ids.append(tool_call_id)
+
+    return tool_call_ids
+
+
+def _create_tool_response(call_id: str, output: dict[str, Any]) -> dict[str, Any]:
+    """
+    Create a tool response message for either OpenAI or Anthropics,
+    based on the tool_id format.
+    """
+    content = output if isinstance(output, str) else json.dumps(output)
+
+    # Anthropics format: tool IDs typically start with "toolu_"
+    if call_id.startswith("toolu_"):
+        return {
+            "role": "user",
+            "type": "message",
+            "content": [
+                {"tool_use_id": call_id, "type": "tool_result", "content": content}
+            ],
+        }
+
+    # OpenAI format: tool IDs typically start with "call_".
+    # Or default fallback (if the tool_id doesn't match any known prefix)
+    return {"role": "tool", "tool_call_id": call_id, "content": content}
+
+
 def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]:
    """
    All the tool calls entry in the conversation history requires a response.
@@ -42,10 +117,10 @@ def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]:
    """
    pending_calls = Counter()
    for history in conversation_history:
-        for call in history.get("tool_calls") or []:
-            pending_calls[call.get("id")] += 1
+        for call_id in _get_tool_requests(history):
+            pending_calls[call_id] += 1

-        if call_id := history.get("tool_call_id"):
+        for call_id in _get_tool_responses(history):
            pending_calls[call_id] -= 1

    return {call_id: count for call_id, count in pending_calls.items() if count > 0}
@@ -70,7 +145,13 @@ class SmartDecisionMakerBlock(Block):
        credentials: llm.AICredentials = llm.AICredentialsField()
        sys_prompt: str = SchemaField(
            title="System Prompt",
-            default="Thinking carefully step by step decide which function to call. Always choose a function call from the list of function signatures.",
+            default="Thinking carefully step by step decide which function to call. "
+            "Always choose a function call from the list of function signatures, "
+            "and always provide the complete argument provided with the type "
+            "matching the required jsonschema signature, no missing argument is allowed. "
+            "If you have already completed the task objective, you can end the task "
+            "by providing the end result of your work as a finish message. "
+            "Only provide EXACTLY one function call, multiple tool calls is strictly prohibited.",
            description="The system prompt to provide additional context to the model.",
        )
        conversation_history: list[dict] = SchemaField(
@@ -122,7 +203,6 @@ class SmartDecisionMakerBlock(Block):

            conversation_history = data.get("conversation_history", [])
            pending_tool_calls = get_pending_tool_calls(conversation_history)
-
            last_tool_output = data.get("last_tool_output")
            if not last_tool_output and pending_tool_calls:
                return {"last_tool_output"}
@@ -347,17 +427,31 @@ class SmartDecisionMakerBlock(Block):
        # Prefill all missing tool calls with the last tool output/
        # TODO: we need a better way to handle this.
        tool_output = [
-            {
-                "role": "tool",
-                "content": input_data.last_tool_output,
-                "tool_call_id": pending_call_id,
-            }
+            _create_tool_response(pending_call_id, input_data.last_tool_output)
            for pending_call_id, count in pending_tool_calls.items()
            for _ in range(count)
        ]
+
+        # If the SDM block only calls 1 tool at a time, this should not happen.
        if len(tool_output) > 1:
            logger.warning(
-                f"[node_exec_id={node_exec_id}] Multiple pending tool calls are prefilled using a single output. Execution may not be accurate."
+                f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] "
+                f"Multiple pending tool calls are prefilled using a single output. "
+                f"Execution may not be accurate."
+            )
+
+        # Fallback on adding tool output in the conversation history as user prompt.
+        if len(tool_output) == 0:
+            logger.warning(
+                f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] "
+                f"No pending tool calls found. This may indicate an issue with the "
+                f"conversation history, or an LLM calling two tools at the same time."
+            )
+            tool_output.append(
+                {
+                    "role": "user",
+                    "content": f"Last tool output: {json.dumps(input_data.last_tool_output)}",
+                }
            )

        prompt.extend(tool_output)
@@ -367,11 +461,17 @@ class SmartDecisionMakerBlock(Block):
            input_data.prompt = llm.fmt.format_string(input_data.prompt, values)
            input_data.sys_prompt = llm.fmt.format_string(input_data.sys_prompt, values)

-        if input_data.sys_prompt:
-            prompt.append({"role": "system", "content": input_data.sys_prompt})
+        prefix = "[Main Objective Prompt]: "

-        if input_data.prompt:
-            prompt.append({"role": "user", "content": input_data.prompt})
+        if input_data.sys_prompt and not any(
+            p["role"] == "system" and p["content"].startswith(prefix) for p in prompt
+        ):
+            prompt.append({"role": "system", "content": prefix + input_data.sys_prompt})
+
+        if input_data.prompt and not any(
+            p["role"] == "user" and p["content"].startswith(prefix) for p in prompt
+        ):
+            prompt.append({"role": "user", "content": prefix + input_data.prompt})

        response = llm.llm_call(
            credentials=credentials,
@@ -384,7 +484,7 @@ class SmartDecisionMakerBlock(Block):
        )

        if not response.tool_calls:
-            yield "finished", f"No Decision Made finishing task: {response.response}"
+            yield "finished", response.response
            return

        for tool_call in response.tool_calls:
--- a/autogpt_platform/frontend/src/components/OutputModalComponent.tsx
+++ b/autogpt_platform/frontend/src/components/OutputModalComponent.tsx
@@ -29,7 +29,12 @@ const OutputModalComponent: FC<OutputModalProps> = ({
        <div className="my-2 max-h-[384px] flex-grow overflow-y-auto rounded-md p-2">
          {executionResults.map((data, i) => (
            <>
-              <DataTable key={i} title={data.execId} data={data.data} />
+              <DataTable
+                key={i}
+                title={data.execId}
+                data={data.data}
+                truncateLongData={true}
+              />
              <Separator />
            </>
          ))}
--- a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
+++ b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
@@ -325,7 +325,10 @@ export default function useAgentGraph(
                          ...(node.data.executionResults || []),
                          {
                            execId: executionData.node_exec_id,
-                            data: executionData.output_data,
+                            data: {
+                              "[Input]": [executionData.input_data],
+                              ...executionData.output_data,
+                            },
                          },
                        ]
                      : node.data.executionResults,