From e5eadeace4c02e3d6c210eb06f3def41ca6ead68 Mon Sep 17 00:00:00 2001
From: Zamil Majdy <zamil.majdy@agpt.co>
Date: Thu, 6 Mar 2025 19:07:41 +0700
Subject: [PATCH] feat(backend): Improve SmartDecisionMaker Agent-loop
 capability & add Anthropics support (#9585)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Changes 🏗️

There are a few agent-loop issues that this PR is addressing:
* There is a lack of support for agent-loop in Anthropic.
* Duplicated system & user prompt as the main objective prompt in the
agent loop.
* A long rendered text of conversation history by
SmartDecisionMakerBlock agent-loop in the UI.
* A lack of execution input being rendered in the execution list making
it harder to debug.


https://github.com/user-attachments/assets/be430000-bde0-40c6-8f2e-c97ce45b5ed1


### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
- [x] Create from scratch and execute an agent with at least 3 blocks
using SmartDecisionMaker Block.
---
 .../backend/backend/blocks/llm.py             |  14 +-
 .../backend/blocks/smart_decision_maker.py    | 132 +++++++++++++++---
 .../src/components/OutputModalComponent.tsx   |   7 +-
 .../frontend/src/hooks/useAgentGraph.tsx      |   5 +-
 4 files changed, 135 insertions(+), 23 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py
index 2ec54b6d6e..1ffb0f24ce 100644
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -371,12 +371,16 @@ def llm_call(
         last_role = None
         for p in prompt:
             if p["role"] in ["user", "assistant"]:
-                if p["role"] != last_role:
+                if (
+                    p["role"] == last_role
+                    and isinstance(messages[-1]["content"], str)
+                    and isinstance(p["content"], str)
+                ):
+                    # If the role is the same as the last one, combine the content
+                    messages[-1]["content"] += p["content"]
+                else:
                     messages.append({"role": p["role"], "content": p["content"]})
                     last_role = p["role"]
-                else:
-                    # If the role is the same as the last one, combine the content
-                    messages[-1]["content"] += "\n" + p["content"]
 
         client = anthropic.Anthropic(api_key=credentials.api_key.get_secret_value())
         try:
@@ -415,7 +419,7 @@ def llm_call(
                 )
 
             return LLMResponse(
-                raw_response=resp.content[0],
+                raw_response=resp,
                 prompt=prompt,
                 response=(
                     resp.content[0].name
diff --git a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
index 72988b426b..b41f5506f4 100644
--- a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
@@ -33,6 +33,81 @@ def get_database_manager_client():
     return get_service_client(DatabaseManager)
 
 
+def _get_tool_requests(entry: dict[str, Any]) -> list[str]:
+    """
+    Return a list of tool_call_ids if the entry is a tool request.
+    Supports both OpenAI and Anthropics formats.
+    """
+    tool_call_ids = []
+    if entry.get("role") != "assistant":
+        return tool_call_ids
+
+    # OpenAI: check for tool_calls in the entry.
+    calls = entry.get("tool_calls")
+    if isinstance(calls, list):
+        for call in calls:
+            if tool_id := call.get("id"):
+                tool_call_ids.append(tool_id)
+
+    # Anthropics: check content items for tool_use type.
+    content = entry.get("content")
+    if isinstance(content, list):
+        for item in content:
+            if item.get("type") != "tool_use":
+                continue
+            if tool_id := item.get("id"):
+                tool_call_ids.append(tool_id)
+
+    return tool_call_ids
+
+
+def _get_tool_responses(entry: dict[str, Any]) -> list[str]:
+    """
+    Return a list of tool_call_ids if the entry is a tool response.
+    Supports both OpenAI and Anthropics formats.
+    """
+    tool_call_ids: list[str] = []
+
+    # OpenAI: a tool response message with role "tool" and key "tool_call_id".
+    if entry.get("role") == "tool":
+        if tool_call_id := entry.get("tool_call_id"):
+            tool_call_ids.append(str(tool_call_id))
+
+    # Anthropics: check content items for tool_result type.
+    if entry.get("role") == "user":
+        content = entry.get("content")
+        if isinstance(content, list):
+            for item in content:
+                if item.get("type") != "tool_result":
+                    continue
+                if tool_call_id := item.get("tool_use_id"):
+                    tool_call_ids.append(tool_call_id)
+
+    return tool_call_ids
+
+
+def _create_tool_response(call_id: str, output: dict[str, Any]) -> dict[str, Any]:
+    """
+    Create a tool response message for either OpenAI or Anthropics,
+    based on the tool_id format.
+    """
+    content = output if isinstance(output, str) else json.dumps(output)
+
+    # Anthropics format: tool IDs typically start with "toolu_"
+    if call_id.startswith("toolu_"):
+        return {
+            "role": "user",
+            "type": "message",
+            "content": [
+                {"tool_use_id": call_id, "type": "tool_result", "content": content}
+            ],
+        }
+
+    # OpenAI format: tool IDs typically start with "call_".
+    # Or default fallback (if the tool_id doesn't match any known prefix)
+    return {"role": "tool", "tool_call_id": call_id, "content": content}
+
+
 def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]:
     """
     All the tool calls entry in the conversation history requires a response.
@@ -42,10 +117,10 @@ def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]:
     """
     pending_calls = Counter()
     for history in conversation_history:
-        for call in history.get("tool_calls") or []:
-            pending_calls[call.get("id")] += 1
+        for call_id in _get_tool_requests(history):
+            pending_calls[call_id] += 1
 
-        if call_id := history.get("tool_call_id"):
+        for call_id in _get_tool_responses(history):
             pending_calls[call_id] -= 1
 
     return {call_id: count for call_id, count in pending_calls.items() if count > 0}
@@ -70,7 +145,13 @@ class SmartDecisionMakerBlock(Block):
         credentials: llm.AICredentials = llm.AICredentialsField()
         sys_prompt: str = SchemaField(
             title="System Prompt",
-            default="Thinking carefully step by step decide which function to call. Always choose a function call from the list of function signatures.",
+            default="Thinking carefully step by step decide which function to call. "
+            "Always choose a function call from the list of function signatures, "
+            "and always provide the complete argument provided with the type "
+            "matching the required jsonschema signature, no missing argument is allowed. "
+            "If you have already completed the task objective, you can end the task "
+            "by providing the end result of your work as a finish message. "
+            "Only provide EXACTLY one function call, multiple tool calls is strictly prohibited.",
             description="The system prompt to provide additional context to the model.",
         )
         conversation_history: list[dict] = SchemaField(
@@ -122,7 +203,6 @@ class SmartDecisionMakerBlock(Block):
 
             conversation_history = data.get("conversation_history", [])
             pending_tool_calls = get_pending_tool_calls(conversation_history)
-
             last_tool_output = data.get("last_tool_output")
             if not last_tool_output and pending_tool_calls:
                 return {"last_tool_output"}
@@ -347,17 +427,31 @@ class SmartDecisionMakerBlock(Block):
         # Prefill all missing tool calls with the last tool output/
         # TODO: we need a better way to handle this.
         tool_output = [
-            {
-                "role": "tool",
-                "content": input_data.last_tool_output,
-                "tool_call_id": pending_call_id,
-            }
+            _create_tool_response(pending_call_id, input_data.last_tool_output)
             for pending_call_id, count in pending_tool_calls.items()
             for _ in range(count)
         ]
+
+        # If the SDM block only calls 1 tool at a time, this should not happen.
         if len(tool_output) > 1:
             logger.warning(
-                f"[node_exec_id={node_exec_id}] Multiple pending tool calls are prefilled using a single output. Execution may not be accurate."
+                f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] "
+                f"Multiple pending tool calls are prefilled using a single output. "
+                f"Execution may not be accurate."
+            )
+
+        # Fallback on adding tool output in the conversation history as user prompt.
+        if len(tool_output) == 0:
+            logger.warning(
+                f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] "
+                f"No pending tool calls found. This may indicate an issue with the "
+                f"conversation history, or an LLM calling two tools at the same time."
+            )
+            tool_output.append(
+                {
+                    "role": "user",
+                    "content": f"Last tool output: {json.dumps(input_data.last_tool_output)}",
+                }
             )
 
         prompt.extend(tool_output)
@@ -367,11 +461,17 @@ class SmartDecisionMakerBlock(Block):
             input_data.prompt = llm.fmt.format_string(input_data.prompt, values)
             input_data.sys_prompt = llm.fmt.format_string(input_data.sys_prompt, values)
 
-        if input_data.sys_prompt:
-            prompt.append({"role": "system", "content": input_data.sys_prompt})
+        prefix = "[Main Objective Prompt]: "
 
-        if input_data.prompt:
-            prompt.append({"role": "user", "content": input_data.prompt})
+        if input_data.sys_prompt and not any(
+            p["role"] == "system" and p["content"].startswith(prefix) for p in prompt
+        ):
+            prompt.append({"role": "system", "content": prefix + input_data.sys_prompt})
+
+        if input_data.prompt and not any(
+            p["role"] == "user" and p["content"].startswith(prefix) for p in prompt
+        ):
+            prompt.append({"role": "user", "content": prefix + input_data.prompt})
 
         response = llm.llm_call(
             credentials=credentials,
@@ -384,7 +484,7 @@ class SmartDecisionMakerBlock(Block):
         )
 
         if not response.tool_calls:
-            yield "finished", f"No Decision Made finishing task: {response.response}"
+            yield "finished", response.response
             return
 
         for tool_call in response.tool_calls:
diff --git a/autogpt_platform/frontend/src/components/OutputModalComponent.tsx b/autogpt_platform/frontend/src/components/OutputModalComponent.tsx
index 082bdb022d..8cd7efa451 100644
--- a/autogpt_platform/frontend/src/components/OutputModalComponent.tsx
+++ b/autogpt_platform/frontend/src/components/OutputModalComponent.tsx
@@ -29,7 +29,12 @@ const OutputModalComponent: FC<OutputModalProps> = ({
         <div className="my-2 max-h-[384px] flex-grow overflow-y-auto rounded-md p-2">
           {executionResults.map((data, i) => (
             <>
-              <DataTable key={i} title={data.execId} data={data.data} />
+              <DataTable
+                key={i}
+                title={data.execId}
+                data={data.data}
+                truncateLongData={true}
+              />
               <Separator />
             </>
           ))}
diff --git a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
index 1ce8aa389f..84cc3b15f4 100644
--- a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
+++ b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
@@ -325,7 +325,10 @@ export default function useAgentGraph(
                           ...(node.data.executionResults || []),
                           {
                             execId: executionData.node_exec_id,
-                            data: executionData.output_data,
+                            data: {
+                              "[Input]": [executionData.input_data],
+                              ...executionData.output_data,
+                            },
                           },
                         ]
                       : node.data.executionResults,