Merge remote-tracking branch 'origin/dev' into lluisagusti/secrt-1926-agent-generation-completes-but-ui-does-not-update-refresh

2026-02-13 08:14:58 -05:00 · 2026-02-12 18:53:12 +08:00
parent 4eff3fd581 a78145505b
commit 660fc0540c
6 changed files with 336 additions and 33 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -2,7 +2,7 @@ import asyncio
 import logging
 import uuid
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, cast
 from weakref import WeakValueDictionary

 from openai.types.chat import (
@@ -104,6 +104,26 @@ class ChatSession(BaseModel):
    successful_agent_runs: dict[str, int] = {}
    successful_agent_schedules: dict[str, int] = {}

+    def add_tool_call_to_current_turn(self, tool_call: dict) -> None:
+        """Attach a tool_call to the current turn's assistant message.
+
+        Searches backwards for the most recent assistant message (stopping at
+        any user message boundary). If found, appends the tool_call to it.
+        Otherwise creates a new assistant message with the tool_call.
+        """
+        for msg in reversed(self.messages):
+            if msg.role == "user":
+                break
+            if msg.role == "assistant":
+                if not msg.tool_calls:
+                    msg.tool_calls = []
+                msg.tool_calls.append(tool_call)
+                return
+
+        self.messages.append(
+            ChatMessage(role="assistant", content="", tool_calls=[tool_call])
+        )
+
    @staticmethod
    def new(user_id: str) -> "ChatSession":
        return ChatSession(
@@ -172,6 +192,47 @@ class ChatSession(BaseModel):
            successful_agent_schedules=successful_agent_schedules,
        )

+    @staticmethod
+    def _merge_consecutive_assistant_messages(
+        messages: list[ChatCompletionMessageParam],
+    ) -> list[ChatCompletionMessageParam]:
+        """Merge consecutive assistant messages into single messages.
+
+        Long-running tool flows can create split assistant messages: one with
+        text content and another with tool_calls. Anthropic's API requires
+        tool_result blocks to reference a tool_use in the immediately preceding
+        assistant message, so these splits cause 400 errors via OpenRouter.
+        """
+        if len(messages) < 2:
+            return messages
+
+        result: list[ChatCompletionMessageParam] = [messages[0]]
+        for msg in messages[1:]:
+            prev = result[-1]
+            if prev.get("role") != "assistant" or msg.get("role") != "assistant":
+                result.append(msg)
+                continue
+
+            prev = cast(ChatCompletionAssistantMessageParam, prev)
+            curr = cast(ChatCompletionAssistantMessageParam, msg)
+
+            curr_content = curr.get("content") or ""
+            if curr_content:
+                prev_content = prev.get("content") or ""
+                prev["content"] = (
+                    f"{prev_content}\n{curr_content}" if prev_content else curr_content
+                )
+
+            curr_tool_calls = curr.get("tool_calls")
+            if curr_tool_calls:
+                prev_tool_calls = prev.get("tool_calls")
+                prev["tool_calls"] = (
+                    list(prev_tool_calls) + list(curr_tool_calls)
+                    if prev_tool_calls
+                    else list(curr_tool_calls)
+                )
+        return result
+
    def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
        messages = []
        for message in self.messages:
@@ -258,7 +319,7 @@ class ChatSession(BaseModel):
                        name=message.name or "",
                    )
                )
-        return messages
+        return self._merge_consecutive_assistant_messages(messages)


 async def _get_session_from_cache(session_id: str) -> ChatSession | None:
--- a/autogpt_platform/backend/backend/api/features/chat/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py
@@ -1,4 +1,16 @@
+from typing import cast
+
 import pytest
+from openai.types.chat import (
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call_param import (
+    ChatCompletionMessageToolCallParam,
+    Function,
+)

 from .model import (
    ChatMessage,
@@ -117,3 +129,205 @@ async def test_chatsession_db_storage(setup_test_user, test_user_id):
                loaded.tool_calls is not None
            ), f"Tool calls missing for {orig.role} message"
            assert len(orig.tool_calls) == len(loaded.tool_calls)
+
+
+# --------------------------------------------------------------------------- #
+#  _merge_consecutive_assistant_messages                                       #
+# --------------------------------------------------------------------------- #
+
+_tc = ChatCompletionMessageToolCallParam(
+    id="tc1", type="function", function=Function(name="do_stuff", arguments="{}")
+)
+_tc2 = ChatCompletionMessageToolCallParam(
+    id="tc2", type="function", function=Function(name="other", arguments="{}")
+)
+
+
+def test_merge_noop_when_no_consecutive_assistants():
+    """Messages without consecutive assistants are returned unchanged."""
+    msgs = [
+        ChatCompletionUserMessageParam(role="user", content="hi"),
+        ChatCompletionAssistantMessageParam(role="assistant", content="hello"),
+        ChatCompletionUserMessageParam(role="user", content="bye"),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)
+    assert len(merged) == 3
+    assert [m["role"] for m in merged] == ["user", "assistant", "user"]
+
+
+def test_merge_splits_text_and_tool_calls():
+    """The exact bug scenario: text-only assistant followed by tool_calls-only assistant."""
+    msgs = [
+        ChatCompletionUserMessageParam(role="user", content="build agent"),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="Let me build that"
+        ),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="", tool_calls=[_tc]
+        ),
+        ChatCompletionToolMessageParam(role="tool", content="ok", tool_call_id="tc1"),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)
+
+    assert len(merged) == 3
+    assert merged[0]["role"] == "user"
+    assert merged[2]["role"] == "tool"
+    a = cast(ChatCompletionAssistantMessageParam, merged[1])
+    assert a["role"] == "assistant"
+    assert a.get("content") == "Let me build that"
+    assert a.get("tool_calls") == [_tc]
+
+
+def test_merge_combines_tool_calls_from_both():
+    """Both consecutive assistants have tool_calls — they get merged."""
+    msgs: list[ChatCompletionAssistantMessageParam] = [
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="text", tool_calls=[_tc]
+        ),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="", tool_calls=[_tc2]
+        ),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)  # type: ignore[arg-type]
+
+    assert len(merged) == 1
+    a = cast(ChatCompletionAssistantMessageParam, merged[0])
+    assert a.get("tool_calls") == [_tc, _tc2]
+    assert a.get("content") == "text"
+
+
+def test_merge_three_consecutive_assistants():
+    """Three consecutive assistants collapse into one."""
+    msgs: list[ChatCompletionAssistantMessageParam] = [
+        ChatCompletionAssistantMessageParam(role="assistant", content="a"),
+        ChatCompletionAssistantMessageParam(role="assistant", content="b"),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="", tool_calls=[_tc]
+        ),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)  # type: ignore[arg-type]
+
+    assert len(merged) == 1
+    a = cast(ChatCompletionAssistantMessageParam, merged[0])
+    assert a.get("content") == "a\nb"
+    assert a.get("tool_calls") == [_tc]
+
+
+def test_merge_empty_and_single_message():
+    """Edge cases: empty list and single message."""
+    assert ChatSession._merge_consecutive_assistant_messages([]) == []
+
+    single: list[ChatCompletionMessageParam] = [
+        ChatCompletionUserMessageParam(role="user", content="hi")
+    ]
+    assert ChatSession._merge_consecutive_assistant_messages(single) == single
+
+
+# --------------------------------------------------------------------------- #
+#  add_tool_call_to_current_turn                                               #
+# --------------------------------------------------------------------------- #
+
+_raw_tc = {
+    "id": "tc1",
+    "type": "function",
+    "function": {"name": "f", "arguments": "{}"},
+}
+_raw_tc2 = {
+    "id": "tc2",
+    "type": "function",
+    "function": {"name": "g", "arguments": "{}"},
+}
+
+
+def test_add_tool_call_appends_to_existing_assistant():
+    """When the last assistant is from the current turn, tool_call is added to it."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="hi"),
+        ChatMessage(role="assistant", content="working on it"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+
+    assert len(session.messages) == 2  # no new message created
+    assert session.messages[1].tool_calls == [_raw_tc]
+
+
+def test_add_tool_call_creates_assistant_when_none_exists():
+    """When there's no current-turn assistant, a new one is created."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="hi"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+
+    assert len(session.messages) == 2
+    assert session.messages[1].role == "assistant"
+    assert session.messages[1].tool_calls == [_raw_tc]
+
+
+def test_add_tool_call_does_not_cross_user_boundary():
+    """A user message acts as a boundary — previous assistant is not modified."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="assistant", content="old turn"),
+        ChatMessage(role="user", content="new message"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+
+    assert len(session.messages) == 3  # new assistant was created
+    assert session.messages[0].tool_calls is None  # old assistant untouched
+    assert session.messages[2].role == "assistant"
+    assert session.messages[2].tool_calls == [_raw_tc]
+
+
+def test_add_tool_call_multiple_times():
+    """Multiple long-running tool calls accumulate on the same assistant."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="hi"),
+        ChatMessage(role="assistant", content="doing stuff"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+    # Simulate a pending tool result in between (like _yield_tool_call does)
+    session.messages.append(
+        ChatMessage(role="tool", content="pending", tool_call_id="tc1")
+    )
+    session.add_tool_call_to_current_turn(_raw_tc2)
+
+    assert len(session.messages) == 3  # user, assistant, tool — no extra assistant
+    assert session.messages[1].tool_calls == [_raw_tc, _raw_tc2]
+
+
+def test_to_openai_messages_merges_split_assistants():
+    """End-to-end: session with split assistants produces valid OpenAI messages."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="build agent"),
+        ChatMessage(role="assistant", content="Let me build that"),
+        ChatMessage(
+            role="assistant",
+            content="",
+            tool_calls=[
+                {
+                    "id": "tc1",
+                    "type": "function",
+                    "function": {"name": "create_agent", "arguments": "{}"},
+                }
+            ],
+        ),
+        ChatMessage(role="tool", content="done", tool_call_id="tc1"),
+        ChatMessage(role="assistant", content="Saved!"),
+        ChatMessage(role="user", content="show me an example run"),
+    ]
+    openai_msgs = session.to_openai_messages()
+
+    # The two consecutive assistants at index 1,2 should be merged
+    roles = [m["role"] for m in openai_msgs]
+    assert roles == ["user", "assistant", "tool", "assistant", "user"]
+
+    # The merged assistant should have both content and tool_calls
+    merged = cast(ChatCompletionAssistantMessageParam, openai_msgs[1])
+    assert merged.get("content") == "Let me build that"
+    tc_list = merged.get("tool_calls")
+    assert tc_list is not None and len(list(tc_list)) == 1
+    assert list(tc_list)[0]["id"] == "tc1"
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -800,9 +800,13 @@ async def stream_chat_completion(
        # Build the messages list in the correct order
        messages_to_save: list[ChatMessage] = []

-        # Add assistant message with tool_calls if any
+        # Add assistant message with tool_calls if any.
+        # Use extend (not assign) to preserve tool_calls already added by
+        # _yield_tool_call for long-running tools.
        if accumulated_tool_calls:
-            assistant_response.tool_calls = accumulated_tool_calls
+            if not assistant_response.tool_calls:
+                assistant_response.tool_calls = []
+            assistant_response.tool_calls.extend(accumulated_tool_calls)
            logger.info(
                f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
            )
@@ -1404,13 +1408,9 @@ async def _yield_tool_call(
                operation_id=operation_id,
            )

-            # Save assistant message with tool_call FIRST (required by LLM)
-            assistant_message = ChatMessage(
-                role="assistant",
-                content="",
-                tool_calls=[tool_calls[yield_idx]],
-            )
-            session.messages.append(assistant_message)
+            # Attach the tool_call to the current turn's assistant message
+            # (or create one if this is a tool-only response with no text).
+            session.add_tool_call_to_current_turn(tool_calls[yield_idx])

            # Then save pending tool result
            pending_message = ChatMessage(
--- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
+++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
@@ -21,43 +21,71 @@ logger = logging.getLogger(__name__)

 class HumanInTheLoopBlock(Block):
    """
-    This block pauses execution and waits for human approval or modification of the data.
+    Pauses execution and waits for human approval or rejection of the data.

-    When executed, it creates a pending review entry and sets the node execution status
-    to REVIEW. The execution will remain paused until a human user either:
-    - Approves the data (with or without modifications)
-    - Rejects the data
+    When executed, this block creates a pending review entry and sets the node execution
+    status to REVIEW. The execution remains paused until a human user either approves
+    or rejects the data.

-    This is useful for workflows that require human validation or intervention before
-    proceeding to the next steps.
+    **How it works:**
+    - The input data is presented to a human reviewer
+    - The reviewer can approve or reject (and optionally modify the data if editable)
+    - On approval: the data flows out through the `approved_data` output pin
+    - On rejection: the data flows out through the `rejected_data` output pin
+
+    **Important:** The output pins yield the actual data itself, NOT status strings.
+    The approval/rejection decision determines WHICH output pin fires, not the value.
+    You do NOT need to compare the output to "APPROVED" or "REJECTED" - simply connect
+    downstream blocks to the appropriate output pin for each case.
+
+    **Example usage:**
+    - Connect `approved_data` → next step in your workflow (data was approved)
+    - Connect `rejected_data` → error handling or notification (data was rejected)
    """

    class Input(BlockSchemaInput):
-        data: Any = SchemaField(description="The data to be reviewed by a human user")
+        data: Any = SchemaField(
+            description="The data to be reviewed by a human user. "
+            "This exact data will be passed through to either approved_data or "
+            "rejected_data output based on the reviewer's decision."
+        )
        name: str = SchemaField(
-            description="A descriptive name for what this data represents",
+            description="A descriptive name for what this data represents. "
+            "This helps the reviewer understand what they are reviewing.",
        )
        editable: bool = SchemaField(
-            description="Whether the human reviewer can edit the data",
+            description="Whether the human reviewer can edit the data before "
+            "approving or rejecting it",
            default=True,
            advanced=True,
        )

    class Output(BlockSchemaOutput):
        approved_data: Any = SchemaField(
-            description="The data when approved (may be modified by reviewer)"
+            description="Outputs the input data when the reviewer APPROVES it. "
+            "The value is the actual data itself (not a status string like 'APPROVED'). "
+            "If the reviewer edited the data, this contains the modified version. "
+            "Connect downstream blocks here for the 'approved' workflow path."
        )
        rejected_data: Any = SchemaField(
-            description="The data when rejected (may be modified by reviewer)"
+            description="Outputs the input data when the reviewer REJECTS it. "
+            "The value is the actual data itself (not a status string like 'REJECTED'). "
+            "If the reviewer edited the data, this contains the modified version. "
+            "Connect downstream blocks here for the 'rejected' workflow path."
        )
        review_message: str = SchemaField(
-            description="Any message provided by the reviewer", default=""
+            description="Optional message provided by the reviewer explaining their "
+            "decision. Only outputs when the reviewer provides a message; "
+            "this pin does not fire if no message was given.",
+            default="",
        )

    def __init__(self):
        super().__init__(
            id="8b2a7b3c-6e9d-4a5f-8c1b-2e3f4a5b6c7d",
-            description="Pause execution and wait for human approval or modification of data",
+            description="Pause execution for human review. Data flows through "
+            "approved_data or rejected_data output based on the reviewer's decision. "
+            "Outputs contain the actual data, not status strings.",
            categories={BlockCategory.BASIC},
            input_schema=HumanInTheLoopBlock.Input,
            output_schema=HumanInTheLoopBlock.Output,
--- a/docs/integrations/README.md
+++ b/docs/integrations/README.md
@@ -61,7 +61,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim
 | [Get List Item](block-integrations/basic.md#get-list-item) | Returns the element at the given index |
 | [Get Store Agent Details](block-integrations/system/store_operations.md#get-store-agent-details) | Get detailed information about an agent from the store |
 | [Get Weather Information](block-integrations/basic.md#get-weather-information) | Retrieves weather information for a specified location using OpenWeatherMap API |
-| [Human In The Loop](block-integrations/basic.md#human-in-the-loop) | Pause execution and wait for human approval or modification of data |
+| [Human In The Loop](block-integrations/basic.md#human-in-the-loop) | Pause execution for human review |
 | [List Is Empty](block-integrations/basic.md#list-is-empty) | Checks if a list is empty |
 | [List Library Agents](block-integrations/system/library_operations.md#list-library-agents) | List all agents in your personal library |
 | [Note](block-integrations/basic.md#note) | A visual annotation block that displays a sticky note in the workflow editor for documentation and organization purposes |
--- a/docs/integrations/block-integrations/basic.md
+++ b/docs/integrations/block-integrations/basic.md
@@ -975,7 +975,7 @@ A travel planning application could use this block to provide users with current
 ## Human In The Loop

 ### What it is
-Pause execution and wait for human approval or modification of data
+Pause execution for human review. Data flows through approved_data or rejected_data output based on the reviewer's decision. Outputs contain the actual data, not status strings.

 ### How it works
 <!-- MANUAL: how_it_works -->
@@ -988,18 +988,18 @@ This enables human oversight at critical points in automated workflows, ensuring

 | Input | Description | Type | Required |
 |-------|-------------|------|----------|
-| data | The data to be reviewed by a human user | Data | Yes |
-| name | A descriptive name for what this data represents | str | Yes |
-| editable | Whether the human reviewer can edit the data | bool | No |
+| data | The data to be reviewed by a human user. This exact data will be passed through to either approved_data or rejected_data output based on the reviewer's decision. | Data | Yes |
+| name | A descriptive name for what this data represents. This helps the reviewer understand what they are reviewing. | str | Yes |
+| editable | Whether the human reviewer can edit the data before approving or rejecting it | bool | No |

 ### Outputs

 | Output | Description | Type |
 |--------|-------------|------|
 | error | Error message if the operation failed | str |
-| approved_data | The data when approved (may be modified by reviewer) | Approved Data |
-| rejected_data | The data when rejected (may be modified by reviewer) | Rejected Data |
-| review_message | Any message provided by the reviewer | str |
+| approved_data | Outputs the input data when the reviewer APPROVES it. The value is the actual data itself (not a status string like 'APPROVED'). If the reviewer edited the data, this contains the modified version. Connect downstream blocks here for the 'approved' workflow path. | Approved Data |
+| rejected_data | Outputs the input data when the reviewer REJECTS it. The value is the actual data itself (not a status string like 'REJECTED'). If the reviewer edited the data, this contains the modified version. Connect downstream blocks here for the 'rejected' workflow path. | Rejected Data |
+| review_message | Optional message provided by the reviewer explaining their decision. Only outputs when the reviewer provides a message; this pin does not fire if no message was given. | str |

 ### Possible use case
 <!-- MANUAL: use_case -->