diff --git a/autogpt_platform/backend/backend/api/features/chat/model.py b/autogpt_platform/backend/backend/api/features/chat/model.py index 7318ef88d7..35418f174f 100644 --- a/autogpt_platform/backend/backend/api/features/chat/model.py +++ b/autogpt_platform/backend/backend/api/features/chat/model.py @@ -2,7 +2,7 @@ import asyncio import logging import uuid from datetime import UTC, datetime -from typing import Any +from typing import Any, cast from weakref import WeakValueDictionary from openai.types.chat import ( @@ -104,6 +104,26 @@ class ChatSession(BaseModel): successful_agent_runs: dict[str, int] = {} successful_agent_schedules: dict[str, int] = {} + def add_tool_call_to_current_turn(self, tool_call: dict) -> None: + """Attach a tool_call to the current turn's assistant message. + + Searches backwards for the most recent assistant message (stopping at + any user message boundary). If found, appends the tool_call to it. + Otherwise creates a new assistant message with the tool_call. + """ + for msg in reversed(self.messages): + if msg.role == "user": + break + if msg.role == "assistant": + if not msg.tool_calls: + msg.tool_calls = [] + msg.tool_calls.append(tool_call) + return + + self.messages.append( + ChatMessage(role="assistant", content="", tool_calls=[tool_call]) + ) + @staticmethod def new(user_id: str) -> "ChatSession": return ChatSession( @@ -172,6 +192,47 @@ class ChatSession(BaseModel): successful_agent_schedules=successful_agent_schedules, ) + @staticmethod + def _merge_consecutive_assistant_messages( + messages: list[ChatCompletionMessageParam], + ) -> list[ChatCompletionMessageParam]: + """Merge consecutive assistant messages into single messages. + + Long-running tool flows can create split assistant messages: one with + text content and another with tool_calls. Anthropic's API requires + tool_result blocks to reference a tool_use in the immediately preceding + assistant message, so these splits cause 400 errors via OpenRouter. + """ + if len(messages) < 2: + return messages + + result: list[ChatCompletionMessageParam] = [messages[0]] + for msg in messages[1:]: + prev = result[-1] + if prev.get("role") != "assistant" or msg.get("role") != "assistant": + result.append(msg) + continue + + prev = cast(ChatCompletionAssistantMessageParam, prev) + curr = cast(ChatCompletionAssistantMessageParam, msg) + + curr_content = curr.get("content") or "" + if curr_content: + prev_content = prev.get("content") or "" + prev["content"] = ( + f"{prev_content}\n{curr_content}" if prev_content else curr_content + ) + + curr_tool_calls = curr.get("tool_calls") + if curr_tool_calls: + prev_tool_calls = prev.get("tool_calls") + prev["tool_calls"] = ( + list(prev_tool_calls) + list(curr_tool_calls) + if prev_tool_calls + else list(curr_tool_calls) + ) + return result + def to_openai_messages(self) -> list[ChatCompletionMessageParam]: messages = [] for message in self.messages: @@ -258,7 +319,7 @@ class ChatSession(BaseModel): name=message.name or "", ) ) - return messages + return self._merge_consecutive_assistant_messages(messages) async def _get_session_from_cache(session_id: str) -> ChatSession | None: diff --git a/autogpt_platform/backend/backend/api/features/chat/model_test.py b/autogpt_platform/backend/backend/api/features/chat/model_test.py index c230b00f9c..239137844d 100644 --- a/autogpt_platform/backend/backend/api/features/chat/model_test.py +++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py @@ -1,4 +1,16 @@ +from typing import cast + import pytest +from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionMessageParam, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam, +) +from openai.types.chat.chat_completion_message_tool_call_param import ( + ChatCompletionMessageToolCallParam, + Function, +) from .model import ( ChatMessage, @@ -117,3 +129,205 @@ async def test_chatsession_db_storage(setup_test_user, test_user_id): loaded.tool_calls is not None ), f"Tool calls missing for {orig.role} message" assert len(orig.tool_calls) == len(loaded.tool_calls) + + +# --------------------------------------------------------------------------- # +# _merge_consecutive_assistant_messages # +# --------------------------------------------------------------------------- # + +_tc = ChatCompletionMessageToolCallParam( + id="tc1", type="function", function=Function(name="do_stuff", arguments="{}") +) +_tc2 = ChatCompletionMessageToolCallParam( + id="tc2", type="function", function=Function(name="other", arguments="{}") +) + + +def test_merge_noop_when_no_consecutive_assistants(): + """Messages without consecutive assistants are returned unchanged.""" + msgs = [ + ChatCompletionUserMessageParam(role="user", content="hi"), + ChatCompletionAssistantMessageParam(role="assistant", content="hello"), + ChatCompletionUserMessageParam(role="user", content="bye"), + ] + merged = ChatSession._merge_consecutive_assistant_messages(msgs) + assert len(merged) == 3 + assert [m["role"] for m in merged] == ["user", "assistant", "user"] + + +def test_merge_splits_text_and_tool_calls(): + """The exact bug scenario: text-only assistant followed by tool_calls-only assistant.""" + msgs = [ + ChatCompletionUserMessageParam(role="user", content="build agent"), + ChatCompletionAssistantMessageParam( + role="assistant", content="Let me build that" + ), + ChatCompletionAssistantMessageParam( + role="assistant", content="", tool_calls=[_tc] + ), + ChatCompletionToolMessageParam(role="tool", content="ok", tool_call_id="tc1"), + ] + merged = ChatSession._merge_consecutive_assistant_messages(msgs) + + assert len(merged) == 3 + assert merged[0]["role"] == "user" + assert merged[2]["role"] == "tool" + a = cast(ChatCompletionAssistantMessageParam, merged[1]) + assert a["role"] == "assistant" + assert a.get("content") == "Let me build that" + assert a.get("tool_calls") == [_tc] + + +def test_merge_combines_tool_calls_from_both(): + """Both consecutive assistants have tool_calls — they get merged.""" + msgs: list[ChatCompletionAssistantMessageParam] = [ + ChatCompletionAssistantMessageParam( + role="assistant", content="text", tool_calls=[_tc] + ), + ChatCompletionAssistantMessageParam( + role="assistant", content="", tool_calls=[_tc2] + ), + ] + merged = ChatSession._merge_consecutive_assistant_messages(msgs) # type: ignore[arg-type] + + assert len(merged) == 1 + a = cast(ChatCompletionAssistantMessageParam, merged[0]) + assert a.get("tool_calls") == [_tc, _tc2] + assert a.get("content") == "text" + + +def test_merge_three_consecutive_assistants(): + """Three consecutive assistants collapse into one.""" + msgs: list[ChatCompletionAssistantMessageParam] = [ + ChatCompletionAssistantMessageParam(role="assistant", content="a"), + ChatCompletionAssistantMessageParam(role="assistant", content="b"), + ChatCompletionAssistantMessageParam( + role="assistant", content="", tool_calls=[_tc] + ), + ] + merged = ChatSession._merge_consecutive_assistant_messages(msgs) # type: ignore[arg-type] + + assert len(merged) == 1 + a = cast(ChatCompletionAssistantMessageParam, merged[0]) + assert a.get("content") == "a\nb" + assert a.get("tool_calls") == [_tc] + + +def test_merge_empty_and_single_message(): + """Edge cases: empty list and single message.""" + assert ChatSession._merge_consecutive_assistant_messages([]) == [] + + single: list[ChatCompletionMessageParam] = [ + ChatCompletionUserMessageParam(role="user", content="hi") + ] + assert ChatSession._merge_consecutive_assistant_messages(single) == single + + +# --------------------------------------------------------------------------- # +# add_tool_call_to_current_turn # +# --------------------------------------------------------------------------- # + +_raw_tc = { + "id": "tc1", + "type": "function", + "function": {"name": "f", "arguments": "{}"}, +} +_raw_tc2 = { + "id": "tc2", + "type": "function", + "function": {"name": "g", "arguments": "{}"}, +} + + +def test_add_tool_call_appends_to_existing_assistant(): + """When the last assistant is from the current turn, tool_call is added to it.""" + session = ChatSession.new(user_id="u") + session.messages = [ + ChatMessage(role="user", content="hi"), + ChatMessage(role="assistant", content="working on it"), + ] + session.add_tool_call_to_current_turn(_raw_tc) + + assert len(session.messages) == 2 # no new message created + assert session.messages[1].tool_calls == [_raw_tc] + + +def test_add_tool_call_creates_assistant_when_none_exists(): + """When there's no current-turn assistant, a new one is created.""" + session = ChatSession.new(user_id="u") + session.messages = [ + ChatMessage(role="user", content="hi"), + ] + session.add_tool_call_to_current_turn(_raw_tc) + + assert len(session.messages) == 2 + assert session.messages[1].role == "assistant" + assert session.messages[1].tool_calls == [_raw_tc] + + +def test_add_tool_call_does_not_cross_user_boundary(): + """A user message acts as a boundary — previous assistant is not modified.""" + session = ChatSession.new(user_id="u") + session.messages = [ + ChatMessage(role="assistant", content="old turn"), + ChatMessage(role="user", content="new message"), + ] + session.add_tool_call_to_current_turn(_raw_tc) + + assert len(session.messages) == 3 # new assistant was created + assert session.messages[0].tool_calls is None # old assistant untouched + assert session.messages[2].role == "assistant" + assert session.messages[2].tool_calls == [_raw_tc] + + +def test_add_tool_call_multiple_times(): + """Multiple long-running tool calls accumulate on the same assistant.""" + session = ChatSession.new(user_id="u") + session.messages = [ + ChatMessage(role="user", content="hi"), + ChatMessage(role="assistant", content="doing stuff"), + ] + session.add_tool_call_to_current_turn(_raw_tc) + # Simulate a pending tool result in between (like _yield_tool_call does) + session.messages.append( + ChatMessage(role="tool", content="pending", tool_call_id="tc1") + ) + session.add_tool_call_to_current_turn(_raw_tc2) + + assert len(session.messages) == 3 # user, assistant, tool — no extra assistant + assert session.messages[1].tool_calls == [_raw_tc, _raw_tc2] + + +def test_to_openai_messages_merges_split_assistants(): + """End-to-end: session with split assistants produces valid OpenAI messages.""" + session = ChatSession.new(user_id="u") + session.messages = [ + ChatMessage(role="user", content="build agent"), + ChatMessage(role="assistant", content="Let me build that"), + ChatMessage( + role="assistant", + content="", + tool_calls=[ + { + "id": "tc1", + "type": "function", + "function": {"name": "create_agent", "arguments": "{}"}, + } + ], + ), + ChatMessage(role="tool", content="done", tool_call_id="tc1"), + ChatMessage(role="assistant", content="Saved!"), + ChatMessage(role="user", content="show me an example run"), + ] + openai_msgs = session.to_openai_messages() + + # The two consecutive assistants at index 1,2 should be merged + roles = [m["role"] for m in openai_msgs] + assert roles == ["user", "assistant", "tool", "assistant", "user"] + + # The merged assistant should have both content and tool_calls + merged = cast(ChatCompletionAssistantMessageParam, openai_msgs[1]) + assert merged.get("content") == "Let me build that" + tc_list = merged.get("tool_calls") + assert tc_list is not None and len(list(tc_list)) == 1 + assert list(tc_list)[0]["id"] == "tc1" diff --git a/autogpt_platform/backend/backend/api/features/chat/response_model.py b/autogpt_platform/backend/backend/api/features/chat/response_model.py index 1ae836f7d1..8ea0c1f97a 100644 --- a/autogpt_platform/backend/backend/api/features/chat/response_model.py +++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py @@ -10,6 +10,8 @@ from typing import Any from pydantic import BaseModel, Field +from backend.util.json import dumps as json_dumps + class ResponseType(str, Enum): """Types of streaming responses following AI SDK protocol.""" @@ -193,6 +195,18 @@ class StreamError(StreamBaseResponse): default=None, description="Additional error details" ) + def to_sse(self) -> str: + """Convert to SSE format, only emitting fields required by AI SDK protocol. + + The AI SDK uses z.strictObject({type, errorText}) which rejects + any extra fields like `code` or `details`. + """ + data = { + "type": self.type.value, + "errorText": self.errorText, + } + return f"data: {json_dumps(data)}\n\n" + class StreamHeartbeat(StreamBaseResponse): """Heartbeat to keep SSE connection alive during long-running operations. diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py index 072ea88fd5..193566ea01 100644 --- a/autogpt_platform/backend/backend/api/features/chat/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/service.py @@ -800,9 +800,13 @@ async def stream_chat_completion( # Build the messages list in the correct order messages_to_save: list[ChatMessage] = [] - # Add assistant message with tool_calls if any + # Add assistant message with tool_calls if any. + # Use extend (not assign) to preserve tool_calls already added by + # _yield_tool_call for long-running tools. if accumulated_tool_calls: - assistant_response.tool_calls = accumulated_tool_calls + if not assistant_response.tool_calls: + assistant_response.tool_calls = [] + assistant_response.tool_calls.extend(accumulated_tool_calls) logger.info( f"Added {len(accumulated_tool_calls)} tool calls to assistant message" ) @@ -1404,13 +1408,9 @@ async def _yield_tool_call( operation_id=operation_id, ) - # Save assistant message with tool_call FIRST (required by LLM) - assistant_message = ChatMessage( - role="assistant", - content="", - tool_calls=[tool_calls[yield_idx]], - ) - session.messages.append(assistant_message) + # Attach the tool_call to the current turn's assistant message + # (or create one if this is a tool-only response with no text). + session.add_tool_call_to_current_turn(tool_calls[yield_idx]) # Then save pending tool result pending_message = ChatMessage( diff --git a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py index 568ac4b33f..d31f90ec81 100644 --- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py +++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py @@ -21,43 +21,71 @@ logger = logging.getLogger(__name__) class HumanInTheLoopBlock(Block): """ - This block pauses execution and waits for human approval or modification of the data. + Pauses execution and waits for human approval or rejection of the data. - When executed, it creates a pending review entry and sets the node execution status - to REVIEW. The execution will remain paused until a human user either: - - Approves the data (with or without modifications) - - Rejects the data + When executed, this block creates a pending review entry and sets the node execution + status to REVIEW. The execution remains paused until a human user either approves + or rejects the data. - This is useful for workflows that require human validation or intervention before - proceeding to the next steps. + **How it works:** + - The input data is presented to a human reviewer + - The reviewer can approve or reject (and optionally modify the data if editable) + - On approval: the data flows out through the `approved_data` output pin + - On rejection: the data flows out through the `rejected_data` output pin + + **Important:** The output pins yield the actual data itself, NOT status strings. + The approval/rejection decision determines WHICH output pin fires, not the value. + You do NOT need to compare the output to "APPROVED" or "REJECTED" - simply connect + downstream blocks to the appropriate output pin for each case. + + **Example usage:** + - Connect `approved_data` → next step in your workflow (data was approved) + - Connect `rejected_data` → error handling or notification (data was rejected) """ class Input(BlockSchemaInput): - data: Any = SchemaField(description="The data to be reviewed by a human user") + data: Any = SchemaField( + description="The data to be reviewed by a human user. " + "This exact data will be passed through to either approved_data or " + "rejected_data output based on the reviewer's decision." + ) name: str = SchemaField( - description="A descriptive name for what this data represents", + description="A descriptive name for what this data represents. " + "This helps the reviewer understand what they are reviewing.", ) editable: bool = SchemaField( - description="Whether the human reviewer can edit the data", + description="Whether the human reviewer can edit the data before " + "approving or rejecting it", default=True, advanced=True, ) class Output(BlockSchemaOutput): approved_data: Any = SchemaField( - description="The data when approved (may be modified by reviewer)" + description="Outputs the input data when the reviewer APPROVES it. " + "The value is the actual data itself (not a status string like 'APPROVED'). " + "If the reviewer edited the data, this contains the modified version. " + "Connect downstream blocks here for the 'approved' workflow path." ) rejected_data: Any = SchemaField( - description="The data when rejected (may be modified by reviewer)" + description="Outputs the input data when the reviewer REJECTS it. " + "The value is the actual data itself (not a status string like 'REJECTED'). " + "If the reviewer edited the data, this contains the modified version. " + "Connect downstream blocks here for the 'rejected' workflow path." ) review_message: str = SchemaField( - description="Any message provided by the reviewer", default="" + description="Optional message provided by the reviewer explaining their " + "decision. Only outputs when the reviewer provides a message; " + "this pin does not fire if no message was given.", + default="", ) def __init__(self): super().__init__( id="8b2a7b3c-6e9d-4a5f-8c1b-2e3f4a5b6c7d", - description="Pause execution and wait for human approval or modification of data", + description="Pause execution for human review. Data flows through " + "approved_data or rejected_data output based on the reviewer's decision. " + "Outputs contain the actual data, not status strings.", categories={BlockCategory.BASIC}, input_schema=HumanInTheLoopBlock.Input, output_schema=HumanInTheLoopBlock.Output, diff --git a/autogpt_platform/backend/backend/util/prompt.py b/autogpt_platform/backend/backend/util/prompt.py index 5f904bbc8a..3ec25dd61b 100644 --- a/autogpt_platform/backend/backend/util/prompt.py +++ b/autogpt_platform/backend/backend/util/prompt.py @@ -364,6 +364,44 @@ def _remove_orphan_tool_responses( return result +def validate_and_remove_orphan_tool_responses( + messages: list[dict], + log_warning: bool = True, +) -> list[dict]: + """ + Validate tool_call/tool_response pairs and remove orphaned responses. + + Scans messages in order, tracking all tool_call IDs. Any tool response + referencing an ID not seen in a preceding message is considered orphaned + and removed. This prevents API errors like Anthropic's "unexpected tool_use_id". + + Args: + messages: List of messages to validate (OpenAI or Anthropic format) + log_warning: Whether to log a warning when orphans are found + + Returns: + A new list with orphaned tool responses removed + """ + available_ids: set[str] = set() + orphan_ids: set[str] = set() + + for msg in messages: + available_ids |= _extract_tool_call_ids_from_message(msg) + for resp_id in _extract_tool_response_ids_from_message(msg): + if resp_id not in available_ids: + orphan_ids.add(resp_id) + + if not orphan_ids: + return messages + + if log_warning: + logger.warning( + f"Removing {len(orphan_ids)} orphan tool response(s): {orphan_ids}" + ) + + return _remove_orphan_tool_responses(messages, orphan_ids) + + def _ensure_tool_pairs_intact( recent_messages: list[dict], all_messages: list[dict], @@ -723,6 +761,13 @@ async def compress_context( # Filter out any None values that may have been introduced final_msgs: list[dict] = [m for m in msgs if m is not None] + + # ---- STEP 6: Final tool-pair validation --------------------------------- + # After all compression steps, verify that every tool response has a + # matching tool_call in a preceding assistant message. Remove orphans + # to prevent API errors (e.g., Anthropic's "unexpected tool_use_id"). + final_msgs = validate_and_remove_orphan_tool_responses(final_msgs) + final_count = sum(_msg_tokens(m, enc) for m in final_msgs) error = None if final_count + reserve > target_tokens: diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx index 4578b268e3..fbe1c03d1d 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx @@ -10,8 +10,9 @@ import { MessageResponse, } from "@/components/ai-elements/message"; import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner"; +import { toast } from "@/components/molecules/Toast/use-toast"; import { ToolUIPart, UIDataTypes, UIMessage, UITools } from "ai"; -import { useEffect, useState } from "react"; +import { useEffect, useRef, useState } from "react"; import { CreateAgentTool } from "../../tools/CreateAgent/CreateAgent"; import { EditAgentTool } from "../../tools/EditAgent/EditAgent"; import { FindAgentsTool } from "../../tools/FindAgents/FindAgents"; @@ -121,6 +122,7 @@ export const ChatMessagesContainer = ({ isLoading, }: ChatMessagesContainerProps) => { const [thinkingPhrase, setThinkingPhrase] = useState(getRandomPhrase); + const lastToastTimeRef = useRef(0); useEffect(() => { if (status === "submitted") { @@ -128,6 +130,20 @@ export const ChatMessagesContainer = ({ } }, [status]); + // Show a toast when a new error occurs, debounced to avoid spam + useEffect(() => { + if (!error) return; + const now = Date.now(); + if (now - lastToastTimeRef.current < 3_000) return; + lastToastTimeRef.current = now; + toast({ + variant: "destructive", + title: "Something went wrong", + description: + "The assistant encountered an error. Please try sending your message again.", + }); + }, [error]); + const lastMessage = messages[messages.length - 1]; const lastAssistantHasVisibleContent = lastMessage?.role === "assistant" && @@ -263,8 +279,12 @@ export const ChatMessagesContainer = ({ )} {error && ( -
Something went wrong
++ The assistant encountered an error. Please try sending your + message again. +