fix: skip binary file if stat fails to prevent OOM

If the stat command fails (file deleted, permissions issue, etc.), we now skip the file rather than proceeding to read it with an unknown size. This prevents potential OOM crashes from large files where size verification failed.
docs: Fix llm.md to match exact schema description
2026-02-12 07:45:14 -05:00 · 2026-02-12 12:32:13 +00:00 · 2026-02-12 12:25:29 +00:00 · 2026-02-12 12:02:45 +00:00 · 2026-02-12 11:58:35 +00:00 · 2026-02-12 11:53:57 +00:00
12 changed files with 486 additions and 198 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -2,7 +2,7 @@ import asyncio
 import logging
 import uuid
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, cast
 from weakref import WeakValueDictionary

 from openai.types.chat import (
@@ -104,6 +104,26 @@ class ChatSession(BaseModel):
    successful_agent_runs: dict[str, int] = {}
    successful_agent_schedules: dict[str, int] = {}

+    def add_tool_call_to_current_turn(self, tool_call: dict) -> None:
+        """Attach a tool_call to the current turn's assistant message.
+
+        Searches backwards for the most recent assistant message (stopping at
+        any user message boundary). If found, appends the tool_call to it.
+        Otherwise creates a new assistant message with the tool_call.
+        """
+        for msg in reversed(self.messages):
+            if msg.role == "user":
+                break
+            if msg.role == "assistant":
+                if not msg.tool_calls:
+                    msg.tool_calls = []
+                msg.tool_calls.append(tool_call)
+                return
+
+        self.messages.append(
+            ChatMessage(role="assistant", content="", tool_calls=[tool_call])
+        )
+
    @staticmethod
    def new(user_id: str) -> "ChatSession":
        return ChatSession(
@@ -172,6 +192,47 @@ class ChatSession(BaseModel):
            successful_agent_schedules=successful_agent_schedules,
        )

+    @staticmethod
+    def _merge_consecutive_assistant_messages(
+        messages: list[ChatCompletionMessageParam],
+    ) -> list[ChatCompletionMessageParam]:
+        """Merge consecutive assistant messages into single messages.
+
+        Long-running tool flows can create split assistant messages: one with
+        text content and another with tool_calls. Anthropic's API requires
+        tool_result blocks to reference a tool_use in the immediately preceding
+        assistant message, so these splits cause 400 errors via OpenRouter.
+        """
+        if len(messages) < 2:
+            return messages
+
+        result: list[ChatCompletionMessageParam] = [messages[0]]
+        for msg in messages[1:]:
+            prev = result[-1]
+            if prev.get("role") != "assistant" or msg.get("role") != "assistant":
+                result.append(msg)
+                continue
+
+            prev = cast(ChatCompletionAssistantMessageParam, prev)
+            curr = cast(ChatCompletionAssistantMessageParam, msg)
+
+            curr_content = curr.get("content") or ""
+            if curr_content:
+                prev_content = prev.get("content") or ""
+                prev["content"] = (
+                    f"{prev_content}\n{curr_content}" if prev_content else curr_content
+                )
+
+            curr_tool_calls = curr.get("tool_calls")
+            if curr_tool_calls:
+                prev_tool_calls = prev.get("tool_calls")
+                prev["tool_calls"] = (
+                    list(prev_tool_calls) + list(curr_tool_calls)
+                    if prev_tool_calls
+                    else list(curr_tool_calls)
+                )
+        return result
+
    def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
        messages = []
        for message in self.messages:
@@ -258,7 +319,7 @@ class ChatSession(BaseModel):
                        name=message.name or "",
                    )
                )
-        return messages
+        return self._merge_consecutive_assistant_messages(messages)


 async def _get_session_from_cache(session_id: str) -> ChatSession | None:
--- a/autogpt_platform/backend/backend/api/features/chat/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py
@@ -1,4 +1,16 @@
+from typing import cast
+
 import pytest
+from openai.types.chat import (
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call_param import (
+    ChatCompletionMessageToolCallParam,
+    Function,
+)

 from .model import (
    ChatMessage,
@@ -117,3 +129,205 @@ async def test_chatsession_db_storage(setup_test_user, test_user_id):
                loaded.tool_calls is not None
            ), f"Tool calls missing for {orig.role} message"
            assert len(orig.tool_calls) == len(loaded.tool_calls)
+
+
+# --------------------------------------------------------------------------- #
+#  _merge_consecutive_assistant_messages                                       #
+# --------------------------------------------------------------------------- #
+
+_tc = ChatCompletionMessageToolCallParam(
+    id="tc1", type="function", function=Function(name="do_stuff", arguments="{}")
+)
+_tc2 = ChatCompletionMessageToolCallParam(
+    id="tc2", type="function", function=Function(name="other", arguments="{}")
+)
+
+
+def test_merge_noop_when_no_consecutive_assistants():
+    """Messages without consecutive assistants are returned unchanged."""
+    msgs = [
+        ChatCompletionUserMessageParam(role="user", content="hi"),
+        ChatCompletionAssistantMessageParam(role="assistant", content="hello"),
+        ChatCompletionUserMessageParam(role="user", content="bye"),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)
+    assert len(merged) == 3
+    assert [m["role"] for m in merged] == ["user", "assistant", "user"]
+
+
+def test_merge_splits_text_and_tool_calls():
+    """The exact bug scenario: text-only assistant followed by tool_calls-only assistant."""
+    msgs = [
+        ChatCompletionUserMessageParam(role="user", content="build agent"),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="Let me build that"
+        ),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="", tool_calls=[_tc]
+        ),
+        ChatCompletionToolMessageParam(role="tool", content="ok", tool_call_id="tc1"),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)
+
+    assert len(merged) == 3
+    assert merged[0]["role"] == "user"
+    assert merged[2]["role"] == "tool"
+    a = cast(ChatCompletionAssistantMessageParam, merged[1])
+    assert a["role"] == "assistant"
+    assert a.get("content") == "Let me build that"
+    assert a.get("tool_calls") == [_tc]
+
+
+def test_merge_combines_tool_calls_from_both():
+    """Both consecutive assistants have tool_calls — they get merged."""
+    msgs: list[ChatCompletionAssistantMessageParam] = [
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="text", tool_calls=[_tc]
+        ),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="", tool_calls=[_tc2]
+        ),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)  # type: ignore[arg-type]
+
+    assert len(merged) == 1
+    a = cast(ChatCompletionAssistantMessageParam, merged[0])
+    assert a.get("tool_calls") == [_tc, _tc2]
+    assert a.get("content") == "text"
+
+
+def test_merge_three_consecutive_assistants():
+    """Three consecutive assistants collapse into one."""
+    msgs: list[ChatCompletionAssistantMessageParam] = [
+        ChatCompletionAssistantMessageParam(role="assistant", content="a"),
+        ChatCompletionAssistantMessageParam(role="assistant", content="b"),
+        ChatCompletionAssistantMessageParam(
+            role="assistant", content="", tool_calls=[_tc]
+        ),
+    ]
+    merged = ChatSession._merge_consecutive_assistant_messages(msgs)  # type: ignore[arg-type]
+
+    assert len(merged) == 1
+    a = cast(ChatCompletionAssistantMessageParam, merged[0])
+    assert a.get("content") == "a\nb"
+    assert a.get("tool_calls") == [_tc]
+
+
+def test_merge_empty_and_single_message():
+    """Edge cases: empty list and single message."""
+    assert ChatSession._merge_consecutive_assistant_messages([]) == []
+
+    single: list[ChatCompletionMessageParam] = [
+        ChatCompletionUserMessageParam(role="user", content="hi")
+    ]
+    assert ChatSession._merge_consecutive_assistant_messages(single) == single
+
+
+# --------------------------------------------------------------------------- #
+#  add_tool_call_to_current_turn                                               #
+# --------------------------------------------------------------------------- #
+
+_raw_tc = {
+    "id": "tc1",
+    "type": "function",
+    "function": {"name": "f", "arguments": "{}"},
+}
+_raw_tc2 = {
+    "id": "tc2",
+    "type": "function",
+    "function": {"name": "g", "arguments": "{}"},
+}
+
+
+def test_add_tool_call_appends_to_existing_assistant():
+    """When the last assistant is from the current turn, tool_call is added to it."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="hi"),
+        ChatMessage(role="assistant", content="working on it"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+
+    assert len(session.messages) == 2  # no new message created
+    assert session.messages[1].tool_calls == [_raw_tc]
+
+
+def test_add_tool_call_creates_assistant_when_none_exists():
+    """When there's no current-turn assistant, a new one is created."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="hi"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+
+    assert len(session.messages) == 2
+    assert session.messages[1].role == "assistant"
+    assert session.messages[1].tool_calls == [_raw_tc]
+
+
+def test_add_tool_call_does_not_cross_user_boundary():
+    """A user message acts as a boundary — previous assistant is not modified."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="assistant", content="old turn"),
+        ChatMessage(role="user", content="new message"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+
+    assert len(session.messages) == 3  # new assistant was created
+    assert session.messages[0].tool_calls is None  # old assistant untouched
+    assert session.messages[2].role == "assistant"
+    assert session.messages[2].tool_calls == [_raw_tc]
+
+
+def test_add_tool_call_multiple_times():
+    """Multiple long-running tool calls accumulate on the same assistant."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="hi"),
+        ChatMessage(role="assistant", content="doing stuff"),
+    ]
+    session.add_tool_call_to_current_turn(_raw_tc)
+    # Simulate a pending tool result in between (like _yield_tool_call does)
+    session.messages.append(
+        ChatMessage(role="tool", content="pending", tool_call_id="tc1")
+    )
+    session.add_tool_call_to_current_turn(_raw_tc2)
+
+    assert len(session.messages) == 3  # user, assistant, tool — no extra assistant
+    assert session.messages[1].tool_calls == [_raw_tc, _raw_tc2]
+
+
+def test_to_openai_messages_merges_split_assistants():
+    """End-to-end: session with split assistants produces valid OpenAI messages."""
+    session = ChatSession.new(user_id="u")
+    session.messages = [
+        ChatMessage(role="user", content="build agent"),
+        ChatMessage(role="assistant", content="Let me build that"),
+        ChatMessage(
+            role="assistant",
+            content="",
+            tool_calls=[
+                {
+                    "id": "tc1",
+                    "type": "function",
+                    "function": {"name": "create_agent", "arguments": "{}"},
+                }
+            ],
+        ),
+        ChatMessage(role="tool", content="done", tool_call_id="tc1"),
+        ChatMessage(role="assistant", content="Saved!"),
+        ChatMessage(role="user", content="show me an example run"),
+    ]
+    openai_msgs = session.to_openai_messages()
+
+    # The two consecutive assistants at index 1,2 should be merged
+    roles = [m["role"] for m in openai_msgs]
+    assert roles == ["user", "assistant", "tool", "assistant", "user"]
+
+    # The merged assistant should have both content and tool_calls
+    merged = cast(ChatCompletionAssistantMessageParam, openai_msgs[1])
+    assert merged.get("content") == "Let me build that"
+    tc_list = merged.get("tool_calls")
+    assert tc_list is not None and len(list(tc_list)) == 1
+    assert list(tc_list)[0]["id"] == "tc1"
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -800,9 +800,13 @@ async def stream_chat_completion(
        # Build the messages list in the correct order
        messages_to_save: list[ChatMessage] = []

-        # Add assistant message with tool_calls if any
+        # Add assistant message with tool_calls if any.
+        # Use extend (not assign) to preserve tool_calls already added by
+        # _yield_tool_call for long-running tools.
        if accumulated_tool_calls:
-            assistant_response.tool_calls = accumulated_tool_calls
+            if not assistant_response.tool_calls:
+                assistant_response.tool_calls = []
+            assistant_response.tool_calls.extend(accumulated_tool_calls)
            logger.info(
                f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
            )
@@ -1404,13 +1408,9 @@ async def _yield_tool_call(
                operation_id=operation_id,
            )

-            # Save assistant message with tool_call FIRST (required by LLM)
-            assistant_message = ChatMessage(
-                role="assistant",
-                content="",
-                tool_calls=[tool_calls[yield_idx]],
-            )
-            session.messages.append(assistant_message)
+            # Attach the tool_call to the current turn's assistant message
+            # (or create one if this is a tool-only response with no text).
+            session.add_tool_call_to_current_turn(tool_calls[yield_idx])

            # Then save pending tool result
            pending_message = ChatMessage(
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -1,4 +1,6 @@
+import base64
 import json
+import logging
 import shlex
 import uuid
 from typing import Literal, Optional
@@ -21,6 +23,11 @@ from backend.data.model import (
 )
 from backend.integrations.providers import ProviderName

+logger = logging.getLogger(__name__)
+
+# Maximum size for binary files to extract (50MB)
+MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024
+

 class ClaudeCodeExecutionError(Exception):
    """Exception raised when Claude Code execution fails.
@@ -180,7 +187,9 @@ class ClaudeCodeBlock(Block):
        path: str
        relative_path: str  # Path relative to working directory (for GitHub, etc.)
        name: str
-        content: str
+        content: str  # Text content for text files, empty string for binary files
+        is_binary: bool = False  # True if this is a binary file
+        content_base64: Optional[str] = None  # Base64-encoded content for binary files

    class Output(BlockSchemaOutput):
        response: str = SchemaField(
@@ -188,8 +197,11 @@ class ClaudeCodeBlock(Block):
        )
        files: list["ClaudeCodeBlock.FileOutput"] = SchemaField(
            description=(
-                "List of text files created/modified by Claude Code during this execution. "
-                "Each file has 'path', 'relative_path', 'name', and 'content' fields."
+                "List of files created/modified by Claude Code during this execution. "
+                "Each file has 'path', 'relative_path', 'name', 'content', 'is_binary', "
+                "and 'content_base64' fields. For text files, 'content' contains the text "
+                "and 'is_binary' is False. For binary files (PDFs, images, etc.), "
+                "'is_binary' is True and 'content_base64' contains the base64-encoded data."
            )
        )
        conversation_history: str = SchemaField(
@@ -252,6 +264,8 @@ class ClaudeCodeBlock(Block):
                            "relative_path": "index.html",
                            "name": "index.html",
                            "content": "<html>Hello World</html>",
+                            "is_binary": False,
+                            "content_base64": None,
                        }
                    ],
                ),
@@ -272,6 +286,8 @@ class ClaudeCodeBlock(Block):
                            relative_path="index.html",
                            name="index.html",
                            content="<html>Hello World</html>",
+                            is_binary=False,
+                            content_base64=None,
                        )
                    ],  # files
                    "User: Create a hello world HTML file\n"
@@ -531,7 +547,6 @@ class ClaudeCodeBlock(Block):
            ".env",
            ".gitignore",
            ".dockerfile",
-            "Dockerfile",
            ".vue",
            ".svelte",
            ".astro",
@@ -540,6 +555,44 @@ class ClaudeCodeBlock(Block):
            ".tex",
            ".csv",
            ".log",
+            ".svg",  # SVG is XML-based text
+        }
+
+        # Binary file extensions we can read and base64-encode
+        binary_extensions = {
+            # Images
+            ".png",
+            ".jpg",
+            ".jpeg",
+            ".gif",
+            ".webp",
+            ".ico",
+            ".bmp",
+            ".tiff",
+            ".tif",
+            # Documents
+            ".pdf",
+            # Archives (useful for downloads)
+            ".zip",
+            ".tar",
+            ".gz",
+            ".7z",
+            # Audio/Video (if small enough)
+            ".mp3",
+            ".wav",
+            ".mp4",
+            ".webm",
+            # Other binary formats
+            ".woff",
+            ".woff2",
+            ".ttf",
+            ".otf",
+            ".eot",
+            ".bin",
+            ".exe",
+            ".dll",
+            ".so",
+            ".dylib",
        }

        try:
@@ -564,10 +617,26 @@ class ClaudeCodeBlock(Block):
                    if not file_path:
                        continue

-                    # Check if it's a text file we can read
+                    # Check if it's a text file we can read (case-insensitive)
+                    file_path_lower = file_path.lower()
                    is_text = any(
-                        file_path.endswith(ext) for ext in text_extensions
-                    ) or file_path.endswith("Dockerfile")
+                        file_path_lower.endswith(ext) for ext in text_extensions
+                    ) or file_path_lower.endswith("dockerfile")
+
+                    # Check if it's a binary file we should extract
+                    is_binary = any(
+                        file_path_lower.endswith(ext) for ext in binary_extensions
+                    )
+
+                    # Helper to extract filename and relative path
+                    def get_file_info(path: str, work_dir: str) -> tuple[str, str]:
+                        name = path.split("/")[-1]
+                        rel_path = path
+                        if path.startswith(work_dir):
+                            rel_path = path[len(work_dir) :]
+                            if rel_path.startswith("/"):
+                                rel_path = rel_path[1:]
+                        return name, rel_path

                    if is_text:
                        try:
@@ -576,32 +645,75 @@ class ClaudeCodeBlock(Block):
                            if isinstance(content, bytes):
                                content = content.decode("utf-8", errors="replace")

-                            # Extract filename from path
-                            file_name = file_path.split("/")[-1]
-
-                            # Calculate relative path by stripping working directory
-                            relative_path = file_path
-                            if file_path.startswith(working_directory):
-                                relative_path = file_path[len(working_directory) :]
-                                # Remove leading slash if present
-                                if relative_path.startswith("/"):
-                                    relative_path = relative_path[1:]
-
+                            file_name, relative_path = get_file_info(
+                                file_path, working_directory
+                            )
                            files.append(
                                ClaudeCodeBlock.FileOutput(
                                    path=file_path,
                                    relative_path=relative_path,
                                    name=file_name,
                                    content=content,
+                                    is_binary=False,
+                                    content_base64=None,
                                )
                            )
-                        except Exception:
-                            # Skip files that can't be read
-                            pass
+                        except Exception as e:
+                            logger.warning(f"Failed to read text file {file_path}: {e}")
+                    elif is_binary:
+                        try:
+                            # Check file size before reading to avoid OOM
+                            stat_result = await sandbox.commands.run(
+                                f"stat -c %s {shlex.quote(file_path)} 2>/dev/null"
+                            )
+                            if (
+                                stat_result.exit_code != 0
+                                or not stat_result.stdout
+                            ):
+                                logger.warning(
+                                    f"Skipping binary file {file_path}: "
+                                    f"could not determine file size"
+                                )
+                                continue
+                            file_size = int(stat_result.stdout.strip())
+                            if file_size > MAX_BINARY_FILE_SIZE:
+                                logger.warning(
+                                    f"Skipping binary file {file_path}: "
+                                    f"size {file_size} exceeds limit "
+                                    f"{MAX_BINARY_FILE_SIZE}"
+                                )
+                                continue

-        except Exception:
-            # If file extraction fails, return empty results
-            pass
+                            # Read binary file as bytes using format="bytes"
+                            content_bytes = await sandbox.files.read(
+                                file_path, format="bytes"
+                            )
+
+                            # Base64 encode the binary content
+                            content_b64 = base64.b64encode(content_bytes).decode(
+                                "ascii"
+                            )
+
+                            file_name, relative_path = get_file_info(
+                                file_path, working_directory
+                            )
+                            files.append(
+                                ClaudeCodeBlock.FileOutput(
+                                    path=file_path,
+                                    relative_path=relative_path,
+                                    name=file_name,
+                                    content="",  # Empty for binary files
+                                    is_binary=True,
+                                    content_base64=content_b64,
+                                )
+                            )
+                        except Exception as e:
+                            logger.warning(
+                                f"Failed to read binary file {file_path}: {e}"
+                            )
+
+        except Exception as e:
+            logger.warning(f"File extraction failed: {e}")

        return files

--- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
+++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
@@ -21,43 +21,71 @@ logger = logging.getLogger(__name__)

 class HumanInTheLoopBlock(Block):
    """
-    This block pauses execution and waits for human approval or modification of the data.
+    Pauses execution and waits for human approval or rejection of the data.

-    When executed, it creates a pending review entry and sets the node execution status
-    to REVIEW. The execution will remain paused until a human user either:
-    - Approves the data (with or without modifications)
-    - Rejects the data
+    When executed, this block creates a pending review entry and sets the node execution
+    status to REVIEW. The execution remains paused until a human user either approves
+    or rejects the data.

-    This is useful for workflows that require human validation or intervention before
-    proceeding to the next steps.
+    **How it works:**
+    - The input data is presented to a human reviewer
+    - The reviewer can approve or reject (and optionally modify the data if editable)
+    - On approval: the data flows out through the `approved_data` output pin
+    - On rejection: the data flows out through the `rejected_data` output pin
+
+    **Important:** The output pins yield the actual data itself, NOT status strings.
+    The approval/rejection decision determines WHICH output pin fires, not the value.
+    You do NOT need to compare the output to "APPROVED" or "REJECTED" - simply connect
+    downstream blocks to the appropriate output pin for each case.
+
+    **Example usage:**
+    - Connect `approved_data` → next step in your workflow (data was approved)
+    - Connect `rejected_data` → error handling or notification (data was rejected)
    """

    class Input(BlockSchemaInput):
-        data: Any = SchemaField(description="The data to be reviewed by a human user")
+        data: Any = SchemaField(
+            description="The data to be reviewed by a human user. "
+            "This exact data will be passed through to either approved_data or "
+            "rejected_data output based on the reviewer's decision."
+        )
        name: str = SchemaField(
-            description="A descriptive name for what this data represents",
+            description="A descriptive name for what this data represents. "
+            "This helps the reviewer understand what they are reviewing.",
        )
        editable: bool = SchemaField(
-            description="Whether the human reviewer can edit the data",
+            description="Whether the human reviewer can edit the data before "
+            "approving or rejecting it",
            default=True,
            advanced=True,
        )

    class Output(BlockSchemaOutput):
        approved_data: Any = SchemaField(
-            description="The data when approved (may be modified by reviewer)"
+            description="Outputs the input data when the reviewer APPROVES it. "
+            "The value is the actual data itself (not a status string like 'APPROVED'). "
+            "If the reviewer edited the data, this contains the modified version. "
+            "Connect downstream blocks here for the 'approved' workflow path."
        )
        rejected_data: Any = SchemaField(
-            description="The data when rejected (may be modified by reviewer)"
+            description="Outputs the input data when the reviewer REJECTS it. "
+            "The value is the actual data itself (not a status string like 'REJECTED'). "
+            "If the reviewer edited the data, this contains the modified version. "
+            "Connect downstream blocks here for the 'rejected' workflow path."
        )
        review_message: str = SchemaField(
-            description="Any message provided by the reviewer", default=""
+            description="Optional message provided by the reviewer explaining their "
+            "decision. Only outputs when the reviewer provides a message; "
+            "this pin does not fire if no message was given.",
+            default="",
        )

    def __init__(self):
        super().__init__(
            id="8b2a7b3c-6e9d-4a5f-8c1b-2e3f4a5b6c7d",
-            description="Pause execution and wait for human approval or modification of data",
+            description="Pause execution for human review. Data flows through "
+            "approved_data or rejected_data output based on the reviewer's decision. "
+            "Outputs contain the actual data, not status strings.",
            categories={BlockCategory.BASIC},
            input_schema=HumanInTheLoopBlock.Input,
            output_schema=HumanInTheLoopBlock.Output,
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/Untitled
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/Untitled
@@ -0,0 +1,10 @@
+import { parseAsString, useQueryState } from "nuqs";
+
+export function useCopilotSessionId() {
+  const [urlSessionId, setUrlSessionId] = useQueryState(
+    "sessionId",
+    parseAsString,
+  );
+
+  return { urlSessionId, setUrlSessionId };
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/useLongRunningToolPolling.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/useLongRunningToolPolling.ts
@@ -1,131 +0,0 @@
-import { getGetV2GetSessionQueryKey } from "@/app/api/__generated__/endpoints/chat/chat";
-import { useQueryClient } from "@tanstack/react-query";
-import type { UIDataTypes, UIMessage, UITools } from "ai";
-import { useCallback, useEffect, useRef } from "react";
-import { convertChatSessionMessagesToUiMessages } from "../helpers/convertChatSessionToUiMessages";
-
-const OPERATING_TYPES = new Set([
-  "operation_started",
-  "operation_pending",
-  "operation_in_progress",
-]);
-
-const POLL_INTERVAL_MS = 5_000;
-
-/**
- * Detects whether any message contains a tool part whose output indicates
- * a long-running operation is still in progress.
- */
-function hasOperatingTool(
-  messages: UIMessage<unknown, UIDataTypes, UITools>[],
-): boolean {
-  for (const msg of messages) {
-    for (const part of msg.parts) {
-      if (!part.type.startsWith("tool-")) continue;
-      const toolPart = part as { output?: unknown };
-      if (!toolPart.output) continue;
-      const output =
-        typeof toolPart.output === "string"
-          ? safeParse(toolPart.output)
-          : toolPart.output;
-      if (
-        output &&
-        typeof output === "object" &&
-        "type" in output &&
-        OPERATING_TYPES.has((output as { type: string }).type)
-      ) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-function safeParse(value: string): unknown {
-  try {
-    return JSON.parse(value);
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Polls the session endpoint while any tool is in an "operating" state
- * (operation_started / operation_pending / operation_in_progress).
- *
- * When the session data shows the tool output has changed (e.g. to
- * agent_saved), it calls `setMessages` with the updated messages.
- */
-export function useLongRunningToolPolling(
-  sessionId: string | null,
-  messages: UIMessage<unknown, UIDataTypes, UITools>[],
-  setMessages: (
-    updater: (
-      prev: UIMessage<unknown, UIDataTypes, UITools>[],
-    ) => UIMessage<unknown, UIDataTypes, UITools>[],
-  ) => void,
-) {
-  const queryClient = useQueryClient();
-  const isPollingRef = useRef(false);
-  const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
-
-  const stopPolling = useCallback(() => {
-    if (intervalRef.current) {
-      clearInterval(intervalRef.current);
-      intervalRef.current = null;
-    }
-    isPollingRef.current = false;
-  }, []);
-
-  const poll = useCallback(async () => {
-    if (!sessionId) return;
-
-    // Invalidate the query cache so the next fetch gets fresh data
-    await queryClient.invalidateQueries({
-      queryKey: getGetV2GetSessionQueryKey(sessionId),
-    });
-
-    // Fetch fresh session data
-    const data = queryClient.getQueryData<{
-      status: number;
-      data: { messages?: unknown[] };
-    }>(getGetV2GetSessionQueryKey(sessionId));
-
-    if (data?.status !== 200 || !data.data.messages) return;
-
-    const freshMessages = convertChatSessionMessagesToUiMessages(
-      sessionId,
-      data.data.messages,
-    );
-
-    if (!freshMessages || freshMessages.length === 0) return;
-
-    // Only update if the fresh data no longer has operating tools
-    // (meaning the long-running tool completed)
-    if (!hasOperatingTool(freshMessages)) {
-      setMessages(() => freshMessages);
-      stopPolling();
-    }
-  }, [sessionId, queryClient, setMessages, stopPolling]);
-
-  useEffect(() => {
-    const shouldPoll = hasOperatingTool(messages);
-
-    if (shouldPoll && !isPollingRef.current && sessionId) {
-      isPollingRef.current = true;
-      intervalRef.current = setInterval(() => {
-        poll();
-      }, POLL_INTERVAL_MS);
-    } else if (!shouldPoll && isPollingRef.current) {
-      stopPolling();
-    }
-
-    return () => {
-      // Cleanup on unmount or dependency change
-      if (intervalRef.current) {
-        clearInterval(intervalRef.current);
-        intervalRef.current = null;
-      }
-    };
-  }, [messages, sessionId, poll, stopPolling]);
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
@@ -5,7 +5,6 @@ import { useChat } from "@ai-sdk/react";
 import { DefaultChatTransport } from "ai";
 import { useEffect, useMemo, useState } from "react";
 import { useChatSession } from "./useChatSession";
-import { useLongRunningToolPolling } from "./hooks/useLongRunningToolPolling";

 export function useCopilotPage() {
  const { isUserLoading, isLoggedIn } = useSupabase();
@@ -61,11 +60,6 @@ export function useCopilotPage() {
    });
  }, [hydratedMessages, setMessages]);

-  // Poll session endpoint when a long-running tool (create_agent, edit_agent)
-  // is in progress. When the backend completes, the session data will contain
-  // the final tool output — this hook detects the change and updates messages.
-  useLongRunningToolPolling(sessionId, messages, setMessages);
-
  // Clear messages when session is null
  useEffect(() => {
    if (!sessionId) setMessages([]);
--- a/docs/integrations/README.md
+++ b/docs/integrations/README.md
@@ -61,7 +61,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim
 | [Get List Item](block-integrations/basic.md#get-list-item) | Returns the element at the given index |
 | [Get Store Agent Details](block-integrations/system/store_operations.md#get-store-agent-details) | Get detailed information about an agent from the store |
 | [Get Weather Information](block-integrations/basic.md#get-weather-information) | Retrieves weather information for a specified location using OpenWeatherMap API |
-| [Human In The Loop](block-integrations/basic.md#human-in-the-loop) | Pause execution and wait for human approval or modification of data |
+| [Human In The Loop](block-integrations/basic.md#human-in-the-loop) | Pause execution for human review |
 | [List Is Empty](block-integrations/basic.md#list-is-empty) | Checks if a list is empty |
 | [List Library Agents](block-integrations/system/library_operations.md#list-library-agents) | List all agents in your personal library |
 | [Note](block-integrations/basic.md#note) | A visual annotation block that displays a sticky note in the workflow editor for documentation and organization purposes |
--- a/docs/integrations/block-integrations/basic.md
+++ b/docs/integrations/block-integrations/basic.md
@@ -975,7 +975,7 @@ A travel planning application could use this block to provide users with current
 ## Human In The Loop

 ### What it is
-Pause execution and wait for human approval or modification of data
+Pause execution for human review. Data flows through approved_data or rejected_data output based on the reviewer's decision. Outputs contain the actual data, not status strings.

 ### How it works
 <!-- MANUAL: how_it_works -->
@@ -988,18 +988,18 @@ This enables human oversight at critical points in automated workflows, ensuring

 | Input | Description | Type | Required |
 |-------|-------------|------|----------|
-| data | The data to be reviewed by a human user | Data | Yes |
-| name | A descriptive name for what this data represents | str | Yes |
-| editable | Whether the human reviewer can edit the data | bool | No |
+| data | The data to be reviewed by a human user. This exact data will be passed through to either approved_data or rejected_data output based on the reviewer's decision. | Data | Yes |
+| name | A descriptive name for what this data represents. This helps the reviewer understand what they are reviewing. | str | Yes |
+| editable | Whether the human reviewer can edit the data before approving or rejecting it | bool | No |

 ### Outputs

 | Output | Description | Type |
 |--------|-------------|------|
 | error | Error message if the operation failed | str |
-| approved_data | The data when approved (may be modified by reviewer) | Approved Data |
-| rejected_data | The data when rejected (may be modified by reviewer) | Rejected Data |
-| review_message | Any message provided by the reviewer | str |
+| approved_data | Outputs the input data when the reviewer APPROVES it. The value is the actual data itself (not a status string like 'APPROVED'). If the reviewer edited the data, this contains the modified version. Connect downstream blocks here for the 'approved' workflow path. | Approved Data |
+| rejected_data | Outputs the input data when the reviewer REJECTS it. The value is the actual data itself (not a status string like 'REJECTED'). If the reviewer edited the data, this contains the modified version. Connect downstream blocks here for the 'rejected' workflow path. | Rejected Data |
+| review_message | Optional message provided by the reviewer explaining their decision. Only outputs when the reviewer provides a message; this pin does not fire if no message was given. | str |

 ### Possible use case
 <!-- MANUAL: use_case -->
--- a/docs/integrations/block-integrations/claude_code.md
+++ b/docs/integrations/block-integrations/claude_code.md
@@ -16,7 +16,7 @@ When activated, the block:
   - Install dependencies (npm, pip, etc.)
   - Run terminal commands
   - Build and test applications
-5. Extracts all text files created/modified during execution
+5. Extracts all text and binary files created/modified during execution
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms:
 | Output | Description |
 |--------|-------------|
 | Response | The output/response from Claude Code execution |
-| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields |
+| Files | List of files created/modified during execution. Each file includes path, relative_path, name, content, is_binary, and content_base64 fields. For text files, content contains the text and is_binary is False. For binary files (PDFs, images, etc.), is_binary is True and content_base64 contains the base64-encoded data |
 | Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox |
 | Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation |
 | Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |
--- a/docs/integrations/block-integrations/llm.md
+++ b/docs/integrations/block-integrations/llm.md
@@ -535,7 +535,7 @@ When activated, the block:
 2. Installs the latest version of Claude Code in the sandbox
 3. Optionally runs setup commands to prepare the environment
 4. Executes your prompt using Claude Code, which can create/edit files, install dependencies, run terminal commands, and build applications
-5. Extracts all text files created/modified during execution
+5. Extracts all text and binary files created/modified during execution
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
 |--------|-------------|------|
 | error | Error message if execution failed | str |
 | response | The output/response from Claude Code execution | str |
-| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', and 'content' fields. | List[FileOutput] |
+| files | List of files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', 'is_binary', and 'content_base64' fields. For text files, 'content' contains the text and 'is_binary' is False. For binary files (PDFs, images, etc.), 'is_binary' is True and 'content_base64' contains the base64-encoded data. | List[FileOutput] |
 | conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
 | session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
 | sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |
Author	SHA1	Message	Date
Bentlybro	99f8bf5f0c	fix: skip binary file if stat fails to prevent OOM If the stat command fails (file deleted, permissions issue, etc.), we now skip the file rather than proceeding to read it with an unknown size. This prevents potential OOM crashes from large files where size verification failed.	2026-02-12 12:32:13 +00:00
Bentlybro	3f76f1318b	docs: Fix llm.md to match exact schema description	2026-02-12 12:25:29 +00:00
Bentlybro	b011289dd2	fix: Address code review feedback - Add 50MB size guard for binary files to prevent OOM - Extract helper function for path resolution (DRY) - Add logging for file extraction errors - Remove dead 'Dockerfile' entry from text_extensions	2026-02-12 12:02:45 +00:00
Bentlybro	49c2f578b4	docs: Update llm.md for binary file support in Claude Code block	2026-02-12 11:58:35 +00:00
Bentlybro	7150b7768d	fix: Make Dockerfile check case-insensitive	2026-02-12 11:53:57 +00:00
Bentlybro	8c95b03636	fix: Update tests and address code review feedback - Update test fixtures with is_binary and content_base64 fields - Move .svg to text_extensions (it's XML-based) - Make extension matching case-insensitive for both text and binary	2026-02-12 11:45:52 +00:00
Bentlybro	4a8368887f	fix: Use format='bytes' for reading binary files from E2B sandbox Fixes the critical bug where binary files would fail to read because files.read() defaults to text mode (UTF-8 decoding). Now explicitly uses format='bytes' which returns a bytearray.	2026-02-12 11:29:43 +00:00
Bentlybro	d46e5e6b6a	docs: Update claude_code.md for binary file support	2026-02-12 11:26:58 +00:00
Bentlybro	4e632bbd60	fix(backend): Extract binary files from ClaudeCodeBlock sandbox Add support for extracting binary files (PDFs, images, etc.) from the E2B sandbox in ClaudeCodeBlock. Changes: - Add binary_extensions set for common binary file types (.pdf, .png, .jpg, etc.) - Update FileOutput schema with is_binary and content_base64 fields - Binary files are read as bytes and base64-encoded before returning - Text files continue to work as before with is_binary=False Closes SECRT-1897	2026-02-12 11:23:05 +00:00
Zamil Majdy	a78145505b	fix(copilot): merge split assistant messages to prevent Anthropic API errors (#12062 ) ## Summary - When the copilot model responds with both text content AND a long-running tool call (e.g., `create_agent`), the streaming code created two separate consecutive assistant messages — one with text, one with `tool_calls`. This caused Anthropic's API to reject with `"unexpected tool_use_id found in tool_result blocks"` because the `tool_result` couldn't find a matching `tool_use` in the immediately preceding assistant message. - Added a defensive merge of consecutive assistant messages in `to_openai_messages()` (fixes existing corrupt sessions too) - Fixed `_yield_tool_call` to add tool_calls to the existing current-turn assistant message instead of creating a new one - Changed `accumulated_tool_calls` assignment to use `extend` to prevent overwriting tool_calls added by long-running tool flow ## Test plan - [x] All 23 chat feature tests pass (`backend/api/features/chat/`) - [x] All 44 prompt utility tests pass (`backend/util/prompt_test.py`) - [x] All pre-commit hooks pass (ruff, isort, black, pyright) - [ ] Manual test: create an agent via copilot, then ask a follow-up question — should no longer get 400 error <!-- greptile_comment --> <h2>Greptile Overview</h2> <details><summary><h3>Greptile Summary</h3></summary> Fixes a critical bug where long-running tool calls (like `create_agent`) caused Anthropic API 400 errors due to split assistant messages. The fix ensures tool calls are added to the existing assistant message instead of creating new ones, and adds a defensive merge function to repair any existing corrupt sessions. Key changes: - Added `_merge_consecutive_assistant_messages()` to defensively merge split assistant messages in `to_openai_messages()` - Modified `_yield_tool_call()` to append tool calls to the current-turn assistant message instead of creating a new one - Changed `accumulated_tool_calls` from assignment to `extend` to preserve tool calls already added by long-running tool flow Impact: Resolves the issue where users received 400 errors after creating agents via copilot and asking follow-up questions. </details> <details><summary><h3>Confidence Score: 4/5</h3></summary> - Safe to merge with minor verification recommended - The changes are well-targeted and solve a real API compatibility issue. The logic is sound: searching backwards for the current assistant message is correct, and using `extend` instead of assignment prevents overwriting. The defensive merge in `to_openai_messages()` also fixes existing corrupt sessions. All existing tests pass according to the PR description. - No files require special attention - changes are localized and defensive </details> <details><summary><h3>Sequence Diagram</h3></summary> ```mermaid sequenceDiagram participant User participant StreamAPI as stream_chat_completion participant Chunks as _stream_chat_chunks participant ToolCall as _yield_tool_call participant Session as ChatSession User->>StreamAPI: Send message StreamAPI->>Chunks: Stream chat chunks alt Text + Long-running tool call Chunks->>StreamAPI: Text delta (content) StreamAPI->>Session: Append assistant message with content Chunks->>ToolCall: Tool call detected Note over ToolCall: OLD: Created new assistant message<br/>NEW: Appends to existing assistant ToolCall->>Session: Search backwards for current assistant ToolCall->>Session: Append tool_call to existing message ToolCall->>Session: Add pending tool result end StreamAPI->>StreamAPI: Merge accumulated_tool_calls Note over StreamAPI: Use extend (not assign)<br/>to preserve existing tool_calls StreamAPI->>Session: to_openai_messages() Session->>Session: _merge_consecutive_assistant_messages() Note over Session: Defensive: Merges any split<br/>assistant messages Session-->>StreamAPI: Merged messages StreamAPI->>User: Return response ``` </details> <!-- greptile_other_comments_section --> <!-- /greptile_comment -->	2026-02-12 01:52:17 +00:00
Otto	36aeb0b2b3	docs(blocks): clarify HumanInTheLoop output descriptions for agent builder (#12069 ) ## Problem The agent builder (LLM) misinterprets the HumanInTheLoop block outputs. It thinks `approved_data` and `rejected_data` will yield status strings like "APPROVED" or "REJECTED" instead of understanding that the actual input data passes through. This leads to unnecessary complexity - the agent builder adds comparison blocks to check for status strings that don't exist. ## Solution Enriched the block docstring and all input/output field descriptions to make it explicit that: 1. The output is the actual data itself, not a status string 2. The routing is determined by which output pin fires 3. How to use the block correctly (connect downstream blocks to appropriate output pins) ## Changes - Updated block docstring with clear "How it works" and "Example usage" sections - Enhanced `data` input description to explain data flow - Enhanced `name` input description for reviewer context - Enhanced `approved_data` output to explicitly state it's NOT a status string - Enhanced `rejected_data` output to explicitly state it's NOT a status string - Enhanced `review_message` output for clarity ## Testing Documentation-only change to schema descriptions. No functional changes. Fixes SECRT-1930 <!-- greptile_comment --> <h2>Greptile Overview</h2> <details><summary><h3>Greptile Summary</h3></summary> Enhanced documentation for the `HumanInTheLoopBlock` to clarify how output pins work. The key improvement explicitly states that output pins (`approved_data` and `rejected_data`) yield the actual input data, not status strings like "APPROVED" or "REJECTED". This prevents the agent builder (LLM) from misinterpreting the block's behavior and adding unnecessary comparison blocks. Key changes: - Added "How it works" and "Example usage" sections to the block docstring - Clarified that routing is determined by which output pin fires, not by comparing output values - Enhanced all input/output field descriptions with explicit data flow explanations - Emphasized that downstream blocks should be connected to the appropriate output pin based on desired workflow path This is a documentation-only change with no functional modifications to the code logic. </details> <details><summary><h3>Confidence Score: 5/5</h3></summary> - This PR is safe to merge with no risk - Documentation-only change that accurately reflects the existing code behavior. No functional changes, no runtime impact, and the enhanced descriptions correctly explain how the block outputs work based on verification of the implementation code. - No files require special attention </details> <!-- greptile_other_comments_section --> <!-- /greptile_comment --> Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co>	2026-02-11 15:43:58 +00:00