Merge branch 'dev' into fix/claude-code-binary-files-v2

fix: apply size check to text files too (OOM protection)
fix: address review comments
2026-02-24 03:00:28 -05:00 · 2026-02-19 17:48:58 +00:00 · 2026-02-17 14:11:44 +00:00 · 2026-02-17 14:03:55 +00:00 · 2026-02-16 14:46:06 +00:00 · 2026-02-16 14:40:53 +00:00
10 changed files with 142 additions and 590 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -18,7 +18,7 @@ from backend.copilot.completion_handler import (
    process_operation_success,
 )
 from backend.copilot.config import ChatConfig
-from backend.copilot.executor.utils import enqueue_cancel_task, enqueue_copilot_task
+from backend.copilot.executor.utils import enqueue_copilot_task
 from backend.copilot.model import (
    ChatMessage,
    ChatSession,
@@ -132,14 +132,6 @@ class ListSessionsResponse(BaseModel):
    total: int


-class CancelTaskResponse(BaseModel):
-    """Response model for the cancel task endpoint."""
-
-    cancelled: bool
-    task_id: str | None = None
-    reason: str | None = None
-
-
 class OperationCompleteRequest(BaseModel):
    """Request model for external completion webhook."""

@@ -322,57 +314,6 @@ async def get_session(
    )


-@router.post(
-    "/sessions/{session_id}/cancel",
-    status_code=200,
-)
-async def cancel_session_task(
-    session_id: str,
-    user_id: Annotated[str | None, Depends(auth.get_user_id)],
-) -> CancelTaskResponse:
-    """Cancel the active streaming task for a session.
-
-    Publishes a cancel event to the executor via RabbitMQ FANOUT, then
-    polls Redis until the task status flips from ``running`` or a timeout
-    (5 s) is reached.  Returns only after the cancellation is confirmed.
-    """
-    await _validate_and_get_session(session_id, user_id)
-
-    active_task, _ = await stream_registry.get_active_task_for_session(
-        session_id, user_id
-    )
-    if not active_task:
-        return CancelTaskResponse(cancelled=False, reason="no_active_task")
-
-    task_id = active_task.task_id
-    await enqueue_cancel_task(task_id)
-    logger.info(
-        f"[CANCEL] Published cancel for task ...{task_id[-8:]} "
-        f"session ...{session_id[-8:]}"
-    )
-
-    # Poll until the executor confirms the task is no longer running.
-    # Keep max_wait below typical reverse-proxy read timeouts.
-    poll_interval = 0.5
-    max_wait = 5.0
-    waited = 0.0
-    while waited < max_wait:
-        await asyncio.sleep(poll_interval)
-        waited += poll_interval
-        task = await stream_registry.get_task(task_id)
-        if task is None or task.status != "running":
-            logger.info(
-                f"[CANCEL] Task ...{task_id[-8:]} confirmed stopped "
-                f"(status={task.status if task else 'gone'}) after {waited:.1f}s"
-            )
-            return CancelTaskResponse(cancelled=True, task_id=task_id)
-
-    logger.warning(f"[CANCEL] Task ...{task_id[-8:]} not confirmed after {max_wait}s")
-    return CancelTaskResponse(
-        cancelled=True, task_id=task_id, reason="cancel_published_not_confirmed"
-    )
-
-
@router.post(
    "/sessions/{session_id}/stream",
 )
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -187,9 +187,11 @@ class ClaudeCodeBlock(Block):
        )
        files: list[SandboxFileOutput] = SchemaField(
            description=(
-                "List of text files created/modified by Claude Code during this execution. "
+                "List of files created/modified by Claude Code during this execution. "
+                "Includes text files and binary files (images, PDFs, etc.). "
                "Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
-                "workspace_ref contains a workspace:// URI if the file was stored to workspace."
+                "workspace_ref contains a workspace:// URI for workspace storage. "
+                "For binary files, content contains a placeholder; use workspace_ref to access the file."
            )
        )
        conversation_history: str = SchemaField(
@@ -452,13 +454,15 @@ class ClaudeCodeBlock(Block):
                else:
                    new_conversation_history = turn_entry

-            # Extract files created/modified during this run and store to workspace
+            # Extract files created/modified during this run and store to workspace.
+            # Binary files (images, PDFs, etc.) are stored via store_media_file
+            # which handles virus scanning and workspace storage.
            sandbox_files = await extract_and_store_sandbox_files(
                sandbox=sandbox,
                working_directory=working_directory,
                execution_context=execution_context,
                since_timestamp=start_timestamp,
-                text_only=True,
+                text_only=False,
            )

            return (
--- a/autogpt_platform/backend/backend/copilot/executor/utils.py
+++ b/autogpt_platform/backend/backend/copilot/executor/utils.py
@@ -205,20 +205,3 @@ async def enqueue_copilot_task(
        message=entry.model_dump_json(),
        exchange=COPILOT_EXECUTION_EXCHANGE,
    )
-
-
-async def enqueue_cancel_task(task_id: str) -> None:
-    """Publish a cancel request for a running CoPilot task.
-
-    Sends a ``CancelCoPilotEvent`` to the FANOUT exchange so all executor
-    pods receive the cancellation signal.
-    """
-    from backend.util.clients import get_async_copilot_queue
-
-    event = CancelCoPilotEvent(task_id=task_id)
-    queue_client = await get_async_copilot_queue()
-    await queue_client.publish_message(
-        routing_key="",  # FANOUT ignores routing key
-        message=event.model_dump_json(),
-        exchange=COPILOT_CANCEL_EXCHANGE,
-    )
--- a/autogpt_platform/backend/backend/copilot/parallel_tool_calls_test.py
+++ b/autogpt_platform/backend/backend/copilot/parallel_tool_calls_test.py
@@ -1,272 +0,0 @@
-"""Tests for parallel tool call execution in CoPilot.
-
-These tests mock _yield_tool_call to avoid importing the full copilot stack
-which requires Prisma, DB connections, etc.
-"""
-
-import asyncio
-import time
-from typing import Any, cast
-
-import pytest
-
-
-@pytest.mark.asyncio
-async def test_parallel_tool_calls_run_concurrently():
-    """Multiple tool calls should complete in ~max(delays), not sum(delays)."""
-    # Import here to allow module-level mocking if needed
-    from backend.copilot.response_model import (
-        StreamToolInputAvailable,
-        StreamToolOutputAvailable,
-    )
-    from backend.copilot.service import _execute_tool_calls_parallel
-
-    n_tools = 3
-    delay_per_tool = 0.2
-    tool_calls = [
-        {
-            "id": f"call_{i}",
-            "type": "function",
-            "function": {"name": f"tool_{i}", "arguments": "{}"},
-        }
-        for i in range(n_tools)
-    ]
-
-    # Minimal session mock
-    class FakeSession:
-        session_id = "test"
-        user_id = "test"
-
-        def __init__(self):
-            self.messages = []
-
-    original_yield = None
-
-    async def fake_yield(tc_list, idx, sess, lock=None):
-        yield StreamToolInputAvailable(
-            toolCallId=tc_list[idx]["id"],
-            toolName=tc_list[idx]["function"]["name"],
-            input={},
-        )
-        await asyncio.sleep(delay_per_tool)
-        yield StreamToolOutputAvailable(
-            toolCallId=tc_list[idx]["id"],
-            toolName=tc_list[idx]["function"]["name"],
-            output="{}",
-        )
-
-    import backend.copilot.service as svc
-
-    original_yield = svc._yield_tool_call
-    svc._yield_tool_call = fake_yield
-    try:
-        start = time.monotonic()
-        events = []
-        async for event in _execute_tool_calls_parallel(
-            tool_calls, cast(Any, FakeSession())
-        ):
-            events.append(event)
-        elapsed = time.monotonic() - start
-    finally:
-        svc._yield_tool_call = original_yield
-
-    assert len(events) == n_tools * 2
-    # Parallel: should take ~delay, not ~n*delay
-    assert elapsed < delay_per_tool * (
-        n_tools - 0.5
-    ), f"Took {elapsed:.2f}s, expected parallel (~{delay_per_tool}s)"
-
-
-@pytest.mark.asyncio
-async def test_single_tool_call_works():
-    """Single tool call should work identically."""
-    from backend.copilot.response_model import (
-        StreamToolInputAvailable,
-        StreamToolOutputAvailable,
-    )
-    from backend.copilot.service import _execute_tool_calls_parallel
-
-    tool_calls = [
-        {
-            "id": "call_0",
-            "type": "function",
-            "function": {"name": "t", "arguments": "{}"},
-        }
-    ]
-
-    class FakeSession:
-        session_id = "test"
-        user_id = "test"
-
-        def __init__(self):
-            self.messages = []
-
-    async def fake_yield(tc_list, idx, sess, lock=None):
-        yield StreamToolInputAvailable(toolCallId="call_0", toolName="t", input={})
-        yield StreamToolOutputAvailable(toolCallId="call_0", toolName="t", output="{}")
-
-    import backend.copilot.service as svc
-
-    orig = svc._yield_tool_call
-    svc._yield_tool_call = fake_yield
-    try:
-        events = [
-            e
-            async for e in _execute_tool_calls_parallel(
-                tool_calls, cast(Any, FakeSession())
-            )
-        ]
-    finally:
-        svc._yield_tool_call = orig
-
-    assert len(events) == 2
-
-
-@pytest.mark.asyncio
-async def test_retryable_error_propagates():
-    """Retryable errors should be raised after all tools finish."""
-    from backend.copilot.response_model import StreamToolOutputAvailable
-    from backend.copilot.service import _execute_tool_calls_parallel
-
-    tool_calls = [
-        {
-            "id": f"call_{i}",
-            "type": "function",
-            "function": {"name": f"t_{i}", "arguments": "{}"},
-        }
-        for i in range(2)
-    ]
-
-    class FakeSession:
-        session_id = "test"
-        user_id = "test"
-
-        def __init__(self):
-            self.messages = []
-
-    async def fake_yield(tc_list, idx, sess, lock=None):
-        if idx == 1:
-            raise KeyError("bad")
-        from backend.copilot.response_model import StreamToolInputAvailable
-
-        yield StreamToolInputAvailable(
-            toolCallId=tc_list[idx]["id"], toolName="t_0", input={}
-        )
-        await asyncio.sleep(0.05)
-        yield StreamToolOutputAvailable(
-            toolCallId=tc_list[idx]["id"], toolName="t_0", output="{}"
-        )
-
-    import backend.copilot.service as svc
-
-    orig = svc._yield_tool_call
-    svc._yield_tool_call = fake_yield
-    try:
-        events = []
-        with pytest.raises(KeyError):
-            async for event in _execute_tool_calls_parallel(
-                tool_calls, cast(Any, FakeSession())
-            ):
-                events.append(event)
-        # First tool's events should still be yielded
-        assert any(isinstance(e, StreamToolOutputAvailable) for e in events)
-    finally:
-        svc._yield_tool_call = orig
-
-
-@pytest.mark.asyncio
-async def test_session_lock_shared():
-    """All parallel tools should receive the same lock instance."""
-    from backend.copilot.response_model import (
-        StreamToolInputAvailable,
-        StreamToolOutputAvailable,
-    )
-    from backend.copilot.service import _execute_tool_calls_parallel
-
-    tool_calls = [
-        {
-            "id": f"call_{i}",
-            "type": "function",
-            "function": {"name": f"t_{i}", "arguments": "{}"},
-        }
-        for i in range(3)
-    ]
-
-    class FakeSession:
-        session_id = "test"
-        user_id = "test"
-
-        def __init__(self):
-            self.messages = []
-
-    observed_locks = []
-
-    async def fake_yield(tc_list, idx, sess, lock=None):
-        observed_locks.append(lock)
-        yield StreamToolInputAvailable(
-            toolCallId=tc_list[idx]["id"], toolName=f"t_{idx}", input={}
-        )
-        yield StreamToolOutputAvailable(
-            toolCallId=tc_list[idx]["id"], toolName=f"t_{idx}", output="{}"
-        )
-
-    import backend.copilot.service as svc
-
-    orig = svc._yield_tool_call
-    svc._yield_tool_call = fake_yield
-    try:
-        async for _ in _execute_tool_calls_parallel(
-            tool_calls, cast(Any, FakeSession())
-        ):
-            pass
-    finally:
-        svc._yield_tool_call = orig
-
-    assert len(observed_locks) == 3
-    assert observed_locks[0] is observed_locks[1] is observed_locks[2]
-    assert isinstance(observed_locks[0], asyncio.Lock)
-
-
-@pytest.mark.asyncio
-async def test_cancellation_cleans_up():
-    """Generator close should cancel in-flight tasks."""
-    from backend.copilot.response_model import StreamToolInputAvailable
-    from backend.copilot.service import _execute_tool_calls_parallel
-
-    tool_calls = [
-        {
-            "id": f"call_{i}",
-            "type": "function",
-            "function": {"name": f"t_{i}", "arguments": "{}"},
-        }
-        for i in range(2)
-    ]
-
-    class FakeSession:
-        session_id = "test"
-        user_id = "test"
-
-        def __init__(self):
-            self.messages = []
-
-    started = asyncio.Event()
-
-    async def fake_yield(tc_list, idx, sess, lock=None):
-        yield StreamToolInputAvailable(
-            toolCallId=tc_list[idx]["id"], toolName=f"t_{idx}", input={}
-        )
-        started.set()
-        await asyncio.sleep(10)  # simulate long-running
-
-    import backend.copilot.service as svc
-
-    orig = svc._yield_tool_call
-    svc._yield_tool_call = fake_yield
-    try:
-        gen = _execute_tool_calls_parallel(tool_calls, cast(Any, FakeSession()))
-        await gen.__anext__()  # get first event
-        await started.wait()
-        await gen.aclose()  # close generator
-    finally:
-        svc._yield_tool_call = orig
-    # If we get here without hanging, cleanup worked
--- a/autogpt_platform/backend/backend/copilot/service.py
+++ b/autogpt_platform/backend/backend/copilot/service.py
@@ -1232,10 +1232,23 @@ async def _stream_chat_chunks(
                    },
                )

-                # Execute all accumulated tool calls in parallel
-                # Events are yielded as they arrive from each concurrent tool
-                async for event in _execute_tool_calls_parallel(tool_calls, session):
-                    yield event
+                # Yield all accumulated tool calls after the stream is complete
+                # This ensures all tool call arguments have been fully received
+                for idx, tool_call in enumerate(tool_calls):
+                    try:
+                        async for tc in _yield_tool_call(tool_calls, idx, session):
+                            yield tc
+                    except (orjson.JSONDecodeError, KeyError, TypeError) as e:
+                        logger.error(
+                            f"Failed to parse tool call {idx}: {e}",
+                            exc_info=True,
+                            extra={"tool_call": tool_call},
+                        )
+                        yield StreamError(
+                            errorText=f"Invalid tool call arguments for tool {tool_call.get('function', {}).get('name', 'unknown')}: {e}",
+                        )
+                        # Re-raise to trigger retry logic in the parent function
+                        raise

                total_time = (time_module.perf_counter() - stream_chunks_start) * 1000
                logger.info(
@@ -1313,91 +1326,10 @@ async def _stream_chat_chunks(
            return


-async def _with_optional_lock(
-    lock: asyncio.Lock | None,
-    coro_fn: Any,
-) -> Any:
-    """Run *coro_fn()* under *lock* when provided, otherwise run directly."""
-    if lock:
-        async with lock:
-            return await coro_fn()
-    return await coro_fn()
-
-
-async def _execute_tool_calls_parallel(
-    tool_calls: list[dict[str, Any]],
-    session: ChatSession,
-) -> AsyncGenerator[StreamBaseResponse, None]:
-    """Execute all tool calls concurrently, yielding stream events as they arrive.
-
-    Each tool runs as an ``asyncio.Task``, pushing events into a shared queue.
-    A ``session_lock`` serialises session-state mutations (long-running tool
-    bookkeeping, ``run_agent`` counters).
-    """
-    queue: asyncio.Queue[StreamBaseResponse | None] = asyncio.Queue()
-    session_lock = asyncio.Lock()
-    n_tools = len(tool_calls)
-    retryable_errors: list[Exception] = []
-
-    async def _run_tool(idx: int) -> None:
-        tool_name = tool_calls[idx].get("function", {}).get("name", "unknown")
-        tool_call_id = tool_calls[idx].get("id", f"unknown_{idx}")
-        try:
-            async for event in _yield_tool_call(tool_calls, idx, session, session_lock):
-                await queue.put(event)
-        except (orjson.JSONDecodeError, KeyError, TypeError) as e:
-            logger.error(
-                f"Failed to parse tool call {idx} ({tool_name}): {e}",
-                exc_info=True,
-            )
-            retryable_errors.append(e)
-        except Exception as e:
-            # Infrastructure / setup errors — emit an error output so the
-            # client always sees a terminal event and doesn't hang.
-            logger.error(f"Tool call {idx} ({tool_name}) failed: {e}", exc_info=True)
-            await queue.put(
-                StreamToolOutputAvailable(
-                    toolCallId=tool_call_id,
-                    toolName=tool_name,
-                    output=ErrorResponse(
-                        message=f"Tool execution failed: {e!s}",
-                        error=type(e).__name__,
-                        session_id=session.session_id,
-                    ).model_dump_json(),
-                    success=False,
-                )
-            )
-        finally:
-            await queue.put(None)  # sentinel
-
-    tasks = [asyncio.create_task(_run_tool(idx)) for idx in range(n_tools)]
-    try:
-        finished = 0
-        while finished < n_tools:
-            event = await queue.get()
-            if event is None:
-                finished += 1
-            else:
-                yield event
-        if retryable_errors:
-            if len(retryable_errors) > 1:
-                logger.warning(
-                    f"{len(retryable_errors)} tool calls had retryable errors; "
-                    f"re-raising first to trigger retry"
-                )
-            raise retryable_errors[0]
-    finally:
-        for t in tasks:
-            if not t.done():
-                t.cancel()
-        await asyncio.gather(*tasks, return_exceptions=True)
-
-
 async def _yield_tool_call(
    tool_calls: list[dict[str, Any]],
    yield_idx: int,
    session: ChatSession,
-    session_lock: asyncio.Lock | None = None,
 ) -> AsyncGenerator[StreamBaseResponse, None]:
    """
    Yield a tool call and its execution result.
@@ -1495,7 +1427,8 @@ async def _yield_tool_call(
                "check back in a few minutes."
            )

-        # Track appended message for rollback on failure
+        # Track appended messages for rollback on failure
+        assistant_message: ChatMessage | None = None
        pending_message: ChatMessage | None = None

        # Wrap session save and task creation in try-except to release lock on failure
@@ -1510,24 +1443,22 @@ async def _yield_tool_call(
                operation_id=operation_id,
            )

-            # Attach tool_call and save pending result — lock serialises
-            # concurrent session mutations during parallel execution.
-            async def _save_pending() -> None:
-                nonlocal pending_message
-                session.add_tool_call_to_current_turn(tool_calls[yield_idx])
-                pending_message = ChatMessage(
-                    role="tool",
-                    content=OperationPendingResponse(
-                        message=pending_msg,
-                        operation_id=operation_id,
-                        tool_name=tool_name,
-                    ).model_dump_json(),
-                    tool_call_id=tool_call_id,
-                )
-                session.messages.append(pending_message)
-                await upsert_chat_session(session)
+            # Attach the tool_call to the current turn's assistant message
+            # (or create one if this is a tool-only response with no text).
+            session.add_tool_call_to_current_turn(tool_calls[yield_idx])

-            await _with_optional_lock(session_lock, _save_pending)
+            # Then save pending tool result
+            pending_message = ChatMessage(
+                role="tool",
+                content=OperationPendingResponse(
+                    message=pending_msg,
+                    operation_id=operation_id,
+                    tool_name=tool_name,
+                ).model_dump_json(),
+                tool_call_id=tool_call_id,
+            )
+            session.messages.append(pending_message)
+            await upsert_chat_session(session)
            logger.info(
                f"Saved pending operation {operation_id} (task_id={task_id}) "
                f"for tool {tool_name} in session {session.session_id}"
@@ -1551,13 +1482,19 @@ async def _yield_tool_call(
            # Associate the asyncio task with the stream registry task
            await stream_registry.set_task_asyncio_task(task_id, bg_task)
        except Exception as e:
-            # Roll back appended messages — use identity-based removal so
-            # it works even when other parallel tools have appended after us.
-            async def _rollback() -> None:
-                if pending_message and pending_message in session.messages:
-                    session.messages.remove(pending_message)
-
-            await _with_optional_lock(session_lock, _rollback)
+            # Roll back appended messages to prevent data corruption on subsequent saves
+            if (
+                pending_message
+                and session.messages
+                and session.messages[-1] == pending_message
+            ):
+                session.messages.pop()
+            if (
+                assistant_message
+                and session.messages
+                and session.messages[-1] == assistant_message
+            ):
+                session.messages.pop()

            # Release the Redis lock since the background task won't be spawned
            await _mark_operation_completed(tool_call_id)
--- a/autogpt_platform/backend/backend/util/sandbox_files.py
+++ b/autogpt_platform/backend/backend/util/sandbox_files.py
@@ -74,8 +74,50 @@ TEXT_EXTENSIONS = {
    ".tex",
    ".csv",
    ".log",
+    ".svg",  # SVG is XML-based text
 }

+# Binary file extensions we explicitly support extracting
+BINARY_EXTENSIONS = {
+    # Images
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".webp",
+    ".ico",
+    ".bmp",
+    ".tiff",
+    ".tif",
+    # Documents
+    ".pdf",
+    # Archives
+    ".zip",
+    ".tar",
+    ".gz",
+    ".7z",
+    # Audio
+    ".mp3",
+    ".wav",
+    ".ogg",
+    ".flac",
+    # Video
+    ".mp4",
+    ".webm",
+    ".mov",
+    ".avi",
+    # Fonts
+    ".woff",
+    ".woff2",
+    ".ttf",
+    ".otf",
+    ".eot",
+}
+
+# Maximum file size for binary extraction (50MB)
+# Prevents OOM from accidentally extracting huge files
+MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024
+

 class SandboxFileOutput(BaseModel):
    """A file extracted from a sandbox and optionally stored in workspace."""
@@ -120,7 +162,8 @@ async def extract_sandbox_files(
        sandbox: The E2B sandbox instance
        working_directory: Directory to search for files
        since_timestamp: ISO timestamp - only return files modified after this time
-        text_only: If True, only extract text files (default). If False, extract all files.
+        text_only: If True, only extract text files. If False, also extract
+                   supported binary files (images, PDFs, etc.).

    Returns:
        List of ExtractedFile objects with path, content, and metadata
@@ -149,15 +192,48 @@ async def extract_sandbox_files(
            if not file_path:
                continue

-            # Check if it's a text file
-            is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS)
+            # Check file type (case-insensitive for extensions)
+            file_path_lower = file_path.lower()
+            is_text = any(
+                file_path_lower.endswith(ext.lower()) for ext in TEXT_EXTENSIONS
+            )
+            is_binary = any(
+                file_path_lower.endswith(ext.lower()) for ext in BINARY_EXTENSIONS
+            )

-            # Skip non-text files if text_only mode
+            # Skip files with unrecognized extensions
+            if not is_text and not is_binary:
+                continue
+
+            # In text_only mode, skip binary files
            if text_only and not is_text:
                continue

            try:
-                # Read file content as bytes
+                # Check file size before reading to prevent OOM
+                stat_result = await sandbox.commands.run(
+                    f"stat -c %s {shlex.quote(file_path)} 2>/dev/null"
+                )
+                if stat_result.exit_code != 0 or not stat_result.stdout:
+                    logger.debug(f"Skipping {file_path}: could not determine file size")
+                    continue
+
+                try:
+                    file_size = int(stat_result.stdout.strip())
+                except ValueError:
+                    logger.debug(
+                        f"Skipping {file_path}: unexpected stat output "
+                        f"{stat_result.stdout.strip()!r}"
+                    )
+                    continue
+
+                if file_size > MAX_BINARY_FILE_SIZE:
+                    logger.info(
+                        f"Skipping {file_path}: size {file_size} bytes "
+                        f"exceeds limit {MAX_BINARY_FILE_SIZE}"
+                    )
+                    continue
+
                content = await sandbox.files.read(file_path, format="bytes")
                if isinstance(content, str):
                    content = content.encode("utf-8")
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
@@ -1,6 +1,5 @@
 import {
  getGetV2ListSessionsQueryKey,
-  postV2CancelSessionTask,
  useDeleteV2DeleteSession,
  useGetV2ListSessions,
 } from "@/app/api/__generated__/endpoints/chat/chat";
@@ -9,7 +8,6 @@ import { useBreakpoint } from "@/lib/hooks/useBreakpoint";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { useChat } from "@ai-sdk/react";
 import { useQueryClient } from "@tanstack/react-query";
-import type { UIMessage } from "ai";
 import { DefaultChatTransport } from "ai";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { useChatSession } from "./useChatSession";
@@ -17,24 +15,6 @@ import { useLongRunningToolPolling } from "./hooks/useLongRunningToolPolling";

 const STREAM_START_TIMEOUT_MS = 12_000;

-/** Mark any in-progress tool parts as completed/errored so spinners stop. */
-function resolveInProgressTools(
-  messages: UIMessage[],
-  outcome: "completed" | "cancelled",
-): UIMessage[] {
-  return messages.map((msg) => ({
-    ...msg,
-    parts: msg.parts.map((part) =>
-      "state" in part &&
-      (part.state === "input-streaming" || part.state === "input-available")
-        ? outcome === "cancelled"
-          ? { ...part, state: "output-error" as const, errorText: "Cancelled" }
-          : { ...part, state: "output-available" as const, output: "" }
-        : part,
-    ),
-  }));
-}
-
 export function useCopilotPage() {
  const { isUserLoading, isLoggedIn } = useSupabase();
  const [isDrawerOpen, setIsDrawerOpen] = useState(false);
@@ -115,7 +95,7 @@ export function useCopilotPage() {
  const {
    messages,
    sendMessage,
-    stop: sdkStop,
+    stop,
    status,
    error,
    setMessages,
@@ -128,36 +108,6 @@ export function useCopilotPage() {
    // call resumeStream() manually after hydration + active_stream detection.
  });

-  // Wrap AI SDK's stop() to also cancel the backend executor task.
-  // sdkStop() aborts the SSE fetch instantly (UI feedback), then we fire
-  // the cancel API to actually stop the executor and wait for confirmation.
-  async function stop() {
-    sdkStop();
-    setMessages((prev) => resolveInProgressTools(prev, "cancelled"));
-
-    if (!sessionId) return;
-    try {
-      const res = await postV2CancelSessionTask(sessionId);
-      if (
-        res.status === 200 &&
-        "reason" in res.data &&
-        res.data.reason === "cancel_published_not_confirmed"
-      ) {
-        toast({
-          title: "Stop may take a moment",
-          description:
-            "The cancel was sent but not yet confirmed. The task should stop shortly.",
-        });
-      }
-    } catch {
-      toast({
-        title: "Could not stop the task",
-        description: "The task may still be running in the background.",
-        variant: "destructive",
-      });
-    }
-  }
-
  // Abort the stream if the backend doesn't start sending data within 12s.
  const stopRef = useRef(stop);
  stopRef.current = stop;
@@ -202,18 +152,6 @@ export function useCopilotPage() {
    resumeStream();
  }, [hasActiveStream, sessionId, hydratedMessages, status, resumeStream]);

-  // When the stream finishes, resolve any tool parts still showing spinners.
-  // This can happen if the backend didn't emit StreamToolOutputAvailable for
-  // a tool call before sending StreamFinish (e.g. SDK built-in tools).
-  const prevStatusRef = useRef(status);
-  useEffect(() => {
-    const prev = prevStatusRef.current;
-    prevStatusRef.current = status;
-    if (prev === "streaming" && status === "ready") {
-      setMessages((msgs) => resolveInProgressTools(msgs, "completed"));
-    }
-  }, [status, setMessages]);
-
  // Poll session endpoint when a long-running tool (create_agent, edit_agent)
  // is in progress. When the backend completes, the session data will contain
  // the final tool output — this hook detects the change and updates messages.
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -1263,44 +1263,6 @@
        }
      }
    },
-    "/api/chat/sessions/{session_id}/cancel": {
-      "post": {
-        "tags": ["v2", "chat", "chat"],
-        "summary": "Cancel Session Task",
-        "description": "Cancel the active streaming task for a session.\n\nPublishes a cancel event to the executor via RabbitMQ FANOUT, then\npolls Redis until the task status flips from ``running`` or a timeout\n(5 s) is reached.  Returns only after the cancellation is confirmed.",
-        "operationId": "postV2CancelSessionTask",
-        "security": [{ "HTTPBearerJWT": [] }],
-        "parameters": [
-          {
-            "name": "session_id",
-            "in": "path",
-            "required": true,
-            "schema": { "type": "string", "title": "Session Id" }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Successful Response",
-            "content": {
-              "application/json": {
-                "schema": { "$ref": "#/components/schemas/CancelTaskResponse" }
-              }
-            }
-          },
-          "401": {
-            "$ref": "#/components/responses/HTTP401NotAuthenticatedError"
-          },
-          "422": {
-            "description": "Validation Error",
-            "content": {
-              "application/json": {
-                "schema": { "$ref": "#/components/schemas/HTTPValidationError" }
-              }
-            }
-          }
-        }
-      }
-    },
    "/api/chat/sessions/{session_id}/stream": {
      "get": {
        "tags": ["v2", "chat", "chat"],
@@ -7575,23 +7537,6 @@
        "required": ["file"],
        "title": "Body_postV2Upload submission media"
      },
-      "CancelTaskResponse": {
-        "properties": {
-          "cancelled": { "type": "boolean", "title": "Cancelled" },
-          "task_id": {
-            "anyOf": [{ "type": "string" }, { "type": "null" }],
-            "title": "Task Id"
-          },
-          "reason": {
-            "anyOf": [{ "type": "string" }, { "type": "null" }],
-            "title": "Reason"
-          }
-        },
-        "type": "object",
-        "required": ["cancelled"],
-        "title": "CancelTaskResponse",
-        "description": "Response model for the cancel task endpoint."
-      },
      "ChangelogEntry": {
        "properties": {
          "version": { "type": "string", "title": "Version" },
--- a/docs/integrations/block-integrations/claude_code.md
+++ b/docs/integrations/block-integrations/claude_code.md
@@ -16,7 +16,7 @@ When activated, the block:
   - Install dependencies (npm, pip, etc.)
   - Run terminal commands
   - Build and test applications
-5. Extracts all text files created/modified during execution
+5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms:
 | Output | Description |
 |--------|-------------|
 | Response | The output/response from Claude Code execution |
-| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields |
+| Files | List of files (text and binary) created/modified during execution. Includes images, PDFs, and other supported formats. Each file has path, relative_path, name, content, and workspace_ref fields. Binary files are stored in workspace and accessible via workspace_ref |
 | Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox |
 | Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation |
 | Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |
--- a/docs/integrations/block-integrations/llm.md
+++ b/docs/integrations/block-integrations/llm.md
@@ -535,7 +535,7 @@ When activated, the block:
 2. Installs the latest version of Claude Code in the sandbox
 3. Optionally runs setup commands to prepare the environment
 4. Executes your prompt using Claude Code, which can create/edit files, install dependencies, run terminal commands, and build applications
-5. Extracts all text files created/modified during execution
+5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
 |--------|-------------|------|
 | error | Error message if execution failed | str |
 | response | The output/response from Claude Code execution | str |
-| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI if the file was stored to workspace. | List[SandboxFileOutput] |
+| files | List of files created/modified by Claude Code during this execution. Includes text files and binary files (images, PDFs, etc.). Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI for workspace storage. For binary files, content contains a placeholder; use workspace_ref to access the file. | List[SandboxFileOutput] |
 | conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
 | session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
 | sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |
Author	SHA1	Message	Date
Bently	7bc08672fa	Merge branch 'dev' into fix/claude-code-binary-files-v2	2026-02-19 17:48:58 +00:00
Bentlybro	e8b8cad97a	fix: apply size check to text files too (OOM protection)	2026-02-17 14:11:44 +00:00
Bentlybro	be35c626ad	fix: address review comments - Remove redundant inline comment on text_only param - Simplify file filtering logic per review suggestion	2026-02-17 14:03:55 +00:00
Bentlybro	719c4ee1d1	fix: add explicit ValueError guard for stat output parsing	2026-02-16 14:46:06 +00:00
Bentlybro	411c399e03	style: fix formatting and sync docs - Fix Black formatting for is_text/is_binary checks - Update llm.md to reflect binary file support in Claude Code block	2026-02-16 14:40:53 +00:00
Bentlybro	6ac011e36c	fix: normalize extension case in sandbox file extraction Fixes bug where 'Dockerfile' in TEXT_EXTENSIONS wouldn't match after lowercasing file_path because the extension itself wasn't lowercased.	2026-02-16 14:18:25 +00:00
Bentlybro	5e554526e2	fix(backend): Extract binary files from ClaudeCodeBlock sandbox Enables binary file extraction (images, PDFs, etc.) for the Claude Code block by setting text_only=False in extract_and_store_sandbox_files. Changes: - sandbox_files.py: Add BINARY_EXTENSIONS set with supported formats - sandbox_files.py: Add MAX_BINARY_FILE_SIZE (50MB) limit to prevent OOM - sandbox_files.py: Add size check before reading binary files - sandbox_files.py: Add .svg to TEXT_EXTENSIONS (XML-based) - sandbox_files.py: Make extension matching case-insensitive - claude_code.py: Enable binary file extraction (text_only=False) - claude_code.py: Update output description to mention binary support - claude_code.md: Update docs to reflect binary file support Binary files are stored via store_media_file which handles: - Virus scanning via scan_content_safe() - Workspace storage (returns workspace:// URI in CoPilot) - Data URI fallback for graph execution Closes SECRT-1897	2026-02-16 14:10:05 +00:00