fix(copilot): remove redundant success-path transcript upload

The success path always uploaded the resume file (old downloaded data), then the finally block overwrote with the stop hook (new turn data). With always-upload, this caused the smaller stop hook to overwrite larger (but stale) data from the resume file. Remove the success path upload — the finally block handles it correctly by preferring stop hook content and falling back to the resume file when empty.
fix(copilot): always upload transcript instead of size-based skip
2026-03-17 03:00:27 -04:00 · 2026-03-06 12:45:12 +07:00 · 2026-03-06 02:16:36 +07:00
6 changed files with 19 additions and 146 deletions
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -187,11 +187,9 @@ class ClaudeCodeBlock(Block):
        )
        files: list[SandboxFileOutput] = SchemaField(
            description=(
-                "List of files created/modified by Claude Code during this execution. "
-                "Includes text files and binary files (images, PDFs, etc.). "
+                "List of text files created/modified by Claude Code during this execution. "
                "Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
-                "workspace_ref contains a workspace:// URI for workspace storage. "
-                "For binary files, content contains a placeholder; use workspace_ref to access the file."
+                "workspace_ref contains a workspace:// URI if the file was stored to workspace."
            )
        )
        conversation_history: str = SchemaField(
@@ -454,15 +452,13 @@ class ClaudeCodeBlock(Block):
                else:
                    new_conversation_history = turn_entry

-            # Extract files created/modified during this run and store to workspace.
-            # Binary files (images, PDFs, etc.) are stored via store_media_file
-            # which handles virus scanning and workspace storage.
+            # Extract files created/modified during this run and store to workspace
            sandbox_files = await extract_and_store_sandbox_files(
                sandbox=sandbox,
                working_directory=working_directory,
                execution_context=execution_context,
                since_timestamp=start_timestamp,
-                text_only=False,
+                text_only=True,
            )

            return (
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1408,46 +1408,11 @@ async def stream_chat_completion_sdk(
            ) and not has_appended_assistant:
                session.messages.append(assistant_response)

-        # --- Upload transcript for next-turn --resume ---
-        # After async with the SDK task group has exited, so the Stop
-        # hook has already fired and the CLI has been SIGTERMed.  The
-        # CLI uses appendFileSync, so all writes are safely on disk.
-        if config.claude_agent_use_resume and user_id:
-            # With --resume the CLI appends to the resume file (most
-            # complete).  Otherwise use the Stop hook path.
-            if use_resume and resume_file:
-                raw_transcript = read_transcript_file(resume_file)
-                logger.debug("[SDK] Transcript source: resume file")
-            elif captured_transcript.path:
-                raw_transcript = read_transcript_file(captured_transcript.path)
-                logger.debug(
-                    "[SDK] Transcript source: stop hook (%s), read result: %s",
-                    captured_transcript.path,
-                    f"{len(raw_transcript)}B" if raw_transcript else "None",
-                )
-            else:
-                raw_transcript = None
-
-            if not raw_transcript:
-                logger.debug(
-                    "[SDK] No usable transcript — CLI file had no "
-                    "conversation entries (expected for first turn "
-                    "without --resume)"
-                )
-
-            if raw_transcript:
-                # Shield the upload from generator cancellation so a
-                # client disconnect / page refresh doesn't lose the
-                # transcript.  The upload must finish even if the SSE
-                # connection is torn down.
-                await asyncio.shield(
-                    _try_upload_transcript(
-                        user_id,
-                        session_id,
-                        raw_transcript,
-                        message_count=len(session.messages),
-                    )
-                )
+        # Transcript upload is handled in the finally block below — it
+        # correctly prefers the stop hook content (new turn data) over the
+        # resume file (old downloaded data).  Uploading here would write
+        # stale data that the finally block then overwrites with potentially
+        # smaller (but newer) stop hook content.

        logger.info(
            "[SDK] [%s] Stream completed successfully with %d messages",
--- a/autogpt_platform/backend/backend/util/sandbox_files.py
+++ b/autogpt_platform/backend/backend/util/sandbox_files.py
@@ -74,50 +74,8 @@ TEXT_EXTENSIONS = {
    ".tex",
    ".csv",
    ".log",
-    ".svg",  # SVG is XML-based text
 }

-# Binary file extensions we explicitly support extracting
-BINARY_EXTENSIONS = {
-    # Images
-    ".png",
-    ".jpg",
-    ".jpeg",
-    ".gif",
-    ".webp",
-    ".ico",
-    ".bmp",
-    ".tiff",
-    ".tif",
-    # Documents
-    ".pdf",
-    # Archives
-    ".zip",
-    ".tar",
-    ".gz",
-    ".7z",
-    # Audio
-    ".mp3",
-    ".wav",
-    ".ogg",
-    ".flac",
-    # Video
-    ".mp4",
-    ".webm",
-    ".mov",
-    ".avi",
-    # Fonts
-    ".woff",
-    ".woff2",
-    ".ttf",
-    ".otf",
-    ".eot",
-}
-
-# Maximum file size for binary extraction (50MB)
-# Prevents OOM from accidentally extracting huge files
-MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024
-

 class SandboxFileOutput(BaseModel):
    """A file extracted from a sandbox and optionally stored in workspace."""
@@ -162,8 +120,7 @@ async def extract_sandbox_files(
        sandbox: The E2B sandbox instance
        working_directory: Directory to search for files
        since_timestamp: ISO timestamp - only return files modified after this time
-        text_only: If True, only extract text files. If False, also extract
-                   supported binary files (images, PDFs, etc.).
+        text_only: If True, only extract text files (default). If False, extract all files.

    Returns:
        List of ExtractedFile objects with path, content, and metadata
@@ -192,48 +149,15 @@ async def extract_sandbox_files(
            if not file_path:
                continue

-            # Check file type (case-insensitive for extensions)
-            file_path_lower = file_path.lower()
-            is_text = any(
-                file_path_lower.endswith(ext.lower()) for ext in TEXT_EXTENSIONS
-            )
-            is_binary = any(
-                file_path_lower.endswith(ext.lower()) for ext in BINARY_EXTENSIONS
-            )
+            # Check if it's a text file
+            is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS)

-            # Skip files with unrecognized extensions
-            if not is_text and not is_binary:
-                continue
-
-            # In text_only mode, skip binary files
+            # Skip non-text files if text_only mode
            if text_only and not is_text:
                continue

            try:
-                # Check file size before reading to prevent OOM
-                stat_result = await sandbox.commands.run(
-                    f"stat -c %s {shlex.quote(file_path)} 2>/dev/null"
-                )
-                if stat_result.exit_code != 0 or not stat_result.stdout:
-                    logger.debug(f"Skipping {file_path}: could not determine file size")
-                    continue
-
-                try:
-                    file_size = int(stat_result.stdout.strip())
-                except ValueError:
-                    logger.debug(
-                        f"Skipping {file_path}: unexpected stat output "
-                        f"{stat_result.stdout.strip()!r}"
-                    )
-                    continue
-
-                if file_size > MAX_BINARY_FILE_SIZE:
-                    logger.info(
-                        f"Skipping {file_path}: size {file_size} bytes "
-                        f"exceeds limit {MAX_BINARY_FILE_SIZE}"
-                    )
-                    continue
-
+                # Read file content as bytes
                content = await sandbox.files.read(file_path, format="bytes")
                if isinstance(content, str):
                    content = content.encode("utf-8")
--- a/autogpt_platform/frontend/src/app/(platform)/signup/actions.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/signup/actions.ts
@@ -1,11 +1,10 @@
 "use server";

-import { postV1GetOrCreateUser } from "@/app/api/__generated__/endpoints/auth/auth";
-import { getOnboardingStatus, resolveResponse } from "@/app/api/helpers";
 import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
 import { signupFormSchema } from "@/types/auth";
 import * as Sentry from "@sentry/nextjs";
 import { isWaitlistError, logWaitlistError } from "../../api/auth/utils";
+import { getOnboardingStatus } from "../../api/helpers";

 export async function signup(
  email: string,
@@ -58,17 +57,6 @@ export async function signup(
      await supabase.auth.setSession(data.session);
    }

-    try {
-      await resolveResponse(postV1GetOrCreateUser());
-    } catch (createUserError) {
-      console.error("Error creating user during signup:", createUserError);
-      Sentry.captureException(createUserError);
-      return {
-        success: false,
-        error: "Failed to complete account setup. Please try again.",
-      };
-    }
-
    // Get onboarding status from backend (includes chat flag evaluated for this user)
    const { shouldShowOnboarding } = await getOnboardingStatus();
    const next = shouldShowOnboarding ? "/onboarding" : "/";
--- a/docs/integrations/block-integrations/claude_code.md
+++ b/docs/integrations/block-integrations/claude_code.md
@@ -16,7 +16,7 @@ When activated, the block:
   - Install dependencies (npm, pip, etc.)
   - Run terminal commands
   - Build and test applications
-5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
+5. Extracts all text files created/modified during execution
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms:
 | Output | Description |
 |--------|-------------|
 | Response | The output/response from Claude Code execution |
-| Files | List of files (text and binary) created/modified during execution. Includes images, PDFs, and other supported formats. Each file has path, relative_path, name, content, and workspace_ref fields. Binary files are stored in workspace and accessible via workspace_ref |
+| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields |
 | Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox |
 | Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation |
 | Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |
--- a/docs/integrations/block-integrations/llm.md
+++ b/docs/integrations/block-integrations/llm.md
@@ -535,7 +535,7 @@ When activated, the block:
 2. Installs the latest version of Claude Code in the sandbox
 3. Optionally runs setup commands to prepare the environment
 4. Executes your prompt using Claude Code, which can create/edit files, install dependencies, run terminal commands, and build applications
-5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
+5. Extracts all text files created/modified during execution
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
 |--------|-------------|------|
 | error | Error message if execution failed | str |
 | response | The output/response from Claude Code execution | str |
-| files | List of files created/modified by Claude Code during this execution. Includes text files and binary files (images, PDFs, etc.). Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI for workspace storage. For binary files, content contains a placeholder; use workspace_ref to access the file. | List[SandboxFileOutput] |
+| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI if the file was stored to workspace. | List[SandboxFileOutput] |
 | conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
 | session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
 | sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |