fix(copilot): remove redundant success-path transcript upload

The success path always uploaded the resume file (old downloaded data), then the finally block overwrote with the stop hook (new turn data). With always-upload, this caused the smaller stop hook to overwrite larger (but stale) data from the resume file. Remove the success path upload — the finally block handles it correctly by preferring stop hook content and falling back to the resume file when empty.
fix(copilot): always upload transcript instead of size-based skip
2026-03-17 03:00:27 -04:00 · 2026-03-06 12:45:12 +07:00 · 2026-03-06 02:16:36 +07:00
2 changed files with 19 additions and 72 deletions
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1408,46 +1408,11 @@ async def stream_chat_completion_sdk(
            ) and not has_appended_assistant:
                session.messages.append(assistant_response)
-        # --- Upload transcript for next-turn --resume ---
+        # Transcript upload is handled in the finally block below — it
-        # After async with the SDK task group has exited, so the Stop
+        # correctly prefers the stop hook content (new turn data) over the
-        # hook has already fired and the CLI has been SIGTERMed.  The
+        # resume file (old downloaded data).  Uploading here would write
-        # CLI uses appendFileSync, so all writes are safely on disk.
+        # stale data that the finally block then overwrites with potentially
-        if config.claude_agent_use_resume and user_id:
+        # smaller (but newer) stop hook content.
            # With --resume the CLI appends to the resume file (most
            # complete).  Otherwise use the Stop hook path.
            if use_resume and resume_file:
                raw_transcript = read_transcript_file(resume_file)
                logger.debug("[SDK] Transcript source: resume file")
            elif captured_transcript.path:
                raw_transcript = read_transcript_file(captured_transcript.path)
                logger.debug(
                    "[SDK] Transcript source: stop hook (%s), read result: %s",
                    captured_transcript.path,
                    f"{len(raw_transcript)}B" if raw_transcript else "None",
                )
            else:
                raw_transcript = None
            if not raw_transcript:
                logger.debug(
                    "[SDK] No usable transcript — CLI file had no "
                    "conversation entries (expected for first turn "
                    "without --resume)"
                )
            if raw_transcript:
                # Shield the upload from generator cancellation so a
                # client disconnect / page refresh doesn't lose the
                # transcript.  The upload must finish even if the SSE
                # connection is torn down.
                await asyncio.shield(
                    _try_upload_transcript(
                        user_id,
                        session_id,
                        raw_transcript,
                        message_count=len(session.messages),
                    )
                )
        logger.info(
            "[SDK] [%s] Stream completed successfully with %d messages",
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript.py
@@ -331,10 +331,10 @@ async def upload_transcript(
 ) -> None:
    """Strip progress entries and upload transcript to bucket storage.
-    Safety: only overwrites when the new (stripped) transcript is larger than
+    The executor holds a cluster lock per session, so concurrent uploads for
-    what is already stored.  Since JSONL is append-only, the latest transcript
+    the same session cannot happen.  We always overwrite — with ``--resume``
-    is always the longest.  This prevents a slow/stale background task from
+    the CLI may compact old tool results, so neither byte size nor line count
-    clobbering a newer upload from a concurrent turn.
+    is a reliable proxy for "newer".
    Args:
        message_count: ``len(session.messages)`` at upload time — used by
@@ -353,33 +353,16 @@ async def upload_transcript(
    storage = await get_workspace_storage()
    wid, fid, fname = _storage_path_parts(user_id, session_id)
    encoded = stripped.encode("utf-8")
    new_size = len(encoded)
-    # Check existing transcript size to avoid overwriting newer with older
+    await storage.store(
-    path = _build_storage_path(user_id, session_id, storage)
+        workspace_id=wid,
-    content_skipped = False
+        file_id=fid,
-    try:
+        filename=fname,
-        existing = await storage.retrieve(path)
+        content=encoded,
-        if len(existing) >= new_size:
+    )
            logger.info(
                f"[Transcript] Skipping content upload — existing ({len(existing)}B) "
                f">= new ({new_size}B) for session {session_id}"
            )
            content_skipped = True
    except (FileNotFoundError, Exception):
        pass  # No existing transcript or retrieval error — proceed with upload
-    if not content_skipped:
+    # Update metadata so message_count stays current.  The gap-fill logic
-        await storage.store(
+    # in _build_query_message relies on it to avoid re-compressing messages.
            workspace_id=wid,
            file_id=fid,
            filename=fname,
            content=encoded,
        )
    # Always update metadata (even when content is skipped) so message_count
    # stays current.  The gap-fill logic in _build_query_message relies on
    # message_count to avoid re-compressing the same messages every turn.
    try:
        meta = {"message_count": message_count, "uploaded_at": time.time()}
        mwid, mfid, mfname = _meta_storage_path_parts(user_id, session_id)
@@ -393,9 +376,8 @@ async def upload_transcript(
        logger.warning(f"[Transcript] Failed to write metadata for {session_id}: {e}")
    logger.info(
-        f"[Transcript] Uploaded {new_size}B "
+        f"[Transcript] Uploaded {len(encoded)}B "
-        f"(stripped from {len(content)}B, msg_count={message_count}, "
+        f"(stripped from {len(content)}B, msg_count={message_count}) "
        f"content_skipped={content_skipped}) "
        f"for session {session_id}"
    )