fix(backend/copilot): strip CLI session file to prevent auto-compaction context loss

The Claude Code CLI auto-compacts its native session JSONL when the context approaches the model's token limit (~200K for Sonnet). After compaction the detailed conversation history is replaced by a ~27K-token summary, causing the silent context loss users see as memory failures in long sessions. Root cause identified from production logs for session 93ecf7c9: - T6 CLI session: 233KB / ~207K tokens (near Sonnet limit) - T7 CLI compacted session -> ~167KB / ~47K tokens (PreCompact hook missed) - T12 second compaction -> ~176KB / ~27K tokens (just system prompt + summary) - T14-T21: cache_read=26714 constantly -- only system prompt visible to Claude The same stripping we already apply to our transcript (stale thinking blocks, progress/metadata entries) now also runs on the CLI native session file. At ~2x the size of the stripped transcript, unstripped sessions routinely hit the compaction threshold within 6-10 turns of a heavy Opus/thinking session. After stripping: - same-pod turns reuse the stripped local file (no compaction trigger) - cross-pod turns restore the stripped GCS file (same benefit)
2026-04-30 03:00:41 -04:00 · 2026-04-15 23:18:59 +07:00
parent 4efa1c4310
commit df205b5444
2 changed files with 223 additions and 1 deletions
--- a/autogpt_platform/backend/backend/copilot/transcript.py
+++ b/autogpt_platform/backend/backend/copilot/transcript.py
@@ -716,7 +716,7 @@ async def upload_cli_session(
        return

    try:
-        content = Path(real_path).read_bytes()
+        raw_bytes = Path(real_path).read_bytes()
    except FileNotFoundError:
        logger.debug(
            "%s CLI session file not found, skipping upload: %s",
@@ -728,6 +728,32 @@ async def upload_cli_session(
        logger.warning("%s Failed to read CLI session file: %s", log_prefix, e)
        return

+    # Strip stale thinking blocks and metadata entries (progress, file-history-snapshot,
+    # queue-operation) from the CLI session before writing it back locally and uploading
+    # to GCS.  Thinking blocks from non-last assistant turns are not needed for --resume
+    # but can be massive (tens of thousands of tokens each), causing the CLI to auto-compact
+    # its session when the context window fills up.  Stripping keeps the session well below
+    # the ~200K-token compaction threshold and prevents silent context loss.
+    try:
+        raw_text = raw_bytes.decode("utf-8")
+        stripped_text = strip_for_upload(raw_text)
+        stripped_bytes = stripped_text.encode("utf-8")
+        if len(stripped_bytes) < len(raw_bytes):
+            # Write the stripped version back locally so same-pod turns also benefit.
+            Path(real_path).write_bytes(stripped_bytes)
+            logger.info(
+                "%s Stripped CLI session file: %dB → %dB",
+                log_prefix,
+                len(raw_bytes),
+                len(stripped_bytes),
+            )
+        content = stripped_bytes
+    except Exception as e:
+        logger.warning(
+            "%s Failed to strip CLI session file, uploading raw: %s", log_prefix, e
+        )
+        content = raw_bytes
+
    storage = await get_workspace_storage()
    wid, fid, fname = _cli_session_storage_path_parts(user_id, session_id)
    try:
--- a/autogpt_platform/backend/backend/copilot/transcript_test.py
+++ b/autogpt_platform/backend/backend/copilot/transcript_test.py
@@ -918,6 +918,202 @@ class TestUploadCliSession:

        mock_storage.store.assert_not_called()

+    def test_strips_session_before_upload_and_writes_back(self, tmp_path):
+        """Strippable entries (progress, thinking blocks) are removed before upload.
+
+        The stripped content is written back to disk (so same-pod turns benefit)
+        and the smaller bytes are uploaded to GCS.
+        """
+        import asyncio
+        import os
+        import re
+        from unittest.mock import AsyncMock, patch
+
+        from .transcript import _sanitize_id, upload_cli_session
+
+        projects_base = str(tmp_path)
+        session_id = "12345678-0000-0000-0000-000000000010"
+        sdk_cwd = str(tmp_path)
+
+        encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
+        session_dir = tmp_path / encoded_cwd
+        session_dir.mkdir(parents=True, exist_ok=True)
+        session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
+
+        # A CLI session with a progress entry (strippable) and a real assistant message.
+        import json
+
+        progress_entry = {
+            "type": "progress",
+            "uuid": "p1",
+            "parentUuid": "u1",
+            "data": {"type": "bash_progress", "stdout": "running..."},
+        }
+        user_entry = {
+            "type": "user",
+            "uuid": "u1",
+            "message": {"role": "user", "content": "hello"},
+        }
+        asst_entry = {
+            "type": "assistant",
+            "uuid": "a1",
+            "parentUuid": "u1",
+            "message": {"role": "assistant", "content": "world"},
+        }
+        raw_content = (
+            json.dumps(progress_entry)
+            + "\n"
+            + json.dumps(user_entry)
+            + "\n"
+            + json.dumps(asst_entry)
+            + "\n"
+        )
+        raw_bytes = raw_content.encode("utf-8")
+        session_file.write_bytes(raw_bytes)
+
+        mock_storage = AsyncMock()
+
+        with (
+            patch(
+                "backend.copilot.transcript._projects_base",
+                return_value=projects_base,
+            ),
+            patch(
+                "backend.copilot.transcript.get_workspace_storage",
+                new_callable=AsyncMock,
+                return_value=mock_storage,
+            ),
+        ):
+            asyncio.run(
+                upload_cli_session(
+                    user_id="user-1",
+                    session_id=session_id,
+                    sdk_cwd=sdk_cwd,
+                )
+            )
+
+        # Upload should have been called with stripped bytes (no progress entry).
+        mock_storage.store.assert_called_once()
+        stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
+        stored_lines = stored_content.decode("utf-8").strip().split("\n")
+        stored_types = [json.loads(line).get("type") for line in stored_lines]
+        assert "progress" not in stored_types
+        assert "user" in stored_types
+        assert "assistant" in stored_types
+        # Stripped bytes should be smaller than raw.
+        assert len(stored_content) < len(raw_bytes)
+        # File on disk should also be the stripped version.
+        disk_content = session_file.read_bytes()
+        assert disk_content == stored_content
+
+    def test_strips_stale_thinking_blocks_before_upload(self, tmp_path):
+        """Thinking blocks in non-last assistant turns are stripped to reduce size."""
+        import asyncio
+        import json
+        import os
+        import re
+        from unittest.mock import AsyncMock, patch
+
+        from .transcript import _sanitize_id, upload_cli_session
+
+        projects_base = str(tmp_path)
+        session_id = "12345678-0000-0000-0000-000000000011"
+        sdk_cwd = str(tmp_path)
+
+        encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
+        session_dir = tmp_path / encoded_cwd
+        session_dir.mkdir(parents=True, exist_ok=True)
+        session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
+
+        # Two turns: first assistant has thinking block (stale), second doesn't.
+        u1 = {
+            "type": "user",
+            "uuid": "u1",
+            "message": {"role": "user", "content": "q1"},
+        }
+        a1_with_thinking = {
+            "type": "assistant",
+            "uuid": "a1",
+            "parentUuid": "u1",
+            "message": {
+                "id": "msg_a1",
+                "role": "assistant",
+                "content": [
+                    {"type": "thinking", "thinking": "A" * 5000},
+                    {"type": "text", "text": "answer1"},
+                ],
+            },
+        }
+        u2 = {
+            "type": "user",
+            "uuid": "u2",
+            "parentUuid": "a1",
+            "message": {"role": "user", "content": "q2"},
+        }
+        a2_no_thinking = {
+            "type": "assistant",
+            "uuid": "a2",
+            "parentUuid": "u2",
+            "message": {
+                "id": "msg_a2",
+                "role": "assistant",
+                "content": [{"type": "text", "text": "answer2"}],
+            },
+        }
+        raw_content = (
+            json.dumps(u1)
+            + "\n"
+            + json.dumps(a1_with_thinking)
+            + "\n"
+            + json.dumps(u2)
+            + "\n"
+            + json.dumps(a2_no_thinking)
+            + "\n"
+        )
+        raw_bytes = raw_content.encode("utf-8")
+        session_file.write_bytes(raw_bytes)
+
+        mock_storage = AsyncMock()
+
+        with (
+            patch(
+                "backend.copilot.transcript._projects_base",
+                return_value=projects_base,
+            ),
+            patch(
+                "backend.copilot.transcript.get_workspace_storage",
+                new_callable=AsyncMock,
+                return_value=mock_storage,
+            ),
+        ):
+            asyncio.run(
+                upload_cli_session(
+                    user_id="user-1",
+                    session_id=session_id,
+                    sdk_cwd=sdk_cwd,
+                )
+            )
+
+        stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
+        stored_lines = stored_content.decode("utf-8").strip().split("\n")
+
+        # a1 should have its thinking block stripped (it's not the last assistant turn).
+        a1_stored = json.loads(stored_lines[1])
+        a1_content = a1_stored["message"]["content"]
+        assert all(
+            b["type"] != "thinking" for b in a1_content
+        ), "stale thinking block should be stripped from a1"
+        assert any(
+            b["type"] == "text" for b in a1_content
+        ), "text block should be kept in a1"
+
+        # a2 (last turn) should be unchanged.
+        a2_stored = json.loads(stored_lines[3])
+        assert a2_stored["message"]["content"] == [{"type": "text", "text": "answer2"}]
+
+        # Stripped bytes smaller than raw.
+        assert len(stored_content) < len(raw_bytes)
+

 class TestRestoreCliSession:
    def test_returns_false_when_file_not_found_in_storage(self):