mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(backend/copilot): strip CLI session file to prevent auto-compaction context loss
The Claude Code CLI auto-compacts its native session JSONL when the context approaches the model's token limit (~200K for Sonnet). After compaction the detailed conversation history is replaced by a ~27K-token summary, causing the silent context loss users see as memory failures in long sessions. Root cause identified from production logs for session 93ecf7c9: - T6 CLI session: 233KB / ~207K tokens (near Sonnet limit) - T7 CLI compacted session -> ~167KB / ~47K tokens (PreCompact hook missed) - T12 second compaction -> ~176KB / ~27K tokens (just system prompt + summary) - T14-T21: cache_read=26714 constantly -- only system prompt visible to Claude The same stripping we already apply to our transcript (stale thinking blocks, progress/metadata entries) now also runs on the CLI native session file. At ~2x the size of the stripped transcript, unstripped sessions routinely hit the compaction threshold within 6-10 turns of a heavy Opus/thinking session. After stripping: - same-pod turns reuse the stripped local file (no compaction trigger) - cross-pod turns restore the stripped GCS file (same benefit)
This commit is contained in:
@@ -716,7 +716,7 @@ async def upload_cli_session(
|
||||
return
|
||||
|
||||
try:
|
||||
content = Path(real_path).read_bytes()
|
||||
raw_bytes = Path(real_path).read_bytes()
|
||||
except FileNotFoundError:
|
||||
logger.debug(
|
||||
"%s CLI session file not found, skipping upload: %s",
|
||||
@@ -728,6 +728,32 @@ async def upload_cli_session(
|
||||
logger.warning("%s Failed to read CLI session file: %s", log_prefix, e)
|
||||
return
|
||||
|
||||
# Strip stale thinking blocks and metadata entries (progress, file-history-snapshot,
|
||||
# queue-operation) from the CLI session before writing it back locally and uploading
|
||||
# to GCS. Thinking blocks from non-last assistant turns are not needed for --resume
|
||||
# but can be massive (tens of thousands of tokens each), causing the CLI to auto-compact
|
||||
# its session when the context window fills up. Stripping keeps the session well below
|
||||
# the ~200K-token compaction threshold and prevents silent context loss.
|
||||
try:
|
||||
raw_text = raw_bytes.decode("utf-8")
|
||||
stripped_text = strip_for_upload(raw_text)
|
||||
stripped_bytes = stripped_text.encode("utf-8")
|
||||
if len(stripped_bytes) < len(raw_bytes):
|
||||
# Write the stripped version back locally so same-pod turns also benefit.
|
||||
Path(real_path).write_bytes(stripped_bytes)
|
||||
logger.info(
|
||||
"%s Stripped CLI session file: %dB → %dB",
|
||||
log_prefix,
|
||||
len(raw_bytes),
|
||||
len(stripped_bytes),
|
||||
)
|
||||
content = stripped_bytes
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"%s Failed to strip CLI session file, uploading raw: %s", log_prefix, e
|
||||
)
|
||||
content = raw_bytes
|
||||
|
||||
storage = await get_workspace_storage()
|
||||
wid, fid, fname = _cli_session_storage_path_parts(user_id, session_id)
|
||||
try:
|
||||
|
||||
@@ -918,6 +918,202 @@ class TestUploadCliSession:
|
||||
|
||||
mock_storage.store.assert_not_called()
|
||||
|
||||
def test_strips_session_before_upload_and_writes_back(self, tmp_path):
|
||||
"""Strippable entries (progress, thinking blocks) are removed before upload.
|
||||
|
||||
The stripped content is written back to disk (so same-pod turns benefit)
|
||||
and the smaller bytes are uploaded to GCS.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import _sanitize_id, upload_cli_session
|
||||
|
||||
projects_base = str(tmp_path)
|
||||
session_id = "12345678-0000-0000-0000-000000000010"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
session_dir = tmp_path / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||
|
||||
# A CLI session with a progress entry (strippable) and a real assistant message.
|
||||
import json
|
||||
|
||||
progress_entry = {
|
||||
"type": "progress",
|
||||
"uuid": "p1",
|
||||
"parentUuid": "u1",
|
||||
"data": {"type": "bash_progress", "stdout": "running..."},
|
||||
}
|
||||
user_entry = {
|
||||
"type": "user",
|
||||
"uuid": "u1",
|
||||
"message": {"role": "user", "content": "hello"},
|
||||
}
|
||||
asst_entry = {
|
||||
"type": "assistant",
|
||||
"uuid": "a1",
|
||||
"parentUuid": "u1",
|
||||
"message": {"role": "assistant", "content": "world"},
|
||||
}
|
||||
raw_content = (
|
||||
json.dumps(progress_entry)
|
||||
+ "\n"
|
||||
+ json.dumps(user_entry)
|
||||
+ "\n"
|
||||
+ json.dumps(asst_entry)
|
||||
+ "\n"
|
||||
)
|
||||
raw_bytes = raw_content.encode("utf-8")
|
||||
session_file.write_bytes(raw_bytes)
|
||||
|
||||
mock_storage = AsyncMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=projects_base,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.get_workspace_storage",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_storage,
|
||||
),
|
||||
):
|
||||
asyncio.run(
|
||||
upload_cli_session(
|
||||
user_id="user-1",
|
||||
session_id=session_id,
|
||||
sdk_cwd=sdk_cwd,
|
||||
)
|
||||
)
|
||||
|
||||
# Upload should have been called with stripped bytes (no progress entry).
|
||||
mock_storage.store.assert_called_once()
|
||||
stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
|
||||
stored_lines = stored_content.decode("utf-8").strip().split("\n")
|
||||
stored_types = [json.loads(line).get("type") for line in stored_lines]
|
||||
assert "progress" not in stored_types
|
||||
assert "user" in stored_types
|
||||
assert "assistant" in stored_types
|
||||
# Stripped bytes should be smaller than raw.
|
||||
assert len(stored_content) < len(raw_bytes)
|
||||
# File on disk should also be the stripped version.
|
||||
disk_content = session_file.read_bytes()
|
||||
assert disk_content == stored_content
|
||||
|
||||
def test_strips_stale_thinking_blocks_before_upload(self, tmp_path):
|
||||
"""Thinking blocks in non-last assistant turns are stripped to reduce size."""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import _sanitize_id, upload_cli_session
|
||||
|
||||
projects_base = str(tmp_path)
|
||||
session_id = "12345678-0000-0000-0000-000000000011"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
session_dir = tmp_path / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||
|
||||
# Two turns: first assistant has thinking block (stale), second doesn't.
|
||||
u1 = {
|
||||
"type": "user",
|
||||
"uuid": "u1",
|
||||
"message": {"role": "user", "content": "q1"},
|
||||
}
|
||||
a1_with_thinking = {
|
||||
"type": "assistant",
|
||||
"uuid": "a1",
|
||||
"parentUuid": "u1",
|
||||
"message": {
|
||||
"id": "msg_a1",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "A" * 5000},
|
||||
{"type": "text", "text": "answer1"},
|
||||
],
|
||||
},
|
||||
}
|
||||
u2 = {
|
||||
"type": "user",
|
||||
"uuid": "u2",
|
||||
"parentUuid": "a1",
|
||||
"message": {"role": "user", "content": "q2"},
|
||||
}
|
||||
a2_no_thinking = {
|
||||
"type": "assistant",
|
||||
"uuid": "a2",
|
||||
"parentUuid": "u2",
|
||||
"message": {
|
||||
"id": "msg_a2",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "answer2"}],
|
||||
},
|
||||
}
|
||||
raw_content = (
|
||||
json.dumps(u1)
|
||||
+ "\n"
|
||||
+ json.dumps(a1_with_thinking)
|
||||
+ "\n"
|
||||
+ json.dumps(u2)
|
||||
+ "\n"
|
||||
+ json.dumps(a2_no_thinking)
|
||||
+ "\n"
|
||||
)
|
||||
raw_bytes = raw_content.encode("utf-8")
|
||||
session_file.write_bytes(raw_bytes)
|
||||
|
||||
mock_storage = AsyncMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=projects_base,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.get_workspace_storage",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_storage,
|
||||
),
|
||||
):
|
||||
asyncio.run(
|
||||
upload_cli_session(
|
||||
user_id="user-1",
|
||||
session_id=session_id,
|
||||
sdk_cwd=sdk_cwd,
|
||||
)
|
||||
)
|
||||
|
||||
stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
|
||||
stored_lines = stored_content.decode("utf-8").strip().split("\n")
|
||||
|
||||
# a1 should have its thinking block stripped (it's not the last assistant turn).
|
||||
a1_stored = json.loads(stored_lines[1])
|
||||
a1_content = a1_stored["message"]["content"]
|
||||
assert all(
|
||||
b["type"] != "thinking" for b in a1_content
|
||||
), "stale thinking block should be stripped from a1"
|
||||
assert any(
|
||||
b["type"] == "text" for b in a1_content
|
||||
), "text block should be kept in a1"
|
||||
|
||||
# a2 (last turn) should be unchanged.
|
||||
a2_stored = json.loads(stored_lines[3])
|
||||
assert a2_stored["message"]["content"] == [{"type": "text", "text": "answer2"}]
|
||||
|
||||
# Stripped bytes smaller than raw.
|
||||
assert len(stored_content) < len(raw_bytes)
|
||||
|
||||
|
||||
class TestRestoreCliSession:
|
||||
def test_returns_false_when_file_not_found_in_storage(self):
|
||||
|
||||
Reference in New Issue
Block a user