mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(backend/copilot): strip CLI session file to prevent auto-compaction context loss
The Claude Code CLI auto-compacts its native session JSONL when the context approaches the model's token limit (~200K for Sonnet). After compaction the detailed conversation history is replaced by a ~27K-token summary, causing the silent context loss users see as memory failures in long sessions. Root cause identified from production logs for session 93ecf7c9: - T6 CLI session: 233KB / ~207K tokens (near Sonnet limit) - T7 CLI compacted session -> ~167KB / ~47K tokens (PreCompact hook missed) - T12 second compaction -> ~176KB / ~27K tokens (just system prompt + summary) - T14-T21: cache_read=26714 constantly -- only system prompt visible to Claude The same stripping we already apply to our transcript (stale thinking blocks, progress/metadata entries) now also runs on the CLI native session file. At ~2x the size of the stripped transcript, unstripped sessions routinely hit the compaction threshold within 6-10 turns of a heavy Opus/thinking session. After stripping: - same-pod turns reuse the stripped local file (no compaction trigger) - cross-pod turns restore the stripped GCS file (same benefit)
This commit is contained in:
@@ -716,7 +716,7 @@ async def upload_cli_session(
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
content = Path(real_path).read_bytes()
|
raw_bytes = Path(real_path).read_bytes()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"%s CLI session file not found, skipping upload: %s",
|
"%s CLI session file not found, skipping upload: %s",
|
||||||
@@ -728,6 +728,32 @@ async def upload_cli_session(
|
|||||||
logger.warning("%s Failed to read CLI session file: %s", log_prefix, e)
|
logger.warning("%s Failed to read CLI session file: %s", log_prefix, e)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Strip stale thinking blocks and metadata entries (progress, file-history-snapshot,
|
||||||
|
# queue-operation) from the CLI session before writing it back locally and uploading
|
||||||
|
# to GCS. Thinking blocks from non-last assistant turns are not needed for --resume
|
||||||
|
# but can be massive (tens of thousands of tokens each), causing the CLI to auto-compact
|
||||||
|
# its session when the context window fills up. Stripping keeps the session well below
|
||||||
|
# the ~200K-token compaction threshold and prevents silent context loss.
|
||||||
|
try:
|
||||||
|
raw_text = raw_bytes.decode("utf-8")
|
||||||
|
stripped_text = strip_for_upload(raw_text)
|
||||||
|
stripped_bytes = stripped_text.encode("utf-8")
|
||||||
|
if len(stripped_bytes) < len(raw_bytes):
|
||||||
|
# Write the stripped version back locally so same-pod turns also benefit.
|
||||||
|
Path(real_path).write_bytes(stripped_bytes)
|
||||||
|
logger.info(
|
||||||
|
"%s Stripped CLI session file: %dB → %dB",
|
||||||
|
log_prefix,
|
||||||
|
len(raw_bytes),
|
||||||
|
len(stripped_bytes),
|
||||||
|
)
|
||||||
|
content = stripped_bytes
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"%s Failed to strip CLI session file, uploading raw: %s", log_prefix, e
|
||||||
|
)
|
||||||
|
content = raw_bytes
|
||||||
|
|
||||||
storage = await get_workspace_storage()
|
storage = await get_workspace_storage()
|
||||||
wid, fid, fname = _cli_session_storage_path_parts(user_id, session_id)
|
wid, fid, fname = _cli_session_storage_path_parts(user_id, session_id)
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -918,6 +918,202 @@ class TestUploadCliSession:
|
|||||||
|
|
||||||
mock_storage.store.assert_not_called()
|
mock_storage.store.assert_not_called()
|
||||||
|
|
||||||
|
def test_strips_session_before_upload_and_writes_back(self, tmp_path):
|
||||||
|
"""Strippable entries (progress, thinking blocks) are removed before upload.
|
||||||
|
|
||||||
|
The stripped content is written back to disk (so same-pod turns benefit)
|
||||||
|
and the smaller bytes are uploaded to GCS.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
from .transcript import _sanitize_id, upload_cli_session
|
||||||
|
|
||||||
|
projects_base = str(tmp_path)
|
||||||
|
session_id = "12345678-0000-0000-0000-000000000010"
|
||||||
|
sdk_cwd = str(tmp_path)
|
||||||
|
|
||||||
|
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||||
|
session_dir = tmp_path / encoded_cwd
|
||||||
|
session_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||||
|
|
||||||
|
# A CLI session with a progress entry (strippable) and a real assistant message.
|
||||||
|
import json
|
||||||
|
|
||||||
|
progress_entry = {
|
||||||
|
"type": "progress",
|
||||||
|
"uuid": "p1",
|
||||||
|
"parentUuid": "u1",
|
||||||
|
"data": {"type": "bash_progress", "stdout": "running..."},
|
||||||
|
}
|
||||||
|
user_entry = {
|
||||||
|
"type": "user",
|
||||||
|
"uuid": "u1",
|
||||||
|
"message": {"role": "user", "content": "hello"},
|
||||||
|
}
|
||||||
|
asst_entry = {
|
||||||
|
"type": "assistant",
|
||||||
|
"uuid": "a1",
|
||||||
|
"parentUuid": "u1",
|
||||||
|
"message": {"role": "assistant", "content": "world"},
|
||||||
|
}
|
||||||
|
raw_content = (
|
||||||
|
json.dumps(progress_entry)
|
||||||
|
+ "\n"
|
||||||
|
+ json.dumps(user_entry)
|
||||||
|
+ "\n"
|
||||||
|
+ json.dumps(asst_entry)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
raw_bytes = raw_content.encode("utf-8")
|
||||||
|
session_file.write_bytes(raw_bytes)
|
||||||
|
|
||||||
|
mock_storage = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"backend.copilot.transcript._projects_base",
|
||||||
|
return_value=projects_base,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"backend.copilot.transcript.get_workspace_storage",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=mock_storage,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
asyncio.run(
|
||||||
|
upload_cli_session(
|
||||||
|
user_id="user-1",
|
||||||
|
session_id=session_id,
|
||||||
|
sdk_cwd=sdk_cwd,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Upload should have been called with stripped bytes (no progress entry).
|
||||||
|
mock_storage.store.assert_called_once()
|
||||||
|
stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
|
||||||
|
stored_lines = stored_content.decode("utf-8").strip().split("\n")
|
||||||
|
stored_types = [json.loads(line).get("type") for line in stored_lines]
|
||||||
|
assert "progress" not in stored_types
|
||||||
|
assert "user" in stored_types
|
||||||
|
assert "assistant" in stored_types
|
||||||
|
# Stripped bytes should be smaller than raw.
|
||||||
|
assert len(stored_content) < len(raw_bytes)
|
||||||
|
# File on disk should also be the stripped version.
|
||||||
|
disk_content = session_file.read_bytes()
|
||||||
|
assert disk_content == stored_content
|
||||||
|
|
||||||
|
def test_strips_stale_thinking_blocks_before_upload(self, tmp_path):
|
||||||
|
"""Thinking blocks in non-last assistant turns are stripped to reduce size."""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
from .transcript import _sanitize_id, upload_cli_session
|
||||||
|
|
||||||
|
projects_base = str(tmp_path)
|
||||||
|
session_id = "12345678-0000-0000-0000-000000000011"
|
||||||
|
sdk_cwd = str(tmp_path)
|
||||||
|
|
||||||
|
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||||
|
session_dir = tmp_path / encoded_cwd
|
||||||
|
session_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||||
|
|
||||||
|
# Two turns: first assistant has thinking block (stale), second doesn't.
|
||||||
|
u1 = {
|
||||||
|
"type": "user",
|
||||||
|
"uuid": "u1",
|
||||||
|
"message": {"role": "user", "content": "q1"},
|
||||||
|
}
|
||||||
|
a1_with_thinking = {
|
||||||
|
"type": "assistant",
|
||||||
|
"uuid": "a1",
|
||||||
|
"parentUuid": "u1",
|
||||||
|
"message": {
|
||||||
|
"id": "msg_a1",
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "thinking", "thinking": "A" * 5000},
|
||||||
|
{"type": "text", "text": "answer1"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
u2 = {
|
||||||
|
"type": "user",
|
||||||
|
"uuid": "u2",
|
||||||
|
"parentUuid": "a1",
|
||||||
|
"message": {"role": "user", "content": "q2"},
|
||||||
|
}
|
||||||
|
a2_no_thinking = {
|
||||||
|
"type": "assistant",
|
||||||
|
"uuid": "a2",
|
||||||
|
"parentUuid": "u2",
|
||||||
|
"message": {
|
||||||
|
"id": "msg_a2",
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "text", "text": "answer2"}],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
raw_content = (
|
||||||
|
json.dumps(u1)
|
||||||
|
+ "\n"
|
||||||
|
+ json.dumps(a1_with_thinking)
|
||||||
|
+ "\n"
|
||||||
|
+ json.dumps(u2)
|
||||||
|
+ "\n"
|
||||||
|
+ json.dumps(a2_no_thinking)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
raw_bytes = raw_content.encode("utf-8")
|
||||||
|
session_file.write_bytes(raw_bytes)
|
||||||
|
|
||||||
|
mock_storage = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"backend.copilot.transcript._projects_base",
|
||||||
|
return_value=projects_base,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"backend.copilot.transcript.get_workspace_storage",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=mock_storage,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
asyncio.run(
|
||||||
|
upload_cli_session(
|
||||||
|
user_id="user-1",
|
||||||
|
session_id=session_id,
|
||||||
|
sdk_cwd=sdk_cwd,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
|
||||||
|
stored_lines = stored_content.decode("utf-8").strip().split("\n")
|
||||||
|
|
||||||
|
# a1 should have its thinking block stripped (it's not the last assistant turn).
|
||||||
|
a1_stored = json.loads(stored_lines[1])
|
||||||
|
a1_content = a1_stored["message"]["content"]
|
||||||
|
assert all(
|
||||||
|
b["type"] != "thinking" for b in a1_content
|
||||||
|
), "stale thinking block should be stripped from a1"
|
||||||
|
assert any(
|
||||||
|
b["type"] == "text" for b in a1_content
|
||||||
|
), "text block should be kept in a1"
|
||||||
|
|
||||||
|
# a2 (last turn) should be unchanged.
|
||||||
|
a2_stored = json.loads(stored_lines[3])
|
||||||
|
assert a2_stored["message"]["content"] == [{"type": "text", "text": "answer2"}]
|
||||||
|
|
||||||
|
# Stripped bytes smaller than raw.
|
||||||
|
assert len(stored_content) < len(raw_bytes)
|
||||||
|
|
||||||
|
|
||||||
class TestRestoreCliSession:
|
class TestRestoreCliSession:
|
||||||
def test_returns_false_when_file_not_found_in_storage(self):
|
def test_returns_false_when_file_not_found_in_storage(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user