Compare commits

...

3 Commits

Author SHA1 Message Date
Zamil Majdy
d99b613b1d fix(backend/copilot): always download CLI session from GCS to prevent cross-pod context loss
Removes the same-pod optimisation in restore_cli_session that returned early
when a local session file existed.  In a multi-pod environment the local file
may be from a previous turn that ran on this same pod; using it silently hides
the most-recent turn (which ran on a different pod and uploaded to GCS) from
the model.

Root cause confirmed via GCloud logs on session 93ecf7c9: T14 uploaded 181 805 B
to GCS, but T15 – which ran back on Pod A's stale file – uploaded only 176 635 B,
proving it never saw T14's content.

The fix always overwrites the local file from GCS so --resume always starts from
the authoritative, up-to-date session regardless of which pod serves the request.
2026-04-16 00:57:23 +07:00
Zamil Majdy
429ef36568 fix(backend/copilot): split compaction no-op log messages and add identical-content test
Split the combined `not compacted or compacted == content` guard into two
separate conditions with distinct log messages so logs clearly distinguish
between compact_transcript returning None (failure) and returning the
identical string (no-op). Add test_keeps_original_when_compaction_returns_identical_content
to cover the new branch.
2026-04-16 00:21:30 +07:00
Zamil Majdy
6dc24b3766 fix(backend/copilot): proactively compact CLI session before each turn to prevent silent auto-compaction
Sonnet sessions (no thinking blocks) were losing context when the CLI silently
auto-compacted mid-turn at ~200K tokens, bypassing the PreCompact hook. Add
maybe_compact_cli_session() that runs LLM summarization before the turn starts
whenever the CLI session file exceeds 120KB (~105K tokens). Wires the call
into service.py after restore_cli_session() succeeds.
2026-04-16 00:09:24 +07:00
3 changed files with 348 additions and 20 deletions

View File

@@ -95,6 +95,7 @@ from ..transcript import (
cleanup_stale_project_dirs,
compact_transcript,
download_transcript,
maybe_compact_cli_session,
read_compacted_entries,
restore_cli_session,
upload_cli_session,
@@ -2501,6 +2502,14 @@ async def stream_chat_completion_sdk(
user_id, session_id, sdk_cwd, log_prefix=log_prefix
)
if cli_restored:
# Proactively compact the CLI session if it's large enough
# to risk triggering auto-compaction mid-turn. The CLI's
# silent auto-compact bypasses our PreCompact hook and
# loses context uncontrollably; compacting proactively here
# keeps the session well below the ~200K-token threshold.
await maybe_compact_cli_session(
sdk_cwd, session_id, config.model, log_prefix
)
use_resume = True
resume_file = session_id # CLI --resume expects UUID, not file path
transcript_msg_count = dl.message_count

View File

@@ -689,6 +689,15 @@ def _cli_session_storage_path_parts(
)
# Byte-size threshold for proactive pre-query CLI session compaction.
# Measured from production session 93ecf7c9: 233KB file → 204K tokens
# (≈0.88 bytes/token). 120KB ≈ 105K tokens — roughly half the CLI's
# ~200K auto-compaction threshold. Compacting proactively here prevents
# the CLI from silently auto-compacting mid-turn, which bypasses our
# PreCompact hook and causes uncontrolled context loss.
_PROACTIVE_COMPACT_THRESHOLD_BYTES = 120_000
async def upload_cli_session(
user_id: str,
session_id: str,
@@ -793,17 +802,10 @@ async def restore_cli_session(
)
return False
# If the session file already exists locally (same-pod reuse), use it directly.
# Downloading from storage could overwrite a newer local version when a previous
# turn's upload failed: stored content is stale while the local file already
# contains extended history from that turn.
if Path(real_path).exists():
logger.debug(
"%s CLI session file already exists locally — using it for --resume",
log_prefix,
)
return True
# Always download from GCS, even if a local file exists.
# In a multi-pod load-balanced environment the local file may belong to a
# different (older) turn that ran on this same pod — using it would silently
# restore a stale context and hide the most-recent turn from the model.
storage = await get_workspace_storage()
path = _build_path_from_parts(
_cli_session_storage_path_parts(user_id, session_id), storage
@@ -832,6 +834,103 @@ async def restore_cli_session(
return False
async def maybe_compact_cli_session(
sdk_cwd: str,
session_id: str,
model: str,
log_prefix: str = "[Transcript]",
) -> bool:
"""Proactively compact the CLI native session if it risks triggering auto-compaction.
Called after ``restore_cli_session()`` succeeds and before the SDK turn starts.
If the session file exceeds ``_PROACTIVE_COMPACT_THRESHOLD_BYTES``, runs LLM
summarization via ``compact_transcript()`` and writes the smaller result back
to disk so ``--resume`` uses the compacted context.
This prevents the CLI from silently auto-compacting mid-turn — a path that
bypasses our PreCompact hook and causes uncontrolled context loss for long
sessions, including pure-Sonnet sessions where thinking-block stripping
provides no relief.
Returns ``True`` if compaction was performed and the file was updated.
"""
session_file = _cli_session_path(sdk_cwd, session_id)
real_path = os.path.realpath(session_file)
projects_base = _projects_base()
if not real_path.startswith(projects_base + os.sep):
logger.warning(
"%s CLI session path outside projects base, skipping proactive compaction",
log_prefix,
)
return False
try:
raw_bytes = Path(real_path).read_bytes()
except FileNotFoundError:
logger.debug(
"%s CLI session not found, skipping proactive compaction", log_prefix
)
return False
except OSError as e:
logger.warning(
"%s Failed to read CLI session for compaction: %s", log_prefix, e
)
return False
if len(raw_bytes) < _PROACTIVE_COMPACT_THRESHOLD_BYTES:
logger.debug(
"%s CLI session %dB < threshold %dB — no proactive compaction",
log_prefix,
len(raw_bytes),
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
)
return False
logger.info(
"%s CLI session %dB >= threshold %dB — running proactive compaction",
log_prefix,
len(raw_bytes),
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
)
try:
content = raw_bytes.decode("utf-8")
except UnicodeDecodeError as e:
logger.warning(
"%s CLI session is not valid UTF-8, skipping compaction: %s", log_prefix, e
)
return False
compacted = await compact_transcript(content, model=model, log_prefix=log_prefix)
if not compacted:
logger.warning(
"%s Proactive compaction failed or returned None — keeping original",
log_prefix,
)
return False
if compacted == content:
logger.warning(
"%s Proactive compaction returned identical content — keeping original",
log_prefix,
)
return False
compacted_bytes = compacted.encode("utf-8")
try:
Path(real_path).write_bytes(compacted_bytes)
logger.info(
"%s Proactively compacted CLI session: %dB → %dB",
log_prefix,
len(raw_bytes),
len(compacted_bytes),
)
return True
except OSError as e:
logger.warning("%s Failed to write compacted CLI session: %s", log_prefix, e)
return False
async def upload_transcript(
user_id: str,
session_id: str,

View File

@@ -1177,8 +1177,14 @@ class TestRestoreCliSession:
assert result is False
def test_returns_true_when_local_file_already_exists(self, tmp_path):
"""Same-pod reuse: if local file exists, skip storage download and return True."""
def test_gcs_overwrites_stale_local_file(self, tmp_path):
"""Cross-pod staleness fix: GCS content always overwrites any local file.
Previously a same-pod optimisation returned early when a local file existed,
which caused Pod A to silently reuse a stale file from an earlier turn while
the canonical (newer) session lived in GCS — hiding the most-recent turn from
the model. The fix removes that early-return so GCS is always consulted.
"""
import asyncio
import os
import re
@@ -1190,15 +1196,18 @@ class TestRestoreCliSession:
session_id = "12345678-0000-0000-0000-000000000099"
sdk_cwd = str(tmp_path)
# Pre-create the local session file (simulates previous turn on same pod)
# Pre-create a STALE local session file (simulates a previous turn on this pod)
projects_base = os.path.realpath(str(tmp_path))
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", projects_base)
session_dir = Path(projects_base) / encoded_cwd
session_dir.mkdir(parents=True, exist_ok=True)
existing_content = b'{"type":"user"}\n{"type":"assistant"}\n'
(session_dir / f"{session_id}.jsonl").write_bytes(existing_content)
stale_content = b'{"type":"user"}\n{"type":"assistant","stale":true}\n'
(session_dir / f"{session_id}.jsonl").write_bytes(stale_content)
# GCS has the FRESH content from the turn that ran on a different pod
fresh_gcs_content = b'{"type":"user"}\n{"type":"assistant","fresh":true}\n'
mock_storage = AsyncMock()
mock_storage.retrieve.return_value = fresh_gcs_content
with (
patch(
@@ -1220,10 +1229,12 @@ class TestRestoreCliSession:
)
assert result is True
# Storage should NOT have been accessed (local file was used as-is)
mock_storage.retrieve.assert_not_called()
# Local file should be unchanged
assert (session_dir / f"{session_id}.jsonl").read_bytes() == existing_content
# GCS MUST have been consulted — the local file must not be trusted blindly
mock_storage.retrieve.assert_called_once()
# Local file must now contain the fresh GCS content, not the stale version
written = (session_dir / f"{session_id}.jsonl").read_bytes()
assert written == fresh_gcs_content
assert written != stale_content
def test_returns_true_on_success(self, tmp_path):
"""Happy path: storage has the session → file written → returns True."""
@@ -1285,3 +1296,212 @@ class TestRestoreCliSession:
)
assert result is False
class TestMaybeCompactCliSession:
def _make_session_file(self, tmp_path, session_id: str, sdk_cwd: str, content: str):
import os
import re
from .transcript import _sanitize_id
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
session_dir = tmp_path / encoded_cwd
session_dir.mkdir(parents=True, exist_ok=True)
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
session_file.write_text(content, encoding="utf-8")
return session_file
def test_skips_small_file(self, tmp_path):
"""Files below the threshold are not compacted."""
import asyncio
from unittest.mock import patch
from .transcript import maybe_compact_cli_session
session_id = "12345678-0000-0000-0000-000000000020"
sdk_cwd = str(tmp_path)
small_content = (
'{"type":"user","uuid":"u1","message":{"role":"user","content":"hi"}}\n'
)
session_file = self._make_session_file(
tmp_path, session_id, sdk_cwd, small_content
)
with patch(
"backend.copilot.transcript._projects_base",
return_value=str(tmp_path),
):
result = asyncio.run(
maybe_compact_cli_session(
sdk_cwd=sdk_cwd,
session_id=session_id,
model="claude-sonnet-4",
)
)
assert result is False
# File should be untouched.
assert session_file.read_text(encoding="utf-8") == small_content
def test_compacts_large_file_and_writes_back(self, tmp_path):
"""Files above the threshold are compacted and the result written to disk."""
import asyncio
from unittest.mock import AsyncMock, patch
from .transcript import (
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
maybe_compact_cli_session,
)
session_id = "12345678-0000-0000-0000-000000000021"
sdk_cwd = str(tmp_path)
# Build a valid JSONL session large enough to exceed the threshold.
u1 = (
'{"type":"user","uuid":"u1","parentUuid":"","message":{"role":"user","content":"'
+ ("x" * 1000)
+ '"}}'
)
a1 = (
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"id":"msg_a1","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"'
+ ("y" * 1000)
+ '"}],"stop_reason":"end_turn","stop_sequence":null}}'
)
single_pair = u1 + "\n" + a1 + "\n"
repeat = (_PROACTIVE_COMPACT_THRESHOLD_BYTES // len(single_pair.encode())) + 2
large_content = single_pair * repeat
session_file = self._make_session_file(
tmp_path, session_id, sdk_cwd, large_content
)
compacted_content = (
'{"type":"user","uuid":"c1","parentUuid":"","message":{"role":"user","content":"summary"}}\n'
'{"type":"assistant","uuid":"c2","parentUuid":"c1","message":{"id":"msg_c2","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"compacted"}],"stop_reason":"end_turn","stop_sequence":null}}\n'
)
with (
patch(
"backend.copilot.transcript._projects_base",
return_value=str(tmp_path),
),
patch(
"backend.copilot.transcript.compact_transcript",
new_callable=AsyncMock,
return_value=compacted_content,
),
):
result = asyncio.run(
maybe_compact_cli_session(
sdk_cwd=sdk_cwd,
session_id=session_id,
model="claude-sonnet-4",
)
)
assert result is True
# File on disk should now contain the compacted content.
assert session_file.read_text(encoding="utf-8") == compacted_content
def test_keeps_original_when_compaction_fails(self, tmp_path):
"""If compact_transcript returns None, the original file is left intact."""
import asyncio
from unittest.mock import AsyncMock, patch
from .transcript import (
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
maybe_compact_cli_session,
)
session_id = "12345678-0000-0000-0000-000000000022"
sdk_cwd = str(tmp_path)
single_pair = (
'{"type":"user","uuid":"u1","parentUuid":"","message":{"role":"user","content":"'
+ ("x" * 1000)
+ '"}}\n'
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"id":"msg_a1","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"'
+ ("y" * 1000)
+ '"}],"stop_reason":"end_turn","stop_sequence":null}}\n'
)
repeat = (_PROACTIVE_COMPACT_THRESHOLD_BYTES // len(single_pair.encode())) + 2
large_content = single_pair * repeat
session_file = self._make_session_file(
tmp_path, session_id, sdk_cwd, large_content
)
with (
patch(
"backend.copilot.transcript._projects_base",
return_value=str(tmp_path),
),
patch(
"backend.copilot.transcript.compact_transcript",
new_callable=AsyncMock,
return_value=None,
),
):
result = asyncio.run(
maybe_compact_cli_session(
sdk_cwd=sdk_cwd,
session_id=session_id,
model="claude-sonnet-4",
)
)
assert result is False
# File should be unchanged.
assert session_file.read_text(encoding="utf-8") == large_content
def test_keeps_original_when_compaction_returns_identical_content(self, tmp_path):
"""If compact_transcript returns the same string as the input, the file is left intact."""
import asyncio
from unittest.mock import AsyncMock, patch
from .transcript import (
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
maybe_compact_cli_session,
)
session_id = "12345678-0000-0000-0000-000000000023"
sdk_cwd = str(tmp_path)
single_pair = (
'{"type":"user","uuid":"u1","parentUuid":"","message":{"role":"user","content":"'
+ ("x" * 1000)
+ '"}}\n'
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"id":"msg_a1","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"'
+ ("y" * 1000)
+ '"}],"stop_reason":"end_turn","stop_sequence":null}}\n'
)
repeat = (_PROACTIVE_COMPACT_THRESHOLD_BYTES // len(single_pair.encode())) + 2
large_content = single_pair * repeat
session_file = self._make_session_file(
tmp_path, session_id, sdk_cwd, large_content
)
with (
patch(
"backend.copilot.transcript._projects_base",
return_value=str(tmp_path),
),
patch(
"backend.copilot.transcript.compact_transcript",
new_callable=AsyncMock,
return_value=large_content,
),
):
result = asyncio.run(
maybe_compact_cli_session(
sdk_cwd=sdk_cwd,
session_id=session_id,
model="claude-sonnet-4",
)
)
assert result is False
# File should be unchanged.
assert session_file.read_text(encoding="utf-8") == large_content