mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Compare commits
7 Commits
hotfix/aut
...
fix/copilo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d99b613b1d | ||
|
|
429ef36568 | ||
|
|
6dc24b3766 | ||
|
|
d27d22159d | ||
|
|
fffbe0aad8 | ||
|
|
df205b5444 | ||
|
|
4efa1c4310 |
@@ -156,10 +156,9 @@ class ChatConfig(BaseSettings):
|
||||
"history compression. Falls back to compression when unavailable.",
|
||||
)
|
||||
claude_agent_fallback_model: str = Field(
|
||||
default="claude-sonnet-4-20250514",
|
||||
default="claude-sonnet-4-6",
|
||||
description="Fallback model when the primary model is unavailable (e.g. 529 "
|
||||
"overloaded). The SDK automatically retries with this alternate model. "
|
||||
"It must differ from the primary model.",
|
||||
"overloaded). The SDK automatically retries with this cheaper model.",
|
||||
)
|
||||
claude_agent_max_turns: int = Field(
|
||||
default=50,
|
||||
|
||||
@@ -96,39 +96,6 @@ class TestResolveFallbackModel:
|
||||
assert result is not None
|
||||
assert "sonnet" in result.lower() or "claude" in result.lower()
|
||||
|
||||
def test_distinct_helper_drops_same_model(self):
|
||||
"""CLI fallback is omitted when it matches the resolved primary model."""
|
||||
cfg = _make_config(
|
||||
model="anthropic/claude-sonnet-4-6",
|
||||
claude_agent_fallback_model="claude-sonnet-4-6",
|
||||
use_openrouter=False,
|
||||
)
|
||||
with patch(f"{_SVC}.config", cfg):
|
||||
from backend.copilot.sdk.service import (
|
||||
_resolve_distinct_fallback_model,
|
||||
_resolve_sdk_model,
|
||||
)
|
||||
|
||||
assert _resolve_distinct_fallback_model(_resolve_sdk_model()) is None
|
||||
|
||||
def test_distinct_helper_keeps_different_model(self):
|
||||
"""CLI fallback is preserved when it differs from the primary model."""
|
||||
cfg = _make_config(
|
||||
model="anthropic/claude-sonnet-4-6",
|
||||
claude_agent_fallback_model="claude-sonnet-4-20250514",
|
||||
use_openrouter=False,
|
||||
)
|
||||
with patch(f"{_SVC}.config", cfg):
|
||||
from backend.copilot.sdk.service import (
|
||||
_resolve_distinct_fallback_model,
|
||||
_resolve_sdk_model,
|
||||
)
|
||||
|
||||
assert (
|
||||
_resolve_distinct_fallback_model(_resolve_sdk_model())
|
||||
== "claude-sonnet-4-20250514"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Security & isolation env vars
|
||||
|
||||
@@ -95,6 +95,7 @@ from ..transcript import (
|
||||
cleanup_stale_project_dirs,
|
||||
compact_transcript,
|
||||
download_transcript,
|
||||
maybe_compact_cli_session,
|
||||
read_compacted_entries,
|
||||
restore_cli_session,
|
||||
upload_cli_session,
|
||||
@@ -686,21 +687,6 @@ def _resolve_fallback_model() -> str | None:
|
||||
return _normalize_model_name(raw)
|
||||
|
||||
|
||||
def _resolve_distinct_fallback_model(primary_model: str | None) -> str | None:
|
||||
"""Resolve a fallback model that does not collide with *primary_model*."""
|
||||
fallback_model = _resolve_fallback_model()
|
||||
if not fallback_model or not primary_model:
|
||||
return fallback_model
|
||||
if fallback_model == primary_model:
|
||||
logger.warning(
|
||||
"[SDK] Fallback model %s matches primary model %s; disabling fallback",
|
||||
fallback_model,
|
||||
primary_model,
|
||||
)
|
||||
return None
|
||||
return fallback_model
|
||||
|
||||
|
||||
async def _resolve_model_and_multiplier(
|
||||
model: "CopilotLlmModel | None",
|
||||
session_id: str,
|
||||
@@ -2516,6 +2502,14 @@ async def stream_chat_completion_sdk(
|
||||
user_id, session_id, sdk_cwd, log_prefix=log_prefix
|
||||
)
|
||||
if cli_restored:
|
||||
# Proactively compact the CLI session if it's large enough
|
||||
# to risk triggering auto-compaction mid-turn. The CLI's
|
||||
# silent auto-compact bypasses our PreCompact hook and
|
||||
# loses context uncontrollably; compacting proactively here
|
||||
# keeps the session well below the ~200K-token threshold.
|
||||
await maybe_compact_cli_session(
|
||||
sdk_cwd, session_id, config.model, log_prefix
|
||||
)
|
||||
use_resume = True
|
||||
resume_file = session_id # CLI --resume expects UUID, not file path
|
||||
transcript_msg_count = dl.message_count
|
||||
@@ -2652,8 +2646,6 @@ async def stream_chat_completion_sdk(
|
||||
cross_user_cache=_cross_user,
|
||||
)
|
||||
|
||||
fallback_model = _resolve_distinct_fallback_model(sdk_model)
|
||||
|
||||
sdk_options_kwargs: dict[str, Any] = {
|
||||
"system_prompt": system_prompt_value,
|
||||
"mcp_servers": {"copilot": mcp_server},
|
||||
@@ -2663,6 +2655,10 @@ async def stream_chat_completion_sdk(
|
||||
"cwd": sdk_cwd,
|
||||
"max_buffer_size": config.claude_agent_max_buffer_size,
|
||||
"stderr": _on_stderr,
|
||||
# --- P0 guardrails ---
|
||||
# fallback_model: SDK auto-retries with this cheaper model on
|
||||
# 529 (overloaded) errors, avoiding user-visible failures.
|
||||
"fallback_model": _resolve_fallback_model(),
|
||||
# max_turns: hard cap on agentic tool-use loops per query to
|
||||
# prevent runaway execution from burning budget.
|
||||
"max_turns": config.claude_agent_max_turns,
|
||||
@@ -2676,11 +2672,6 @@ async def stream_chat_completion_sdk(
|
||||
# native extended thinking), so it is safe to pass unconditionally.
|
||||
"max_thinking_tokens": config.claude_agent_max_thinking_tokens,
|
||||
}
|
||||
if fallback_model:
|
||||
# fallback_model: SDK auto-retries with this alternate model on
|
||||
# 529 (overloaded) errors. Omit it entirely when it resolves to
|
||||
# the same value as the primary model because the CLI rejects that.
|
||||
sdk_options_kwargs["fallback_model"] = fallback_model
|
||||
# effort: only set for models with extended thinking (Opus).
|
||||
# Setting effort on Sonnet causes <internal_reasoning> tag leaks.
|
||||
if config.claude_agent_thinking_effort:
|
||||
@@ -2933,9 +2924,10 @@ async def stream_chat_completion_sdk(
|
||||
sdk_options_kwargs_retry.pop("resume", None)
|
||||
sdk_options_kwargs_retry["session_id"] = session_id
|
||||
else:
|
||||
# T2+ retry without --resume: do not pass --session-id.
|
||||
# The T1 session file already exists at that path; re-using
|
||||
# the same ID would fail with "Session ID already in use".
|
||||
# T2+ retry without --resume: initial invocation used
|
||||
# --resume, which restored the T1 session file to local
|
||||
# storage. Re-using session_id without --resume would
|
||||
# fail with "Session ID already in use".
|
||||
sdk_options_kwargs_retry.pop("resume", None)
|
||||
sdk_options_kwargs_retry.pop("session_id", None)
|
||||
# Recompute system_prompt for retry — ctx.use_resume may have
|
||||
|
||||
@@ -689,6 +689,15 @@ def _cli_session_storage_path_parts(
|
||||
)
|
||||
|
||||
|
||||
# Byte-size threshold for proactive pre-query CLI session compaction.
|
||||
# Measured from production session 93ecf7c9: 233KB file → 204K tokens
|
||||
# (≈0.88 bytes/token). 120KB ≈ 105K tokens — roughly half the CLI's
|
||||
# ~200K auto-compaction threshold. Compacting proactively here prevents
|
||||
# the CLI from silently auto-compacting mid-turn, which bypasses our
|
||||
# PreCompact hook and causes uncontrolled context loss.
|
||||
_PROACTIVE_COMPACT_THRESHOLD_BYTES = 120_000
|
||||
|
||||
|
||||
async def upload_cli_session(
|
||||
user_id: str,
|
||||
session_id: str,
|
||||
@@ -716,7 +725,7 @@ async def upload_cli_session(
|
||||
return
|
||||
|
||||
try:
|
||||
content = Path(real_path).read_bytes()
|
||||
raw_bytes = Path(real_path).read_bytes()
|
||||
except FileNotFoundError:
|
||||
logger.debug(
|
||||
"%s CLI session file not found, skipping upload: %s",
|
||||
@@ -728,6 +737,32 @@ async def upload_cli_session(
|
||||
logger.warning("%s Failed to read CLI session file: %s", log_prefix, e)
|
||||
return
|
||||
|
||||
# Strip stale thinking blocks and metadata entries (progress, file-history-snapshot,
|
||||
# queue-operation) from the CLI session before writing it back locally and uploading
|
||||
# to GCS. Thinking blocks from non-last assistant turns are not needed for --resume
|
||||
# but can be massive (tens of thousands of tokens each), causing the CLI to auto-compact
|
||||
# its session when the context window fills up. Stripping keeps the session well below
|
||||
# the ~200K-token compaction threshold and prevents silent context loss.
|
||||
try:
|
||||
raw_text = raw_bytes.decode("utf-8")
|
||||
stripped_text = strip_for_upload(raw_text)
|
||||
stripped_bytes = stripped_text.encode("utf-8")
|
||||
if len(stripped_bytes) < len(raw_bytes):
|
||||
# Write the stripped version back locally so same-pod turns also benefit.
|
||||
Path(real_path).write_bytes(stripped_bytes)
|
||||
logger.info(
|
||||
"%s Stripped CLI session file: %dB → %dB",
|
||||
log_prefix,
|
||||
len(raw_bytes),
|
||||
len(stripped_bytes),
|
||||
)
|
||||
content = stripped_bytes
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"%s Failed to strip CLI session file, uploading raw: %s", log_prefix, e
|
||||
)
|
||||
content = raw_bytes
|
||||
|
||||
storage = await get_workspace_storage()
|
||||
wid, fid, fname = _cli_session_storage_path_parts(user_id, session_id)
|
||||
try:
|
||||
@@ -767,17 +802,10 @@ async def restore_cli_session(
|
||||
)
|
||||
return False
|
||||
|
||||
# If the session file already exists locally (same-pod reuse), use it directly.
|
||||
# Downloading from storage could overwrite a newer local version when a previous
|
||||
# turn's upload failed: stored content is stale while the local file already
|
||||
# contains extended history from that turn.
|
||||
if Path(real_path).exists():
|
||||
logger.debug(
|
||||
"%s CLI session file already exists locally — using it for --resume",
|
||||
log_prefix,
|
||||
)
|
||||
return True
|
||||
|
||||
# Always download from GCS, even if a local file exists.
|
||||
# In a multi-pod load-balanced environment the local file may belong to a
|
||||
# different (older) turn that ran on this same pod — using it would silently
|
||||
# restore a stale context and hide the most-recent turn from the model.
|
||||
storage = await get_workspace_storage()
|
||||
path = _build_path_from_parts(
|
||||
_cli_session_storage_path_parts(user_id, session_id), storage
|
||||
@@ -806,6 +834,103 @@ async def restore_cli_session(
|
||||
return False
|
||||
|
||||
|
||||
async def maybe_compact_cli_session(
|
||||
sdk_cwd: str,
|
||||
session_id: str,
|
||||
model: str,
|
||||
log_prefix: str = "[Transcript]",
|
||||
) -> bool:
|
||||
"""Proactively compact the CLI native session if it risks triggering auto-compaction.
|
||||
|
||||
Called after ``restore_cli_session()`` succeeds and before the SDK turn starts.
|
||||
If the session file exceeds ``_PROACTIVE_COMPACT_THRESHOLD_BYTES``, runs LLM
|
||||
summarization via ``compact_transcript()`` and writes the smaller result back
|
||||
to disk so ``--resume`` uses the compacted context.
|
||||
|
||||
This prevents the CLI from silently auto-compacting mid-turn — a path that
|
||||
bypasses our PreCompact hook and causes uncontrolled context loss for long
|
||||
sessions, including pure-Sonnet sessions where thinking-block stripping
|
||||
provides no relief.
|
||||
|
||||
Returns ``True`` if compaction was performed and the file was updated.
|
||||
"""
|
||||
session_file = _cli_session_path(sdk_cwd, session_id)
|
||||
real_path = os.path.realpath(session_file)
|
||||
projects_base = _projects_base()
|
||||
|
||||
if not real_path.startswith(projects_base + os.sep):
|
||||
logger.warning(
|
||||
"%s CLI session path outside projects base, skipping proactive compaction",
|
||||
log_prefix,
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
raw_bytes = Path(real_path).read_bytes()
|
||||
except FileNotFoundError:
|
||||
logger.debug(
|
||||
"%s CLI session not found, skipping proactive compaction", log_prefix
|
||||
)
|
||||
return False
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
"%s Failed to read CLI session for compaction: %s", log_prefix, e
|
||||
)
|
||||
return False
|
||||
|
||||
if len(raw_bytes) < _PROACTIVE_COMPACT_THRESHOLD_BYTES:
|
||||
logger.debug(
|
||||
"%s CLI session %dB < threshold %dB — no proactive compaction",
|
||||
log_prefix,
|
||||
len(raw_bytes),
|
||||
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"%s CLI session %dB >= threshold %dB — running proactive compaction",
|
||||
log_prefix,
|
||||
len(raw_bytes),
|
||||
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
|
||||
)
|
||||
|
||||
try:
|
||||
content = raw_bytes.decode("utf-8")
|
||||
except UnicodeDecodeError as e:
|
||||
logger.warning(
|
||||
"%s CLI session is not valid UTF-8, skipping compaction: %s", log_prefix, e
|
||||
)
|
||||
return False
|
||||
|
||||
compacted = await compact_transcript(content, model=model, log_prefix=log_prefix)
|
||||
if not compacted:
|
||||
logger.warning(
|
||||
"%s Proactive compaction failed or returned None — keeping original",
|
||||
log_prefix,
|
||||
)
|
||||
return False
|
||||
if compacted == content:
|
||||
logger.warning(
|
||||
"%s Proactive compaction returned identical content — keeping original",
|
||||
log_prefix,
|
||||
)
|
||||
return False
|
||||
|
||||
compacted_bytes = compacted.encode("utf-8")
|
||||
try:
|
||||
Path(real_path).write_bytes(compacted_bytes)
|
||||
logger.info(
|
||||
"%s Proactively compacted CLI session: %dB → %dB",
|
||||
log_prefix,
|
||||
len(raw_bytes),
|
||||
len(compacted_bytes),
|
||||
)
|
||||
return True
|
||||
except OSError as e:
|
||||
logger.warning("%s Failed to write compacted CLI session: %s", log_prefix, e)
|
||||
return False
|
||||
|
||||
|
||||
async def upload_transcript(
|
||||
user_id: str,
|
||||
session_id: str,
|
||||
|
||||
@@ -918,6 +918,202 @@ class TestUploadCliSession:
|
||||
|
||||
mock_storage.store.assert_not_called()
|
||||
|
||||
def test_strips_session_before_upload_and_writes_back(self, tmp_path):
|
||||
"""Strippable entries (progress, thinking blocks) are removed before upload.
|
||||
|
||||
The stripped content is written back to disk (so same-pod turns benefit)
|
||||
and the smaller bytes are uploaded to GCS.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import _sanitize_id, upload_cli_session
|
||||
|
||||
projects_base = str(tmp_path)
|
||||
session_id = "12345678-0000-0000-0000-000000000010"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
session_dir = tmp_path / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||
|
||||
# A CLI session with a progress entry (strippable) and a real assistant message.
|
||||
import json
|
||||
|
||||
progress_entry = {
|
||||
"type": "progress",
|
||||
"uuid": "p1",
|
||||
"parentUuid": "u1",
|
||||
"data": {"type": "bash_progress", "stdout": "running..."},
|
||||
}
|
||||
user_entry = {
|
||||
"type": "user",
|
||||
"uuid": "u1",
|
||||
"message": {"role": "user", "content": "hello"},
|
||||
}
|
||||
asst_entry = {
|
||||
"type": "assistant",
|
||||
"uuid": "a1",
|
||||
"parentUuid": "u1",
|
||||
"message": {"role": "assistant", "content": "world"},
|
||||
}
|
||||
raw_content = (
|
||||
json.dumps(progress_entry)
|
||||
+ "\n"
|
||||
+ json.dumps(user_entry)
|
||||
+ "\n"
|
||||
+ json.dumps(asst_entry)
|
||||
+ "\n"
|
||||
)
|
||||
raw_bytes = raw_content.encode("utf-8")
|
||||
session_file.write_bytes(raw_bytes)
|
||||
|
||||
mock_storage = AsyncMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=projects_base,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.get_workspace_storage",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_storage,
|
||||
),
|
||||
):
|
||||
asyncio.run(
|
||||
upload_cli_session(
|
||||
user_id="user-1",
|
||||
session_id=session_id,
|
||||
sdk_cwd=sdk_cwd,
|
||||
)
|
||||
)
|
||||
|
||||
# Upload should have been called with stripped bytes (no progress entry).
|
||||
mock_storage.store.assert_called_once()
|
||||
stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
|
||||
stored_lines = stored_content.decode("utf-8").strip().split("\n")
|
||||
stored_types = [json.loads(line).get("type") for line in stored_lines]
|
||||
assert "progress" not in stored_types
|
||||
assert "user" in stored_types
|
||||
assert "assistant" in stored_types
|
||||
# Stripped bytes should be smaller than raw.
|
||||
assert len(stored_content) < len(raw_bytes)
|
||||
# File on disk should also be the stripped version.
|
||||
disk_content = session_file.read_bytes()
|
||||
assert disk_content == stored_content
|
||||
|
||||
def test_strips_stale_thinking_blocks_before_upload(self, tmp_path):
|
||||
"""Thinking blocks in non-last assistant turns are stripped to reduce size."""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import _sanitize_id, upload_cli_session
|
||||
|
||||
projects_base = str(tmp_path)
|
||||
session_id = "12345678-0000-0000-0000-000000000011"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
session_dir = tmp_path / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||
|
||||
# Two turns: first assistant has thinking block (stale), second doesn't.
|
||||
u1 = {
|
||||
"type": "user",
|
||||
"uuid": "u1",
|
||||
"message": {"role": "user", "content": "q1"},
|
||||
}
|
||||
a1_with_thinking = {
|
||||
"type": "assistant",
|
||||
"uuid": "a1",
|
||||
"parentUuid": "u1",
|
||||
"message": {
|
||||
"id": "msg_a1",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "A" * 5000},
|
||||
{"type": "text", "text": "answer1"},
|
||||
],
|
||||
},
|
||||
}
|
||||
u2 = {
|
||||
"type": "user",
|
||||
"uuid": "u2",
|
||||
"parentUuid": "a1",
|
||||
"message": {"role": "user", "content": "q2"},
|
||||
}
|
||||
a2_no_thinking = {
|
||||
"type": "assistant",
|
||||
"uuid": "a2",
|
||||
"parentUuid": "u2",
|
||||
"message": {
|
||||
"id": "msg_a2",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "answer2"}],
|
||||
},
|
||||
}
|
||||
raw_content = (
|
||||
json.dumps(u1)
|
||||
+ "\n"
|
||||
+ json.dumps(a1_with_thinking)
|
||||
+ "\n"
|
||||
+ json.dumps(u2)
|
||||
+ "\n"
|
||||
+ json.dumps(a2_no_thinking)
|
||||
+ "\n"
|
||||
)
|
||||
raw_bytes = raw_content.encode("utf-8")
|
||||
session_file.write_bytes(raw_bytes)
|
||||
|
||||
mock_storage = AsyncMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=projects_base,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.get_workspace_storage",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_storage,
|
||||
),
|
||||
):
|
||||
asyncio.run(
|
||||
upload_cli_session(
|
||||
user_id="user-1",
|
||||
session_id=session_id,
|
||||
sdk_cwd=sdk_cwd,
|
||||
)
|
||||
)
|
||||
|
||||
stored_content: bytes = mock_storage.store.call_args.kwargs["content"]
|
||||
stored_lines = stored_content.decode("utf-8").strip().split("\n")
|
||||
|
||||
# a1 should have its thinking block stripped (it's not the last assistant turn).
|
||||
a1_stored = json.loads(stored_lines[1])
|
||||
a1_content = a1_stored["message"]["content"]
|
||||
assert all(
|
||||
b["type"] != "thinking" for b in a1_content
|
||||
), "stale thinking block should be stripped from a1"
|
||||
assert any(
|
||||
b["type"] == "text" for b in a1_content
|
||||
), "text block should be kept in a1"
|
||||
|
||||
# a2 (last turn) should be unchanged.
|
||||
a2_stored = json.loads(stored_lines[3])
|
||||
assert a2_stored["message"]["content"] == [{"type": "text", "text": "answer2"}]
|
||||
|
||||
# Stripped bytes smaller than raw.
|
||||
assert len(stored_content) < len(raw_bytes)
|
||||
|
||||
|
||||
class TestRestoreCliSession:
|
||||
def test_returns_false_when_file_not_found_in_storage(self):
|
||||
@@ -981,8 +1177,14 @@ class TestRestoreCliSession:
|
||||
|
||||
assert result is False
|
||||
|
||||
def test_returns_true_when_local_file_already_exists(self, tmp_path):
|
||||
"""Same-pod reuse: if local file exists, skip storage download and return True."""
|
||||
def test_gcs_overwrites_stale_local_file(self, tmp_path):
|
||||
"""Cross-pod staleness fix: GCS content always overwrites any local file.
|
||||
|
||||
Previously a same-pod optimisation returned early when a local file existed,
|
||||
which caused Pod A to silently reuse a stale file from an earlier turn while
|
||||
the canonical (newer) session lived in GCS — hiding the most-recent turn from
|
||||
the model. The fix removes that early-return so GCS is always consulted.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
@@ -994,15 +1196,18 @@ class TestRestoreCliSession:
|
||||
session_id = "12345678-0000-0000-0000-000000000099"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
# Pre-create the local session file (simulates previous turn on same pod)
|
||||
# Pre-create a STALE local session file (simulates a previous turn on this pod)
|
||||
projects_base = os.path.realpath(str(tmp_path))
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", projects_base)
|
||||
session_dir = Path(projects_base) / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
existing_content = b'{"type":"user"}\n{"type":"assistant"}\n'
|
||||
(session_dir / f"{session_id}.jsonl").write_bytes(existing_content)
|
||||
stale_content = b'{"type":"user"}\n{"type":"assistant","stale":true}\n'
|
||||
(session_dir / f"{session_id}.jsonl").write_bytes(stale_content)
|
||||
|
||||
# GCS has the FRESH content from the turn that ran on a different pod
|
||||
fresh_gcs_content = b'{"type":"user"}\n{"type":"assistant","fresh":true}\n'
|
||||
mock_storage = AsyncMock()
|
||||
mock_storage.retrieve.return_value = fresh_gcs_content
|
||||
|
||||
with (
|
||||
patch(
|
||||
@@ -1024,10 +1229,12 @@ class TestRestoreCliSession:
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# Storage should NOT have been accessed (local file was used as-is)
|
||||
mock_storage.retrieve.assert_not_called()
|
||||
# Local file should be unchanged
|
||||
assert (session_dir / f"{session_id}.jsonl").read_bytes() == existing_content
|
||||
# GCS MUST have been consulted — the local file must not be trusted blindly
|
||||
mock_storage.retrieve.assert_called_once()
|
||||
# Local file must now contain the fresh GCS content, not the stale version
|
||||
written = (session_dir / f"{session_id}.jsonl").read_bytes()
|
||||
assert written == fresh_gcs_content
|
||||
assert written != stale_content
|
||||
|
||||
def test_returns_true_on_success(self, tmp_path):
|
||||
"""Happy path: storage has the session → file written → returns True."""
|
||||
@@ -1089,3 +1296,212 @@ class TestRestoreCliSession:
|
||||
)
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestMaybeCompactCliSession:
|
||||
def _make_session_file(self, tmp_path, session_id: str, sdk_cwd: str, content: str):
|
||||
import os
|
||||
import re
|
||||
|
||||
from .transcript import _sanitize_id
|
||||
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
session_dir = tmp_path / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
session_file = session_dir / f"{_sanitize_id(session_id)}.jsonl"
|
||||
session_file.write_text(content, encoding="utf-8")
|
||||
return session_file
|
||||
|
||||
def test_skips_small_file(self, tmp_path):
|
||||
"""Files below the threshold are not compacted."""
|
||||
import asyncio
|
||||
from unittest.mock import patch
|
||||
|
||||
from .transcript import maybe_compact_cli_session
|
||||
|
||||
session_id = "12345678-0000-0000-0000-000000000020"
|
||||
sdk_cwd = str(tmp_path)
|
||||
small_content = (
|
||||
'{"type":"user","uuid":"u1","message":{"role":"user","content":"hi"}}\n'
|
||||
)
|
||||
session_file = self._make_session_file(
|
||||
tmp_path, session_id, sdk_cwd, small_content
|
||||
)
|
||||
|
||||
with patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=str(tmp_path),
|
||||
):
|
||||
result = asyncio.run(
|
||||
maybe_compact_cli_session(
|
||||
sdk_cwd=sdk_cwd,
|
||||
session_id=session_id,
|
||||
model="claude-sonnet-4",
|
||||
)
|
||||
)
|
||||
|
||||
assert result is False
|
||||
# File should be untouched.
|
||||
assert session_file.read_text(encoding="utf-8") == small_content
|
||||
|
||||
def test_compacts_large_file_and_writes_back(self, tmp_path):
|
||||
"""Files above the threshold are compacted and the result written to disk."""
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import (
|
||||
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
|
||||
maybe_compact_cli_session,
|
||||
)
|
||||
|
||||
session_id = "12345678-0000-0000-0000-000000000021"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
# Build a valid JSONL session large enough to exceed the threshold.
|
||||
u1 = (
|
||||
'{"type":"user","uuid":"u1","parentUuid":"","message":{"role":"user","content":"'
|
||||
+ ("x" * 1000)
|
||||
+ '"}}'
|
||||
)
|
||||
a1 = (
|
||||
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"id":"msg_a1","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"'
|
||||
+ ("y" * 1000)
|
||||
+ '"}],"stop_reason":"end_turn","stop_sequence":null}}'
|
||||
)
|
||||
single_pair = u1 + "\n" + a1 + "\n"
|
||||
repeat = (_PROACTIVE_COMPACT_THRESHOLD_BYTES // len(single_pair.encode())) + 2
|
||||
large_content = single_pair * repeat
|
||||
|
||||
session_file = self._make_session_file(
|
||||
tmp_path, session_id, sdk_cwd, large_content
|
||||
)
|
||||
|
||||
compacted_content = (
|
||||
'{"type":"user","uuid":"c1","parentUuid":"","message":{"role":"user","content":"summary"}}\n'
|
||||
'{"type":"assistant","uuid":"c2","parentUuid":"c1","message":{"id":"msg_c2","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"compacted"}],"stop_reason":"end_turn","stop_sequence":null}}\n'
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=str(tmp_path),
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.compact_transcript",
|
||||
new_callable=AsyncMock,
|
||||
return_value=compacted_content,
|
||||
),
|
||||
):
|
||||
result = asyncio.run(
|
||||
maybe_compact_cli_session(
|
||||
sdk_cwd=sdk_cwd,
|
||||
session_id=session_id,
|
||||
model="claude-sonnet-4",
|
||||
)
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# File on disk should now contain the compacted content.
|
||||
assert session_file.read_text(encoding="utf-8") == compacted_content
|
||||
|
||||
def test_keeps_original_when_compaction_fails(self, tmp_path):
|
||||
"""If compact_transcript returns None, the original file is left intact."""
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import (
|
||||
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
|
||||
maybe_compact_cli_session,
|
||||
)
|
||||
|
||||
session_id = "12345678-0000-0000-0000-000000000022"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
single_pair = (
|
||||
'{"type":"user","uuid":"u1","parentUuid":"","message":{"role":"user","content":"'
|
||||
+ ("x" * 1000)
|
||||
+ '"}}\n'
|
||||
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"id":"msg_a1","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"'
|
||||
+ ("y" * 1000)
|
||||
+ '"}],"stop_reason":"end_turn","stop_sequence":null}}\n'
|
||||
)
|
||||
repeat = (_PROACTIVE_COMPACT_THRESHOLD_BYTES // len(single_pair.encode())) + 2
|
||||
large_content = single_pair * repeat
|
||||
|
||||
session_file = self._make_session_file(
|
||||
tmp_path, session_id, sdk_cwd, large_content
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=str(tmp_path),
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.compact_transcript",
|
||||
new_callable=AsyncMock,
|
||||
return_value=None,
|
||||
),
|
||||
):
|
||||
result = asyncio.run(
|
||||
maybe_compact_cli_session(
|
||||
sdk_cwd=sdk_cwd,
|
||||
session_id=session_id,
|
||||
model="claude-sonnet-4",
|
||||
)
|
||||
)
|
||||
|
||||
assert result is False
|
||||
# File should be unchanged.
|
||||
assert session_file.read_text(encoding="utf-8") == large_content
|
||||
|
||||
def test_keeps_original_when_compaction_returns_identical_content(self, tmp_path):
|
||||
"""If compact_transcript returns the same string as the input, the file is left intact."""
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from .transcript import (
|
||||
_PROACTIVE_COMPACT_THRESHOLD_BYTES,
|
||||
maybe_compact_cli_session,
|
||||
)
|
||||
|
||||
session_id = "12345678-0000-0000-0000-000000000023"
|
||||
sdk_cwd = str(tmp_path)
|
||||
|
||||
single_pair = (
|
||||
'{"type":"user","uuid":"u1","parentUuid":"","message":{"role":"user","content":"'
|
||||
+ ("x" * 1000)
|
||||
+ '"}}\n'
|
||||
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"id":"msg_a1","role":"assistant","model":"","type":"message","content":[{"type":"text","text":"'
|
||||
+ ("y" * 1000)
|
||||
+ '"}],"stop_reason":"end_turn","stop_sequence":null}}\n'
|
||||
)
|
||||
repeat = (_PROACTIVE_COMPACT_THRESHOLD_BYTES // len(single_pair.encode())) + 2
|
||||
large_content = single_pair * repeat
|
||||
|
||||
session_file = self._make_session_file(
|
||||
tmp_path, session_id, sdk_cwd, large_content
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.transcript._projects_base",
|
||||
return_value=str(tmp_path),
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.compact_transcript",
|
||||
new_callable=AsyncMock,
|
||||
return_value=large_content,
|
||||
),
|
||||
):
|
||||
result = asyncio.run(
|
||||
maybe_compact_cli_session(
|
||||
sdk_cwd=sdk_cwd,
|
||||
session_id=session_id,
|
||||
model="claude-sonnet-4",
|
||||
)
|
||||
)
|
||||
|
||||
assert result is False
|
||||
# File should be unchanged.
|
||||
assert session_file.read_text(encoding="utf-8") == large_content
|
||||
|
||||
Reference in New Issue
Block a user