fix(backend/copilot): apply session_msg_ceiling to no-resume compression fallback

The no-resume fallback in _build_query_message used raw msg_count (> 1) to
detect multi-message history and session.messages[:-1] for the compression
slice. After a turn-start drain appends pending messages, msg_count is inflated
and the fallback fires on what should be a fresh first turn, placing the current
user message into the history context and delivering a confusing split prompt to
the model.

Apply session_msg_ceiling to both branches:
- elif condition: effective_count > 1 instead of msg_count > 1
- compression slice: session.messages[:effective_count - 1] instead of [:-1]

With _pre_drain_msg_count=1 on a first turn with drained pending messages,
effective_count=1 so the fallback is correctly skipped and current_message
(which already contains both the original and pending text) is returned as-is.

Adds regression test covering the spurious-fallback scenario.
This commit is contained in:
majdyz
2026-04-11 08:45:54 +07:00
parent 1d05b06e43
commit 6b390d6677
2 changed files with 38 additions and 3 deletions

View File

@@ -298,6 +298,39 @@ async def test_build_query_session_msg_ceiling_preserves_real_gap():
assert "pending2" not in result
@pytest.mark.asyncio
async def test_build_query_session_msg_ceiling_suppresses_spurious_no_resume_fallback():
"""session_msg_ceiling prevents the no-resume compression fallback from
firing on the first turn of a session when pending messages inflate msg_count.
Scenario: fresh session (1 message) + 1 pending message drained at turn start.
Without the ceiling: msg_count=2 > 1 → fallback triggers → pending message
leaked into history → wrong context sent to model.
With session_msg_ceiling=1 (pre-drain count): effective_count=1, 1 > 1 is False
→ fallback does not trigger → current_message returned as-is.
"""
# session.messages after drain: [current_msg, pending_msg]
session = _make_session(
[
ChatMessage(role="user", content="What is 2 plus 2?"),
ChatMessage(role="user", content="What is 7 plus 7?"), # pending
]
)
result, was_compacted = await _build_query_message(
"What is 2 plus 2?\n\nWhat is 7 plus 7?",
session,
use_resume=False,
transcript_msg_count=0,
session_id="test-session",
session_msg_ceiling=1, # pre-drain: only 1 message existed
)
# Should return current_message directly without wrapping in history context
assert result == "What is 2 plus 2?\n\nWhat is 7 plus 7?"
assert was_compacted is False
# Pending question must NOT appear in a spurious history section
assert "<conversation_history>" not in result
@pytest.mark.asyncio
async def test_build_query_no_resume_multi_message_compacted(monkeypatch):
"""When compression actually compacts, was_compacted should be True."""

View File

@@ -1001,12 +1001,14 @@ async def _build_query_message(
f"{gap_context}\n\nNow, the user says:\n{current_message}",
was_compressed,
)
elif not use_resume and msg_count > 1:
elif not use_resume and effective_count > 1:
logger.warning(
f"[SDK] Using compression fallback for session "
f"{session_id} ({msg_count} messages) — no transcript for --resume"
f"{session_id} ({effective_count} messages) — no transcript for --resume"
)
compressed, was_compressed = await _compress_messages(
session.messages[: effective_count - 1]
)
compressed, was_compressed = await _compress_messages(session.messages[:-1])
history_context = _format_conversation_context(compressed)
if history_context:
return (