mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(backend/copilot): apply session_msg_ceiling to no-resume compression fallback
The no-resume fallback in _build_query_message used raw msg_count (> 1) to detect multi-message history and session.messages[:-1] for the compression slice. After a turn-start drain appends pending messages, msg_count is inflated and the fallback fires on what should be a fresh first turn, placing the current user message into the history context and delivering a confusing split prompt to the model. Apply session_msg_ceiling to both branches: - elif condition: effective_count > 1 instead of msg_count > 1 - compression slice: session.messages[:effective_count - 1] instead of [:-1] With _pre_drain_msg_count=1 on a first turn with drained pending messages, effective_count=1 so the fallback is correctly skipped and current_message (which already contains both the original and pending text) is returned as-is. Adds regression test covering the spurious-fallback scenario.
This commit is contained in:
@@ -298,6 +298,39 @@ async def test_build_query_session_msg_ceiling_preserves_real_gap():
|
||||
assert "pending2" not in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_build_query_session_msg_ceiling_suppresses_spurious_no_resume_fallback():
|
||||
"""session_msg_ceiling prevents the no-resume compression fallback from
|
||||
firing on the first turn of a session when pending messages inflate msg_count.
|
||||
|
||||
Scenario: fresh session (1 message) + 1 pending message drained at turn start.
|
||||
Without the ceiling: msg_count=2 > 1 → fallback triggers → pending message
|
||||
leaked into history → wrong context sent to model.
|
||||
With session_msg_ceiling=1 (pre-drain count): effective_count=1, 1 > 1 is False
|
||||
→ fallback does not trigger → current_message returned as-is.
|
||||
"""
|
||||
# session.messages after drain: [current_msg, pending_msg]
|
||||
session = _make_session(
|
||||
[
|
||||
ChatMessage(role="user", content="What is 2 plus 2?"),
|
||||
ChatMessage(role="user", content="What is 7 plus 7?"), # pending
|
||||
]
|
||||
)
|
||||
result, was_compacted = await _build_query_message(
|
||||
"What is 2 plus 2?\n\nWhat is 7 plus 7?",
|
||||
session,
|
||||
use_resume=False,
|
||||
transcript_msg_count=0,
|
||||
session_id="test-session",
|
||||
session_msg_ceiling=1, # pre-drain: only 1 message existed
|
||||
)
|
||||
# Should return current_message directly without wrapping in history context
|
||||
assert result == "What is 2 plus 2?\n\nWhat is 7 plus 7?"
|
||||
assert was_compacted is False
|
||||
# Pending question must NOT appear in a spurious history section
|
||||
assert "<conversation_history>" not in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_build_query_no_resume_multi_message_compacted(monkeypatch):
|
||||
"""When compression actually compacts, was_compacted should be True."""
|
||||
|
||||
@@ -1001,12 +1001,14 @@ async def _build_query_message(
|
||||
f"{gap_context}\n\nNow, the user says:\n{current_message}",
|
||||
was_compressed,
|
||||
)
|
||||
elif not use_resume and msg_count > 1:
|
||||
elif not use_resume and effective_count > 1:
|
||||
logger.warning(
|
||||
f"[SDK] Using compression fallback for session "
|
||||
f"{session_id} ({msg_count} messages) — no transcript for --resume"
|
||||
f"{session_id} ({effective_count} messages) — no transcript for --resume"
|
||||
)
|
||||
compressed, was_compressed = await _compress_messages(
|
||||
session.messages[: effective_count - 1]
|
||||
)
|
||||
compressed, was_compressed = await _compress_messages(session.messages[:-1])
|
||||
history_context = _format_conversation_context(compressed)
|
||||
if history_context:
|
||||
return (
|
||||
|
||||
Reference in New Issue
Block a user