fix(backend/copilot): apply session_msg_ceiling to no-resume compression fallback

The no-resume fallback in _build_query_message used raw msg_count (> 1) to detect multi-message history and session.messages[:-1] for the compression slice. After a turn-start drain appends pending messages, msg_count is inflated and the fallback fires on what should be a fresh first turn, placing the current user message into the history context and delivering a confusing split prompt to the model. Apply session_msg_ceiling to both branches: - elif condition: effective_count > 1 instead of msg_count > 1 - compression slice: session.messages[:effective_count - 1] instead of [:-1] With _pre_drain_msg_count=1 on a first turn with drained pending messages, effective_count=1 so the fallback is correctly skipped and current_message (which already contains both the original and pending text) is returned as-is. Adds regression test covering the spurious-fallback scenario.
2026-04-30 03:00:41 -04:00 · 2026-04-11 08:45:54 +07:00
parent 1d05b06e43
commit 6b390d6677
2 changed files with 38 additions and 3 deletions
--- a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
@@ -298,6 +298,39 @@ async def test_build_query_session_msg_ceiling_preserves_real_gap():
    assert "pending2" not in result


+@pytest.mark.asyncio
+async def test_build_query_session_msg_ceiling_suppresses_spurious_no_resume_fallback():
+    """session_msg_ceiling prevents the no-resume compression fallback from
+    firing on the first turn of a session when pending messages inflate msg_count.
+
+    Scenario: fresh session (1 message) + 1 pending message drained at turn start.
+    Without the ceiling: msg_count=2 > 1 → fallback triggers → pending message
+    leaked into history → wrong context sent to model.
+    With session_msg_ceiling=1 (pre-drain count): effective_count=1, 1 > 1 is False
+    → fallback does not trigger → current_message returned as-is.
+    """
+    # session.messages after drain: [current_msg, pending_msg]
+    session = _make_session(
+        [
+            ChatMessage(role="user", content="What is 2 plus 2?"),
+            ChatMessage(role="user", content="What is 7 plus 7?"),  # pending
+        ]
+    )
+    result, was_compacted = await _build_query_message(
+        "What is 2 plus 2?\n\nWhat is 7 plus 7?",
+        session,
+        use_resume=False,
+        transcript_msg_count=0,
+        session_id="test-session",
+        session_msg_ceiling=1,  # pre-drain: only 1 message existed
+    )
+    # Should return current_message directly without wrapping in history context
+    assert result == "What is 2 plus 2?\n\nWhat is 7 plus 7?"
+    assert was_compacted is False
+    # Pending question must NOT appear in a spurious history section
+    assert "<conversation_history>" not in result
+
+
@pytest.mark.asyncio
 async def test_build_query_no_resume_multi_message_compacted(monkeypatch):
    """When compression actually compacts, was_compacted should be True."""
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1001,12 +1001,14 @@ async def _build_query_message(
                    f"{gap_context}\n\nNow, the user says:\n{current_message}",
                    was_compressed,
                )
-    elif not use_resume and msg_count > 1:
+    elif not use_resume and effective_count > 1:
        logger.warning(
            f"[SDK] Using compression fallback for session "
-            f"{session_id} ({msg_count} messages) — no transcript for --resume"
+            f"{session_id} ({effective_count} messages) — no transcript for --resume"
+        )
+        compressed, was_compressed = await _compress_messages(
+            session.messages[: effective_count - 1]
        )
-        compressed, was_compressed = await _compress_messages(session.messages[:-1])
        history_context = _format_conversation_context(compressed)
        if history_context:
            return (