From 6b390d667726d206138a125cd07bd2fd5a0d33f2 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sat, 11 Apr 2026 08:45:54 +0700
Subject: [PATCH] fix(backend/copilot): apply session_msg_ceiling to no-resume
 compression fallback

The no-resume fallback in _build_query_message used raw msg_count (> 1) to
detect multi-message history and session.messages[:-1] for the compression
slice. After a turn-start drain appends pending messages, msg_count is inflated
and the fallback fires on what should be a fresh first turn, placing the current
user message into the history context and delivering a confusing split prompt to
the model.

Apply session_msg_ceiling to both branches:
- elif condition: effective_count > 1 instead of msg_count > 1
- compression slice: session.messages[:effective_count - 1] instead of [:-1]

With _pre_drain_msg_count=1 on a first turn with drained pending messages,
effective_count=1 so the fallback is correctly skipped and current_message
(which already contains both the original and pending text) is returned as-is.

Adds regression test covering the spurious-fallback scenario.
---
 .../backend/copilot/sdk/query_builder_test.py | 33 +++++++++++++++++++
 .../backend/backend/copilot/sdk/service.py    |  8 +++--
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
index 4042dae590..4a7bf01823 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
@@ -298,6 +298,39 @@ async def test_build_query_session_msg_ceiling_preserves_real_gap():
     assert "pending2" not in result
 
 
+@pytest.mark.asyncio
+async def test_build_query_session_msg_ceiling_suppresses_spurious_no_resume_fallback():
+    """session_msg_ceiling prevents the no-resume compression fallback from
+    firing on the first turn of a session when pending messages inflate msg_count.
+
+    Scenario: fresh session (1 message) + 1 pending message drained at turn start.
+    Without the ceiling: msg_count=2 > 1 → fallback triggers → pending message
+    leaked into history → wrong context sent to model.
+    With session_msg_ceiling=1 (pre-drain count): effective_count=1, 1 > 1 is False
+    → fallback does not trigger → current_message returned as-is.
+    """
+    # session.messages after drain: [current_msg, pending_msg]
+    session = _make_session(
+        [
+            ChatMessage(role="user", content="What is 2 plus 2?"),
+            ChatMessage(role="user", content="What is 7 plus 7?"),  # pending
+        ]
+    )
+    result, was_compacted = await _build_query_message(
+        "What is 2 plus 2?\n\nWhat is 7 plus 7?",
+        session,
+        use_resume=False,
+        transcript_msg_count=0,
+        session_id="test-session",
+        session_msg_ceiling=1,  # pre-drain: only 1 message existed
+    )
+    # Should return current_message directly without wrapping in history context
+    assert result == "What is 2 plus 2?\n\nWhat is 7 plus 7?"
+    assert was_compacted is False
+    # Pending question must NOT appear in a spurious history section
+    assert "<conversation_history>" not in result
+
+
 @pytest.mark.asyncio
 async def test_build_query_no_resume_multi_message_compacted(monkeypatch):
     """When compression actually compacts, was_compacted should be True."""
diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py
index 4d53611021..88c41f4c51 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1001,12 +1001,14 @@ async def _build_query_message(
                     f"{gap_context}\n\nNow, the user says:\n{current_message}",
                     was_compressed,
                 )
-    elif not use_resume and msg_count > 1:
+    elif not use_resume and effective_count > 1:
         logger.warning(
             f"[SDK] Using compression fallback for session "
-            f"{session_id} ({msg_count} messages) — no transcript for --resume"
+            f"{session_id} ({effective_count} messages) — no transcript for --resume"
+        )
+        compressed, was_compressed = await _compress_messages(
+            session.messages[: effective_count - 1]
         )
-        compressed, was_compressed = await _compress_messages(session.messages[:-1])
         history_context = _format_conversation_context(compressed)
         if history_context:
             return (