Resolve merge conflicts in copilot baseline service files

Keep HEAD's pre-drain count logic for transcript loading and drain error handling, and merge incoming cache token extraction tests from PR #12762.
2026-04-30 03:00:41 -04:00 · 2026-04-13 10:49:02 +00:00
parent ea0b5f70ad
commit f121dcd5c8
2 changed files with 201 additions and 219 deletions
--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -1113,7 +1113,6 @@ async def stream_chat_completion_baseline(
        prompt_task = _build_cacheable_system_prompt(None)

    # Run download + prompt build concurrently — both are independent I/O
-<<<<<<< HEAD
    # on the request critical path.  Use the pre-drain count so pending
    # messages drained at turn start don't spuriously trigger a transcript
    # load on an actual first turn.
@@ -1130,21 +1129,6 @@ async def stream_chat_completion_baseline(
                ),
                prompt_task,
            )
-=======
-    # on the request critical path.
-    if user_id and len(session.messages) > 1:
-        (
-            transcript_covers_prefix,
-            (base_system_prompt, understanding),
-        ) = await asyncio.gather(
-            _load_prior_transcript(
-                user_id=user_id,
-                session_id=session_id,
-                session_msg_count=len(session.messages),
-                transcript_builder=transcript_builder,
-            ),
-            prompt_task,
->>>>>>> c6af52033dc97f673af7a968564d14fbb2949707
        )
    else:
        base_system_prompt, understanding = await prompt_task
--- a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py
@@ -847,208 +847,6 @@ class TestBaselineCostExtraction:
        # response was never assigned so cost extraction must not raise
        assert state.cost_usd is None

-<<<<<<< HEAD
-
-class TestMidLoopPendingFlushOrdering:
-    """Regression test for the mid-loop pending drain ordering invariant.
-
-    ``_baseline_conversation_updater`` records assistant+tool entries from
-    each tool-call round into ``state.session_messages``; the finally block
-    of ``stream_chat_completion_baseline`` batch-flushes them into
-    ``session.messages`` at the end of the turn.
-
-    The mid-loop pending drain appends pending user messages directly to
-    ``session.messages``.  Without flushing ``state.session_messages`` first,
-    the pending user message lands BEFORE the preceding round's assistant+
-    tool entries in the final persisted ``session.messages`` — which
-    produces a malformed tool-call/tool-result ordering on the next turn's
-    replay.
-
-    This test documents the invariant by replaying the production flush
-    sequence against an in-memory state.
-    """
-
-    def test_flush_then_append_preserves_chronological_order(self):
-        """Mid-loop drain must flush state.session_messages before appending
-        the pending user message, so the final order matches the
-        chronological execution order.
-        """
-        # Initial state: user turn already appended by maybe_append_user_message
-        session_messages: list[ChatMessage] = [
-            ChatMessage(role="user", content="original user turn"),
-        ]
-        state = _BaselineStreamState()
-
-        # Round 1 completes: conversation_updater buffers assistant+tool
-        # entries into state.session_messages (but does NOT write to
-        # session.messages yet).
-        builder = TranscriptBuilder()
-        builder.append_user("original user turn")
-        response = LLMLoopResponse(
-            response_text="calling search",
-            tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
-            raw_response=None,
-            prompt_tokens=0,
-            completion_tokens=0,
-        )
-        tool_results = [
-            ToolCallResult(
-                tool_call_id="tc_1", tool_name="search", content="search output"
-            ),
-        ]
-        openai_messages: list = []
-        _baseline_conversation_updater(
-            openai_messages,
-            response,
-            tool_results=tool_results,
-            transcript_builder=builder,
-            state=state,
-            model="test-model",
-        )
-        # state.session_messages should now hold the round-1 assistant + tool
-        assert len(state.session_messages) == 2
-        assert state.session_messages[0].role == "assistant"
-        assert state.session_messages[1].role == "tool"
-
-        # --- Mid-loop pending drain (production code pattern) ---
-        # Flush first, THEN append pending.  This is the ordering fix.
-        for _buffered in state.session_messages:
-            session_messages.append(_buffered)
-        state.session_messages.clear()
-        session_messages.append(
-            ChatMessage(role="user", content="pending mid-loop message")
-        )
-
-        # Round 2 completes: new assistant+tool entries buffer again.
-        response2 = LLMLoopResponse(
-            response_text="another call",
-            tool_calls=[LLMToolCall(id="tc_2", name="calc", arguments="{}")],
-            raw_response=None,
-            prompt_tokens=0,
-            completion_tokens=0,
-        )
-        tool_results2 = [
-            ToolCallResult(
-                tool_call_id="tc_2", tool_name="calc", content="calc output"
-            ),
-        ]
-        _baseline_conversation_updater(
-            openai_messages,
-            response2,
-            tool_results=tool_results2,
-            transcript_builder=builder,
-            state=state,
-            model="test-model",
-        )
-
-        # --- Finally-block flush (end of turn) ---
-        for msg in state.session_messages:
-            session_messages.append(msg)
-
-        # Assert chronological order: original user, round-1 assistant,
-        # round-1 tool, pending user, round-2 assistant, round-2 tool.
-        assert [m.role for m in session_messages] == [
-            "user",
-            "assistant",
-            "tool",
-            "user",
-            "assistant",
-            "tool",
-        ]
-        assert session_messages[0].content == "original user turn"
-        assert session_messages[3].content == "pending mid-loop message"
-        # The assistant message carrying tool_call tc_1 must be immediately
-        # followed by its tool result — no user message interposed.
-        assert session_messages[1].role == "assistant"
-        assert session_messages[1].tool_calls is not None
-        assert session_messages[1].tool_calls[0]["id"] == "tc_1"
-        assert session_messages[2].role == "tool"
-        assert session_messages[2].tool_call_id == "tc_1"
-        # Same invariant for the round after the pending user.
-        assert session_messages[4].tool_calls is not None
-        assert session_messages[4].tool_calls[0]["id"] == "tc_2"
-        assert session_messages[5].tool_call_id == "tc_2"
-
-    def test_flushed_assistant_text_len_prevents_duplicate_final_text(self):
-        """After mid-loop drain clears state.session_messages, the finally
-        block must not re-append assistant text from rounds already flushed.
-
-        ``state.assistant_text`` accumulates ALL rounds' text, but
-        ``state.session_messages`` only holds entries from rounds AFTER the
-        last mid-loop flush.  Without ``_flushed_assistant_text_len``, the
-        ``finally`` block's ``startswith(recorded)`` check fails because
-        ``recorded`` only covers post-flush rounds, and the full
-        ``assistant_text`` is appended — duplicating pre-flush rounds.
-        """
-        state = _BaselineStreamState()
-        session_messages: list[ChatMessage] = [
-            ChatMessage(role="user", content="user turn"),
-        ]
-
-        # Simulate round 1 text accumulation (as _bound_llm_caller does)
-        state.assistant_text += "calling search"
-
-        # Round 1 conversation_updater buffers structured entries
-        builder = TranscriptBuilder()
-        builder.append_user("user turn")
-        response1 = LLMLoopResponse(
-            response_text="calling search",
-            tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
-            raw_response=None,
-            prompt_tokens=0,
-            completion_tokens=0,
-        )
-        _baseline_conversation_updater(
-            [],
-            response1,
-            tool_results=[
-                ToolCallResult(
-                    tool_call_id="tc_1", tool_name="search", content="result"
-                )
-            ],
-            transcript_builder=builder,
-            state=state,
-            model="test-model",
-        )
-
-        # Mid-loop drain: flush + clear + record flushed text length
-        for _buffered in state.session_messages:
-            session_messages.append(_buffered)
-        state.session_messages.clear()
-        state._flushed_assistant_text_len = len(state.assistant_text)
-        session_messages.append(ChatMessage(role="user", content="pending message"))
-
-        # Simulate round 2 text accumulation
-        state.assistant_text += "final answer"
-
-        # Round 2: natural finish (no tool calls → no session_messages entry)
-
-        # --- Finally block logic (production code) ---
-        for msg in state.session_messages:
-            session_messages.append(msg)
-
-        final_text = state.assistant_text[state._flushed_assistant_text_len :]
-        if state.session_messages:
-            recorded = "".join(
-                m.content or "" for m in state.session_messages if m.role == "assistant"
-            )
-            if final_text.startswith(recorded):
-                final_text = final_text[len(recorded) :]
-        if final_text.strip():
-            session_messages.append(ChatMessage(role="assistant", content=final_text))
-
-        # The final assistant message should only contain round-2 text,
-        # not the round-1 text that was already flushed mid-loop.
-        assistant_msgs = [m for m in session_messages if m.role == "assistant"]
-        # Round-1 structured assistant (from mid-loop flush)
-        assert assistant_msgs[0].content == "calling search"
-        assert assistant_msgs[0].tool_calls is not None
-        # Round-2 final text (from finally block)
-        assert assistant_msgs[1].content == "final answer"
-        assert assistant_msgs[1].tool_calls is None
-        # Crucially: only 2 assistant messages, not 3 (no duplicate)
-        assert len(assistant_msgs) == 2
-=======
    @pytest.mark.asyncio
    async def test_cost_estimated_from_tokens_when_header_missing(self):
        """cost_usd is estimated from token counts when x-total-cost is absent."""
@@ -1252,4 +1050,204 @@ class TestMidLoopPendingFlushOrdering:
        # Accumulators hold all tokens across both turns
        assert state.turn_prompt_tokens == 2100
        assert state.turn_completion_tokens == 500
->>>>>>> c6af52033dc97f673af7a968564d14fbb2949707
+
+
+class TestMidLoopPendingFlushOrdering:
+    """Regression test for the mid-loop pending drain ordering invariant.
+
+    ``_baseline_conversation_updater`` records assistant+tool entries from
+    each tool-call round into ``state.session_messages``; the finally block
+    of ``stream_chat_completion_baseline`` batch-flushes them into
+    ``session.messages`` at the end of the turn.
+
+    The mid-loop pending drain appends pending user messages directly to
+    ``session.messages``.  Without flushing ``state.session_messages`` first,
+    the pending user message lands BEFORE the preceding round's assistant+
+    tool entries in the final persisted ``session.messages`` — which
+    produces a malformed tool-call/tool-result ordering on the next turn's
+    replay.
+
+    This test documents the invariant by replaying the production flush
+    sequence against an in-memory state.
+    """
+
+    def test_flush_then_append_preserves_chronological_order(self):
+        """Mid-loop drain must flush state.session_messages before appending
+        the pending user message, so the final order matches the
+        chronological execution order.
+        """
+        # Initial state: user turn already appended by maybe_append_user_message
+        session_messages: list[ChatMessage] = [
+            ChatMessage(role="user", content="original user turn"),
+        ]
+        state = _BaselineStreamState()
+
+        # Round 1 completes: conversation_updater buffers assistant+tool
+        # entries into state.session_messages (but does NOT write to
+        # session.messages yet).
+        builder = TranscriptBuilder()
+        builder.append_user("original user turn")
+        response = LLMLoopResponse(
+            response_text="calling search",
+            tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
+            raw_response=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        tool_results = [
+            ToolCallResult(
+                tool_call_id="tc_1", tool_name="search", content="search output"
+            ),
+        ]
+        openai_messages: list = []
+        _baseline_conversation_updater(
+            openai_messages,
+            response,
+            tool_results=tool_results,
+            transcript_builder=builder,
+            state=state,
+            model="test-model",
+        )
+        # state.session_messages should now hold the round-1 assistant + tool
+        assert len(state.session_messages) == 2
+        assert state.session_messages[0].role == "assistant"
+        assert state.session_messages[1].role == "tool"
+
+        # --- Mid-loop pending drain (production code pattern) ---
+        # Flush first, THEN append pending.  This is the ordering fix.
+        for _buffered in state.session_messages:
+            session_messages.append(_buffered)
+        state.session_messages.clear()
+        session_messages.append(
+            ChatMessage(role="user", content="pending mid-loop message")
+        )
+
+        # Round 2 completes: new assistant+tool entries buffer again.
+        response2 = LLMLoopResponse(
+            response_text="another call",
+            tool_calls=[LLMToolCall(id="tc_2", name="calc", arguments="{}")],
+            raw_response=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        tool_results2 = [
+            ToolCallResult(
+                tool_call_id="tc_2", tool_name="calc", content="calc output"
+            ),
+        ]
+        _baseline_conversation_updater(
+            openai_messages,
+            response2,
+            tool_results=tool_results2,
+            transcript_builder=builder,
+            state=state,
+            model="test-model",
+        )
+
+        # --- Finally-block flush (end of turn) ---
+        for msg in state.session_messages:
+            session_messages.append(msg)
+
+        # Assert chronological order: original user, round-1 assistant,
+        # round-1 tool, pending user, round-2 assistant, round-2 tool.
+        assert [m.role for m in session_messages] == [
+            "user",
+            "assistant",
+            "tool",
+            "user",
+            "assistant",
+            "tool",
+        ]
+        assert session_messages[0].content == "original user turn"
+        assert session_messages[3].content == "pending mid-loop message"
+        # The assistant message carrying tool_call tc_1 must be immediately
+        # followed by its tool result — no user message interposed.
+        assert session_messages[1].role == "assistant"
+        assert session_messages[1].tool_calls is not None
+        assert session_messages[1].tool_calls[0]["id"] == "tc_1"
+        assert session_messages[2].role == "tool"
+        assert session_messages[2].tool_call_id == "tc_1"
+        # Same invariant for the round after the pending user.
+        assert session_messages[4].tool_calls is not None
+        assert session_messages[4].tool_calls[0]["id"] == "tc_2"
+        assert session_messages[5].tool_call_id == "tc_2"
+
+    def test_flushed_assistant_text_len_prevents_duplicate_final_text(self):
+        """After mid-loop drain clears state.session_messages, the finally
+        block must not re-append assistant text from rounds already flushed.
+
+        ``state.assistant_text`` accumulates ALL rounds' text, but
+        ``state.session_messages`` only holds entries from rounds AFTER the
+        last mid-loop flush.  Without ``_flushed_assistant_text_len``, the
+        ``finally`` block's ``startswith(recorded)`` check fails because
+        ``recorded`` only covers post-flush rounds, and the full
+        ``assistant_text`` is appended — duplicating pre-flush rounds.
+        """
+        state = _BaselineStreamState()
+        session_messages: list[ChatMessage] = [
+            ChatMessage(role="user", content="user turn"),
+        ]
+
+        # Simulate round 1 text accumulation (as _bound_llm_caller does)
+        state.assistant_text += "calling search"
+
+        # Round 1 conversation_updater buffers structured entries
+        builder = TranscriptBuilder()
+        builder.append_user("user turn")
+        response1 = LLMLoopResponse(
+            response_text="calling search",
+            tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
+            raw_response=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        _baseline_conversation_updater(
+            [],
+            response1,
+            tool_results=[
+                ToolCallResult(
+                    tool_call_id="tc_1", tool_name="search", content="result"
+                )
+            ],
+            transcript_builder=builder,
+            state=state,
+            model="test-model",
+        )
+
+        # Mid-loop drain: flush + clear + record flushed text length
+        for _buffered in state.session_messages:
+            session_messages.append(_buffered)
+        state.session_messages.clear()
+        state._flushed_assistant_text_len = len(state.assistant_text)
+        session_messages.append(ChatMessage(role="user", content="pending message"))
+
+        # Simulate round 2 text accumulation
+        state.assistant_text += "final answer"
+
+        # Round 2: natural finish (no tool calls → no session_messages entry)
+
+        # --- Finally block logic (production code) ---
+        for msg in state.session_messages:
+            session_messages.append(msg)
+
+        final_text = state.assistant_text[state._flushed_assistant_text_len :]
+        if state.session_messages:
+            recorded = "".join(
+                m.content or "" for m in state.session_messages if m.role == "assistant"
+            )
+            if final_text.startswith(recorded):
+                final_text = final_text[len(recorded) :]
+        if final_text.strip():
+            session_messages.append(ChatMessage(role="assistant", content=final_text))
+
+        # The final assistant message should only contain round-2 text,
+        # not the round-1 text that was already flushed mid-loop.
+        assistant_msgs = [m for m in session_messages if m.role == "assistant"]
+        # Round-1 structured assistant (from mid-loop flush)
+        assert assistant_msgs[0].content == "calling search"
+        assert assistant_msgs[0].tool_calls is not None
+        # Round-2 final text (from finally block)
+        assert assistant_msgs[1].content == "final answer"
+        assert assistant_msgs[1].tool_calls is None
+        # Crucially: only 2 assistant messages, not 3 (no duplicate)
+        assert len(assistant_msgs) == 2