Resolve merge conflicts in copilot baseline service files

Keep HEAD's pre-drain count logic for transcript loading and drain error
handling, and merge incoming cache token extraction tests from PR #12762.
This commit is contained in:
majdyz
2026-04-13 10:49:02 +00:00
parent ea0b5f70ad
commit f121dcd5c8
2 changed files with 201 additions and 219 deletions

View File

@@ -1113,7 +1113,6 @@ async def stream_chat_completion_baseline(
prompt_task = _build_cacheable_system_prompt(None)
# Run download + prompt build concurrently — both are independent I/O
<<<<<<< HEAD
# on the request critical path. Use the pre-drain count so pending
# messages drained at turn start don't spuriously trigger a transcript
# load on an actual first turn.
@@ -1130,21 +1129,6 @@ async def stream_chat_completion_baseline(
),
prompt_task,
)
=======
# on the request critical path.
if user_id and len(session.messages) > 1:
(
transcript_covers_prefix,
(base_system_prompt, understanding),
) = await asyncio.gather(
_load_prior_transcript(
user_id=user_id,
session_id=session_id,
session_msg_count=len(session.messages),
transcript_builder=transcript_builder,
),
prompt_task,
>>>>>>> c6af52033dc97f673af7a968564d14fbb2949707
)
else:
base_system_prompt, understanding = await prompt_task

View File

@@ -847,208 +847,6 @@ class TestBaselineCostExtraction:
# response was never assigned so cost extraction must not raise
assert state.cost_usd is None
<<<<<<< HEAD
class TestMidLoopPendingFlushOrdering:
"""Regression test for the mid-loop pending drain ordering invariant.
``_baseline_conversation_updater`` records assistant+tool entries from
each tool-call round into ``state.session_messages``; the finally block
of ``stream_chat_completion_baseline`` batch-flushes them into
``session.messages`` at the end of the turn.
The mid-loop pending drain appends pending user messages directly to
``session.messages``. Without flushing ``state.session_messages`` first,
the pending user message lands BEFORE the preceding round's assistant+
tool entries in the final persisted ``session.messages`` — which
produces a malformed tool-call/tool-result ordering on the next turn's
replay.
This test documents the invariant by replaying the production flush
sequence against an in-memory state.
"""
def test_flush_then_append_preserves_chronological_order(self):
"""Mid-loop drain must flush state.session_messages before appending
the pending user message, so the final order matches the
chronological execution order.
"""
# Initial state: user turn already appended by maybe_append_user_message
session_messages: list[ChatMessage] = [
ChatMessage(role="user", content="original user turn"),
]
state = _BaselineStreamState()
# Round 1 completes: conversation_updater buffers assistant+tool
# entries into state.session_messages (but does NOT write to
# session.messages yet).
builder = TranscriptBuilder()
builder.append_user("original user turn")
response = LLMLoopResponse(
response_text="calling search",
tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
raw_response=None,
prompt_tokens=0,
completion_tokens=0,
)
tool_results = [
ToolCallResult(
tool_call_id="tc_1", tool_name="search", content="search output"
),
]
openai_messages: list = []
_baseline_conversation_updater(
openai_messages,
response,
tool_results=tool_results,
transcript_builder=builder,
state=state,
model="test-model",
)
# state.session_messages should now hold the round-1 assistant + tool
assert len(state.session_messages) == 2
assert state.session_messages[0].role == "assistant"
assert state.session_messages[1].role == "tool"
# --- Mid-loop pending drain (production code pattern) ---
# Flush first, THEN append pending. This is the ordering fix.
for _buffered in state.session_messages:
session_messages.append(_buffered)
state.session_messages.clear()
session_messages.append(
ChatMessage(role="user", content="pending mid-loop message")
)
# Round 2 completes: new assistant+tool entries buffer again.
response2 = LLMLoopResponse(
response_text="another call",
tool_calls=[LLMToolCall(id="tc_2", name="calc", arguments="{}")],
raw_response=None,
prompt_tokens=0,
completion_tokens=0,
)
tool_results2 = [
ToolCallResult(
tool_call_id="tc_2", tool_name="calc", content="calc output"
),
]
_baseline_conversation_updater(
openai_messages,
response2,
tool_results=tool_results2,
transcript_builder=builder,
state=state,
model="test-model",
)
# --- Finally-block flush (end of turn) ---
for msg in state.session_messages:
session_messages.append(msg)
# Assert chronological order: original user, round-1 assistant,
# round-1 tool, pending user, round-2 assistant, round-2 tool.
assert [m.role for m in session_messages] == [
"user",
"assistant",
"tool",
"user",
"assistant",
"tool",
]
assert session_messages[0].content == "original user turn"
assert session_messages[3].content == "pending mid-loop message"
# The assistant message carrying tool_call tc_1 must be immediately
# followed by its tool result — no user message interposed.
assert session_messages[1].role == "assistant"
assert session_messages[1].tool_calls is not None
assert session_messages[1].tool_calls[0]["id"] == "tc_1"
assert session_messages[2].role == "tool"
assert session_messages[2].tool_call_id == "tc_1"
# Same invariant for the round after the pending user.
assert session_messages[4].tool_calls is not None
assert session_messages[4].tool_calls[0]["id"] == "tc_2"
assert session_messages[5].tool_call_id == "tc_2"
def test_flushed_assistant_text_len_prevents_duplicate_final_text(self):
"""After mid-loop drain clears state.session_messages, the finally
block must not re-append assistant text from rounds already flushed.
``state.assistant_text`` accumulates ALL rounds' text, but
``state.session_messages`` only holds entries from rounds AFTER the
last mid-loop flush. Without ``_flushed_assistant_text_len``, the
``finally`` block's ``startswith(recorded)`` check fails because
``recorded`` only covers post-flush rounds, and the full
``assistant_text`` is appended — duplicating pre-flush rounds.
"""
state = _BaselineStreamState()
session_messages: list[ChatMessage] = [
ChatMessage(role="user", content="user turn"),
]
# Simulate round 1 text accumulation (as _bound_llm_caller does)
state.assistant_text += "calling search"
# Round 1 conversation_updater buffers structured entries
builder = TranscriptBuilder()
builder.append_user("user turn")
response1 = LLMLoopResponse(
response_text="calling search",
tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
raw_response=None,
prompt_tokens=0,
completion_tokens=0,
)
_baseline_conversation_updater(
[],
response1,
tool_results=[
ToolCallResult(
tool_call_id="tc_1", tool_name="search", content="result"
)
],
transcript_builder=builder,
state=state,
model="test-model",
)
# Mid-loop drain: flush + clear + record flushed text length
for _buffered in state.session_messages:
session_messages.append(_buffered)
state.session_messages.clear()
state._flushed_assistant_text_len = len(state.assistant_text)
session_messages.append(ChatMessage(role="user", content="pending message"))
# Simulate round 2 text accumulation
state.assistant_text += "final answer"
# Round 2: natural finish (no tool calls → no session_messages entry)
# --- Finally block logic (production code) ---
for msg in state.session_messages:
session_messages.append(msg)
final_text = state.assistant_text[state._flushed_assistant_text_len :]
if state.session_messages:
recorded = "".join(
m.content or "" for m in state.session_messages if m.role == "assistant"
)
if final_text.startswith(recorded):
final_text = final_text[len(recorded) :]
if final_text.strip():
session_messages.append(ChatMessage(role="assistant", content=final_text))
# The final assistant message should only contain round-2 text,
# not the round-1 text that was already flushed mid-loop.
assistant_msgs = [m for m in session_messages if m.role == "assistant"]
# Round-1 structured assistant (from mid-loop flush)
assert assistant_msgs[0].content == "calling search"
assert assistant_msgs[0].tool_calls is not None
# Round-2 final text (from finally block)
assert assistant_msgs[1].content == "final answer"
assert assistant_msgs[1].tool_calls is None
# Crucially: only 2 assistant messages, not 3 (no duplicate)
assert len(assistant_msgs) == 2
=======
@pytest.mark.asyncio
async def test_cost_estimated_from_tokens_when_header_missing(self):
"""cost_usd is estimated from token counts when x-total-cost is absent."""
@@ -1252,4 +1050,204 @@ class TestMidLoopPendingFlushOrdering:
# Accumulators hold all tokens across both turns
assert state.turn_prompt_tokens == 2100
assert state.turn_completion_tokens == 500
>>>>>>> c6af52033dc97f673af7a968564d14fbb2949707
class TestMidLoopPendingFlushOrdering:
"""Regression test for the mid-loop pending drain ordering invariant.
``_baseline_conversation_updater`` records assistant+tool entries from
each tool-call round into ``state.session_messages``; the finally block
of ``stream_chat_completion_baseline`` batch-flushes them into
``session.messages`` at the end of the turn.
The mid-loop pending drain appends pending user messages directly to
``session.messages``. Without flushing ``state.session_messages`` first,
the pending user message lands BEFORE the preceding round's assistant+
tool entries in the final persisted ``session.messages`` — which
produces a malformed tool-call/tool-result ordering on the next turn's
replay.
This test documents the invariant by replaying the production flush
sequence against an in-memory state.
"""
def test_flush_then_append_preserves_chronological_order(self):
"""Mid-loop drain must flush state.session_messages before appending
the pending user message, so the final order matches the
chronological execution order.
"""
# Initial state: user turn already appended by maybe_append_user_message
session_messages: list[ChatMessage] = [
ChatMessage(role="user", content="original user turn"),
]
state = _BaselineStreamState()
# Round 1 completes: conversation_updater buffers assistant+tool
# entries into state.session_messages (but does NOT write to
# session.messages yet).
builder = TranscriptBuilder()
builder.append_user("original user turn")
response = LLMLoopResponse(
response_text="calling search",
tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
raw_response=None,
prompt_tokens=0,
completion_tokens=0,
)
tool_results = [
ToolCallResult(
tool_call_id="tc_1", tool_name="search", content="search output"
),
]
openai_messages: list = []
_baseline_conversation_updater(
openai_messages,
response,
tool_results=tool_results,
transcript_builder=builder,
state=state,
model="test-model",
)
# state.session_messages should now hold the round-1 assistant + tool
assert len(state.session_messages) == 2
assert state.session_messages[0].role == "assistant"
assert state.session_messages[1].role == "tool"
# --- Mid-loop pending drain (production code pattern) ---
# Flush first, THEN append pending. This is the ordering fix.
for _buffered in state.session_messages:
session_messages.append(_buffered)
state.session_messages.clear()
session_messages.append(
ChatMessage(role="user", content="pending mid-loop message")
)
# Round 2 completes: new assistant+tool entries buffer again.
response2 = LLMLoopResponse(
response_text="another call",
tool_calls=[LLMToolCall(id="tc_2", name="calc", arguments="{}")],
raw_response=None,
prompt_tokens=0,
completion_tokens=0,
)
tool_results2 = [
ToolCallResult(
tool_call_id="tc_2", tool_name="calc", content="calc output"
),
]
_baseline_conversation_updater(
openai_messages,
response2,
tool_results=tool_results2,
transcript_builder=builder,
state=state,
model="test-model",
)
# --- Finally-block flush (end of turn) ---
for msg in state.session_messages:
session_messages.append(msg)
# Assert chronological order: original user, round-1 assistant,
# round-1 tool, pending user, round-2 assistant, round-2 tool.
assert [m.role for m in session_messages] == [
"user",
"assistant",
"tool",
"user",
"assistant",
"tool",
]
assert session_messages[0].content == "original user turn"
assert session_messages[3].content == "pending mid-loop message"
# The assistant message carrying tool_call tc_1 must be immediately
# followed by its tool result — no user message interposed.
assert session_messages[1].role == "assistant"
assert session_messages[1].tool_calls is not None
assert session_messages[1].tool_calls[0]["id"] == "tc_1"
assert session_messages[2].role == "tool"
assert session_messages[2].tool_call_id == "tc_1"
# Same invariant for the round after the pending user.
assert session_messages[4].tool_calls is not None
assert session_messages[4].tool_calls[0]["id"] == "tc_2"
assert session_messages[5].tool_call_id == "tc_2"
def test_flushed_assistant_text_len_prevents_duplicate_final_text(self):
"""After mid-loop drain clears state.session_messages, the finally
block must not re-append assistant text from rounds already flushed.
``state.assistant_text`` accumulates ALL rounds' text, but
``state.session_messages`` only holds entries from rounds AFTER the
last mid-loop flush. Without ``_flushed_assistant_text_len``, the
``finally`` block's ``startswith(recorded)`` check fails because
``recorded`` only covers post-flush rounds, and the full
``assistant_text`` is appended — duplicating pre-flush rounds.
"""
state = _BaselineStreamState()
session_messages: list[ChatMessage] = [
ChatMessage(role="user", content="user turn"),
]
# Simulate round 1 text accumulation (as _bound_llm_caller does)
state.assistant_text += "calling search"
# Round 1 conversation_updater buffers structured entries
builder = TranscriptBuilder()
builder.append_user("user turn")
response1 = LLMLoopResponse(
response_text="calling search",
tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")],
raw_response=None,
prompt_tokens=0,
completion_tokens=0,
)
_baseline_conversation_updater(
[],
response1,
tool_results=[
ToolCallResult(
tool_call_id="tc_1", tool_name="search", content="result"
)
],
transcript_builder=builder,
state=state,
model="test-model",
)
# Mid-loop drain: flush + clear + record flushed text length
for _buffered in state.session_messages:
session_messages.append(_buffered)
state.session_messages.clear()
state._flushed_assistant_text_len = len(state.assistant_text)
session_messages.append(ChatMessage(role="user", content="pending message"))
# Simulate round 2 text accumulation
state.assistant_text += "final answer"
# Round 2: natural finish (no tool calls → no session_messages entry)
# --- Finally block logic (production code) ---
for msg in state.session_messages:
session_messages.append(msg)
final_text = state.assistant_text[state._flushed_assistant_text_len :]
if state.session_messages:
recorded = "".join(
m.content or "" for m in state.session_messages if m.role == "assistant"
)
if final_text.startswith(recorded):
final_text = final_text[len(recorded) :]
if final_text.strip():
session_messages.append(ChatMessage(role="assistant", content=final_text))
# The final assistant message should only contain round-2 text,
# not the round-1 text that was already flushed mid-loop.
assistant_msgs = [m for m in session_messages if m.role == "assistant"]
# Round-1 structured assistant (from mid-loop flush)
assert assistant_msgs[0].content == "calling search"
assert assistant_msgs[0].tool_calls is not None
# Round-2 final text (from finally block)
assert assistant_msgs[1].content == "final answer"
assert assistant_msgs[1].tool_calls is None
# Crucially: only 2 assistant messages, not 3 (no duplicate)
assert len(assistant_msgs) == 2