refactor(backend/copilot): rename has_tool_been_called and sample monotonic once per chunk

Addresses three self-review nits on #12871:

1. Rename `has_tool_been_called_this_turn` → `has_tool_been_called`.
   The method is misleadingly named: its durable-messages branch
   scans the full ``session.messages`` list (not just the current
   turn), which matches the guide-read contract
   (``test_guide_earlier_in_history_still_passes``) but actively
   invites the wrong reading at every call site.  Only the
   in-flight buffer is genuinely turn-scoped.  Update the lone
   caller (``require_guide_read``) and the agent_guide_gate_test
   docstring reference.

2. Clarify `announce_inflight_tool_call` docstring to state that
   the announcement fires *before* ``execute_tool`` runs and isn't
   rolled back if the tool raises.  That matches the guide-read
   gate's "was it called?" semantics, but a future gate wanting
   *successful* dispatches would need its own tracking — flagging
   this in the docstring so the next reader sees it.

3. Sample ``time.monotonic()`` once per reasoning chunk instead of
   twice (once inside ``_should_flush_pending``, again on flush).
   At ~4,700 chunks per Kimi turn that's ~4,700 redundant
   monotonic() syscalls off the hot path.  ``_should_flush_pending``
   now takes ``now`` as a parameter so the caller supplies the
   already-sampled value, and the flush branch reuses the same
   value for ``_last_flush_monotonic``.  Existing coalescing tests
   (``test_time_based_flush_when_chars_stay_below_threshold``) pass
   unchanged via the same ``monkeypatch`` on ``time.monotonic``.
This commit is contained in:
majdyz
2026-04-21 23:11:10 +07:00
parent 1848810b32
commit 7dc3b880a6
4 changed files with 43 additions and 22 deletions

View File

@@ -265,11 +265,15 @@ class BaselineReasoningEmitter:
# rather than waiting for the coalesce window to elapse. Subsequent
# chunks buffer into ``_pending_delta`` and only flush when the
# char/time thresholds trip.
# Sample the monotonic clock exactly once per chunk — at ~4,700
# chunks per turn, folding the two calls into one cuts ~4,700
# syscalls off the hot path without changing semantics.
now = time.monotonic()
if not self._open:
events.append(StreamReasoningStart(id=self._block_id))
events.append(StreamReasoningDelta(id=self._block_id, delta=text))
self._open = True
self._last_flush_monotonic = time.monotonic()
self._last_flush_monotonic = now
if self._session_messages is not None:
self._current_row = ChatMessage(role="reasoning", content=text)
self._session_messages.append(self._current_row)
@@ -282,21 +286,26 @@ class BaselineReasoningEmitter:
self._current_row.content = (self._current_row.content or "") + text
self._pending_delta += text
if self._should_flush_pending():
if self._should_flush_pending(now):
events.append(
StreamReasoningDelta(id=self._block_id, delta=self._pending_delta)
)
self._pending_delta = ""
self._last_flush_monotonic = time.monotonic()
self._last_flush_monotonic = now
return events
def _should_flush_pending(self) -> bool:
"""Return True when the accumulated delta should be emitted now."""
def _should_flush_pending(self, now: float) -> bool:
"""Return True when the accumulated delta should be emitted now.
*now* is the monotonic timestamp sampled by the caller so the
clock is read at most once per chunk (the flush-timestamp update
reuses the same value).
"""
if not self._pending_delta:
return False
if len(self._pending_delta) >= self._coalesce_min_chars:
return True
elapsed_ms = (time.monotonic() - self._last_flush_monotonic) * 1000.0
elapsed_ms = (now - self._last_flush_monotonic) * 1000.0
return elapsed_ms >= self._coalesce_max_interval_ms
def close(self) -> list[StreamBaseResponse]:

View File

@@ -254,6 +254,13 @@ class ChatSession(ChatSessionInfo):
def announce_inflight_tool_call(self, tool_name: str) -> None:
"""Record that *tool_name* is being dispatched in the current turn.
Called by the baseline tool executor **before** the tool actually
runs (the announcement is about dispatch, not success). If the
tool raises, the name stays in the buffer for the rest of the
turn — that matches the guide-read gate's contract ("was the tool
called?") but means any future gate wanting *successful*
dispatches would need its own tracking.
Lets in-turn guards (see
``copilot/tools/helpers.py::require_guide_read``) see a tool
call the moment it's issued, instead of waiting for the
@@ -270,13 +277,18 @@ class ChatSession(ChatSessionInfo):
"""Reset the in-flight tool-call announcement buffer."""
self._inflight_tool_calls.clear()
def has_tool_been_called_this_turn(self, tool_name: str) -> bool:
"""True when *tool_name* has been called in the current turn.
def has_tool_been_called(self, tool_name: str) -> bool:
"""True when *tool_name* has been called in this session.
Checks the in-flight announcement buffer first (for calls
dispatched in *this* turn but not yet persisted) and then the
durable ``messages`` history (for past turns + prior rounds
within this turn whose writes already landed).
Checks the in-flight announcement buffer (for calls dispatched
in the *current* turn but not yet flushed into ``messages``) and
the durable ``messages`` history (for past turns + prior rounds
within this turn whose writes already landed). The durable
scan is session-wide, not turn-scoped: a matching tool call
anywhere in ``messages`` counts. This matches the guide-read
contract — once the guide has been read in the session, the
agent doesn't need to re-read it for later create/edit/fix
tools.
"""
if tool_name in self._inflight_tool_calls:
return True

View File

@@ -23,7 +23,7 @@ def _session_with_messages(
Uses ``ChatSession.new`` + attribute reassignment rather than
``MagicMock(spec=...)`` because the gate now calls
``session.has_tool_been_called_this_turn(...)`` and a ``spec`` mock
``session.has_tool_been_called(...)`` and a ``spec`` mock
returns a truthy ``MagicMock`` from that call, hiding real gate
behaviour. A live ``ChatSession`` also correctly initialises the
``_inflight_tool_calls`` PrivateAttr scratch buffer used by the

View File

@@ -791,14 +791,14 @@ def require_guide_read(session: ChatSession, tool_name: str):
"""Return an ErrorResponse if the guide hasn't been loaded this session.
Import inline to keep ``helpers.py`` free of tool-response imports.
Uses :meth:`ChatSession.has_tool_been_called_this_turn` which checks
both the persisted ``messages`` list and the in-flight announcement
buffer — so a guide call dispatched earlier in the *current* turn
(before ``session.messages`` flushes at turn end) is recognised too.
Otherwise a second tool in the same turn would re-fire this guard
despite the guide having been called — seen on Kimi K2.6 in
particular because its aggressive tool-call chaining exercises this
path far more than Sonnet does.
Uses :meth:`ChatSession.has_tool_been_called` which checks both the
persisted ``messages`` list (session-wide) and the in-flight
announcement buffer — so a guide call dispatched earlier in the
*current* turn (before ``session.messages`` flushes at turn end) is
recognised too. Otherwise a second tool in the same turn would
re-fire this guard despite the guide having been called — seen on
Kimi K2.6 in particular because its aggressive tool-call chaining
exercises this path far more than Sonnet does.
"""
from .models import ErrorResponse # noqa: PLC0415 — avoid circular import
@@ -808,7 +808,7 @@ def require_guide_read(session: ChatSession, tool_name: str):
# requiring one would waste a round-trip every turn.
if session.metadata.builder_graph_id:
return None
if session.has_tool_been_called_this_turn(_AGENT_GUIDE_TOOL_NAME):
if session.has_tool_been_called(_AGENT_GUIDE_TOOL_NAME):
return None
return ErrorResponse(
message=(