refactor(backend/copilot): rename has_tool_been_called and sample monotonic once per chunk

Addresses three self-review nits on #12871: 1. Rename `has_tool_been_called_this_turn` → `has_tool_been_called`. The method is misleadingly named: its durable-messages branch scans the full ``session.messages`` list (not just the current turn), which matches the guide-read contract (``test_guide_earlier_in_history_still_passes``) but actively invites the wrong reading at every call site. Only the in-flight buffer is genuinely turn-scoped. Update the lone caller (``require_guide_read``) and the agent_guide_gate_test docstring reference. 2. Clarify `announce_inflight_tool_call` docstring to state that the announcement fires *before* ``execute_tool`` runs and isn't rolled back if the tool raises. That matches the guide-read gate's "was it called?" semantics, but a future gate wanting *successful* dispatches would need its own tracking — flagging this in the docstring so the next reader sees it. 3. Sample ``time.monotonic()`` once per reasoning chunk instead of twice (once inside ``_should_flush_pending``, again on flush). At ~4,700 chunks per Kimi turn that's ~4,700 redundant monotonic() syscalls off the hot path. ``_should_flush_pending`` now takes ``now`` as a parameter so the caller supplies the already-sampled value, and the flush branch reuses the same value for ``_last_flush_monotonic``. Existing coalescing tests (``test_time_based_flush_when_chars_stay_below_threshold``) pass unchanged via the same ``monkeypatch`` on ``time.monotonic``.
2026-04-30 03:00:41 -04:00 · 2026-04-21 23:11:10 +07:00
parent 1848810b32
commit 7dc3b880a6
4 changed files with 43 additions and 22 deletions
--- a/autogpt_platform/backend/backend/copilot/baseline/reasoning.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/reasoning.py
@@ -265,11 +265,15 @@ class BaselineReasoningEmitter:
        # rather than waiting for the coalesce window to elapse.  Subsequent
        # chunks buffer into ``_pending_delta`` and only flush when the
        # char/time thresholds trip.
+        # Sample the monotonic clock exactly once per chunk — at ~4,700
+        # chunks per turn, folding the two calls into one cuts ~4,700
+        # syscalls off the hot path without changing semantics.
+        now = time.monotonic()
        if not self._open:
            events.append(StreamReasoningStart(id=self._block_id))
            events.append(StreamReasoningDelta(id=self._block_id, delta=text))
            self._open = True
-            self._last_flush_monotonic = time.monotonic()
+            self._last_flush_monotonic = now
            if self._session_messages is not None:
                self._current_row = ChatMessage(role="reasoning", content=text)
                self._session_messages.append(self._current_row)
@@ -282,21 +286,26 @@ class BaselineReasoningEmitter:
            self._current_row.content = (self._current_row.content or "") + text

        self._pending_delta += text
-        if self._should_flush_pending():
+        if self._should_flush_pending(now):
            events.append(
                StreamReasoningDelta(id=self._block_id, delta=self._pending_delta)
            )
            self._pending_delta = ""
-            self._last_flush_monotonic = time.monotonic()
+            self._last_flush_monotonic = now
        return events

-    def _should_flush_pending(self) -> bool:
-        """Return True when the accumulated delta should be emitted now."""
+    def _should_flush_pending(self, now: float) -> bool:
+        """Return True when the accumulated delta should be emitted now.
+
+        *now* is the monotonic timestamp sampled by the caller so the
+        clock is read at most once per chunk (the flush-timestamp update
+        reuses the same value).
+        """
        if not self._pending_delta:
            return False
        if len(self._pending_delta) >= self._coalesce_min_chars:
            return True
-        elapsed_ms = (time.monotonic() - self._last_flush_monotonic) * 1000.0
+        elapsed_ms = (now - self._last_flush_monotonic) * 1000.0
        return elapsed_ms >= self._coalesce_max_interval_ms

    def close(self) -> list[StreamBaseResponse]:
--- a/autogpt_platform/backend/backend/copilot/model.py
+++ b/autogpt_platform/backend/backend/copilot/model.py
@@ -254,6 +254,13 @@ class ChatSession(ChatSessionInfo):
    def announce_inflight_tool_call(self, tool_name: str) -> None:
        """Record that *tool_name* is being dispatched in the current turn.

+        Called by the baseline tool executor **before** the tool actually
+        runs (the announcement is about dispatch, not success).  If the
+        tool raises, the name stays in the buffer for the rest of the
+        turn — that matches the guide-read gate's contract ("was the tool
+        called?") but means any future gate wanting *successful*
+        dispatches would need its own tracking.
+
        Lets in-turn guards (see
        ``copilot/tools/helpers.py::require_guide_read``) see a tool
        call the moment it's issued, instead of waiting for the
@@ -270,13 +277,18 @@ class ChatSession(ChatSessionInfo):
        """Reset the in-flight tool-call announcement buffer."""
        self._inflight_tool_calls.clear()

-    def has_tool_been_called_this_turn(self, tool_name: str) -> bool:
-        """True when *tool_name* has been called in the current turn.
+    def has_tool_been_called(self, tool_name: str) -> bool:
+        """True when *tool_name* has been called in this session.

-        Checks the in-flight announcement buffer first (for calls
-        dispatched in *this* turn but not yet persisted) and then the
-        durable ``messages`` history (for past turns + prior rounds
-        within this turn whose writes already landed).
+        Checks the in-flight announcement buffer (for calls dispatched
+        in the *current* turn but not yet flushed into ``messages``) and
+        the durable ``messages`` history (for past turns + prior rounds
+        within this turn whose writes already landed).  The durable
+        scan is session-wide, not turn-scoped: a matching tool call
+        anywhere in ``messages`` counts.  This matches the guide-read
+        contract — once the guide has been read in the session, the
+        agent doesn't need to re-read it for later create/edit/fix
+        tools.
        """
        if tool_name in self._inflight_tool_calls:
            return True
--- a/autogpt_platform/backend/backend/copilot/tools/agent_guide_gate_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_guide_gate_test.py
@@ -23,7 +23,7 @@ def _session_with_messages(

    Uses ``ChatSession.new`` + attribute reassignment rather than
    ``MagicMock(spec=...)`` because the gate now calls
-    ``session.has_tool_been_called_this_turn(...)`` and a ``spec`` mock
+    ``session.has_tool_been_called(...)`` and a ``spec`` mock
    returns a truthy ``MagicMock`` from that call, hiding real gate
    behaviour.  A live ``ChatSession`` also correctly initialises the
    ``_inflight_tool_calls`` PrivateAttr scratch buffer used by the
--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -791,14 +791,14 @@ def require_guide_read(session: ChatSession, tool_name: str):
    """Return an ErrorResponse if the guide hasn't been loaded this session.

    Import inline to keep ``helpers.py`` free of tool-response imports.
-    Uses :meth:`ChatSession.has_tool_been_called_this_turn` which checks
-    both the persisted ``messages`` list and the in-flight announcement
-    buffer — so a guide call dispatched earlier in the *current* turn
-    (before ``session.messages`` flushes at turn end) is recognised too.
-    Otherwise a second tool in the same turn would re-fire this guard
-    despite the guide having been called — seen on Kimi K2.6 in
-    particular because its aggressive tool-call chaining exercises this
-    path far more than Sonnet does.
+    Uses :meth:`ChatSession.has_tool_been_called` which checks both the
+    persisted ``messages`` list (session-wide) and the in-flight
+    announcement buffer — so a guide call dispatched earlier in the
+    *current* turn (before ``session.messages`` flushes at turn end) is
+    recognised too.  Otherwise a second tool in the same turn would
+    re-fire this guard despite the guide having been called — seen on
+    Kimi K2.6 in particular because its aggressive tool-call chaining
+    exercises this path far more than Sonnet does.
    """
    from .models import ErrorResponse  # noqa: PLC0415 — avoid circular import

@@ -808,7 +808,7 @@ def require_guide_read(session: ChatSession, tool_name: str):
    # requiring one would waste a round-trip every turn.
    if session.metadata.builder_graph_id:
        return None
-    if session.has_tool_been_called_this_turn(_AGENT_GUIDE_TOOL_NAME):
+    if session.has_tool_been_called(_AGENT_GUIDE_TOOL_NAME):
        return None
    return ErrorResponse(
        message=(