mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
refactor(backend/copilot): rename has_tool_been_called and sample monotonic once per chunk
Addresses three self-review nits on #12871: 1. Rename `has_tool_been_called_this_turn` → `has_tool_been_called`. The method is misleadingly named: its durable-messages branch scans the full ``session.messages`` list (not just the current turn), which matches the guide-read contract (``test_guide_earlier_in_history_still_passes``) but actively invites the wrong reading at every call site. Only the in-flight buffer is genuinely turn-scoped. Update the lone caller (``require_guide_read``) and the agent_guide_gate_test docstring reference. 2. Clarify `announce_inflight_tool_call` docstring to state that the announcement fires *before* ``execute_tool`` runs and isn't rolled back if the tool raises. That matches the guide-read gate's "was it called?" semantics, but a future gate wanting *successful* dispatches would need its own tracking — flagging this in the docstring so the next reader sees it. 3. Sample ``time.monotonic()`` once per reasoning chunk instead of twice (once inside ``_should_flush_pending``, again on flush). At ~4,700 chunks per Kimi turn that's ~4,700 redundant monotonic() syscalls off the hot path. ``_should_flush_pending`` now takes ``now`` as a parameter so the caller supplies the already-sampled value, and the flush branch reuses the same value for ``_last_flush_monotonic``. Existing coalescing tests (``test_time_based_flush_when_chars_stay_below_threshold``) pass unchanged via the same ``monkeypatch`` on ``time.monotonic``.
This commit is contained in:
@@ -265,11 +265,15 @@ class BaselineReasoningEmitter:
|
||||
# rather than waiting for the coalesce window to elapse. Subsequent
|
||||
# chunks buffer into ``_pending_delta`` and only flush when the
|
||||
# char/time thresholds trip.
|
||||
# Sample the monotonic clock exactly once per chunk — at ~4,700
|
||||
# chunks per turn, folding the two calls into one cuts ~4,700
|
||||
# syscalls off the hot path without changing semantics.
|
||||
now = time.monotonic()
|
||||
if not self._open:
|
||||
events.append(StreamReasoningStart(id=self._block_id))
|
||||
events.append(StreamReasoningDelta(id=self._block_id, delta=text))
|
||||
self._open = True
|
||||
self._last_flush_monotonic = time.monotonic()
|
||||
self._last_flush_monotonic = now
|
||||
if self._session_messages is not None:
|
||||
self._current_row = ChatMessage(role="reasoning", content=text)
|
||||
self._session_messages.append(self._current_row)
|
||||
@@ -282,21 +286,26 @@ class BaselineReasoningEmitter:
|
||||
self._current_row.content = (self._current_row.content or "") + text
|
||||
|
||||
self._pending_delta += text
|
||||
if self._should_flush_pending():
|
||||
if self._should_flush_pending(now):
|
||||
events.append(
|
||||
StreamReasoningDelta(id=self._block_id, delta=self._pending_delta)
|
||||
)
|
||||
self._pending_delta = ""
|
||||
self._last_flush_monotonic = time.monotonic()
|
||||
self._last_flush_monotonic = now
|
||||
return events
|
||||
|
||||
def _should_flush_pending(self) -> bool:
|
||||
"""Return True when the accumulated delta should be emitted now."""
|
||||
def _should_flush_pending(self, now: float) -> bool:
|
||||
"""Return True when the accumulated delta should be emitted now.
|
||||
|
||||
*now* is the monotonic timestamp sampled by the caller so the
|
||||
clock is read at most once per chunk (the flush-timestamp update
|
||||
reuses the same value).
|
||||
"""
|
||||
if not self._pending_delta:
|
||||
return False
|
||||
if len(self._pending_delta) >= self._coalesce_min_chars:
|
||||
return True
|
||||
elapsed_ms = (time.monotonic() - self._last_flush_monotonic) * 1000.0
|
||||
elapsed_ms = (now - self._last_flush_monotonic) * 1000.0
|
||||
return elapsed_ms >= self._coalesce_max_interval_ms
|
||||
|
||||
def close(self) -> list[StreamBaseResponse]:
|
||||
|
||||
@@ -254,6 +254,13 @@ class ChatSession(ChatSessionInfo):
|
||||
def announce_inflight_tool_call(self, tool_name: str) -> None:
|
||||
"""Record that *tool_name* is being dispatched in the current turn.
|
||||
|
||||
Called by the baseline tool executor **before** the tool actually
|
||||
runs (the announcement is about dispatch, not success). If the
|
||||
tool raises, the name stays in the buffer for the rest of the
|
||||
turn — that matches the guide-read gate's contract ("was the tool
|
||||
called?") but means any future gate wanting *successful*
|
||||
dispatches would need its own tracking.
|
||||
|
||||
Lets in-turn guards (see
|
||||
``copilot/tools/helpers.py::require_guide_read``) see a tool
|
||||
call the moment it's issued, instead of waiting for the
|
||||
@@ -270,13 +277,18 @@ class ChatSession(ChatSessionInfo):
|
||||
"""Reset the in-flight tool-call announcement buffer."""
|
||||
self._inflight_tool_calls.clear()
|
||||
|
||||
def has_tool_been_called_this_turn(self, tool_name: str) -> bool:
|
||||
"""True when *tool_name* has been called in the current turn.
|
||||
def has_tool_been_called(self, tool_name: str) -> bool:
|
||||
"""True when *tool_name* has been called in this session.
|
||||
|
||||
Checks the in-flight announcement buffer first (for calls
|
||||
dispatched in *this* turn but not yet persisted) and then the
|
||||
durable ``messages`` history (for past turns + prior rounds
|
||||
within this turn whose writes already landed).
|
||||
Checks the in-flight announcement buffer (for calls dispatched
|
||||
in the *current* turn but not yet flushed into ``messages``) and
|
||||
the durable ``messages`` history (for past turns + prior rounds
|
||||
within this turn whose writes already landed). The durable
|
||||
scan is session-wide, not turn-scoped: a matching tool call
|
||||
anywhere in ``messages`` counts. This matches the guide-read
|
||||
contract — once the guide has been read in the session, the
|
||||
agent doesn't need to re-read it for later create/edit/fix
|
||||
tools.
|
||||
"""
|
||||
if tool_name in self._inflight_tool_calls:
|
||||
return True
|
||||
|
||||
@@ -23,7 +23,7 @@ def _session_with_messages(
|
||||
|
||||
Uses ``ChatSession.new`` + attribute reassignment rather than
|
||||
``MagicMock(spec=...)`` because the gate now calls
|
||||
``session.has_tool_been_called_this_turn(...)`` and a ``spec`` mock
|
||||
``session.has_tool_been_called(...)`` and a ``spec`` mock
|
||||
returns a truthy ``MagicMock`` from that call, hiding real gate
|
||||
behaviour. A live ``ChatSession`` also correctly initialises the
|
||||
``_inflight_tool_calls`` PrivateAttr scratch buffer used by the
|
||||
|
||||
@@ -791,14 +791,14 @@ def require_guide_read(session: ChatSession, tool_name: str):
|
||||
"""Return an ErrorResponse if the guide hasn't been loaded this session.
|
||||
|
||||
Import inline to keep ``helpers.py`` free of tool-response imports.
|
||||
Uses :meth:`ChatSession.has_tool_been_called_this_turn` which checks
|
||||
both the persisted ``messages`` list and the in-flight announcement
|
||||
buffer — so a guide call dispatched earlier in the *current* turn
|
||||
(before ``session.messages`` flushes at turn end) is recognised too.
|
||||
Otherwise a second tool in the same turn would re-fire this guard
|
||||
despite the guide having been called — seen on Kimi K2.6 in
|
||||
particular because its aggressive tool-call chaining exercises this
|
||||
path far more than Sonnet does.
|
||||
Uses :meth:`ChatSession.has_tool_been_called` which checks both the
|
||||
persisted ``messages`` list (session-wide) and the in-flight
|
||||
announcement buffer — so a guide call dispatched earlier in the
|
||||
*current* turn (before ``session.messages`` flushes at turn end) is
|
||||
recognised too. Otherwise a second tool in the same turn would
|
||||
re-fire this guard despite the guide having been called — seen on
|
||||
Kimi K2.6 in particular because its aggressive tool-call chaining
|
||||
exercises this path far more than Sonnet does.
|
||||
"""
|
||||
from .models import ErrorResponse # noqa: PLC0415 — avoid circular import
|
||||
|
||||
@@ -808,7 +808,7 @@ def require_guide_read(session: ChatSession, tool_name: str):
|
||||
# requiring one would waste a round-trip every turn.
|
||||
if session.metadata.builder_graph_id:
|
||||
return None
|
||||
if session.has_tool_been_called_this_turn(_AGENT_GUIDE_TOOL_NAME):
|
||||
if session.has_tool_been_called(_AGENT_GUIDE_TOOL_NAME):
|
||||
return None
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
|
||||
Reference in New Issue
Block a user