fix(copilot): scope fallback token estimation to current turn only

The fallback estimator was counting the entire openai_messages history (system prompt + all previous turns) instead of just the messages added during the current turn. This caused overcounting and overly strict rate limiting when providers don't return streaming usage data.
2026-04-08 03:00:28 -04:00 · 2026-03-13 03:44:30 +07:00
parent 3096f94996
commit 4ceb15b3f1
1 changed files with 7 additions and 1 deletions
--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -227,6 +227,9 @@ async def stream_chat_completion_baseline(
    # Token usage accumulators — populated from streaming chunks
    turn_prompt_tokens = 0
    turn_completion_tokens = 0
+    # Track message count before the tool loop so the fallback estimator
+    # only counts messages added during *this* turn, not the full history.
+    _msgs_before_turn = len(openai_messages)
    try:
        for _round in range(_MAX_TOOL_ROUNDS):
            # Open a new step for each LLM round
@@ -429,9 +432,12 @@ async def stream_chat_completion_baseline(

        # Fallback: estimate tokens from text length when the provider
        # does not honour stream_options={"include_usage": True}.
+        # Only count messages added during *this* turn (user message +
+        # tool rounds), not the full conversation history.
        # Rough estimate: 1 token ≈ 4 characters.
        if turn_prompt_tokens == 0 and turn_completion_tokens == 0:
-            prompt_chars = sum(len(m.get("content") or "") for m in openai_messages)
+            turn_messages = openai_messages[_msgs_before_turn - 1 :]
+            prompt_chars = sum(len(m.get("content") or "") for m in turn_messages)
            turn_prompt_tokens = max(prompt_chars // 4, 1)
            turn_completion_tokens = max(len(assistant_text) // 4, 1)
            logger.info(