fix(backend): address pushback on credit charging and token estimation

- Post-exec InsufficientBalanceError: return output + log warning instead of ErrorResponse (block already executed with side effects) - Add token estimation fallback for providers that don't support stream_options include_usage (1 token ≈ 4 chars) - Remove unnecessary # type: ignore on AsyncRedis param
2026-04-08 03:00:28 -04:00 · 2026-03-12 22:25:14 +07:00
parent 104d1f1bf4
commit c0e59f0a6b
4 changed files with 26 additions and 13 deletions
--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -426,6 +426,20 @@ async def stream_chat_completion_baseline(
            except Exception:
                logger.warning("[Baseline] Langfuse trace context teardown failed")

+        # Fallback: estimate tokens from text length when the provider
+        # does not honour stream_options={"include_usage": True}.
+        # Rough estimate: 1 token ≈ 4 characters.
+        if turn_prompt_tokens == 0 and turn_completion_tokens == 0:
+            prompt_chars = sum(len(m.get("content", "")) for m in openai_messages)
+            turn_prompt_tokens = max(prompt_chars // 4, 1)
+            turn_completion_tokens = max(len(assistant_text) // 4, 1)
+            logger.info(
+                "[Baseline] No streaming usage reported; estimated tokens: "
+                "prompt=%d, completion=%d",
+                turn_prompt_tokens,
+                turn_completion_tokens,
+            )
+
        # Emit token usage and update session for persistence
        if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
            total_tokens = turn_prompt_tokens + turn_completion_tokens
--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -83,7 +83,7 @@ def _weekly_reset_time() -> datetime:


 async def _session_reset_from_ttl(
-    redis: AsyncRedis, user_id: str, session_id: str  # type: ignore[type-arg]
+    redis: AsyncRedis, user_id: str, session_id: str
 ) -> datetime:
    """Derive session reset time from the Redis key's actual TTL.

--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -155,13 +155,12 @@ async def execute_block(
                )
            except InsufficientBalanceError:
                # Concurrent spend drained balance after our pre-check passed.
-                # Treat as fatal to avoid unpaid execution (matches executor behavior).
-                return ErrorResponse(
-                    message=(
-                        f"Insufficient credits to charge for '{block.name}'. "
-                        "Please top up your credits to continue."
-                    ),
-                    session_id=session_id,
+                # Block already executed (with possible side effects), so return
+                # its output. Log the billing leak amount for reconciliation.
+                logger.warning(
+                    "Post-exec credit charge failed for block %s (cost=%d)",
+                    block.name,
+                    cost,
                )

        return BlockOutputResponse(
--- a/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
@@ -139,8 +139,8 @@ class TestExecuteBlockCreditCharging:
        # get_user_credit_model should not be called at all for zero-cost blocks
        mock_get_credit.assert_not_awaited()

-    async def test_returns_error_on_post_exec_insufficient_balance(self):
-        """If charging fails after execution (concurrent spend race), return error."""
+    async def test_returns_output_on_post_exec_insufficient_balance(self):
+        """If charging fails after execution, output is still returned (block already ran)."""
        from backend.util.exceptions import InsufficientBalanceError

        block = _make_block()
@@ -171,6 +171,6 @@ class TestExecuteBlockCreditCharging:
                matched_credentials={},
            )

-        # Post-exec charge failure is treated as fatal (matches executor behavior)
-        assert isinstance(result, ErrorResponse)
-        assert "Insufficient credits" in result.message
+        # Block already executed (with side effects), so output is returned
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True