mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend): address pushback on credit charging and token estimation
- Post-exec InsufficientBalanceError: return output + log warning instead of ErrorResponse (block already executed with side effects) - Add token estimation fallback for providers that don't support stream_options include_usage (1 token ≈ 4 chars) - Remove unnecessary # type: ignore on AsyncRedis param
This commit is contained in:
@@ -426,6 +426,20 @@ async def stream_chat_completion_baseline(
|
||||
except Exception:
|
||||
logger.warning("[Baseline] Langfuse trace context teardown failed")
|
||||
|
||||
# Fallback: estimate tokens from text length when the provider
|
||||
# does not honour stream_options={"include_usage": True}.
|
||||
# Rough estimate: 1 token ≈ 4 characters.
|
||||
if turn_prompt_tokens == 0 and turn_completion_tokens == 0:
|
||||
prompt_chars = sum(len(m.get("content", "")) for m in openai_messages)
|
||||
turn_prompt_tokens = max(prompt_chars // 4, 1)
|
||||
turn_completion_tokens = max(len(assistant_text) // 4, 1)
|
||||
logger.info(
|
||||
"[Baseline] No streaming usage reported; estimated tokens: "
|
||||
"prompt=%d, completion=%d",
|
||||
turn_prompt_tokens,
|
||||
turn_completion_tokens,
|
||||
)
|
||||
|
||||
# Emit token usage and update session for persistence
|
||||
if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
|
||||
total_tokens = turn_prompt_tokens + turn_completion_tokens
|
||||
|
||||
@@ -83,7 +83,7 @@ def _weekly_reset_time() -> datetime:
|
||||
|
||||
|
||||
async def _session_reset_from_ttl(
|
||||
redis: AsyncRedis, user_id: str, session_id: str # type: ignore[type-arg]
|
||||
redis: AsyncRedis, user_id: str, session_id: str
|
||||
) -> datetime:
|
||||
"""Derive session reset time from the Redis key's actual TTL.
|
||||
|
||||
|
||||
@@ -155,13 +155,12 @@ async def execute_block(
|
||||
)
|
||||
except InsufficientBalanceError:
|
||||
# Concurrent spend drained balance after our pre-check passed.
|
||||
# Treat as fatal to avoid unpaid execution (matches executor behavior).
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
f"Insufficient credits to charge for '{block.name}'. "
|
||||
"Please top up your credits to continue."
|
||||
),
|
||||
session_id=session_id,
|
||||
# Block already executed (with possible side effects), so return
|
||||
# its output. Log the billing leak amount for reconciliation.
|
||||
logger.warning(
|
||||
"Post-exec credit charge failed for block %s (cost=%d)",
|
||||
block.name,
|
||||
cost,
|
||||
)
|
||||
|
||||
return BlockOutputResponse(
|
||||
|
||||
@@ -139,8 +139,8 @@ class TestExecuteBlockCreditCharging:
|
||||
# get_user_credit_model should not be called at all for zero-cost blocks
|
||||
mock_get_credit.assert_not_awaited()
|
||||
|
||||
async def test_returns_error_on_post_exec_insufficient_balance(self):
|
||||
"""If charging fails after execution (concurrent spend race), return error."""
|
||||
async def test_returns_output_on_post_exec_insufficient_balance(self):
|
||||
"""If charging fails after execution, output is still returned (block already ran)."""
|
||||
from backend.util.exceptions import InsufficientBalanceError
|
||||
|
||||
block = _make_block()
|
||||
@@ -171,6 +171,6 @@ class TestExecuteBlockCreditCharging:
|
||||
matched_credentials={},
|
||||
)
|
||||
|
||||
# Post-exec charge failure is treated as fatal (matches executor behavior)
|
||||
assert isinstance(result, ErrorResponse)
|
||||
assert "Insufficient credits" in result.message
|
||||
# Block already executed (with side effects), so output is returned
|
||||
assert isinstance(result, BlockOutputResponse)
|
||||
assert result.success is True
|
||||
|
||||
Reference in New Issue
Block a user