fix(backend): address pushback on credit charging and token estimation

- Post-exec InsufficientBalanceError: return output + log warning
  instead of ErrorResponse (block already executed with side effects)
- Add token estimation fallback for providers that don't support
  stream_options include_usage (1 token ≈ 4 chars)
- Remove unnecessary # type: ignore on AsyncRedis param
This commit is contained in:
Zamil Majdy
2026-03-12 22:25:14 +07:00
parent 104d1f1bf4
commit c0e59f0a6b
4 changed files with 26 additions and 13 deletions

View File

@@ -426,6 +426,20 @@ async def stream_chat_completion_baseline(
except Exception:
logger.warning("[Baseline] Langfuse trace context teardown failed")
# Fallback: estimate tokens from text length when the provider
# does not honour stream_options={"include_usage": True}.
# Rough estimate: 1 token ≈ 4 characters.
if turn_prompt_tokens == 0 and turn_completion_tokens == 0:
prompt_chars = sum(len(m.get("content", "")) for m in openai_messages)
turn_prompt_tokens = max(prompt_chars // 4, 1)
turn_completion_tokens = max(len(assistant_text) // 4, 1)
logger.info(
"[Baseline] No streaming usage reported; estimated tokens: "
"prompt=%d, completion=%d",
turn_prompt_tokens,
turn_completion_tokens,
)
# Emit token usage and update session for persistence
if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
total_tokens = turn_prompt_tokens + turn_completion_tokens

View File

@@ -83,7 +83,7 @@ def _weekly_reset_time() -> datetime:
async def _session_reset_from_ttl(
redis: AsyncRedis, user_id: str, session_id: str # type: ignore[type-arg]
redis: AsyncRedis, user_id: str, session_id: str
) -> datetime:
"""Derive session reset time from the Redis key's actual TTL.

View File

@@ -155,13 +155,12 @@ async def execute_block(
)
except InsufficientBalanceError:
# Concurrent spend drained balance after our pre-check passed.
# Treat as fatal to avoid unpaid execution (matches executor behavior).
return ErrorResponse(
message=(
f"Insufficient credits to charge for '{block.name}'. "
"Please top up your credits to continue."
),
session_id=session_id,
# Block already executed (with possible side effects), so return
# its output. Log the billing leak amount for reconciliation.
logger.warning(
"Post-exec credit charge failed for block %s (cost=%d)",
block.name,
cost,
)
return BlockOutputResponse(

View File

@@ -139,8 +139,8 @@ class TestExecuteBlockCreditCharging:
# get_user_credit_model should not be called at all for zero-cost blocks
mock_get_credit.assert_not_awaited()
async def test_returns_error_on_post_exec_insufficient_balance(self):
"""If charging fails after execution (concurrent spend race), return error."""
async def test_returns_output_on_post_exec_insufficient_balance(self):
"""If charging fails after execution, output is still returned (block already ran)."""
from backend.util.exceptions import InsufficientBalanceError
block = _make_block()
@@ -171,6 +171,6 @@ class TestExecuteBlockCreditCharging:
matched_credentials={},
)
# Post-exec charge failure is treated as fatal (matches executor behavior)
assert isinstance(result, ErrorResponse)
assert "Insufficient credits" in result.message
# Block already executed (with side effects), so output is returned
assert isinstance(result, BlockOutputResponse)
assert result.success is True