fix(platform): move baseline cost extraction to finally + accumulate multi-round costs

- Move x-total-cost header extraction to finally block so cost is
  captured even when stream errors mid-way (we already paid)
- Accumulate cost across multi-round tool-calling turns instead of
  overwriting with last round only
- Handle UnboundLocalError if response was never assigned
This commit is contained in:
Zamil Majdy
2026-04-02 19:00:44 +02:00
parent 9a08011d7d
commit 45d3193727

View File

@@ -160,22 +160,24 @@ async def _baseline_llm_caller(
if tc.function and tc.function.arguments:
entry["arguments"] += tc.function.arguments
# Extract OpenRouter cost from response headers
try:
raw_resp = getattr(response, "response", None)
if raw_resp and hasattr(raw_resp, "headers"):
cost_header = raw_resp.headers.get("x-total-cost")
if cost_header:
state.cost_usd = float(cost_header)
except (ValueError, AttributeError):
pass
# Close text block
if state.text_started:
state.pending_events.append(StreamTextEnd(id=state.text_block_id))
state.text_started = False
state.text_block_id = str(uuid.uuid4())
finally:
# Extract OpenRouter cost from response headers (in finally so we
# capture cost even when the stream errors mid-way — we already paid).
# Accumulate across multi-round tool-calling turns.
try:
raw_resp = getattr(response, "response", None)
if raw_resp and hasattr(raw_resp, "headers"):
cost_header = raw_resp.headers.get("x-total-cost")
if cost_header:
state.cost_usd = (state.cost_usd or 0.0) + float(cost_header)
except (ValueError, AttributeError, UnboundLocalError):
pass
# Always persist partial text so the session history stays consistent,
# even when the stream is interrupted by an exception.
state.assistant_text += round_text