ensure usage is included

This commit is contained in:
Swifty
2026-03-03 11:39:24 +01:00
parent e8b82cd268
commit 57f56c0caa
3 changed files with 46 additions and 50 deletions

View File

@@ -119,6 +119,10 @@ def build_otlp_payload(
prompt_tokens: int | None = None,
completion_tokens: int | None = None,
total_tokens: int | None = None,
total_cost_usd: float | None = None,
cache_creation_input_tokens: int | None = None,
cache_read_input_tokens: int | None = None,
reasoning_tokens: int | None = None,
user_id: str | None = None,
session_id: str | None = None,
tool_calls: list[dict[str, Any]] | None = None,
@@ -166,17 +170,14 @@ def build_otlp_payload(
_kv("gen_ai.usage.input_tokens", prompt_tokens),
_kv("gen_ai.usage.output_tokens", completion_tokens),
_kv("gen_ai.usage.total_tokens", total_tokens),
_kv(
"gen_ai.usage.input_tokens.cached", 0 if prompt_tokens is not None else None
),
_kv(
"gen_ai.usage.output_tokens.reasoning",
0 if completion_tokens is not None else None,
),
_kv("gen_ai.usage.input_tokens.cached", cache_read_input_tokens),
_kv("gen_ai.usage.input_tokens.cache_creation", cache_creation_input_tokens),
_kv("gen_ai.usage.output_tokens.reasoning", reasoning_tokens),
_kv("user.id", user_id),
_kv("session.id", session_id),
_kv("trace.metadata.openrouter.source", "openrouter"),
_kv("trace.metadata.openrouter.user_id", user_id),
_kv("gen_ai.usage.total_cost", total_cost_usd),
_kv("trace.metadata.openrouter.provider_name", provider_name),
_kv("trace.metadata.openrouter.provider_slug", provider_slug),
_kv("trace.metadata.openrouter.finish_reason", finish_reason),
@@ -184,40 +185,6 @@ def build_otlp_payload(
if kv is not None:
attrs.append(kv)
# Keep keyset aligned with historical OpenRouter traces even when values
# are unavailable from this path.
attrs.extend(
[
{"key": "gen_ai.usage.input_cost", "value": {"doubleValue": None}},
{"key": "gen_ai.usage.output_cost", "value": {"doubleValue": None}},
{"key": "gen_ai.usage.total_cost", "value": {"doubleValue": None}},
{
"key": "trace.metadata.openrouter.api_key_name",
"value": {"stringValue": None},
},
{
"key": "trace.metadata.openrouter.entity_id",
"value": {"stringValue": None},
},
{
"key": "trace.metadata.openrouter.creator_user_id",
"value": {"stringValue": None},
},
{
"key": "trace.metadata.openrouter.organization_id",
"value": {"stringValue": None},
},
{
"key": "trace.metadata.openrouter.input_unit_price",
"value": {"doubleValue": None},
},
{
"key": "trace.metadata.openrouter.output_unit_price",
"value": {"doubleValue": None},
},
]
)
if prompt_payload is not None:
attrs.append({"key": "trace.input", "value": {"stringValue": prompt_payload}})
attrs.append({"key": "span.input", "value": {"stringValue": prompt_payload}})
@@ -295,6 +262,10 @@ def emit_trace(
prompt_tokens: int | None = None,
completion_tokens: int | None = None,
total_tokens: int | None = None,
total_cost_usd: float | None = None,
cache_creation_input_tokens: int | None = None,
cache_read_input_tokens: int | None = None,
reasoning_tokens: int | None = None,
user_id: str | None = None,
session_id: str | None = None,
tool_calls: list[dict[str, Any]] | None = None,
@@ -319,6 +290,10 @@ def emit_trace(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
total_cost_usd=total_cost_usd,
cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens,
reasoning_tokens=reasoning_tokens,
user_id=user_id,
session_id=session_id,
tool_calls=tool_calls,

View File

@@ -620,6 +620,8 @@ async def stream_chat_completion_sdk(
# are always bound when the OTLP trace section runs.
assistant_response = ChatMessage(role="assistant", content="")
trace_tool_calls: list[dict[str, Any]] = []
trace_usage: dict[str, Any] = {}
trace_cost_usd: float | None = None
sdk_options_kwargs: dict[str, Any] = {
"system_prompt": system_prompt,
@@ -785,8 +787,13 @@ async def stream_chat_completion_sdk(
- len(adapter.resolved_tool_calls),
)
# Log ResultMessage details for debugging
# Extract usage and cost from ResultMessage for OTLP trace
if isinstance(sdk_msg, ResultMessage):
if sdk_msg.usage:
trace_usage = sdk_msg.usage
if sdk_msg.total_cost_usd is not None:
trace_cost_usd = sdk_msg.total_cost_usd
logger.info(
"[SDK] [%s] Received: ResultMessage %s "
"(unresolved=%d, current=%d, resolved=%d)",
@@ -1041,11 +1048,21 @@ async def stream_chat_completion_sdk(
dict(m) for m in session.to_openai_messages()
]
_input = trace_usage.get("input_tokens")
_output = trace_usage.get("output_tokens")
_total = (_input or 0) + (_output or 0) if _input or _output else None
emit_trace(
model=sdk_model or config.model,
messages=trace_messages,
assistant_content=assistant_response.content or None,
finish_reason=trace_finish_reason,
prompt_tokens=_input,
completion_tokens=_output,
total_tokens=_total,
total_cost_usd=trace_cost_usd,
cache_creation_input_tokens=trace_usage.get("cache_creation_input_tokens"),
cache_read_input_tokens=trace_usage.get("cache_read_input_tokens"),
user_id=user_id,
session_id=session_id,
tool_calls=trace_tool_calls_payload,

View File

@@ -1043,11 +1043,7 @@ async def _stream_chat_chunks(
# Accumulators for OTLP trace export
_trace_text_parts: list[str] = []
_trace_usage: dict[str, int | None] = {
"prompt": None,
"completion": None,
"total": None,
}
_trace_usage: dict[str, Any] = {}
# Process the stream
chunk: ChatCompletionChunk
@@ -1057,6 +1053,12 @@ async def _stream_chat_chunks(
_trace_usage["prompt"] = chunk.usage.prompt_tokens
_trace_usage["completion"] = chunk.usage.completion_tokens
_trace_usage["total"] = chunk.usage.total_tokens
if chunk.usage.prompt_tokens_details:
d = chunk.usage.prompt_tokens_details
_trace_usage["cached"] = d.cached_tokens
if chunk.usage.completion_tokens_details:
d = chunk.usage.completion_tokens_details
_trace_usage["reasoning"] = d.reasoning_tokens
yield StreamUsage(
promptTokens=chunk.usage.prompt_tokens,
completionTokens=chunk.usage.completion_tokens,
@@ -1190,9 +1192,11 @@ async def _stream_chat_chunks(
finish_reason=(
"tool_calls" if tool_calls else (finish_reason or "stop")
),
prompt_tokens=_trace_usage["prompt"],
completion_tokens=_trace_usage["completion"],
total_tokens=_trace_usage["total"],
prompt_tokens=_trace_usage.get("prompt"),
completion_tokens=_trace_usage.get("completion"),
total_tokens=_trace_usage.get("total"),
cache_read_input_tokens=_trace_usage.get("cached"),
reasoning_tokens=_trace_usage.get("reasoning"),
user_id=session.user_id,
session_id=session.session_id,
tool_calls=tool_calls or None,