mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
ensure usage is included
This commit is contained in:
@@ -119,6 +119,10 @@ def build_otlp_payload(
|
||||
prompt_tokens: int | None = None,
|
||||
completion_tokens: int | None = None,
|
||||
total_tokens: int | None = None,
|
||||
total_cost_usd: float | None = None,
|
||||
cache_creation_input_tokens: int | None = None,
|
||||
cache_read_input_tokens: int | None = None,
|
||||
reasoning_tokens: int | None = None,
|
||||
user_id: str | None = None,
|
||||
session_id: str | None = None,
|
||||
tool_calls: list[dict[str, Any]] | None = None,
|
||||
@@ -166,17 +170,14 @@ def build_otlp_payload(
|
||||
_kv("gen_ai.usage.input_tokens", prompt_tokens),
|
||||
_kv("gen_ai.usage.output_tokens", completion_tokens),
|
||||
_kv("gen_ai.usage.total_tokens", total_tokens),
|
||||
_kv(
|
||||
"gen_ai.usage.input_tokens.cached", 0 if prompt_tokens is not None else None
|
||||
),
|
||||
_kv(
|
||||
"gen_ai.usage.output_tokens.reasoning",
|
||||
0 if completion_tokens is not None else None,
|
||||
),
|
||||
_kv("gen_ai.usage.input_tokens.cached", cache_read_input_tokens),
|
||||
_kv("gen_ai.usage.input_tokens.cache_creation", cache_creation_input_tokens),
|
||||
_kv("gen_ai.usage.output_tokens.reasoning", reasoning_tokens),
|
||||
_kv("user.id", user_id),
|
||||
_kv("session.id", session_id),
|
||||
_kv("trace.metadata.openrouter.source", "openrouter"),
|
||||
_kv("trace.metadata.openrouter.user_id", user_id),
|
||||
_kv("gen_ai.usage.total_cost", total_cost_usd),
|
||||
_kv("trace.metadata.openrouter.provider_name", provider_name),
|
||||
_kv("trace.metadata.openrouter.provider_slug", provider_slug),
|
||||
_kv("trace.metadata.openrouter.finish_reason", finish_reason),
|
||||
@@ -184,40 +185,6 @@ def build_otlp_payload(
|
||||
if kv is not None:
|
||||
attrs.append(kv)
|
||||
|
||||
# Keep keyset aligned with historical OpenRouter traces even when values
|
||||
# are unavailable from this path.
|
||||
attrs.extend(
|
||||
[
|
||||
{"key": "gen_ai.usage.input_cost", "value": {"doubleValue": None}},
|
||||
{"key": "gen_ai.usage.output_cost", "value": {"doubleValue": None}},
|
||||
{"key": "gen_ai.usage.total_cost", "value": {"doubleValue": None}},
|
||||
{
|
||||
"key": "trace.metadata.openrouter.api_key_name",
|
||||
"value": {"stringValue": None},
|
||||
},
|
||||
{
|
||||
"key": "trace.metadata.openrouter.entity_id",
|
||||
"value": {"stringValue": None},
|
||||
},
|
||||
{
|
||||
"key": "trace.metadata.openrouter.creator_user_id",
|
||||
"value": {"stringValue": None},
|
||||
},
|
||||
{
|
||||
"key": "trace.metadata.openrouter.organization_id",
|
||||
"value": {"stringValue": None},
|
||||
},
|
||||
{
|
||||
"key": "trace.metadata.openrouter.input_unit_price",
|
||||
"value": {"doubleValue": None},
|
||||
},
|
||||
{
|
||||
"key": "trace.metadata.openrouter.output_unit_price",
|
||||
"value": {"doubleValue": None},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
if prompt_payload is not None:
|
||||
attrs.append({"key": "trace.input", "value": {"stringValue": prompt_payload}})
|
||||
attrs.append({"key": "span.input", "value": {"stringValue": prompt_payload}})
|
||||
@@ -295,6 +262,10 @@ def emit_trace(
|
||||
prompt_tokens: int | None = None,
|
||||
completion_tokens: int | None = None,
|
||||
total_tokens: int | None = None,
|
||||
total_cost_usd: float | None = None,
|
||||
cache_creation_input_tokens: int | None = None,
|
||||
cache_read_input_tokens: int | None = None,
|
||||
reasoning_tokens: int | None = None,
|
||||
user_id: str | None = None,
|
||||
session_id: str | None = None,
|
||||
tool_calls: list[dict[str, Any]] | None = None,
|
||||
@@ -319,6 +290,10 @@ def emit_trace(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
total_cost_usd=total_cost_usd,
|
||||
cache_creation_input_tokens=cache_creation_input_tokens,
|
||||
cache_read_input_tokens=cache_read_input_tokens,
|
||||
reasoning_tokens=reasoning_tokens,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
tool_calls=tool_calls,
|
||||
|
||||
@@ -620,6 +620,8 @@ async def stream_chat_completion_sdk(
|
||||
# are always bound when the OTLP trace section runs.
|
||||
assistant_response = ChatMessage(role="assistant", content="")
|
||||
trace_tool_calls: list[dict[str, Any]] = []
|
||||
trace_usage: dict[str, Any] = {}
|
||||
trace_cost_usd: float | None = None
|
||||
|
||||
sdk_options_kwargs: dict[str, Any] = {
|
||||
"system_prompt": system_prompt,
|
||||
@@ -785,8 +787,13 @@ async def stream_chat_completion_sdk(
|
||||
- len(adapter.resolved_tool_calls),
|
||||
)
|
||||
|
||||
# Log ResultMessage details for debugging
|
||||
# Extract usage and cost from ResultMessage for OTLP trace
|
||||
if isinstance(sdk_msg, ResultMessage):
|
||||
if sdk_msg.usage:
|
||||
trace_usage = sdk_msg.usage
|
||||
if sdk_msg.total_cost_usd is not None:
|
||||
trace_cost_usd = sdk_msg.total_cost_usd
|
||||
|
||||
logger.info(
|
||||
"[SDK] [%s] Received: ResultMessage %s "
|
||||
"(unresolved=%d, current=%d, resolved=%d)",
|
||||
@@ -1041,11 +1048,21 @@ async def stream_chat_completion_sdk(
|
||||
dict(m) for m in session.to_openai_messages()
|
||||
]
|
||||
|
||||
_input = trace_usage.get("input_tokens")
|
||||
_output = trace_usage.get("output_tokens")
|
||||
_total = (_input or 0) + (_output or 0) if _input or _output else None
|
||||
|
||||
emit_trace(
|
||||
model=sdk_model or config.model,
|
||||
messages=trace_messages,
|
||||
assistant_content=assistant_response.content or None,
|
||||
finish_reason=trace_finish_reason,
|
||||
prompt_tokens=_input,
|
||||
completion_tokens=_output,
|
||||
total_tokens=_total,
|
||||
total_cost_usd=trace_cost_usd,
|
||||
cache_creation_input_tokens=trace_usage.get("cache_creation_input_tokens"),
|
||||
cache_read_input_tokens=trace_usage.get("cache_read_input_tokens"),
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
tool_calls=trace_tool_calls_payload,
|
||||
|
||||
@@ -1043,11 +1043,7 @@ async def _stream_chat_chunks(
|
||||
|
||||
# Accumulators for OTLP trace export
|
||||
_trace_text_parts: list[str] = []
|
||||
_trace_usage: dict[str, int | None] = {
|
||||
"prompt": None,
|
||||
"completion": None,
|
||||
"total": None,
|
||||
}
|
||||
_trace_usage: dict[str, Any] = {}
|
||||
|
||||
# Process the stream
|
||||
chunk: ChatCompletionChunk
|
||||
@@ -1057,6 +1053,12 @@ async def _stream_chat_chunks(
|
||||
_trace_usage["prompt"] = chunk.usage.prompt_tokens
|
||||
_trace_usage["completion"] = chunk.usage.completion_tokens
|
||||
_trace_usage["total"] = chunk.usage.total_tokens
|
||||
if chunk.usage.prompt_tokens_details:
|
||||
d = chunk.usage.prompt_tokens_details
|
||||
_trace_usage["cached"] = d.cached_tokens
|
||||
if chunk.usage.completion_tokens_details:
|
||||
d = chunk.usage.completion_tokens_details
|
||||
_trace_usage["reasoning"] = d.reasoning_tokens
|
||||
yield StreamUsage(
|
||||
promptTokens=chunk.usage.prompt_tokens,
|
||||
completionTokens=chunk.usage.completion_tokens,
|
||||
@@ -1190,9 +1192,11 @@ async def _stream_chat_chunks(
|
||||
finish_reason=(
|
||||
"tool_calls" if tool_calls else (finish_reason or "stop")
|
||||
),
|
||||
prompt_tokens=_trace_usage["prompt"],
|
||||
completion_tokens=_trace_usage["completion"],
|
||||
total_tokens=_trace_usage["total"],
|
||||
prompt_tokens=_trace_usage.get("prompt"),
|
||||
completion_tokens=_trace_usage.get("completion"),
|
||||
total_tokens=_trace_usage.get("total"),
|
||||
cache_read_input_tokens=_trace_usage.get("cached"),
|
||||
reasoning_tokens=_trace_usage.get("reasoning"),
|
||||
user_id=session.user_id,
|
||||
session_id=session.session_id,
|
||||
tool_calls=tool_calls or None,
|
||||
|
||||
Reference in New Issue
Block a user