From 8c50cb5fbcde02d2ada8a99655d77ff494874954 Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 01:40:02 +0000 Subject: [PATCH] fix(platform-cost): correct token avg denominator; add CostBucket type; update generated TS - avg_input/output_tokens_per_request now divide by token_bearing_requests (rows where trackingType='tokens') instead of cost_bearing_requests (rows where trackingType='cost_usd'). These are different DB rows: LLM calls log tokens under 'tokens' type, not 'cost_usd', so the old denominator was wrong and produced inflated averages. - Add CostBucket TypedDict to platform_cost.py; replace list[dict] with list[CostBucket] for type safety on cost_buckets field. - Update openapi.json cost_buckets item schema with explicit bucket/count properties so orval generates a typed interface instead of object. - Import CostBucket in PlatformCostContent.tsx and use it instead of inline anonymous type on the .map() callback. - Add test assertions for avg_input/output_tokens_per_request and avg_cost_microdollars_per_request in test_returns_dashboard_with_data to lock in the correct denominator behaviour. --- .../backend/backend/data/platform_cost.py | 22 ++++++++++++++----- .../backend/data/platform_cost_test.py | 5 +++++ .../components/PlatformCostContent.tsx | 3 ++- .../frontend/src/app/api/openapi.json | 2 +- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/autogpt_platform/backend/backend/data/platform_cost.py b/autogpt_platform/backend/backend/data/platform_cost.py index 3baa0a58a7..95b953f030 100644 --- a/autogpt_platform/backend/backend/data/platform_cost.py +++ b/autogpt_platform/backend/backend/data/platform_cost.py @@ -1,7 +1,7 @@ import asyncio import logging from datetime import datetime, timedelta, timezone -from typing import Any +from typing import Any, TypedDict from prisma.models import PlatformCostLog as PrismaLog from prisma.models import User as PrismaUser @@ -162,6 +162,11 @@ class CostLogRow(BaseModel): cache_creation_tokens: int | None = None +class CostBucket(TypedDict): + bucket: str + count: int + + class PlatformCostDashboard(BaseModel): by_provider: list[ProviderCostSummary] by_user: list[UserCostSummary] @@ -177,7 +182,7 @@ class PlatformCostDashboard(BaseModel): cost_p75_microdollars: float = 0.0 cost_p95_microdollars: float = 0.0 cost_p99_microdollars: float = 0.0 - cost_buckets: list[dict] = [] + cost_buckets: list[CostBucket] = [] def _si(row: dict, field: str) -> int: @@ -435,6 +440,11 @@ async def get_platform_cost_dashboard( cost_bearing_requests = sum( _ca(r) for r in total_agg_groups if r.get("trackingType") == "cost_usd" ) + # Token-bearing request count: only rows where trackingType == "tokens". + # Token averages must use this denominator; cost_usd rows do not carry tokens. + token_bearing_requests = sum( + _ca(r) for r in total_agg_groups if r.get("trackingType") == "tokens" + ) return PlatformCostDashboard( by_provider=[ @@ -470,13 +480,13 @@ async def get_platform_cost_dashboard( total_input_tokens=total_input_tokens, total_output_tokens=total_output_tokens, avg_input_tokens_per_request=( - total_input_tokens / cost_bearing_requests - if cost_bearing_requests > 0 + total_input_tokens / token_bearing_requests + if token_bearing_requests > 0 else 0.0 ), avg_output_tokens_per_request=( - total_output_tokens / cost_bearing_requests - if cost_bearing_requests > 0 + total_output_tokens / token_bearing_requests + if token_bearing_requests > 0 else 0.0 ), avg_cost_microdollars_per_request=( diff --git a/autogpt_platform/backend/backend/data/platform_cost_test.py b/autogpt_platform/backend/backend/data/platform_cost_test.py index 7649eff524..92470d0a64 100644 --- a/autogpt_platform/backend/backend/data/platform_cost_test.py +++ b/autogpt_platform/backend/backend/data/platform_cost_test.py @@ -324,6 +324,11 @@ class TestGetPlatformCostDashboard: assert dashboard.cost_p50_microdollars == 1000 assert dashboard.cost_p95_microdollars == 4000 assert len(dashboard.cost_buckets) == 1 + # Token averages must use token_bearing_requests (3) not cost_bearing (0) + assert dashboard.avg_input_tokens_per_request == pytest.approx(1000 / 3) + assert dashboard.avg_output_tokens_per_request == pytest.approx(500 / 3) + # No cost_usd rows in total_agg → avg_cost should be 0 + assert dashboard.avg_cost_microdollars_per_request == 0.0 @pytest.mark.asyncio async def test_cache_tokens_aggregated_not_hardcoded(self): diff --git a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx index 636f4f1dd8..d8fdded738 100644 --- a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx @@ -8,6 +8,7 @@ import { ProviderTable } from "./ProviderTable"; import { UserTable } from "./UserTable"; import { LogsTable } from "./LogsTable"; import { usePlatformCostContent } from "./usePlatformCostContent"; +import type { CostBucket } from "@/app/api/__generated__/models/platformCostDashboard"; interface Props { searchParams: { @@ -298,7 +299,7 @@ export function PlatformCostContent({ searchParams }: Props) {
{dashboard.cost_buckets.map( - (b: { bucket: string; count: number }) => ( + (b: CostBucket) => (