fix(platform-cost): correct token avg denominator; add CostBucket type; update generated TS

- avg_input/output_tokens_per_request now divide by token_bearing_requests (rows where trackingType='tokens') instead of cost_bearing_requests (rows where trackingType='cost_usd'). These are different DB rows: LLM calls log tokens under 'tokens' type, not 'cost_usd', so the old denominator was wrong and produced inflated averages. - Add CostBucket TypedDict to platform_cost.py; replace list[dict] with list[CostBucket] for type safety on cost_buckets field. - Update openapi.json cost_buckets item schema with explicit bucket/count properties so orval generates a typed interface instead of object. - Import CostBucket in PlatformCostContent.tsx and use it instead of inline anonymous type on the .map() callback. - Add test assertions for avg_input/output_tokens_per_request and avg_cost_microdollars_per_request in test_returns_dashboard_with_data to lock in the correct denominator behaviour.
2026-04-30 03:00:41 -04:00 · 2026-04-13 01:40:02 +00:00
parent 0be1d7ddbc
commit 8c50cb5fbc
4 changed files with 24 additions and 8 deletions
--- a/autogpt_platform/backend/backend/data/platform_cost.py
+++ b/autogpt_platform/backend/backend/data/platform_cost.py
@@ -1,7 +1,7 @@
 import asyncio
 import logging
 from datetime import datetime, timedelta, timezone
-from typing import Any
+from typing import Any, TypedDict

 from prisma.models import PlatformCostLog as PrismaLog
 from prisma.models import User as PrismaUser
@@ -162,6 +162,11 @@ class CostLogRow(BaseModel):
    cache_creation_tokens: int | None = None


+class CostBucket(TypedDict):
+    bucket: str
+    count: int
+
+
 class PlatformCostDashboard(BaseModel):
    by_provider: list[ProviderCostSummary]
    by_user: list[UserCostSummary]
@@ -177,7 +182,7 @@ class PlatformCostDashboard(BaseModel):
    cost_p75_microdollars: float = 0.0
    cost_p95_microdollars: float = 0.0
    cost_p99_microdollars: float = 0.0
-    cost_buckets: list[dict] = []
+    cost_buckets: list[CostBucket] = []


 def _si(row: dict, field: str) -> int:
@@ -435,6 +440,11 @@ async def get_platform_cost_dashboard(
    cost_bearing_requests = sum(
        _ca(r) for r in total_agg_groups if r.get("trackingType") == "cost_usd"
    )
+    # Token-bearing request count: only rows where trackingType == "tokens".
+    # Token averages must use this denominator; cost_usd rows do not carry tokens.
+    token_bearing_requests = sum(
+        _ca(r) for r in total_agg_groups if r.get("trackingType") == "tokens"
+    )

    return PlatformCostDashboard(
        by_provider=[
@@ -470,13 +480,13 @@ async def get_platform_cost_dashboard(
        total_input_tokens=total_input_tokens,
        total_output_tokens=total_output_tokens,
        avg_input_tokens_per_request=(
-            total_input_tokens / cost_bearing_requests
-            if cost_bearing_requests > 0
+            total_input_tokens / token_bearing_requests
+            if token_bearing_requests > 0
            else 0.0
        ),
        avg_output_tokens_per_request=(
-            total_output_tokens / cost_bearing_requests
-            if cost_bearing_requests > 0
+            total_output_tokens / token_bearing_requests
+            if token_bearing_requests > 0
            else 0.0
        ),
        avg_cost_microdollars_per_request=(
--- a/autogpt_platform/backend/backend/data/platform_cost_test.py
+++ b/autogpt_platform/backend/backend/data/platform_cost_test.py
@@ -324,6 +324,11 @@ class TestGetPlatformCostDashboard:
        assert dashboard.cost_p50_microdollars == 1000
        assert dashboard.cost_p95_microdollars == 4000
        assert len(dashboard.cost_buckets) == 1
+        # Token averages must use token_bearing_requests (3) not cost_bearing (0)
+        assert dashboard.avg_input_tokens_per_request == pytest.approx(1000 / 3)
+        assert dashboard.avg_output_tokens_per_request == pytest.approx(500 / 3)
+        # No cost_usd rows in total_agg → avg_cost should be 0
+        assert dashboard.avg_cost_microdollars_per_request == 0.0

    @pytest.mark.asyncio
    async def test_cache_tokens_aggregated_not_hardcoded(self):
--- a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx
@@ -8,6 +8,7 @@ import { ProviderTable } from "./ProviderTable";
 import { UserTable } from "./UserTable";
 import { LogsTable } from "./LogsTable";
 import { usePlatformCostContent } from "./usePlatformCostContent";
+import type { CostBucket } from "@/app/api/__generated__/models/platformCostDashboard";

 interface Props {
  searchParams: {
@@ -298,7 +299,7 @@ export function PlatformCostContent({ searchParams }: Props) {
                  </h3>
                  <div className="grid grid-cols-2 gap-2 sm:grid-cols-3 md:grid-cols-6">
                    {dashboard.cost_buckets.map(
-                      (b: { bucket: string; count: number }) => (
+                      (b: CostBucket) => (
                        <div
                          key={b.bucket}
                          className="flex flex-col items-center rounded border p-2 text-center"
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -12151,7 +12151,7 @@
          "cost_p75_microdollars": { "type": "number", "title": "Cost P75 Microdollars", "default": 0.0 },
          "cost_p95_microdollars": { "type": "number", "title": "Cost P95 Microdollars", "default": 0.0 },
          "cost_p99_microdollars": { "type": "number", "title": "Cost P99 Microdollars", "default": 0.0 },
-          "cost_buckets": { "type": "array", "items": { "type": "object" }, "title": "Cost Buckets", "default": [] }
+          "cost_buckets": { "type": "array", "items": { "type": "object", "properties": { "bucket": { "type": "string" }, "count": { "type": "integer" } }, "required": ["bucket", "count"] }, "title": "Cost Buckets", "default": [] }
        },
        "type": "object",
        "required": [