Merge branch 'dev' into feat/task-decomposition-copilot

2026-04-30 03:00:41 -04:00 · 2026-04-21 13:58:27 +02:00
parent 51c16c687d e17e9f13c4
commit 00c992ae08
55 changed files with 5029 additions and 1181 deletions
--- a/autogpt_platform/backend/backend/api/features/admin/rate_limit_admin_routes.py
+++ b/autogpt_platform/backend/backend/api/features/admin/rate_limit_admin_routes.py
@@ -32,10 +32,10 @@ router = APIRouter(
 class UserRateLimitResponse(BaseModel):
    user_id: str
    user_email: Optional[str] = None
-    daily_token_limit: int
-    weekly_token_limit: int
-    daily_tokens_used: int
-    weekly_tokens_used: int
+    daily_cost_limit_microdollars: int
+    weekly_cost_limit_microdollars: int
+    daily_cost_used_microdollars: int
+    weekly_cost_used_microdollars: int
    tier: SubscriptionTier


@@ -101,17 +101,19 @@ async def get_user_rate_limit(
    logger.info("Admin %s checking rate limit for user %s", admin_user_id, resolved_id)

    daily_limit, weekly_limit, tier = await get_global_rate_limits(
-        resolved_id, config.daily_token_limit, config.weekly_token_limit
+        resolved_id,
+        config.daily_cost_limit_microdollars,
+        config.weekly_cost_limit_microdollars,
    )
    usage = await get_usage_status(resolved_id, daily_limit, weekly_limit, tier=tier)

    return UserRateLimitResponse(
        user_id=resolved_id,
        user_email=resolved_email,
-        daily_token_limit=daily_limit,
-        weekly_token_limit=weekly_limit,
-        daily_tokens_used=usage.daily.used,
-        weekly_tokens_used=usage.weekly.used,
+        daily_cost_limit_microdollars=daily_limit,
+        weekly_cost_limit_microdollars=weekly_limit,
+        daily_cost_used_microdollars=usage.daily.used,
+        weekly_cost_used_microdollars=usage.weekly.used,
        tier=tier,
    )

@@ -141,7 +143,9 @@ async def reset_user_rate_limit(
        raise HTTPException(status_code=500, detail="Failed to reset usage") from e

    daily_limit, weekly_limit, tier = await get_global_rate_limits(
-        user_id, config.daily_token_limit, config.weekly_token_limit
+        user_id,
+        config.daily_cost_limit_microdollars,
+        config.weekly_cost_limit_microdollars,
    )
    usage = await get_usage_status(user_id, daily_limit, weekly_limit, tier=tier)

@@ -154,10 +158,10 @@ async def reset_user_rate_limit(
    return UserRateLimitResponse(
        user_id=user_id,
        user_email=resolved_email,
-        daily_token_limit=daily_limit,
-        weekly_token_limit=weekly_limit,
-        daily_tokens_used=usage.daily.used,
-        weekly_tokens_used=usage.weekly.used,
+        daily_cost_limit_microdollars=daily_limit,
+        weekly_cost_limit_microdollars=weekly_limit,
+        daily_cost_used_microdollars=usage.daily.used,
+        weekly_cost_used_microdollars=usage.weekly.used,
        tier=tier,
    )

--- a/autogpt_platform/backend/backend/api/features/admin/rate_limit_admin_routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/admin/rate_limit_admin_routes_test.py
@@ -85,10 +85,10 @@ def test_get_rate_limit(
    data = response.json()
    assert data["user_id"] == target_user_id
    assert data["user_email"] == _TARGET_EMAIL
-    assert data["daily_token_limit"] == 2_500_000
-    assert data["weekly_token_limit"] == 12_500_000
-    assert data["daily_tokens_used"] == 500_000
-    assert data["weekly_tokens_used"] == 3_000_000
+    assert data["daily_cost_limit_microdollars"] == 2_500_000
+    assert data["weekly_cost_limit_microdollars"] == 12_500_000
+    assert data["daily_cost_used_microdollars"] == 500_000
+    assert data["weekly_cost_used_microdollars"] == 3_000_000
    assert data["tier"] == "FREE"

    configured_snapshot.assert_match(
@@ -117,7 +117,7 @@ def test_get_rate_limit_by_email(
    data = response.json()
    assert data["user_id"] == target_user_id
    assert data["user_email"] == _TARGET_EMAIL
-    assert data["daily_token_limit"] == 2_500_000
+    assert data["daily_cost_limit_microdollars"] == 2_500_000


 def test_get_rate_limit_by_email_not_found(
@@ -160,9 +160,9 @@ def test_reset_user_usage_daily_only(

    assert response.status_code == 200
    data = response.json()
-    assert data["daily_tokens_used"] == 0
+    assert data["daily_cost_used_microdollars"] == 0
    # Weekly is untouched
-    assert data["weekly_tokens_used"] == 3_000_000
+    assert data["weekly_cost_used_microdollars"] == 3_000_000
    assert data["tier"] == "FREE"

    mock_reset.assert_awaited_once_with(target_user_id, reset_weekly=False)
@@ -192,8 +192,8 @@ def test_reset_user_usage_daily_and_weekly(

    assert response.status_code == 200
    data = response.json()
-    assert data["daily_tokens_used"] == 0
-    assert data["weekly_tokens_used"] == 0
+    assert data["daily_cost_used_microdollars"] == 0
+    assert data["weekly_cost_used_microdollars"] == 0
    assert data["tier"] == "FREE"

    mock_reset.assert_awaited_once_with(target_user_id, reset_weekly=True)
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -34,7 +34,7 @@ from backend.copilot.pending_message_helpers import (
 )
 from backend.copilot.pending_messages import peek_pending_messages
 from backend.copilot.rate_limit import (
-    CoPilotUsageStatus,
+    CoPilotUsagePublic,
    RateLimitExceeded,
    acquire_reset_lock,
    check_rate_limit,
@@ -537,23 +537,27 @@ async def get_session(
 )
 async def get_copilot_usage(
    user_id: Annotated[str, Security(auth.get_user_id)],
-) -> CoPilotUsageStatus:
+) -> CoPilotUsagePublic:
    """Get CoPilot usage status for the authenticated user.

-    Returns current token usage vs limits for daily and weekly windows.
-    Global defaults sourced from LaunchDarkly (falling back to config).
-    Includes the user's rate-limit tier.
+    Returns the percentage of the daily/weekly allowance used — not the
+    raw spend or cap — so clients cannot derive per-turn cost or platform
+    margins. Global defaults sourced from LaunchDarkly (falling back to
+    config). Includes the user's rate-limit tier.
    """
    daily_limit, weekly_limit, tier = await get_global_rate_limits(
-        user_id, config.daily_token_limit, config.weekly_token_limit
+        user_id,
+        config.daily_cost_limit_microdollars,
+        config.weekly_cost_limit_microdollars,
    )
-    return await get_usage_status(
+    status = await get_usage_status(
        user_id=user_id,
-        daily_token_limit=daily_limit,
-        weekly_token_limit=weekly_limit,
+        daily_cost_limit=daily_limit,
+        weekly_cost_limit=weekly_limit,
        rate_limit_reset_cost=config.rate_limit_reset_cost,
        tier=tier,
    )
+    return CoPilotUsagePublic.from_status(status)


 class RateLimitResetResponse(BaseModel):
@@ -562,7 +566,9 @@ class RateLimitResetResponse(BaseModel):
    success: bool
    credits_charged: int = Field(description="Credits charged (in cents)")
    remaining_balance: int = Field(description="Credit balance after charge (in cents)")
-    usage: CoPilotUsageStatus = Field(description="Updated usage status after reset")
+    usage: CoPilotUsagePublic = Field(
+        description="Updated usage status after reset (percentages only)"
+    )


@router.post(
@@ -586,7 +592,7 @@ async def reset_copilot_usage(
 ) -> RateLimitResetResponse:
    """Reset the daily CoPilot rate limit by spending credits.

-    Allows users who have hit their daily token limit to spend credits
+    Allows users who have hit their daily cost limit to spend credits
    to reset their daily usage counter and continue working.
    Returns 400 if the feature is disabled or the user is not over the limit.
    Returns 402 if the user has insufficient credits.
@@ -605,7 +611,9 @@ async def reset_copilot_usage(
        )

    daily_limit, weekly_limit, tier = await get_global_rate_limits(
-        user_id, config.daily_token_limit, config.weekly_token_limit
+        user_id,
+        config.daily_cost_limit_microdollars,
+        config.weekly_cost_limit_microdollars,
    )

    if daily_limit <= 0:
@@ -642,8 +650,8 @@ async def reset_copilot_usage(
        # used for limit checks, not returned to the client.)
        usage_status = await get_usage_status(
            user_id=user_id,
-            daily_token_limit=daily_limit,
-            weekly_token_limit=weekly_limit,
+            daily_cost_limit=daily_limit,
+            weekly_cost_limit=weekly_limit,
            tier=tier,
        )
        if daily_limit > 0 and usage_status.daily.used < daily_limit:
@@ -678,7 +686,7 @@ async def reset_copilot_usage(

        # Reset daily usage in Redis.  If this fails, refund the credits
        # so the user is not charged for a service they did not receive.
-        if not await reset_daily_usage(user_id, daily_token_limit=daily_limit):
+        if not await reset_daily_usage(user_id, daily_cost_limit=daily_limit):
            # Compensate: refund the charged credits.
            refunded = False
            try:
@@ -714,11 +722,11 @@ async def reset_copilot_usage(
    finally:
        await release_reset_lock(user_id)

-    # Return updated usage status.
+    # Return updated usage status (public schema — percentages only).
    updated_usage = await get_usage_status(
        user_id=user_id,
-        daily_token_limit=daily_limit,
-        weekly_token_limit=weekly_limit,
+        daily_cost_limit=daily_limit,
+        weekly_cost_limit=weekly_limit,
        rate_limit_reset_cost=config.rate_limit_reset_cost,
        tier=tier,
    )
@@ -727,7 +735,7 @@ async def reset_copilot_usage(
        success=True,
        credits_charged=cost,
        remaining_balance=remaining,
-        usage=updated_usage,
+        usage=CoPilotUsagePublic.from_status(updated_usage),
    )


@@ -810,7 +818,7 @@ async def cancel_auto_approve_task(
            ),
        },
        404: {"description": "Session not found or access denied"},
-        429: {"description": "Token rate-limit or call-frequency cap exceeded"},
+        429: {"description": "Cost rate-limit or call-frequency cap exceeded"},
    },
 )
 async def stream_chat_post(
@@ -884,18 +892,20 @@ async def stream_chat_post(
        },
    )

-    # Pre-turn rate limit check (token-based).
+    # Pre-turn rate limit check (cost-based, microdollars).
    # check_rate_limit short-circuits internally when both limits are 0.
    # Global defaults sourced from LaunchDarkly, falling back to config.
    if user_id:
        try:
            daily_limit, weekly_limit, _ = await get_global_rate_limits(
-                user_id, config.daily_token_limit, config.weekly_token_limit
+                user_id,
+                config.daily_cost_limit_microdollars,
+                config.weekly_cost_limit_microdollars,
            )
            await check_rate_limit(
                user_id=user_id,
-                daily_token_limit=daily_limit,
-                weekly_token_limit=weekly_limit,
+                daily_cost_limit=daily_limit,
+                weekly_cost_limit=weekly_limit,
            )
        except RateLimitExceeded as e:
            raise HTTPException(status_code=429, detail=str(e)) from e
--- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py
@@ -296,8 +296,8 @@ def test_stream_chat_returns_429_on_daily_rate_limit(mocker: pytest_mock.MockerF

    _mock_stream_internals(mocker)
    # Ensure the rate-limit branch is entered by setting a non-zero limit.
-    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
-    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch.object(chat_routes.config, "daily_cost_limit_microdollars", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_cost_limit_microdollars", 50000)
    mocker.patch(
        "backend.api.features.chat.routes.check_rate_limit",
        side_effect=RateLimitExceeded("daily", datetime.now(UTC) + timedelta(hours=1)),
@@ -318,8 +318,8 @@ def test_stream_chat_returns_429_on_weekly_rate_limit(
    from backend.copilot.rate_limit import RateLimitExceeded

    _mock_stream_internals(mocker)
-    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
-    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch.object(chat_routes.config, "daily_cost_limit_microdollars", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_cost_limit_microdollars", 50000)
    resets_at = datetime.now(UTC) + timedelta(days=3)
    mocker.patch(
        "backend.api.features.chat.routes.check_rate_limit",
@@ -341,8 +341,8 @@ def test_stream_chat_429_includes_reset_time(mocker: pytest_mock.MockerFixture):
    from backend.copilot.rate_limit import RateLimitExceeded

    _mock_stream_internals(mocker)
-    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
-    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch.object(chat_routes.config, "daily_cost_limit_microdollars", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_cost_limit_microdollars", 50000)
    mocker.patch(
        "backend.api.features.chat.routes.check_rate_limit",
        side_effect=RateLimitExceeded(
@@ -402,23 +402,33 @@ def test_usage_returns_daily_and_weekly(
    mocker: pytest_mock.MockerFixture,
    test_user_id: str,
 ) -> None:
-    """GET /usage returns daily and weekly usage."""
+    """GET /usage returns percentages for daily and weekly windows only.
+
+    The raw used/limit microdollar values MUST NOT leak — clients should not
+    be able to derive per-turn cost or platform margins from the public API.
+    """
    mock_get = _mock_usage(mocker, daily_used=500, weekly_used=2000)

-    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
-    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch.object(chat_routes.config, "daily_cost_limit_microdollars", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_cost_limit_microdollars", 50000)

    response = client.get("/usage")

    assert response.status_code == 200
    data = response.json()
-    assert data["daily"]["used"] == 500
-    assert data["weekly"]["used"] == 2000
+    # 500 / 10000 = 5%, 2000 / 50000 = 4%
+    assert data["daily"]["percent_used"] == 5.0
+    assert data["weekly"]["percent_used"] == 4.0
+    # Raw spend/limit must not be exposed.
+    assert "used" not in data["daily"]
+    assert "limit" not in data["daily"]
+    assert "used" not in data["weekly"]
+    assert "limit" not in data["weekly"]

    mock_get.assert_called_once_with(
        user_id=test_user_id,
-        daily_token_limit=10000,
-        weekly_token_limit=50000,
+        daily_cost_limit=10000,
+        weekly_cost_limit=50000,
        rate_limit_reset_cost=chat_routes.config.rate_limit_reset_cost,
        tier=SubscriptionTier.FREE,
    )
@@ -438,8 +448,8 @@ def test_usage_uses_config_limits(
    assert response.status_code == 200
    mock_get.assert_called_once_with(
        user_id=test_user_id,
-        daily_token_limit=99999,
-        weekly_token_limit=77777,
+        daily_cost_limit=99999,
+        weekly_cost_limit=77777,
        rate_limit_reset_cost=500,
        tier=SubscriptionTier.FREE,
    )
--- a/autogpt_platform/backend/backend/api/features/subscription_routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/subscription_routes_test.py
@@ -47,6 +47,40 @@ def _configure_frontend_origin(mocker: pytest_mock.MockFixture) -> None:
    )


+@pytest.fixture(autouse=True)
+def _stub_pending_subscription_change(mocker: pytest_mock.MockFixture) -> None:
+    """Default pending-change lookup to None so tests don't hit Stripe/DB.
+
+    Individual tests can override via their own mocker.patch call.
+    """
+    mocker.patch(
+        "backend.api.features.v1.get_pending_subscription_change",
+        new_callable=AsyncMock,
+        return_value=None,
+    )
+
+
+@pytest.fixture(autouse=True)
+def _stub_subscription_status_lookups(mocker: pytest_mock.MockFixture) -> None:
+    """Stub Stripe price + proration lookups used by get_subscription_status.
+
+    The POST /credits/subscription handler now returns the full subscription
+    status payload from every branch (same-tier, FREE downgrade, paid→paid
+    modify, checkout creation), so every POST test implicitly hits these
+    helpers.  Individual tests can override via their own mocker.patch call.
+    """
+    mocker.patch(
+        "backend.api.features.v1.get_subscription_price_id",
+        new_callable=AsyncMock,
+        return_value=None,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_proration_credit_cents",
+        new_callable=AsyncMock,
+        return_value=0,
+    )
+
+
@pytest.mark.parametrize(
    "url,expected",
    [
@@ -407,30 +441,77 @@ def test_update_subscription_tier_enterprise_blocked(
    set_tier_mock.assert_not_awaited()


-def test_update_subscription_tier_same_tier_is_noop(
+def test_update_subscription_tier_same_tier_releases_pending_change(
    client: fastapi.testclient.TestClient,
    mocker: pytest_mock.MockFixture,
 ) -> None:
-    """POST /credits/subscription for the user's current paid tier returns 200 with empty URL.
+    """POST /credits/subscription for the user's current tier releases any pending change.

-    Without this guard a duplicate POST (double-click, browser retry, stale page) would
-    create a second Stripe Checkout Session for the same price, potentially billing the
-    user twice until the webhook reconciliation fires.
+    "Stay on my current tier" — the collapsed replacement for the old
+    /credits/subscription/cancel-pending route. Always calls
+    release_pending_subscription_schedule (idempotent when nothing is pending)
+    and returns the refreshed status with url="". Never creates a Checkout
+    Session — that would double-charge a user who double-clicks their own tier.
    """
    mock_user = Mock()
-    mock_user.subscription_tier = SubscriptionTier.PRO
-
-    async def mock_feature_enabled(*args, **kwargs):
-        return True
+    mock_user.subscription_tier = SubscriptionTier.BUSINESS

    mocker.patch(
        "backend.api.features.v1.get_user_by_id",
        new_callable=AsyncMock,
        return_value=mock_user,
    )
-    mocker.patch(
+    release_mock = mocker.patch(
+        "backend.api.features.v1.release_pending_subscription_schedule",
+        new_callable=AsyncMock,
+        return_value=True,
+    )
+    checkout_mock = mocker.patch(
+        "backend.api.features.v1.create_subscription_checkout",
+        new_callable=AsyncMock,
+    )
+    feature_mock = mocker.patch(
        "backend.api.features.v1.is_feature_enabled",
-        side_effect=mock_feature_enabled,
+        new_callable=AsyncMock,
+        return_value=True,
+    )
+
+    response = client.post(
+        "/credits/subscription",
+        json={
+            "tier": "BUSINESS",
+            "success_url": f"{TEST_FRONTEND_ORIGIN}/success",
+            "cancel_url": f"{TEST_FRONTEND_ORIGIN}/cancel",
+        },
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["tier"] == "BUSINESS"
+    assert data["url"] == ""
+    release_mock.assert_awaited_once_with(TEST_USER_ID)
+    checkout_mock.assert_not_awaited()
+    # Same-tier branch short-circuits before the payment-flag check.
+    feature_mock.assert_not_awaited()
+
+
+def test_update_subscription_tier_same_tier_no_pending_change_returns_status(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """Same-tier request when nothing is pending still returns status with url=""."""
+    mock_user = Mock()
+    mock_user.subscription_tier = SubscriptionTier.PRO
+
+    mocker.patch(
+        "backend.api.features.v1.get_user_by_id",
+        new_callable=AsyncMock,
+        return_value=mock_user,
+    )
+    release_mock = mocker.patch(
+        "backend.api.features.v1.release_pending_subscription_schedule",
+        new_callable=AsyncMock,
+        return_value=False,
    )
    checkout_mock = mocker.patch(
        "backend.api.features.v1.create_subscription_checkout",
@@ -447,10 +528,50 @@ def test_update_subscription_tier_same_tier_is_noop(
    )

    assert response.status_code == 200
-    assert response.json()["url"] == ""
+    data = response.json()
+    assert data["tier"] == "PRO"
+    assert data["url"] == ""
+    assert data["pending_tier"] is None
+    release_mock.assert_awaited_once_with(TEST_USER_ID)
    checkout_mock.assert_not_awaited()


+def test_update_subscription_tier_same_tier_stripe_error_returns_502(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """Same-tier request surfaces a 502 when Stripe release fails.
+
+    Carries forward the error contract from the removed
+    /credits/subscription/cancel-pending route so clients keep seeing 502 for
+    transient Stripe failures.
+    """
+    mock_user = Mock()
+    mock_user.subscription_tier = SubscriptionTier.BUSINESS
+
+    mocker.patch(
+        "backend.api.features.v1.get_user_by_id",
+        new_callable=AsyncMock,
+        return_value=mock_user,
+    )
+    mocker.patch(
+        "backend.api.features.v1.release_pending_subscription_schedule",
+        side_effect=stripe.StripeError("network"),
+    )
+
+    response = client.post(
+        "/credits/subscription",
+        json={
+            "tier": "BUSINESS",
+            "success_url": f"{TEST_FRONTEND_ORIGIN}/success",
+            "cancel_url": f"{TEST_FRONTEND_ORIGIN}/cancel",
+        },
+    )
+
+    assert response.status_code == 502
+    assert "contact support" in response.json()["detail"].lower()
+
+
 def test_update_subscription_tier_free_with_payment_schedules_cancel_and_does_not_update_db(
    client: fastapi.testclient.TestClient,
    mocker: pytest_mock.MockFixture,
@@ -803,3 +924,197 @@ def test_update_subscription_tier_free_no_stripe_subscription(
    cancel_mock.assert_awaited_once_with(TEST_USER_ID)
    # DB tier must be updated immediately — no webhook will fire for a missing sub
    set_tier_mock.assert_awaited_once_with(TEST_USER_ID, SubscriptionTier.FREE)
+
+
+def test_get_subscription_status_includes_pending_tier(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """GET /credits/subscription exposes pending_tier and pending_tier_effective_at."""
+    import datetime as dt
+
+    mock_user = Mock()
+    mock_user.subscription_tier = SubscriptionTier.BUSINESS
+
+    effective_at = dt.datetime(2030, 1, 1, tzinfo=dt.timezone.utc)
+
+    async def mock_price_id(tier: SubscriptionTier) -> str | None:
+        return None
+
+    mocker.patch(
+        "backend.api.features.v1.get_user_by_id",
+        new_callable=AsyncMock,
+        return_value=mock_user,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_subscription_price_id",
+        side_effect=mock_price_id,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_proration_credit_cents",
+        new_callable=AsyncMock,
+        return_value=0,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_pending_subscription_change",
+        new_callable=AsyncMock,
+        return_value=(SubscriptionTier.PRO, effective_at),
+    )
+
+    response = client.get("/credits/subscription")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["pending_tier"] == "PRO"
+    assert data["pending_tier_effective_at"] is not None
+
+
+def test_get_subscription_status_no_pending_tier(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """When no pending change exists the response omits pending_tier."""
+    mock_user = Mock()
+    mock_user.subscription_tier = SubscriptionTier.PRO
+
+    mocker.patch(
+        "backend.api.features.v1.get_user_by_id",
+        new_callable=AsyncMock,
+        return_value=mock_user,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_subscription_price_id",
+        new_callable=AsyncMock,
+        return_value=None,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_proration_credit_cents",
+        new_callable=AsyncMock,
+        return_value=0,
+    )
+    mocker.patch(
+        "backend.api.features.v1.get_pending_subscription_change",
+        new_callable=AsyncMock,
+        return_value=None,
+    )
+
+    response = client.get("/credits/subscription")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["pending_tier"] is None
+    assert data["pending_tier_effective_at"] is None
+
+
+def test_update_subscription_tier_downgrade_paid_to_paid_schedules(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """A BUSINESS→PRO downgrade request dispatches to modify_stripe_subscription_for_tier."""
+    mock_user = Mock()
+    mock_user.subscription_tier = SubscriptionTier.BUSINESS
+
+    mocker.patch(
+        "backend.api.features.v1.get_user_by_id",
+        new_callable=AsyncMock,
+        return_value=mock_user,
+    )
+    mocker.patch(
+        "backend.api.features.v1.is_feature_enabled",
+        new_callable=AsyncMock,
+        return_value=True,
+    )
+    modify_mock = mocker.patch(
+        "backend.api.features.v1.modify_stripe_subscription_for_tier",
+        new_callable=AsyncMock,
+        return_value=True,
+    )
+    checkout_mock = mocker.patch(
+        "backend.api.features.v1.create_subscription_checkout",
+        new_callable=AsyncMock,
+    )
+
+    response = client.post(
+        "/credits/subscription",
+        json={
+            "tier": "PRO",
+            "success_url": f"{TEST_FRONTEND_ORIGIN}/success",
+            "cancel_url": f"{TEST_FRONTEND_ORIGIN}/cancel",
+        },
+    )
+
+    assert response.status_code == 200
+    assert response.json()["url"] == ""
+    modify_mock.assert_awaited_once_with(TEST_USER_ID, SubscriptionTier.PRO)
+    checkout_mock.assert_not_awaited()
+
+
+def test_stripe_webhook_dispatches_subscription_schedule_released(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """subscription_schedule.released routes to sync_subscription_schedule_from_stripe."""
+    schedule_obj = {"id": "sub_sched_1", "subscription": "sub_pro"}
+    event = {
+        "type": "subscription_schedule.released",
+        "data": {"object": schedule_obj},
+    }
+    mocker.patch(
+        "backend.api.features.v1.settings.secrets.stripe_webhook_secret",
+        new="whsec_test",
+    )
+    mocker.patch(
+        "backend.api.features.v1.stripe.Webhook.construct_event",
+        return_value=event,
+    )
+    sync_mock = mocker.patch(
+        "backend.api.features.v1.sync_subscription_schedule_from_stripe",
+        new_callable=AsyncMock,
+    )
+
+    response = client.post(
+        "/credits/stripe_webhook",
+        content=b"{}",
+        headers={"stripe-signature": "t=1,v1=abc"},
+    )
+
+    assert response.status_code == 200
+    sync_mock.assert_awaited_once_with(schedule_obj)
+
+
+def test_stripe_webhook_ignores_subscription_schedule_updated(
+    client: fastapi.testclient.TestClient,
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """subscription_schedule.updated must NOT dispatch: our own
+    SubscriptionSchedule.create/.modify calls fire this event and would
+    otherwise loop redundant traffic through the sync handler. State
+    transitions we care about surface via .released/.completed, and phase
+    advance to a new price is already covered by customer.subscription.updated.
+    """
+    schedule_obj = {"id": "sub_sched_1", "subscription": "sub_pro"}
+    event = {
+        "type": "subscription_schedule.updated",
+        "data": {"object": schedule_obj},
+    }
+    mocker.patch(
+        "backend.api.features.v1.settings.secrets.stripe_webhook_secret",
+        new="whsec_test",
+    )
+    mocker.patch(
+        "backend.api.features.v1.stripe.Webhook.construct_event",
+        return_value=event,
+    )
+    sync_mock = mocker.patch(
+        "backend.api.features.v1.sync_subscription_schedule_from_stripe",
+        new_callable=AsyncMock,
+    )
+
+    response = client.post(
+        "/credits/stripe_webhook",
+        content=b"{}",
+        headers={"stripe-signature": "t=1,v1=abc"},
+    )
+
+    assert response.status_code == 200
+    sync_mock.assert_not_awaited()
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -26,7 +26,7 @@ from fastapi import (
 )
 from fastapi.concurrency import run_in_threadpool
 from prisma.enums import SubscriptionTier
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from starlette.status import HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
 from typing_extensions import Optional, TypedDict

@@ -49,20 +49,24 @@ from backend.data.auth import api_key as api_key_db
 from backend.data.block import BlockInput, CompletedBlockOutput
 from backend.data.credit import (
    AutoTopUpConfig,
+    PendingChangeUnknown,
    RefundRequest,
    TransactionHistory,
    UserCredit,
    cancel_stripe_subscription,
    create_subscription_checkout,
    get_auto_top_up,
+    get_pending_subscription_change,
    get_proration_credit_cents,
    get_subscription_price_id,
    get_user_credit_model,
    handle_subscription_payment_failure,
    modify_stripe_subscription_for_tier,
+    release_pending_subscription_schedule,
    set_auto_top_up,
    set_subscription_tier,
    sync_subscription_from_stripe,
+    sync_subscription_schedule_from_stripe,
 )
 from backend.data.graph import GraphSettings
 from backend.data.model import CredentialsMetaInput, UserOnboarding
@@ -698,15 +702,21 @@ class SubscriptionTierRequest(BaseModel):
    cancel_url: str = ""


-class SubscriptionCheckoutResponse(BaseModel):
-    url: str
-
-
 class SubscriptionStatusResponse(BaseModel):
    tier: Literal["FREE", "PRO", "BUSINESS", "ENTERPRISE"]
    monthly_cost: int  # amount in cents (Stripe convention)
    tier_costs: dict[str, int]  # tier name -> amount in cents
    proration_credit_cents: int  # unused portion of current sub to convert on upgrade
+    pending_tier: Optional[Literal["FREE", "PRO", "BUSINESS"]] = None
+    pending_tier_effective_at: Optional[datetime] = None
+    url: str = Field(
+        default="",
+        description=(
+            "Populated only when POST /credits/subscription starts a Stripe Checkout"
+            " Session (FREE → paid upgrade). Empty string in all other branches —"
+            " the client redirects to this URL when non-empty."
+        ),
+    )


 def _validate_checkout_redirect_url(url: str) -> bool:
@@ -804,17 +814,42 @@ async def get_subscription_status(
    current_monthly_cost = tier_costs.get(tier.value, 0)
    proration_credit = await get_proration_credit_cents(user_id, current_monthly_cost)

-    return SubscriptionStatusResponse(
+    try:
+        pending = await get_pending_subscription_change(user_id)
+    except (stripe.StripeError, PendingChangeUnknown):
+        # Swallow Stripe-side failures (rate limits, transient network) AND
+        # PendingChangeUnknown (LaunchDarkly price-id lookup failed). Both
+        # propagate past the cache so the next request retries fresh instead
+        # of serving a stale None for the TTL window. Let real bugs (KeyError,
+        # AttributeError, etc.) propagate so they surface in Sentry.
+        logger.exception(
+            "get_subscription_status: failed to resolve pending change for user %s",
+            user_id,
+        )
+        pending = None
+
+    response = SubscriptionStatusResponse(
        tier=tier.value,
        monthly_cost=current_monthly_cost,
        tier_costs=tier_costs,
        proration_credit_cents=proration_credit,
    )
+    if pending is not None:
+        pending_tier_enum, pending_effective_at = pending
+        if pending_tier_enum == SubscriptionTier.FREE:
+            response.pending_tier = "FREE"
+        elif pending_tier_enum == SubscriptionTier.PRO:
+            response.pending_tier = "PRO"
+        elif pending_tier_enum == SubscriptionTier.BUSINESS:
+            response.pending_tier = "BUSINESS"
+        if response.pending_tier is not None:
+            response.pending_tier_effective_at = pending_effective_at
+    return response


@v1_router.post(
    path="/credits/subscription",
-    summary="Start a Stripe Checkout session to upgrade subscription tier",
+    summary="Update subscription tier or start a Stripe Checkout session",
    operation_id="updateSubscriptionTier",
    tags=["credits"],
    dependencies=[Security(requires_user)],
@@ -822,7 +857,7 @@ async def get_subscription_status(
 async def update_subscription_tier(
    request: SubscriptionTierRequest,
    user_id: Annotated[str, Security(get_user_id)],
-) -> SubscriptionCheckoutResponse:
+) -> SubscriptionStatusResponse:
    # Pydantic validates tier is one of FREE/PRO/BUSINESS via Literal type.
    tier = SubscriptionTier(request.tier)

@@ -834,6 +869,29 @@ async def update_subscription_tier(
            detail="ENTERPRISE subscription changes must be managed by an administrator",
        )

+    # Same-tier request = "stay on my current tier" = cancel any pending
+    # scheduled change (paid→paid downgrade or paid→FREE cancel). This is the
+    # collapsed behaviour that replaces the old /credits/subscription/cancel-pending
+    # route. Safe when no pending change exists: release_pending_subscription_schedule
+    # returns False and we simply return the current status.
+    if (user.subscription_tier or SubscriptionTier.FREE) == tier:
+        try:
+            await release_pending_subscription_schedule(user_id)
+        except stripe.StripeError as e:
+            logger.exception(
+                "Stripe error releasing pending subscription change for user %s: %s",
+                user_id,
+                e,
+            )
+            raise HTTPException(
+                status_code=502,
+                detail=(
+                    "Unable to cancel the pending subscription change right now. "
+                    "Please try again or contact support."
+                ),
+            )
+        return await get_subscription_status(user_id)
+
    payment_enabled = await is_feature_enabled(
        Flag.ENABLE_PLATFORM_PAYMENT, user_id, default=False
    )
@@ -871,9 +929,9 @@ async def update_subscription_tier(
                # admin-granted tier. Update DB immediately since the
                # subscription.deleted webhook will never fire.
                await set_subscription_tier(user_id, tier)
-            return SubscriptionCheckoutResponse(url="")
+            return await get_subscription_status(user_id)
        await set_subscription_tier(user_id, tier)
-        return SubscriptionCheckoutResponse(url="")
+        return await get_subscription_status(user_id)

    # Paid tier changes require payment to be enabled — block self-service upgrades
    # when the flag is off.  Admins use the /api/admin/ routes to set tiers directly.
@@ -883,15 +941,6 @@ async def update_subscription_tier(
            detail=f"Subscription not available for tier {tier}",
        )

-    # No-op short-circuit: if the user is already on the requested paid tier,
-    # do NOT create a new Checkout Session. Without this guard, a duplicate
-    # request (double-click, retried POST, stale page) creates a second
-    # subscription for the same price; the user would be charged for both
-    # until `_cleanup_stale_subscriptions` runs from the resulting webhook —
-    # which only fires after the second charge has cleared.
-    if (user.subscription_tier or SubscriptionTier.FREE) == tier:
-        return SubscriptionCheckoutResponse(url="")
-
    # Paid→paid tier change: if the user already has a Stripe subscription,
    # modify it in-place with proration instead of creating a new Checkout
    # Session. This preserves remaining paid time and avoids double-charging.
@@ -901,14 +950,14 @@ async def update_subscription_tier(
        try:
            modified = await modify_stripe_subscription_for_tier(user_id, tier)
            if modified:
-                return SubscriptionCheckoutResponse(url="")
+                return await get_subscription_status(user_id)
            # modify_stripe_subscription_for_tier returns False when no active
            # Stripe subscription exists — i.e. the user has an admin-granted
            # paid tier with no Stripe record.  In that case, update the DB
            # tier directly (same as the FREE-downgrade path for admin-granted
            # users) rather than sending them through a new Checkout Session.
            await set_subscription_tier(user_id, tier)
-            return SubscriptionCheckoutResponse(url="")
+            return await get_subscription_status(user_id)
        except ValueError as e:
            raise HTTPException(status_code=422, detail=str(e))
        except stripe.StripeError as e:
@@ -978,7 +1027,9 @@ async def update_subscription_tier(
            ),
        )

-    return SubscriptionCheckoutResponse(url=url)
+    status = await get_subscription_status(user_id)
+    status.url = url
+    return status


@v1_router.post(
@@ -1043,6 +1094,18 @@ async def stripe_webhook(request: Request):
    ):
        await sync_subscription_from_stripe(data_object)

+    # `subscription_schedule.updated` is deliberately omitted: our own
+    # `SubscriptionSchedule.create` + `.modify` calls in
+    # `_schedule_downgrade_at_period_end` would fire that event right back at us
+    # and loop redundant traffic through this handler. We only care about state
+    # transitions (released / completed); phase advance to the new price is
+    # already covered by `customer.subscription.updated`.
+    if event_type in (
+        "subscription_schedule.released",
+        "subscription_schedule.completed",
+    ):
+        await sync_subscription_schedule_from_stripe(data_object)
+
    if event_type == "invoice.payment_failed":
        await handle_subscription_payment_failure(data_object)

--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -15,14 +15,16 @@ import re
 import shutil
 import tempfile
 import uuid
-from collections.abc import AsyncGenerator, Sequence
+from collections.abc import AsyncGenerator, Mapping, Sequence
 from dataclasses import dataclass, field
 from functools import partial
 from typing import TYPE_CHECKING, Any, cast

 import orjson
 from langfuse import propagate_attributes
+from openai.types import CompletionUsage
 from openai.types.chat import ChatCompletionMessageParam, ChatCompletionToolParam
+from openai.types.completion_usage import PromptTokensDetails
 from opentelemetry import trace as otel_trace

 from backend.copilot.config import CopilotLlmModel, CopilotMode
@@ -45,7 +47,7 @@ from backend.copilot.pending_messages import (
    drain_pending_messages,
    format_pending_as_user_message,
 )
-from backend.copilot.prompting import get_baseline_supplement, get_graphiti_supplement
+from backend.copilot.prompting import SHARED_TOOL_NOTES, get_graphiti_supplement
 from backend.copilot.response_model import (
    StreamBaseResponse,
    StreamError,
@@ -126,6 +128,78 @@ _MAX_INLINE_IMAGE_BYTES = 20 * 1024 * 1024
 # Matches characters unsafe for filenames.
 _UNSAFE_FILENAME = re.compile(r"[^\w.\-]")

+# OpenRouter-specific extra_body flag that embeds the real generation cost
+# into the final usage chunk. Module-level constant so we don't reallocate
+# an identical dict on every streaming call.
+_OPENROUTER_INCLUDE_USAGE_COST = {"usage": {"include": True}}
+
+
+def _extract_usage_cost(usage: CompletionUsage) -> float | None:
+    """Return the provider-reported USD cost on a streaming usage chunk.
+
+    OpenRouter piggybacks a ``cost`` field on the OpenAI-compatible usage
+    object when the request body includes ``usage: {"include": True}``.
+    The OpenAI SDK's typed ``CompletionUsage`` does not declare it, so we
+    read it off ``model_extra`` (the pydantic v2 container for extras) to
+    keep the access fully typed — no ``getattr``.
+
+    Returns ``None`` when the field is absent, explicitly null,
+    non-numeric, non-finite, or negative. Invalid values (including
+    present-but-null) are logged here — they indicate a provider bug
+    worth chasing; plain absences are silent so the caller can dedupe
+    the "missing cost" warning per stream.
+    """
+    extras = usage.model_extra or {}
+    if "cost" not in extras:
+        return None
+    raw = extras["cost"]
+    if raw is None:
+        logger.error("[Baseline] usage.cost is present but null")
+        return None
+    try:
+        val = float(raw)
+    except (TypeError, ValueError):
+        logger.error("[Baseline] usage.cost is not numeric: %r", raw)
+        return None
+    if not math.isfinite(val) or val < 0:
+        logger.error("[Baseline] usage.cost is non-finite or negative: %r", val)
+        return None
+    return val
+
+
+def _extract_cache_creation_tokens(ptd: PromptTokensDetails) -> int:
+    """Return cache-write token count from an OpenAI-compatible
+    ``PromptTokensDetails``, handling provider-specific field names and
+    SDK-version shape differences.
+
+    Two shapes we care about:
+
+    - **OpenRouter** (our primary baseline provider) streams the cache-write
+      count as ``cache_write_tokens``.  Newer ``openai-python`` versions
+      declare this as a typed attribute on ``PromptTokensDetails``; older
+      versions expose it only in ``model_extra``.  Verified empirically:
+      cold-cache request returns ``cache_write_tokens`` > 0, warm-cache
+      request returns ``cached_tokens`` > 0 and ``cache_write_tokens`` = 0.
+    - **Direct Anthropic API** uses ``cache_creation_input_tokens`` —
+      never a typed attribute on the OpenAI SDK, always lives in
+      ``model_extra``.
+
+    Lookup order: typed attr → ``model_extra`` (OpenRouter) → ``model_extra``
+    (Anthropic-native).  ``getattr`` handles both the typed-attr case
+    (newer SDK) and the no-such-attr case (older SDK) — we can't only use
+    ``model_extra`` because when the field is typed it's filtered out of
+    ``model_extra``, leaving us at 0 on the modern happy path.
+    """
+    typed_val = getattr(ptd, "cache_write_tokens", None)
+    if typed_val:
+        return int(typed_val)
+    extras = ptd.model_extra or {}
+    return int(
+        extras.get("cache_write_tokens")
+        or extras.get("cache_creation_input_tokens")
+        or 0
+    )
+

 async def _prepare_baseline_attachments(
    file_ids: list[str],
@@ -267,6 +341,10 @@ class _BaselineStreamState:
    turn_cache_read_tokens: int = 0
    turn_cache_creation_tokens: int = 0
    cost_usd: float | None = None
+    # Tracks whether we've already warned about a missing `cost` field in
+    # the usage chunk this stream, so non-OpenRouter providers don't
+    # generate one warning per streaming call.
+    cost_missing_logged: bool = False
    thinking_stripper: _ThinkingStripper = field(default_factory=_ThinkingStripper)
    session_messages: list[ChatMessage] = field(default_factory=list)
    # Tracks how much of ``assistant_text`` has already been flushed to
@@ -274,6 +352,137 @@ class _BaselineStreamState:
    # block only appends the *new* assistant text (avoiding duplication of
    # round-1 text when round-1 entries were cleared from session_messages).
    _flushed_assistant_text_len: int = 0
+    # Memoised system-message dict with cache_control applied.  The system
+    # prompt is static within a session, so we build it once on the first
+    # LLM round and reuse the same dict on subsequent rounds — avoiding
+    # an O(N) dict-copy of the growing ``messages`` list on every tool-call
+    # iteration.  ``None`` means "not yet computed" (or the first message
+    # wasn't a system role, so no marking applies).
+    cached_system_message: dict[str, Any] | None = None
+
+
+def _is_anthropic_model(model: str) -> bool:
+    """Return True if *model* routes to Anthropic (native or via OpenRouter).
+
+    Cache-control markers on message content + the ``anthropic-beta`` header
+    are Anthropic-specific.  OpenAI rejects the unknown ``cache_control``
+    field with a 400 ("Extra inputs are not permitted") and Grok / other
+    providers behave similarly.  OpenRouter strips unknown headers but
+    passes through ``cache_control`` on the body regardless of provider —
+    which would also fail when OpenRouter routes to a non-Anthropic model.
+
+    Examples that return True:
+      - ``anthropic/claude-sonnet-4-6`` (OpenRouter route)
+      - ``claude-3-5-sonnet-20241022`` (direct Anthropic API)
+      - ``anthropic.claude-3-5-sonnet`` (Bedrock-style)
+
+    False for ``openai/gpt-4o``, ``google/gemini-2.5-pro``, ``xai/grok-4``
+    etc.
+    """
+    lowered = model.lower()
+    return "claude" in lowered or lowered.startswith("anthropic")
+
+
+def _fresh_ephemeral_cache_control() -> dict[str, str]:
+    """Return a FRESH ephemeral ``cache_control`` dict each call.
+
+    The ``ttl`` is sourced from :attr:`ChatConfig.baseline_prompt_cache_ttl`
+    (default ``1h``) so the static prefix stays warm across many users'
+    requests in the same workspace cache.  Anthropic caches are keyed
+    per-workspace, so every copilot user reading the same system prompt
+    hits the same cached entry.
+
+    Using a shared module-level dict would let any downstream mutation
+    (e.g. the OpenAI SDK normalising fields in-place) poison every future
+    request's marker.  Construction is O(1) so the safety margin is free.
+    """
+    return {"type": "ephemeral", "ttl": config.baseline_prompt_cache_ttl}
+
+
+def _fresh_anthropic_caching_headers() -> dict[str, str]:
+    """Return a FRESH ``extra_headers`` dict requesting the Anthropic
+    prompt-caching beta.
+
+    Same reasoning as :func:`_fresh_ephemeral_cache_control`: never hand a
+    shared module-level dict to third-party SDKs.  OpenRouter auto-forwards
+    cache_control for Anthropic routes without this header, but passing it
+    makes the intent unambiguous on-wire and is a no-op for non-Anthropic
+    providers (unknown headers are dropped).
+    """
+    return {"anthropic-beta": "prompt-caching-2024-07-31"}
+
+
+def _mark_tools_with_cache_control(
+    tools: Sequence[Mapping[str, Any]],
+) -> list[dict[str, Any]]:
+    """Return a copy of *tools* with ``cache_control`` on the last entry.
+
+    Marking the last tool is a cache breakpoint that covers the whole tool
+    schema block as a cacheable prefix segment.  Extracted from
+    :func:`_mark_system_message_with_cache_control` so callers can precompute
+    the marked tool list once per session — the tool set is static within a
+    request and the ~43 dict-copies would otherwise run on every LLM round
+    in the tool-call loop.
+
+    **Only call this for Anthropic model routes.**  Non-Anthropic providers
+    (OpenAI, Grok, Gemini) reject the unknown ``cache_control`` field with
+    a 400 schema validation error.  Gate via :func:`_is_anthropic_model`.
+    """
+    cached: list[dict[str, Any]] = [dict(t) for t in tools]
+    if cached:
+        cached[-1] = {
+            **cached[-1],
+            "cache_control": _fresh_ephemeral_cache_control(),
+        }
+    return cached
+
+
+def _build_cached_system_message(
+    system_message: Mapping[str, Any],
+) -> dict[str, Any]:
+    """Return a copy of *system_message* with ``cache_control`` applied.
+
+    Anthropic's cache uses prefix-match with up to 4 explicit breakpoints.
+    Combined with the last-tool marker this gives two cache segments — the
+    system block alone, and system+all-tools — so requests that share only
+    the system prefix still get a partial cache hit.
+
+    The system message is rebuilt via spread (``{**original, ...}``) so any
+    unknown fields the caller set (e.g. ``name``) survive the transformation.
+    Non-Anthropic models silently ignore the markers.
+
+    Returns the original dict (shallow-copied) unchanged when the content
+    shape is unsupported (missing / non-string / empty) — callers should
+    splice it into the message list as-is in that case.
+    """
+    sys_copy = dict(system_message)
+    sys_content = sys_copy.get("content")
+    if isinstance(sys_content, str) and sys_content:
+        sys_copy["content"] = [
+            {
+                "type": "text",
+                "text": sys_content,
+                "cache_control": _fresh_ephemeral_cache_control(),
+            }
+        ]
+    return sys_copy
+
+
+def _mark_system_message_with_cache_control(
+    messages: Sequence[Mapping[str, Any]],
+) -> list[dict[str, Any]]:
+    """Return a copy of *messages* with ``cache_control`` on the system block.
+
+    Thin wrapper around :func:`_build_cached_system_message` that preserves
+    the original list shape.  Prefer the memoised path in
+    ``_baseline_llm_caller`` (which builds the cached system dict once per
+    session) for hot-loop callers; this function is retained for call sites
+    outside the tool-call loop where per-call copying is acceptable.
+    """
+    cached_messages: list[dict[str, Any]] = [dict(m) for m in messages]
+    if cached_messages and cached_messages[0].get("role") == "system":
+        cached_messages[0] = _build_cached_system_message(cached_messages[0])
+    return cached_messages


 async def _baseline_llm_caller(
@@ -292,26 +501,53 @@ async def _baseline_llm_caller(
    state.thinking_stripper = _ThinkingStripper()

    round_text = ""
-    response = None  # initialized before try so finally block can access it
    try:
        client = _get_openai_client()
-        typed_messages = cast(list[ChatCompletionMessageParam], messages)
-        if tools:
-            typed_tools = cast(list[ChatCompletionToolParam], tools)
-            response = await client.chat.completions.create(
-                model=state.model,
-                messages=typed_messages,
-                tools=typed_tools,
-                stream=True,
-                stream_options={"include_usage": True},
-            )
+        # Cache markers are Anthropic-specific.  For OpenAI/Grok/other
+        # providers, leaving them on would trigger a 400 ("Extra inputs
+        # are not permitted" on cache_control).  Tools were precomputed
+        # in stream_chat_completion_baseline via _mark_tools_with_cache_control
+        # (only when the model was Anthropic), so on non-Anthropic routes
+        # tools ship without cache_control on the last entry too.
+        #
+        # `extra_body` `usage.include=true` asks OpenRouter to embed the real
+        # generation cost into the final usage chunk — required by the
+        # cost-based rate limiter in routes.py.  Separate from the Anthropic
+        # caching headers, always sent.
+        is_anthropic = _is_anthropic_model(state.model)
+        if is_anthropic:
+            # Build the cached system dict once per session and splice it in
+            # on each round.  The full ``messages`` list grows with every
+            # tool call, so copying the entire list just to mutate index 0
+            # scales with conversation length (sentry flagged this); this
+            # splice touches only list slots, not message contents.
+            if (
+                state.cached_system_message is None
+                and messages
+                and messages[0].get("role") == "system"
+            ):
+                state.cached_system_message = _build_cached_system_message(messages[0])
+            if state.cached_system_message is not None and messages:
+                final_messages = [state.cached_system_message, *messages[1:]]
+            else:
+                final_messages = messages
+            extra_headers = _fresh_anthropic_caching_headers()
        else:
-            response = await client.chat.completions.create(
-                model=state.model,
-                messages=typed_messages,
-                stream=True,
-                stream_options={"include_usage": True},
-            )
+            final_messages = messages
+            extra_headers = None
+        typed_messages = cast(list[ChatCompletionMessageParam], final_messages)
+        create_kwargs: dict[str, Any] = {
+            "model": state.model,
+            "messages": typed_messages,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+            "extra_body": _OPENROUTER_INCLUDE_USAGE_COST,
+        }
+        if extra_headers:
+            create_kwargs["extra_headers"] = extra_headers
+        if tools:
+            create_kwargs["tools"] = cast(list[ChatCompletionToolParam], list(tools))
+        response = await client.chat.completions.create(**create_kwargs)
        tool_calls_by_index: dict[int, dict[str, str]] = {}

        # Iterate under an inner try/finally so early exits (cancel, tool-call
@@ -323,18 +559,33 @@ async def _baseline_llm_caller(
                if chunk.usage:
                    state.turn_prompt_tokens += chunk.usage.prompt_tokens or 0
                    state.turn_completion_tokens += chunk.usage.completion_tokens or 0
-                    # Extract cache token details when available (OpenAI /
-                    # OpenRouter include these in prompt_tokens_details).
-                    ptd = getattr(chunk.usage, "prompt_tokens_details", None)
+                    ptd = chunk.usage.prompt_tokens_details
                    if ptd:
-                        state.turn_cache_read_tokens += (
-                            getattr(ptd, "cached_tokens", 0) or 0
-                        )
-                        # cache_creation_input_tokens is reported by some providers
-                        # (e.g. Anthropic native) but not standard OpenAI streaming.
+                        state.turn_cache_read_tokens += ptd.cached_tokens or 0
                        state.turn_cache_creation_tokens += (
-                            getattr(ptd, "cache_creation_input_tokens", 0) or 0
+                            _extract_cache_creation_tokens(ptd)
                        )
+                    cost = _extract_usage_cost(chunk.usage)
+                    if cost is not None:
+                        state.cost_usd = (state.cost_usd or 0.0) + cost
+                    elif (
+                        "cost" not in (chunk.usage.model_extra or {})
+                        and not state.cost_missing_logged
+                    ):
+                        # Field absent (non-OpenRouter route, or OpenRouter
+                        # misconfigured) — warn once per stream so error
+                        # monitoring picks up persistent misses without
+                        # flooding. Invalid values already logged inside
+                        # _extract_usage_cost, so no duplicate warning here.
+                        logger.warning(
+                            "[Baseline] usage chunk missing cost (model=%s, "
+                            "prompt=%s, completion=%s) — rate-limit will "
+                            "skip this call",
+                            state.model,
+                            chunk.usage.prompt_tokens,
+                            chunk.usage.completion_tokens,
+                        )
+                        state.cost_missing_logged = True

                delta = chunk.choices[0].delta if chunk.choices else None
                if not delta:
@@ -394,20 +645,6 @@ async def _baseline_llm_caller(
            state.text_started = False
            state.text_block_id = str(uuid.uuid4())
    finally:
-        # Extract OpenRouter cost from response headers (in finally so we
-        # capture cost even when the stream errors mid-way — we already paid).
-        # Accumulate across multi-round tool-calling turns.
-        try:
-            # Access undocumented _response attribute — same pattern as
-            # extract_openrouter_cost() in blocks/llm.py.
-            cost_header = response._response.headers.get("x-total-cost")  # type: ignore[attr-defined]
-            if cost_header:
-                cost = float(cost_header)
-                if math.isfinite(cost) and cost >= 0:
-                    state.cost_usd = (state.cost_usd or 0.0) + cost
-        except (AttributeError, ValueError):
-            pass
-
        # Always persist partial text so the session history stays consistent,
        # even when the stream is interrupted by an exception.
        state.assistant_text += round_text
@@ -1112,7 +1349,7 @@ async def stream_chat_completion_baseline(
    graphiti_enabled = await is_enabled_for_user(user_id)

    graphiti_supplement = get_graphiti_supplement() if graphiti_enabled else ""
-    system_prompt = base_system_prompt + get_baseline_supplement() + graphiti_supplement
+    system_prompt = base_system_prompt + SHARED_TOOL_NOTES + graphiti_supplement

    # Warm context: pre-load relevant facts from Graphiti on first turn.
    # Use the pre-drain count so pending messages drained at turn start
@@ -1262,6 +1499,18 @@ async def stream_chat_completion_baseline(
    if permissions is not None:
        tools = _filter_tools_by_permissions(tools, permissions)

+    # Pre-mark cache_control on the last tool schema once per session.  The
+    # tool set is static within a request, so doing this here (instead of in
+    # _baseline_llm_caller) avoids re-copying ~43 tool dicts on every LLM
+    # round of the tool-call loop.
+    #
+    # Only apply to Anthropic routes — OpenAI/Grok/other providers would
+    # 400 on the unknown ``cache_control`` field inside tool definitions.
+    if _is_anthropic_model(active_model):
+        tools = cast(
+            list[ChatCompletionToolParam], _mark_tools_with_cache_control(tools)
+        )
+
    # Propagate execution context so tool handlers can read session-level flags.
    set_execution_context(
        user_id,
@@ -1649,6 +1898,8 @@ async def stream_chat_completion_baseline(
            prompt_tokens=billed_prompt,
            completion_tokens=state.turn_completion_tokens,
            total_tokens=billed_prompt + state.turn_completion_tokens,
+            cache_read_tokens=state.turn_cache_read_tokens,
+            cache_creation_tokens=state.turn_cache_creation_tokens,
        )

    yield StreamFinish()
--- a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py
@@ -11,8 +11,16 @@ from openai.types.chat import ChatCompletionToolParam

 from backend.copilot.baseline.service import (
    _baseline_conversation_updater,
+    _baseline_llm_caller,
    _BaselineStreamState,
+    _build_cached_system_message,
    _compress_session_messages,
+    _extract_cache_creation_tokens,
+    _fresh_anthropic_caching_headers,
+    _fresh_ephemeral_cache_control,
+    _is_anthropic_model,
+    _mark_system_message_with_cache_control,
+    _mark_tools_with_cache_control,
 )
 from backend.copilot.model import ChatMessage
 from backend.copilot.transcript_builder import TranscriptBuilder
@@ -574,37 +582,87 @@ class TestPrepareBaselineAttachments:
        assert blocks == []


+_COST_MISSING = object()
+
+
+def _make_usage_chunk(
+    *,
+    prompt_tokens: int = 0,
+    completion_tokens: int = 0,
+    cost: float | str | None | object = _COST_MISSING,
+    cached_tokens: int | None = None,
+    cache_creation_input_tokens: int | None = None,
+):
+    """Build a mock streaming chunk carrying usage (and optionally cost).
+
+    Provider-specific fields (``cost`` on usage, ``cache_creation_input_tokens``
+    on prompt_tokens_details) are set on ``model_extra`` because that's where
+    the baseline helper reads them from (typed ``CompletionUsage.model_extra``
+    rather than ``getattr``). Pass ``cost=None`` to emit an explicit-null cost
+    key; omit ``cost`` entirely to leave the key absent.
+    """
+    chunk = MagicMock()
+    chunk.choices = []
+    chunk.usage = MagicMock()
+    chunk.usage.prompt_tokens = prompt_tokens
+    chunk.usage.completion_tokens = completion_tokens
+    usage_extras: dict[str, float | str | None] = {}
+    if cost is not _COST_MISSING:
+        usage_extras["cost"] = cost  # type: ignore[assignment]
+    chunk.usage.model_extra = usage_extras
+
+    if cached_tokens is not None or cache_creation_input_tokens is not None:
+        # Build a real ``PromptTokensDetails`` so ``getattr(ptd,
+        # "cache_write_tokens", None)`` returns ``None`` on this SDK version
+        # (rather than a truthy MagicMock attribute) and the extraction
+        # helper's typed-attr vs model_extra fallback resolves correctly.
+        from openai.types.completion_usage import PromptTokensDetails
+
+        ptd = PromptTokensDetails.model_validate({"cached_tokens": cached_tokens or 0})
+        if cache_creation_input_tokens is not None:
+            if ptd.model_extra is None:
+                object.__setattr__(ptd, "__pydantic_extra__", {})
+            assert ptd.model_extra is not None
+            ptd.model_extra["cache_creation_input_tokens"] = cache_creation_input_tokens
+        chunk.usage.prompt_tokens_details = ptd
+    else:
+        chunk.usage.prompt_tokens_details = None
+
+    return chunk
+
+
+def _make_stream_mock(*chunks):
+    """Build an async streaming response mock that yields *chunks* in order."""
+    stream = MagicMock()
+    stream.close = AsyncMock()
+
+    async def aiter():
+        for c in chunks:
+            yield c
+
+    stream.__aiter__ = lambda self: aiter()
+    return stream
+
+
 class TestBaselineCostExtraction:
-    """Tests for x-total-cost header extraction in _baseline_llm_caller."""
+    """Tests for ``usage.cost`` extraction in ``_baseline_llm_caller``.
+
+    Cost is read from the OpenRouter ``usage.cost`` field on the final
+    streaming chunk when the request body includes ``usage: {include: true}``
+    (handled by the baseline service via ``extra_body``).
+    """

    @pytest.mark.asyncio
-    async def test_cost_usd_extracted_from_response_header(self):
-        """state.cost_usd is set from x-total-cost header when present."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
+    async def test_cost_usd_extracted_from_usage_chunk(self):
+        """state.cost_usd is set from chunk.usage.cost when present."""
        state = _BaselineStreamState(model="gpt-4o-mini")
-
-        # Build a mock raw httpx response with the cost header
-        mock_raw_response = MagicMock()
-        mock_raw_response.headers = {"x-total-cost": "0.0123"}
-
-        # Build a mock async streaming response that yields no chunks but has
-        # a _response attribute pointing to the mock httpx response
-        mock_stream_response = MagicMock()
-        mock_stream_response._response = mock_raw_response
-
-        async def empty_aiter():
-            return
-            yield  # make it an async generator
-
-        mock_stream_response.__aiter__ = lambda self: empty_aiter()
+        chunk = _make_usage_chunk(
+            prompt_tokens=1000, completion_tokens=200, cost=0.0123
+        )

        mock_client = MagicMock()
        mock_client.chat.completions.create = AsyncMock(
-            return_value=mock_stream_response
+            return_value=_make_stream_mock(chunk)
        )

        with patch(
@@ -622,29 +680,14 @@ class TestBaselineCostExtraction:
    @pytest.mark.asyncio
    async def test_cost_usd_accumulates_across_calls(self):
        """cost_usd accumulates when _baseline_llm_caller is called multiple times."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
        state = _BaselineStreamState(model="gpt-4o-mini")

-        def make_stream_mock(cost: str) -> MagicMock:
-            mock_raw = MagicMock()
-            mock_raw.headers = {"x-total-cost": cost}
-            mock_stream = MagicMock()
-            mock_stream._response = mock_raw
-
-            async def empty_aiter():
-                return
-                yield
-
-            mock_stream.__aiter__ = lambda self: empty_aiter()
-            return mock_stream
-
        mock_client = MagicMock()
        mock_client.chat.completions.create = AsyncMock(
-            side_effect=[make_stream_mock("0.01"), make_stream_mock("0.02")]
+            side_effect=[
+                _make_stream_mock(_make_usage_chunk(prompt_tokens=500, cost=0.01)),
+                _make_stream_mock(_make_usage_chunk(prompt_tokens=600, cost=0.02)),
+            ]
        )

        with patch(
@@ -665,28 +708,64 @@ class TestBaselineCostExtraction:
        assert state.cost_usd == pytest.approx(0.03)

    @pytest.mark.asyncio
-    async def test_no_cost_when_header_absent(self):
-        """state.cost_usd remains None when response has no x-total-cost header."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
+    async def test_cost_usd_accepts_string_value(self):
+        """OpenRouter may emit cost as a string — it should still parse."""
        state = _BaselineStreamState(model="gpt-4o-mini")
-
-        mock_raw = MagicMock()
-        mock_raw.headers = {}
-        mock_stream = MagicMock()
-        mock_stream._response = mock_raw
-
-        async def empty_aiter():
-            return
-            yield
-
-        mock_stream.__aiter__ = lambda self: empty_aiter()
+        chunk = _make_usage_chunk(prompt_tokens=10, cost="0.005")

        mock_client = MagicMock()
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_stream)
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )
+
+        with patch(
+            "backend.copilot.baseline.service._get_openai_client",
+            return_value=mock_client,
+        ):
+            await _baseline_llm_caller(
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[],
+                state=state,
+            )
+
+        assert state.cost_usd == pytest.approx(0.005)
+
+    @pytest.mark.asyncio
+    async def test_cost_usd_none_when_usage_cost_missing(self):
+        """state.cost_usd stays None when the usage chunk lacks a cost field."""
+        state = _BaselineStreamState(model="anthropic/claude-sonnet-4")
+        chunk = _make_usage_chunk(prompt_tokens=1000, completion_tokens=500)
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )
+
+        with patch(
+            "backend.copilot.baseline.service._get_openai_client",
+            return_value=mock_client,
+        ):
+            await _baseline_llm_caller(
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[],
+                state=state,
+            )
+
+        assert state.cost_usd is None
+        # Token accumulators are still populated so the caller can log them.
+        assert state.turn_prompt_tokens == 1000
+        assert state.turn_completion_tokens == 500
+
+    @pytest.mark.asyncio
+    async def test_invalid_cost_string_leaves_cost_none(self):
+        """A non-numeric cost value is rejected without raising."""
+        state = _BaselineStreamState(model="gpt-4o-mini")
+        chunk = _make_usage_chunk(prompt_tokens=10, cost="not-a-number")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )

        with patch(
            "backend.copilot.baseline.service._get_openai_client",
@@ -701,28 +780,73 @@ class TestBaselineCostExtraction:
        assert state.cost_usd is None

    @pytest.mark.asyncio
-    async def test_cost_extracted_even_when_stream_raises(self):
-        """cost_usd is captured in the finally block even when streaming fails."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
+    async def test_negative_cost_is_ignored(self):
+        """Guard against negative cost values (shouldn't happen but be safe)."""
+        state = _BaselineStreamState(model="gpt-4o-mini")
+        chunk = _make_usage_chunk(prompt_tokens=10, cost=-0.01)
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
        )

+        with patch(
+            "backend.copilot.baseline.service._get_openai_client",
+            return_value=mock_client,
+        ):
+            await _baseline_llm_caller(
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[],
+                state=state,
+            )
+
+        assert state.cost_usd is None
+
+    @pytest.mark.asyncio
+    async def test_explicit_null_cost_is_logged_and_ignored(self, caplog):
+        """`{"cost": null}` is rejected and logged (not silently dropped)."""
+        state = _BaselineStreamState(model="openrouter/auto")
+        chunk = _make_usage_chunk(prompt_tokens=10, cost=None)
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )
+
+        with (
+            patch(
+                "backend.copilot.baseline.service._get_openai_client",
+                return_value=mock_client,
+            ),
+            caplog.at_level("ERROR", logger="backend.copilot.baseline.service"),
+        ):
+            await _baseline_llm_caller(
+                messages=[{"role": "user", "content": "hi"}],
+                tools=[],
+                state=state,
+            )
+
+        assert state.cost_usd is None
+        assert any(
+            "usage.cost is present but null" in rec.message for rec in caplog.records
+        )
+
+    @pytest.mark.asyncio
+    async def test_cost_not_captured_when_stream_raises_mid_chunk(self):
+        """If the stream aborts before emitting the usage chunk there is no cost."""
        state = _BaselineStreamState(model="gpt-4o-mini")

-        mock_raw = MagicMock()
-        mock_raw.headers = {"x-total-cost": "0.005"}
-        mock_stream = MagicMock()
-        mock_stream._response = mock_raw
+        stream = MagicMock()
+        stream.close = AsyncMock()

        async def failing_aiter():
            raise RuntimeError("stream error")
            yield  # make it an async generator

-        mock_stream.__aiter__ = lambda self: failing_aiter()
+        stream.__aiter__ = lambda self: failing_aiter()

        mock_client = MagicMock()
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_stream)
+        mock_client.chat.completions.create = AsyncMock(return_value=stream)

        with (
            patch(
@@ -737,16 +861,12 @@ class TestBaselineCostExtraction:
                state=state,
            )

-        assert state.cost_usd == pytest.approx(0.005)
+        # Stream aborted before yielding the usage chunk — cost stays None.
+        assert state.cost_usd is None

    @pytest.mark.asyncio
    async def test_no_cost_when_api_call_raises_before_stream(self):
-        """finally block is safe when response is None (API call failed before yielding)."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
+        """The helper is safe when the create() call itself raises."""
        state = _BaselineStreamState(model="gpt-4o-mini")

        mock_client = MagicMock()
@@ -767,84 +887,23 @@ class TestBaselineCostExtraction:
                state=state,
            )

-        # response was never assigned so cost extraction must not raise
-        assert state.cost_usd is None
-
-    @pytest.mark.asyncio
-    async def test_no_cost_when_header_missing(self):
-        """cost_usd remains None when x-total-cost is absent."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
-        state = _BaselineStreamState(model="anthropic/claude-sonnet-4")
-
-        mock_raw = MagicMock()
-        mock_raw.headers = {}  # no x-total-cost
-        mock_stream = MagicMock()
-        mock_stream._response = mock_raw
-
-        mock_chunk = MagicMock()
-        mock_chunk.usage = MagicMock()
-        mock_chunk.usage.prompt_tokens = 1000
-        mock_chunk.usage.completion_tokens = 500
-        mock_chunk.usage.prompt_tokens_details = None
-        mock_chunk.choices = []
-
-        async def chunk_aiter():
-            yield mock_chunk
-
-        mock_stream.__aiter__ = lambda self: chunk_aiter()
-
-        mock_client = MagicMock()
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_stream)
-
-        with patch(
-            "backend.copilot.baseline.service._get_openai_client",
-            return_value=mock_client,
-        ):
-            await _baseline_llm_caller(
-                messages=[{"role": "user", "content": "hi"}],
-                tools=[],
-                state=state,
-            )
-
        assert state.cost_usd is None

    @pytest.mark.asyncio
    async def test_cache_tokens_extracted_from_usage_details(self):
        """cache tokens are extracted from prompt_tokens_details.cached_tokens."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
+        state = _BaselineStreamState(model="openai/gpt-4o")
+        chunk = _make_usage_chunk(
+            prompt_tokens=1000,
+            completion_tokens=200,
+            cost=0.01,
+            cached_tokens=800,
        )

-        state = _BaselineStreamState(model="openai/gpt-4o")
-
-        mock_raw = MagicMock()
-        mock_raw.headers = {"x-total-cost": "0.01"}
-        mock_stream = MagicMock()
-        mock_stream._response = mock_raw
-
-        # Create a chunk with prompt_tokens_details
-        mock_ptd = MagicMock()
-        mock_ptd.cached_tokens = 800
-
-        mock_chunk = MagicMock()
-        mock_chunk.usage = MagicMock()
-        mock_chunk.usage.prompt_tokens = 1000
-        mock_chunk.usage.completion_tokens = 200
-        mock_chunk.usage.prompt_tokens_details = mock_ptd
-        mock_chunk.choices = []
-
-        async def chunk_aiter():
-            yield mock_chunk
-
-        mock_stream.__aiter__ = lambda self: chunk_aiter()
-
        mock_client = MagicMock()
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_stream)
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )

        with patch(
            "backend.copilot.baseline.service._get_openai_client",
@@ -861,37 +920,20 @@ class TestBaselineCostExtraction:

    @pytest.mark.asyncio
    async def test_cache_creation_tokens_extracted_from_usage_details(self):
-        """cache_creation_tokens are extracted from prompt_tokens_details."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
+        """cache_creation_input_tokens is extracted from prompt_tokens_details."""
+        state = _BaselineStreamState(model="openai/gpt-4o")
+        chunk = _make_usage_chunk(
+            prompt_tokens=1000,
+            completion_tokens=200,
+            cost=0.01,
+            cached_tokens=0,
+            cache_creation_input_tokens=500,
        )

-        state = _BaselineStreamState(model="openai/gpt-4o")
-
-        mock_raw = MagicMock()
-        mock_raw.headers = {"x-total-cost": "0.01"}
-        mock_stream = MagicMock()
-        mock_stream._response = mock_raw
-
-        mock_ptd = MagicMock()
-        mock_ptd.cached_tokens = 0
-        mock_ptd.cache_creation_input_tokens = 500
-
-        mock_chunk = MagicMock()
-        mock_chunk.usage = MagicMock()
-        mock_chunk.usage.prompt_tokens = 1000
-        mock_chunk.usage.completion_tokens = 200
-        mock_chunk.usage.prompt_tokens_details = mock_ptd
-        mock_chunk.choices = []
-
-        async def chunk_aiter():
-            yield mock_chunk
-
-        mock_stream.__aiter__ = lambda self: chunk_aiter()
-
        mock_client = MagicMock()
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_stream)
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )

        with patch(
            "backend.copilot.baseline.service._get_openai_client",
@@ -908,37 +950,17 @@ class TestBaselineCostExtraction:
    @pytest.mark.asyncio
    async def test_token_accumulators_track_across_multiple_calls(self):
        """Token accumulators grow correctly across multiple _baseline_llm_caller calls."""
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
        state = _BaselineStreamState(model="anthropic/claude-sonnet-4")

-        def make_stream(prompt_tokens: int, completion_tokens: int):
-            mock_raw = MagicMock()
-            mock_raw.headers = {}  # no x-total-cost
-            mock_stream = MagicMock()
-            mock_stream._response = mock_raw
-
-            mock_chunk = MagicMock()
-            mock_chunk.usage = MagicMock()
-            mock_chunk.usage.prompt_tokens = prompt_tokens
-            mock_chunk.usage.completion_tokens = completion_tokens
-            mock_chunk.usage.prompt_tokens_details = None
-            mock_chunk.choices = []
-
-            async def chunk_aiter():
-                yield mock_chunk
-
-            mock_stream.__aiter__ = lambda self: chunk_aiter()
-            return mock_stream
-
        mock_client = MagicMock()
        mock_client.chat.completions.create = AsyncMock(
            side_effect=[
-                make_stream(1000, 200),
-                make_stream(1100, 300),
+                _make_stream_mock(
+                    _make_usage_chunk(prompt_tokens=1000, completion_tokens=200)
+                ),
+                _make_stream_mock(
+                    _make_usage_chunk(prompt_tokens=1100, completion_tokens=300)
+                ),
            ]
        )

@@ -957,45 +979,33 @@ class TestBaselineCostExtraction:
                state=state,
            )

-        # No x-total-cost header and empty pricing table -- cost_usd remains None
+        # No usage.cost on either chunk → cost stays None, tokens still accumulate.
        assert state.cost_usd is None
-        # Accumulators hold all tokens across both turns
        assert state.turn_prompt_tokens == 2100
        assert state.turn_completion_tokens == 500

+    @pytest.mark.parametrize(
+        "tools",
+        [
+            pytest.param([], id="no_tools"),
+            pytest.param([_make_tool("search")], id="with_tools"),
+        ],
+    )
    @pytest.mark.asyncio
-    async def test_cost_usd_remains_none_when_header_missing(self):
-        """cost_usd stays None when x-total-cost header is absent.
+    async def test_baseline_requests_usage_include_extra_body(
+        self, tools: list[ChatCompletionToolParam]
+    ):
+        """The baseline call must pass extra_body={'usage': {'include': True}}.

-        Token counts are still tracked; persist_and_record_usage handles
-        the None cost by falling back to tracking_type='tokens'.
+        This guards the contract with OpenRouter that triggers inclusion of
+        the authoritative cost on the final usage chunk. Without it the
+        rate-limit counter stays at zero. Exercise both the no-tools and
+        tool-calling branches so a regression in either path trips the test.
        """
-        from backend.copilot.baseline.service import (
-            _baseline_llm_caller,
-            _BaselineStreamState,
-        )
-
-        state = _BaselineStreamState(model="anthropic/claude-sonnet-4")
-
-        mock_raw = MagicMock()
-        mock_raw.headers = {}  # no x-total-cost
-        mock_stream = MagicMock()
-        mock_stream._response = mock_raw
-
-        mock_chunk = MagicMock()
-        mock_chunk.usage = MagicMock()
-        mock_chunk.usage.prompt_tokens = 1000
-        mock_chunk.usage.completion_tokens = 500
-        mock_chunk.usage.prompt_tokens_details = None
-        mock_chunk.choices = []
-
-        async def chunk_aiter():
-            yield mock_chunk
-
-        mock_stream.__aiter__ = lambda self: chunk_aiter()
-
+        state = _BaselineStreamState(model="gpt-4o-mini")
+        create_mock = AsyncMock(return_value=_make_stream_mock())
        mock_client = MagicMock()
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_stream)
+        mock_client.chat.completions.create = create_mock

        with patch(
            "backend.copilot.baseline.service._get_openai_client",
@@ -1003,13 +1013,15 @@ class TestBaselineCostExtraction:
        ):
            await _baseline_llm_caller(
                messages=[{"role": "user", "content": "hi"}],
-                tools=[],
+                tools=tools,
                state=state,
            )

-        assert state.cost_usd is None
-        assert state.turn_prompt_tokens == 1000
-        assert state.turn_completion_tokens == 500
+        create_mock.assert_awaited_once()
+        await_args = create_mock.await_args
+        assert await_args is not None
+        assert await_args.kwargs["extra_body"] == {"usage": {"include": True}}
+        assert await_args.kwargs["stream_options"] == {"include_usage": True}


 class TestMidLoopPendingFlushOrdering:
@@ -1211,3 +1223,288 @@ class TestMidLoopPendingFlushOrdering:
        assert assistant_msgs[1].tool_calls is None
        # Crucially: only 2 assistant messages, not 3 (no duplicate)
        assert len(assistant_msgs) == 2
+
+
+class TestApplyPromptCacheMarkers:
+    """Tests for _apply_prompt_cache_markers — Anthropic ephemeral
+    cache_control markers on baseline OpenRouter requests."""
+
+    def test_system_message_converted_to_content_blocks(self):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hello"},
+        ]
+
+        cached_messages = _mark_system_message_with_cache_control(messages)
+
+        assert cached_messages[0]["role"] == "system"
+        assert cached_messages[0]["content"] == [
+            {
+                "type": "text",
+                "text": "You are helpful.",
+                "cache_control": {"type": "ephemeral", "ttl": "1h"},
+            }
+        ]
+        # User message must be untouched.
+        assert cached_messages[1] == {"role": "user", "content": "hello"}
+
+    def test_system_message_preserves_unknown_fields(self):
+        # Future-proofing: a system message with extra keys (e.g. "name") must
+        # keep them after the content-blocks conversion.
+        messages = [
+            {"role": "system", "content": "sys", "name": "developer"},
+        ]
+
+        cached_messages = _mark_system_message_with_cache_control(messages)
+
+        assert cached_messages[0]["name"] == "developer"
+        assert cached_messages[0]["role"] == "system"
+
+    def test_last_tool_gets_cache_control(self):
+        tools = [
+            {"type": "function", "function": {"name": "a"}},
+            {"type": "function", "function": {"name": "b"}},
+        ]
+
+        cached_tools = _mark_tools_with_cache_control(tools)
+
+        assert "cache_control" not in cached_tools[0]
+        assert cached_tools[-1]["cache_control"] == {
+            "type": "ephemeral",
+            "ttl": "1h",
+        }
+        # Last tool's other fields preserved.
+        assert cached_tools[-1]["function"] == {"name": "b"}
+
+    def test_does_not_mutate_input(self):
+        messages = [{"role": "system", "content": "sys"}]
+        tools = [{"type": "function", "function": {"name": "a"}}]
+
+        _mark_system_message_with_cache_control(messages)
+        _mark_tools_with_cache_control(tools)
+
+        assert messages == [{"role": "system", "content": "sys"}]
+        assert tools == [{"type": "function", "function": {"name": "a"}}]
+
+    def test_no_system_message_safe(self):
+        messages = [{"role": "user", "content": "hi"}]
+        cached_messages = _mark_system_message_with_cache_control(messages)
+        assert cached_messages == messages
+
+    def test_empty_tools_safe(self):
+        assert _mark_tools_with_cache_control([]) == []
+
+    def test_non_string_system_content_left_untouched(self):
+        # If the content is already a list of blocks (e.g. caller pre-marked),
+        # the helper must not overwrite it.
+        pre_marked = [
+            {
+                "type": "text",
+                "text": "sys",
+                "cache_control": {"type": "ephemeral", "ttl": "1h"},
+            }
+        ]
+        messages = [{"role": "system", "content": pre_marked}]
+        cached_messages = _mark_system_message_with_cache_control(messages)
+        assert cached_messages[0]["content"] == pre_marked
+
+    def test_is_anthropic_model_matches_claude_and_anthropic_prefix(self):
+        assert _is_anthropic_model("anthropic/claude-sonnet-4-6")
+        assert _is_anthropic_model("claude-3-5-sonnet-20241022")
+        assert _is_anthropic_model("anthropic.claude-3-5-sonnet-20241022-v2:0")
+        assert _is_anthropic_model("ANTHROPIC/Claude-Opus")  # case insensitive
+
+    def test_is_anthropic_model_rejects_other_providers(self):
+        assert not _is_anthropic_model("openai/gpt-4o")
+        assert not _is_anthropic_model("openai/gpt-5")
+        assert not _is_anthropic_model("google/gemini-2.5-pro")
+        assert not _is_anthropic_model("xai/grok-4")
+        assert not _is_anthropic_model("meta-llama/llama-3.3-70b-instruct")
+
+    def test_cache_control_uses_configured_ttl(self, monkeypatch):
+        """TTL comes from ChatConfig.baseline_prompt_cache_ttl — defaults
+        to 1h so the static prefix (system + tools) stays warm across
+        workspace users past the 5-min default window."""
+        from backend.copilot.baseline import service as bsvc
+
+        assert bsvc.config.baseline_prompt_cache_ttl == "1h"
+        cc = bsvc._fresh_ephemeral_cache_control()
+        assert cc == {"type": "ephemeral", "ttl": "1h"}
+        monkeypatch.setattr(bsvc.config, "baseline_prompt_cache_ttl", "5m")
+        assert bsvc._fresh_ephemeral_cache_control() == {
+            "type": "ephemeral",
+            "ttl": "5m",
+        }
+
+    def test_fresh_helpers_return_distinct_objects(self):
+        """Regression guard: the `_fresh_*` helpers must return a NEW dict
+        on every call.  A future refactor returning a module-level constant
+        would silently reintroduce the shared-mutable-state bug flagged
+        during earlier review cycles."""
+        assert _fresh_ephemeral_cache_control() is not _fresh_ephemeral_cache_control()
+        assert (
+            _fresh_anthropic_caching_headers() is not _fresh_anthropic_caching_headers()
+        )
+
+    def test_extract_cache_creation_tokens_openrouter_typed_attr(self):
+        """Newer ``openai-python`` declares ``cache_write_tokens`` as a
+        typed attribute on ``PromptTokensDetails`` — it no longer lands in
+        ``model_extra``.  Verified empirically against the production
+        openai==1.113 installed in this venv: OpenRouter streaming
+        response populates ``ptd.cache_write_tokens`` directly while
+        ``ptd.model_extra`` is ``{}``.
+        """
+        from openai.types.completion_usage import PromptTokensDetails
+
+        ptd = PromptTokensDetails.model_validate(
+            {
+                "audio_tokens": 0,
+                "cached_tokens": 0,
+                "cache_write_tokens": 4432,
+                "video_tokens": 0,
+            }
+        )
+        assert getattr(ptd, "cache_write_tokens", None) == 4432
+        assert _extract_cache_creation_tokens(ptd) == 4432
+
+    def test_extract_cache_creation_tokens_openrouter_model_extra(self):
+        """Older SDKs that don't yet declare ``cache_write_tokens`` as a
+        typed field leave it in ``model_extra`` — the helper must still
+        find it there."""
+        from openai.types.completion_usage import PromptTokensDetails
+
+        ptd = PromptTokensDetails.model_validate({"cached_tokens": 0})
+        # Force the value into model_extra (simulates the old SDK shape
+        # where the field wasn't typed yet).
+        if ptd.model_extra is None:
+            # Pydantic v2 sometimes exposes __pydantic_extra__ as None when
+            # extras are disabled; initialise to a dict to mutate safely.
+            object.__setattr__(ptd, "__pydantic_extra__", {})
+        assert ptd.model_extra is not None
+        ptd.model_extra["cache_write_tokens"] = 7777
+        assert _extract_cache_creation_tokens(ptd) == 7777
+
+    def test_extract_cache_creation_tokens_anthropic_native_field(self):
+        """Direct Anthropic API uses ``cache_creation_input_tokens`` —
+        falls through as the final path when neither
+        ``cache_write_tokens`` typed attr nor model_extra entry exists."""
+        from openai.types.completion_usage import PromptTokensDetails
+
+        ptd = PromptTokensDetails.model_validate({"cached_tokens": 0})
+        if ptd.model_extra is None:
+            object.__setattr__(ptd, "__pydantic_extra__", {})
+        assert ptd.model_extra is not None
+        ptd.model_extra["cache_creation_input_tokens"] = 2048
+        assert _extract_cache_creation_tokens(ptd) == 2048
+
+    def test_extract_cache_creation_tokens_absent(self):
+        """Neither provider field present → 0 (non-Anthropic routes or
+        cache-miss responses)."""
+        from openai.types.completion_usage import PromptTokensDetails
+
+        ptd = PromptTokensDetails.model_validate({"cached_tokens": 0})
+        assert _extract_cache_creation_tokens(ptd) == 0
+
+    def test_build_cached_system_message_applies_cache_control(self):
+        """The single-message helper wraps the string content in a text block
+        with an ephemeral cache_control marker."""
+        out = _build_cached_system_message({"role": "system", "content": "hi"})
+        assert out["role"] == "system"
+        assert out["content"] == [
+            {
+                "type": "text",
+                "text": "hi",
+                "cache_control": {"type": "ephemeral", "ttl": "1h"},
+            }
+        ]
+
+    def test_build_cached_system_message_preserves_extra_fields(self):
+        """Unknown keys (e.g. ``name``) survive the transformation."""
+        out = _build_cached_system_message(
+            {"role": "system", "content": "sys", "name": "dev"}
+        )
+        assert out["name"] == "dev"
+        assert out["role"] == "system"
+
+    def test_build_cached_system_message_non_string_passthrough(self):
+        """Pre-marked list content is returned as-is (shallow-copied)."""
+        pre_marked = [
+            {
+                "type": "text",
+                "text": "sys",
+                "cache_control": {"type": "ephemeral", "ttl": "1h"},
+            }
+        ]
+        out = _build_cached_system_message({"role": "system", "content": pre_marked})
+        assert out["content"] is pre_marked
+
+    @pytest.mark.asyncio
+    async def test_baseline_llm_caller_memoises_cached_system_message(self):
+        """The cached system dict is built once and reused across rounds.
+
+        Guards against the perf regression where the entire (growing)
+        ``messages`` list was copied on every tool-call iteration just to
+        mark the static system prompt.
+        """
+        state = _BaselineStreamState(model="anthropic/claude-sonnet-4")
+        chunk = _make_usage_chunk(prompt_tokens=10, completion_tokens=5)
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(
+            side_effect=[_make_stream_mock(chunk), _make_stream_mock(chunk)]
+        )
+
+        messages: list[dict] = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        with patch(
+            "backend.copilot.baseline.service._get_openai_client",
+            return_value=mock_client,
+        ):
+            await _baseline_llm_caller(messages=messages, tools=[], state=state)
+            first_cached = state.cached_system_message
+            assert first_cached is not None
+            # Simulate the tool-call loop growing ``messages`` between rounds.
+            messages.append({"role": "assistant", "content": "ok"})
+            messages.append({"role": "user", "content": "follow up"})
+            await _baseline_llm_caller(messages=messages, tools=[], state=state)
+
+        # Same dict instance reused — not rebuilt per round.
+        assert state.cached_system_message is first_cached
+
+        # Second call's first message is the memoised system dict (not a new copy).
+        second_call_messages = mock_client.chat.completions.create.call_args_list[1][1][
+            "messages"
+        ]
+        assert second_call_messages[0] is first_cached
+        # And the tail messages were spliced in, not re-copied.
+        assert second_call_messages[1] is messages[1]
+        assert second_call_messages[-1] is messages[-1]
+
+    @pytest.mark.asyncio
+    async def test_baseline_llm_caller_skips_memoisation_for_non_anthropic(self):
+        """Non-Anthropic routes pass messages through unmodified — no cache
+        dict is built, no list splicing happens."""
+        state = _BaselineStreamState(model="openai/gpt-4o")
+        chunk = _make_usage_chunk(prompt_tokens=10, completion_tokens=5)
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(
+            return_value=_make_stream_mock(chunk)
+        )
+
+        messages: list[dict] = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+        ]
+        with patch(
+            "backend.copilot.baseline.service._get_openai_client",
+            return_value=mock_client,
+        ):
+            await _baseline_llm_caller(messages=messages, tools=[], state=state)
+
+        assert state.cached_system_message is None
+        # The exact same list object reaches the provider (no copy needed).
+        call_messages = mock_client.chat.completions.create.call_args[1]["messages"]
+        assert call_messages is messages
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -101,25 +101,31 @@ class ChatConfig(BaseSettings):
        description="Cache TTL in seconds for Langfuse prompt (0 to disable caching)",
    )

-    # Rate limiting — token-based limits per day and per week.
-    # Per-turn token cost varies with context size: ~10-15K for early turns,
-    # ~30-50K mid-session, up to ~100K pre-compaction. Average across a
-    # session with compaction cycles is ~25-35K tokens/turn, so 2.5M daily
-    # allows ~70-100 turns/day.
+    # Rate limiting — cost-based limits per day and per week, stored in
+    # microdollars (1 USD = 1_000_000).  The counter tracks the real
+    # generation cost reported by the provider (OpenRouter ``usage.cost``
+    # or Claude Agent SDK ``total_cost_usd``), so cache discounts and
+    # cross-model price differences are already reflected — no token
+    # weighting or model multiplier is applied on top.
    # Checked at the HTTP layer (routes.py) before each turn.
    #
-    # These are base limits for the FREE tier. Higher tiers (PRO, BUSINESS,
+    # These are base limits for the FREE tier.  Higher tiers (PRO, BUSINESS,
    # ENTERPRISE) multiply these by their tier multiplier (see
-    # rate_limit.TIER_MULTIPLIERS). User tier is stored in the
+    # rate_limit.TIER_MULTIPLIERS).  User tier is stored in the
    # User.subscriptionTier DB column and resolved inside
    # get_global_rate_limits().
-    daily_token_limit: int = Field(
-        default=2_500_000,
-        description="Max tokens per day, resets at midnight UTC (0 = unlimited)",
+    #
+    # These defaults act as the ceiling when LaunchDarkly is unreachable;
+    # the live per-tier values come from the COPILOT_*_COST_LIMIT flags.
+    daily_cost_limit_microdollars: int = Field(
+        default=1_000_000,
+        description="Max cost per day in microdollars, resets at midnight UTC "
+        "(0 = unlimited).",
    )
-    weekly_token_limit: int = Field(
-        default=12_500_000,
-        description="Max tokens per week, resets Monday 00:00 UTC (0 = unlimited)",
+    weekly_cost_limit_microdollars: int = Field(
+        default=5_000_000,
+        description="Max cost per week in microdollars, resets Monday 00:00 UTC "
+        "(0 = unlimited).",
    )

    # Cost (in credits / cents) to reset the daily rate limit using credits.
@@ -219,6 +225,18 @@ class ChatConfig(BaseSettings):
        "from the prefix. Set to False to fall back to passing the system "
        "prompt as a raw string.",
    )
+    baseline_prompt_cache_ttl: str = Field(
+        default="1h",
+        description="TTL for the ephemeral prompt-cache markers on the baseline "
+        "OpenRouter path. Anthropic supports only `5m` (default, 1.25x input "
+        "price for the write) or `1h` (2x input price for the write). 1h is "
+        "strictly cheaper overall when the static prefix gets >7 reads per "
+        "write-window; since the system prompt + tools array is identical "
+        "across all users in our workspace, 1h is the default so cross-user "
+        "reads amortise the higher write cost. Anthropic has no longer "
+        "(24h, permanent) TTL option — see "
+        "https://platform.claude.com/docs/en/build-with-claude/prompt-caching.",
+    )
    claude_agent_cli_path: str | None = Field(
        default=None,
        description="Optional explicit path to a Claude Code CLI binary. "
--- a/autogpt_platform/backend/backend/copilot/executor/manager.py
+++ b/autogpt_platform/backend/backend/copilot/executor/manager.py
@@ -34,6 +34,7 @@ from .utils import (
    CancelCoPilotEvent,
    CoPilotExecutionEntry,
    create_copilot_queue_config,
+    get_session_lock_key,
 )

 logger = TruncatedLogger(logging.getLogger(__name__), prefix="[CoPilotExecutor]")
@@ -366,7 +367,7 @@ class CoPilotExecutor(AppProcess):
        # Try to acquire cluster-wide lock
        cluster_lock = ClusterLock(
            redis=redis.get_redis(),
-            key=f"copilot:session:{session_id}:lock",
+            key=get_session_lock_key(session_id),
            owner_id=self.executor_id,
            timeout=settings.config.cluster_lock_timeout,
        )
--- a/autogpt_platform/backend/backend/copilot/executor/utils.py
+++ b/autogpt_platform/backend/backend/copilot/executor/utils.py
@@ -82,6 +82,12 @@ COPILOT_CANCEL_EXCHANGE = Exchange(
 )
 COPILOT_CANCEL_QUEUE_NAME = "copilot_cancel_queue"

+
+def get_session_lock_key(session_id: str) -> str:
+    """Redis key for the per-session cluster lock held by the executing pod."""
+    return f"copilot:session:{session_id}:lock"
+
+
 # CoPilot operations can include extended thinking and agent generation
 # which may take 30+ minutes to complete
 COPILOT_CONSUMER_TIMEOUT_SECONDS = 60 * 60  # 1 hour
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -8,10 +8,12 @@ handling the distinction between:

 from functools import cache

-from backend.copilot.tools import TOOL_REGISTRY
-
-# Shared technical notes that apply to both SDK and baseline modes
-_SHARED_TOOL_NOTES = """\
+# Workflow rules appended to the system prompt on every copilot turn
+# (baseline appends directly; SDK appends via the storage-supplement
+# template).  These are cross-tool rules (file sharing, @@agptfile: refs,
+# tool-discovery priority, sub-agent etiquette) that don't belong on any
+# individual tool schema.
+SHARED_TOOL_NOTES = """\

 ### Sharing files
 After `write_workspace_file`, embed the `download_url` in Markdown:
@@ -261,7 +263,7 @@ When a tool output contains `<tool-output-truncated workspace_path="...">`, the
 full output is in workspace storage (NOT on the local filesystem). To access it:
 - Use `read_workspace_file(path="...", offset=..., length=50000)` for reading sections.
 - To process in the sandbox, use `read_workspace_file(path="...", save_to_path="{working_dir}/file.json")` first, then use `bash_exec` on the local copy.
-{_SHARED_TOOL_NOTES}{extra_notes}"""
+{SHARED_TOOL_NOTES}{extra_notes}"""


 # Pre-built supplements for common environments
@@ -312,35 +314,6 @@ def _get_cloud_sandbox_supplement() -> str:
    )


-def _generate_tool_documentation() -> str:
-    """Auto-generate tool documentation from TOOL_REGISTRY.
-
-    NOTE: This is ONLY used in baseline mode (direct OpenAI API).
-    SDK mode doesn't need it since Claude gets tool schemas automatically.
-
-    This generates a complete list of available tools with their descriptions,
-    ensuring the documentation stays in sync with the actual tool implementations.
-    All workflow guidance is now embedded in individual tool descriptions.
-
-    Only documents tools that are available in the current environment
-    (checked via tool.is_available property).
-    """
-    docs = "\n## AVAILABLE TOOLS\n\n"
-
-    # Sort tools alphabetically for consistent output
-    # Filter by is_available to match get_available_tools() behavior
-    for name in sorted(TOOL_REGISTRY.keys()):
-        tool = TOOL_REGISTRY[name]
-        if not tool.is_available:
-            continue
-        schema = tool.as_openai_tool()
-        desc = schema["function"].get("description", "No description available")
-        # Format as bullet list with tool name in code style
-        docs += f"- **`{name}`**: {desc}\n"
-
-    return docs
-
-
 _USER_FOLLOW_UP_NOTE = """
 # `<user_follow_up>` blocks in tool output

@@ -438,17 +411,3 @@ You have access to persistent temporal memory tools that remember facts across s
 - group_id is handled automatically by the system — never set it yourself.
 - When storing, be specific about operational rules and instructions (e.g., "CC Sarah on client communications" not just "Sarah is the assistant").
 """
-
-
-def get_baseline_supplement() -> str:
-    """Get the supplement for baseline mode (direct OpenAI API).
-
-    Baseline mode INCLUDES auto-generated tool documentation because the
-    direct API doesn't automatically provide tool schemas to Claude.
-    Also includes shared technical notes (but NOT SDK-specific environment details).
-
-    Returns:
-        The supplement string to append to the system prompt
-    """
-    tool_docs = _generate_tool_documentation()
-    return tool_docs + _SHARED_TOOL_NOTES
--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -1,9 +1,16 @@
-"""CoPilot rate limiting based on token usage.
+"""CoPilot rate limiting based on generation cost.

-Uses Redis fixed-window counters to track per-user token consumption
-with configurable daily and weekly limits. Daily windows reset at
-midnight UTC; weekly windows reset at ISO week boundary (Monday 00:00
-UTC). Fails open when Redis is unavailable to avoid blocking users.
+Uses Redis fixed-window counters to track per-user USD spend (stored as
+microdollars, matching ``PlatformCostLog.cost_microdollars``) with
+configurable daily and weekly limits. Daily windows reset at midnight UTC;
+weekly windows reset at ISO week boundary (Monday 00:00 UTC). Fails open
+when Redis is unavailable to avoid blocking users.
+
+Storing microdollars rather than tokens means the counter already reflects
+real model pricing (including cache discounts and provider surcharges), so
+this module carries no pricing table — the cost comes from OpenRouter's
+``usage.cost`` field (baseline) or the Claude Agent SDK's reported total
+cost (SDK path).
 """

 import asyncio
@@ -17,12 +24,15 @@ from redis.exceptions import RedisError

 from backend.data.db_accessors import user_db
 from backend.data.redis_client import get_redis_async
+from backend.data.user import get_user_by_id
 from backend.util.cache import cached

 logger = logging.getLogger(__name__)

-# Redis key prefixes
-_USAGE_KEY_PREFIX = "copilot:usage"
+# Redis key prefixes. Bumped from "copilot:usage" (token-based) to
+# "copilot:cost" on the token→cost migration so stale counters do not
+# get misinterpreted as microdollars (which would dramatically under-count).
+_USAGE_KEY_PREFIX = "copilot:cost"


 # ---------------------------------------------------------------------------
@@ -31,7 +41,7 @@ _USAGE_KEY_PREFIX = "copilot:usage"


 class SubscriptionTier(str, Enum):
-    """Subscription tiers with increasing token allowances.
+    """Subscription tiers with increasing cost allowances.

    Mirrors the ``SubscriptionTier`` enum in ``schema.prisma``.
    Once ``prisma generate`` is run, this can be replaced with::
@@ -45,9 +55,9 @@ class SubscriptionTier(str, Enum):
    ENTERPRISE = "ENTERPRISE"


-# Multiplier applied to the base limits (from LD / config) for each tier.
-# Intentionally int (not float): keeps limits as whole token counts and avoids
-# floating-point rounding.  If fractional multipliers are ever needed, change
+# Multiplier applied to the base cost limits (from LD / config) for each tier.
+# Intentionally int (not float): keeps limits as whole microdollars and avoids
+# floating-point rounding. If fractional multipliers are ever needed, change
 # the type and round the result in get_global_rate_limits().
 TIER_MULTIPLIERS: dict[SubscriptionTier, int] = {
    SubscriptionTier.FREE: 1,
@@ -60,17 +70,27 @@ DEFAULT_TIER = SubscriptionTier.FREE


 class UsageWindow(BaseModel):
-    """Usage within a single time window."""
+    """Usage within a single time window.
+
+    ``used`` and ``limit`` are in microdollars (1 USD = 1_000_000).
+    """

    used: int
    limit: int = Field(
-        description="Maximum tokens allowed in this window. 0 means unlimited."
+        description="Maximum microdollars of spend allowed in this window. "
+        "0 means unlimited."
    )
    resets_at: datetime


 class CoPilotUsageStatus(BaseModel):
-    """Current usage status for a user across all windows."""
+    """Current usage status for a user across all windows.
+
+    Internal representation used by server-side code that needs to compare
+    usage against limits (e.g. the reset-credits endpoint).  The public API
+    returns ``CoPilotUsagePublic`` instead so that raw spend and limit
+    figures never leak to clients.
+    """

    daily: UsageWindow
    weekly: UsageWindow
@@ -81,6 +101,68 @@ class CoPilotUsageStatus(BaseModel):
    )


+class UsageWindowPublic(BaseModel):
+    """Public view of a usage window — only the percentage and reset time.
+
+    Hides the raw spend and the cap so clients cannot derive per-turn cost
+    or reverse-engineer platform margins.  ``percent_used`` is capped at 100.
+    """
+
+    percent_used: float = Field(
+        ge=0.0,
+        le=100.0,
+        description="Percentage of the window's allowance used (0-100). "
+        "Clamped at 100 when over the cap.",
+    )
+    resets_at: datetime
+
+
+class CoPilotUsagePublic(BaseModel):
+    """Current usage status for a user — public (client-safe) shape."""
+
+    daily: UsageWindowPublic | None = Field(
+        default=None,
+        description="Null when no daily cap is configured (unlimited).",
+    )
+    weekly: UsageWindowPublic | None = Field(
+        default=None,
+        description="Null when no weekly cap is configured (unlimited).",
+    )
+    tier: SubscriptionTier = DEFAULT_TIER
+    reset_cost: int = Field(
+        default=0,
+        description="Credit cost (in cents) to reset the daily limit. 0 = feature disabled.",
+    )
+
+    @classmethod
+    def from_status(cls, status: CoPilotUsageStatus) -> "CoPilotUsagePublic":
+        """Project the internal status onto the client-safe schema."""
+
+        def window(w: UsageWindow) -> UsageWindowPublic | None:
+            if w.limit <= 0:
+                return None
+            # When at/over the cap, snap to exactly 100.0 so the UI's
+            # rounded display and its exhaustion check (`percent_used >= 100`)
+            # agree. Without this, e.g. 99.95% would render as "100% used"
+            # via Math.round but fail the exhaustion check, leaving the
+            # reset button hidden while the bar appears full.
+            if w.used >= w.limit:
+                pct = 100.0
+            else:
+                pct = round(100.0 * w.used / w.limit, 1)
+            return UsageWindowPublic(
+                percent_used=pct,
+                resets_at=w.resets_at,
+            )
+
+        return cls(
+            daily=window(status.daily),
+            weekly=window(status.weekly),
+            tier=status.tier,
+            reset_cost=status.reset_cost,
+        )
+
+
 class RateLimitExceeded(Exception):
    """Raised when a user exceeds their CoPilot usage limit."""

@@ -102,8 +184,8 @@ class RateLimitExceeded(Exception):

 async def get_usage_status(
    user_id: str,
-    daily_token_limit: int,
-    weekly_token_limit: int,
+    daily_cost_limit: int,
+    weekly_cost_limit: int,
    rate_limit_reset_cost: int = 0,
    tier: SubscriptionTier = DEFAULT_TIER,
 ) -> CoPilotUsageStatus:
@@ -111,13 +193,13 @@ async def get_usage_status(

    Args:
        user_id: The user's ID.
-        daily_token_limit: Max tokens per day (0 = unlimited).
-        weekly_token_limit: Max tokens per week (0 = unlimited).
+        daily_cost_limit: Max microdollars of spend per day (0 = unlimited).
+        weekly_cost_limit: Max microdollars of spend per week (0 = unlimited).
        rate_limit_reset_cost: Credit cost (cents) to reset daily limit (0 = disabled).
        tier: The user's rate-limit tier (included in the response).

    Returns:
-        CoPilotUsageStatus with current usage and limits.
+        CoPilotUsageStatus with current usage and limits in microdollars.
    """
    now = datetime.now(UTC)
    daily_used = 0
@@ -136,12 +218,12 @@ async def get_usage_status(
    return CoPilotUsageStatus(
        daily=UsageWindow(
            used=daily_used,
-            limit=daily_token_limit,
+            limit=daily_cost_limit,
            resets_at=_daily_reset_time(now=now),
        ),
        weekly=UsageWindow(
            used=weekly_used,
-            limit=weekly_token_limit,
+            limit=weekly_cost_limit,
            resets_at=_weekly_reset_time(now=now),
        ),
        tier=tier,
@@ -151,22 +233,22 @@ async def get_usage_status(

 async def check_rate_limit(
    user_id: str,
-    daily_token_limit: int,
-    weekly_token_limit: int,
+    daily_cost_limit: int,
+    weekly_cost_limit: int,
 ) -> None:
    """Check if user is within rate limits. Raises RateLimitExceeded if not.

    This is a pre-turn soft check. The authoritative usage counter is updated
-    by ``record_token_usage()`` after the turn completes. Under concurrency,
+    by ``record_cost_usage()`` after the turn completes. Under concurrency,
    two parallel turns may both pass this check against the same snapshot.
-    This is acceptable because token-based limits are approximate by nature
-    (the exact token count is unknown until after generation).
+    This is acceptable because cost-based limits are approximate by nature
+    (the exact cost is unknown until after generation).

    Fails open: if Redis is unavailable, allows the request.
    """
    # Short-circuit: when both limits are 0 (unlimited) skip the Redis
    # round-trip entirely.
-    if daily_token_limit <= 0 and weekly_token_limit <= 0:
+    if daily_cost_limit <= 0 and weekly_cost_limit <= 0:
        return

    now = datetime.now(UTC)
@@ -182,26 +264,25 @@ async def check_rate_limit(
        logger.warning("Redis unavailable for rate limit check, allowing request")
        return

-    # Worst-case overshoot: N concurrent requests × ~15K tokens each.
-    if daily_token_limit > 0 and daily_used >= daily_token_limit:
+    if daily_cost_limit > 0 and daily_used >= daily_cost_limit:
        raise RateLimitExceeded("daily", _daily_reset_time(now=now))

-    if weekly_token_limit > 0 and weekly_used >= weekly_token_limit:
+    if weekly_cost_limit > 0 and weekly_used >= weekly_cost_limit:
        raise RateLimitExceeded("weekly", _weekly_reset_time(now=now))


-async def reset_daily_usage(user_id: str, daily_token_limit: int = 0) -> bool:
-    """Reset a user's daily token usage counter in Redis.
+async def reset_daily_usage(user_id: str, daily_cost_limit: int = 0) -> bool:
+    """Reset a user's daily cost usage counter in Redis.

    Called after a user pays credits to extend their daily limit.
-    Also reduces the weekly usage counter by ``daily_token_limit`` tokens
+    Also reduces the weekly usage counter by ``daily_cost_limit`` microdollars
    (clamped to 0) so the user effectively gets one extra day's worth of
    weekly capacity.

    Args:
        user_id: The user's ID.
-        daily_token_limit: The configured daily token limit. When positive,
-            the weekly counter is reduced by this amount.
+        daily_cost_limit: The configured daily cost limit in microdollars.
+            When positive, the weekly counter is reduced by this amount.

    Returns False if Redis is unavailable so the caller can handle
    compensation (fail-closed for billed operations, unlike the read-only
@@ -217,12 +298,12 @@ async def reset_daily_usage(user_id: str, daily_token_limit: int = 0) -> bool:
        # counter is not decremented — which would let the caller refund
        # credits even though the daily limit was already reset.
        d_key = _daily_key(user_id, now=now)
-        w_key = _weekly_key(user_id, now=now) if daily_token_limit > 0 else None
+        w_key = _weekly_key(user_id, now=now) if daily_cost_limit > 0 else None

        pipe = redis.pipeline(transaction=True)
        pipe.delete(d_key)
        if w_key is not None:
-            pipe.decrby(w_key, daily_token_limit)
+            pipe.decrby(w_key, daily_cost_limit)
        results = await pipe.execute()

        # Clamp negative weekly counter to 0 (best-effort; not critical).
@@ -295,84 +376,40 @@ async def increment_daily_reset_count(user_id: str) -> None:
        logger.warning("Redis unavailable for tracking reset count")


-async def record_token_usage(
+async def record_cost_usage(
    user_id: str,
-    prompt_tokens: int,
-    completion_tokens: int,
-    *,
-    cache_read_tokens: int = 0,
-    cache_creation_tokens: int = 0,
-    model_cost_multiplier: float = 1.0,
+    cost_microdollars: int,
 ) -> None:
-    """Record token usage for a user across all windows.
+    """Record a user's generation spend against daily and weekly counters.

-    Uses cost-weighted counting so cached tokens don't unfairly penalise
-    multi-turn conversations. Anthropic's pricing:
-      - uncached input: 100%
-      - cache creation:  25%
-      - cache read:      10%
-      - output:         100%
-
-    ``prompt_tokens`` should be the *uncached* input count (``input_tokens``
-    from the API response). Cache counts are passed separately.
-
-    ``model_cost_multiplier`` scales the final weighted total to reflect
-    relative model cost. Use 5.0 for Opus (5× more expensive than Sonnet)
-    so that Opus turns deplete the rate limit faster, proportional to cost.
+    ``cost_microdollars`` is the real generation cost reported by the
+    provider (OpenRouter's ``usage.cost`` or the Claude Agent SDK's
+    ``total_cost_usd`` converted to microdollars). Because the provider
+    cost already reflects model pricing and cache discounts, this function
+    carries no pricing table or weighting — it just increments counters.

    Args:
        user_id: The user's ID.
-        prompt_tokens: Uncached input tokens.
-        completion_tokens: Output tokens.
-        cache_read_tokens: Tokens served from prompt cache (10% cost).
-        cache_creation_tokens: Tokens written to prompt cache (25% cost).
-        model_cost_multiplier: Relative model cost factor (1.0 = Sonnet, 5.0 = Opus).
+        cost_microdollars: Spend to record in microdollars (1 USD = 1_000_000).
+            Non-positive values are ignored.
    """
-    prompt_tokens = max(0, prompt_tokens)
-    completion_tokens = max(0, completion_tokens)
-    cache_read_tokens = max(0, cache_read_tokens)
-    cache_creation_tokens = max(0, cache_creation_tokens)
-
-    weighted_input = (
-        prompt_tokens
-        + round(cache_creation_tokens * 0.25)
-        + round(cache_read_tokens * 0.1)
-    )
-    total = round(
-        (weighted_input + completion_tokens) * max(1.0, model_cost_multiplier)
-    )
-    if total <= 0:
+    cost_microdollars = max(0, cost_microdollars)
+    if cost_microdollars <= 0:
        return

-    raw_total = (
-        prompt_tokens + cache_read_tokens + cache_creation_tokens + completion_tokens
-    )
-    logger.info(
-        "Recording token usage for %s: raw=%d, weighted=%d, multiplier=%.1fx "
-        "(uncached=%d, cache_read=%d@10%%, cache_create=%d@25%%, output=%d)",
-        user_id[:8],
-        raw_total,
-        total,
-        model_cost_multiplier,
-        prompt_tokens,
-        cache_read_tokens,
-        cache_creation_tokens,
-        completion_tokens,
-    )
+    logger.info("Recording copilot spend: %d microdollars", cost_microdollars)

    now = datetime.now(UTC)
    try:
        redis = await get_redis_async()
-        # transaction=False: these are independent INCRBY+EXPIRE pairs on
-        # separate keys — no cross-key atomicity needed.  Skipping
-        # MULTI/EXEC avoids the overhead.  If the connection drops between
-        # INCRBY and EXPIRE the key survives until the next date-based key
-        # rotation (daily/weekly), so the memory-leak risk is negligible.
-        pipe = redis.pipeline(transaction=False)
+        # Use MULTI/EXEC so each INCRBY/EXPIRE pair is atomic — guarantees
+        # the TTL is set even if the connection drops mid-pipeline, so
+        # counters can never survive past their date-based rotation window.
+        pipe = redis.pipeline(transaction=True)

        # Daily counter (expires at next midnight UTC)
        d_key = _daily_key(user_id, now=now)
-        pipe.incrby(d_key, total)
+        pipe.incrby(d_key, cost_microdollars)
        seconds_until_daily_reset = int(
            (_daily_reset_time(now=now) - now).total_seconds()
        )
@@ -380,7 +417,7 @@ async def record_token_usage(

        # Weekly counter (expires end of week)
        w_key = _weekly_key(user_id, now=now)
-        pipe.incrby(w_key, total)
+        pipe.incrby(w_key, cost_microdollars)
        seconds_until_weekly_reset = int(
            (_weekly_reset_time(now=now) - now).total_seconds()
        )
@@ -389,8 +426,8 @@ async def record_token_usage(
        await pipe.execute()
    except (RedisError, ConnectionError, OSError):
        logger.warning(
-            "Redis unavailable for recording token usage (tokens=%d)",
-            total,
+            "Redis unavailable for recording cost usage (microdollars=%d)",
+            cost_microdollars,
        )


@@ -459,8 +496,20 @@ get_user_tier.cache_delete = _fetch_user_tier.cache_delete  # type: ignore[attr-
 async def set_user_tier(user_id: str, tier: SubscriptionTier) -> None:
    """Persist the user's rate-limit tier to the database.

-    Also invalidates the ``get_user_tier`` cache for this user so that
-    subsequent rate-limit checks immediately see the new tier.
+    Invalidates every cache that keys off the user's subscription tier so the
+    change is visible immediately: this function's own ``get_user_tier``, the
+    shared ``get_user_by_id`` (which exposes ``user.subscription_tier``), and
+    ``get_pending_subscription_change`` (since an admin override can invalidate
+    a cached ``cancel_at_period_end`` or schedule-based pending state).
+
+    If the user has an active Stripe subscription whose current price does not
+    match ``tier``, Stripe will keep billing the old price and the next
+    ``customer.subscription.updated`` webhook will overwrite the DB tier back
+    to whatever Stripe has. Proper reconciliation (cancelling or modifying the
+    Stripe subscription when an admin overrides the tier) is out of scope for
+    this PR — it changes the admin contract and needs its own test coverage.
+    For now we emit a ``WARNING`` so drift surfaces via Sentry until that
+    follow-up lands.

    Raises:
        prisma.errors.RecordNotFoundError: If the user does not exist.
@@ -469,8 +518,113 @@ async def set_user_tier(user_id: str, tier: SubscriptionTier) -> None:
        where={"id": user_id},
        data={"subscriptionTier": tier.value},
    )
-    # Invalidate cached tier so rate-limit checks pick up the change immediately.
    get_user_tier.cache_delete(user_id)  # type: ignore[attr-defined]
+    # Local import required: backend.data.credit imports backend.copilot.rate_limit
+    # (via get_user_tier in credit.py's _invalidate_user_tier_caches), so a
+    # top-level ``from backend.data.credit import ...`` here would create a
+    # circular import at module-load time.
+    from backend.data.credit import get_pending_subscription_change
+
+    get_user_by_id.cache_delete(user_id)  # type: ignore[attr-defined]
+    get_pending_subscription_change.cache_delete(user_id)  # type: ignore[attr-defined]
+
+    # The DB write above is already committed; the drift check is best-effort
+    # diagnostic logging. Fire-and-forget so admin bulk ops don't wait on a
+    # Stripe roundtrip. The inner helper wraps its body in a timeout + broad
+    # except so background task errors still surface via logs rather than as
+    # "task exception never retrieved" warnings. Cancellation on request
+    # shutdown is acceptable — the drift warning is non-load-bearing.
+    asyncio.ensure_future(_drift_check_background(user_id, tier))
+
+
+async def _drift_check_background(user_id: str, tier: SubscriptionTier) -> None:
+    """Run the Stripe drift check in the background, logging rather than raising."""
+    try:
+        await asyncio.wait_for(
+            _warn_if_stripe_subscription_drifts(user_id, tier),
+            timeout=5.0,
+        )
+        logger.debug(
+            "set_user_tier: drift check completed for user=%s admin_tier=%s",
+            user_id,
+            tier.value,
+        )
+    except asyncio.TimeoutError:
+        logger.warning(
+            "set_user_tier: drift check timed out for user=%s admin_tier=%s",
+            user_id,
+            tier.value,
+        )
+    except asyncio.CancelledError:
+        # Request may have completed and the event loop is cancelling tasks —
+        # the drift log is non-critical, so accept cancellation silently.
+        raise
+    except Exception:
+        logger.exception(
+            "set_user_tier: drift check background task failed for"
+            " user=%s admin_tier=%s",
+            user_id,
+            tier.value,
+        )
+
+
+async def _warn_if_stripe_subscription_drifts(
+    user_id: str, new_tier: SubscriptionTier
+) -> None:
+    """Emit a WARNING when an admin tier override leaves an active Stripe sub on a
+    mismatched price.
+
+    The warning is diagnostic only: Stripe remains the billing source of truth,
+    so the next ``customer.subscription.updated`` webhook will reset the DB
+    tier. Surfacing the drift here lets ops catch admin overrides that bypass
+    the intended Checkout / Portal cancel flows before users notice surprise
+    charges.
+    """
+    # Local imports: see note in ``set_user_tier`` about the credit <-> rate_limit
+    # circular. These helpers (``_get_active_subscription``,
+    # ``get_subscription_price_id``) live in credit.py alongside the rest of
+    # the Stripe billing code.
+    from backend.data.credit import _get_active_subscription, get_subscription_price_id
+
+    try:
+        user = await get_user_by_id(user_id)
+        if not getattr(user, "stripe_customer_id", None):
+            return
+        sub = await _get_active_subscription(user.stripe_customer_id)
+        if sub is None:
+            return
+        items = sub["items"].data
+        if not items:
+            return
+        price = items[0].price
+        current_price_id = price if isinstance(price, str) else price.id
+        # The LaunchDarkly-backed price lookup must live inside this try/except:
+        # an LD SDK failure (network, token revoked) here would otherwise
+        # propagate past set_user_tier's already-committed DB write and turn a
+        # best-effort diagnostic into a 500 on admin tier writes.
+        expected_price_id = await get_subscription_price_id(new_tier)
+    except Exception:
+        logger.debug(
+            "_warn_if_stripe_subscription_drifts: drift lookup failed for"
+            " user=%s; skipping drift warning",
+            user_id,
+            exc_info=True,
+        )
+        return
+    if expected_price_id is not None and expected_price_id == current_price_id:
+        return
+    logger.warning(
+        "Admin tier override will drift from Stripe: user=%s admin_tier=%s"
+        " stripe_sub=%s stripe_price=%s expected_price=%s — the next"
+        " customer.subscription.updated webhook will reconcile the DB tier"
+        " back to whatever Stripe has; cancel or modify the Stripe subscription"
+        " if you intended the admin override to stick.",
+        user_id,
+        new_tier.value,
+        sub.id,
+        current_price_id,
+        expected_price_id,
+    )


 async def get_global_rate_limits(
@@ -480,37 +634,41 @@ async def get_global_rate_limits(
 ) -> tuple[int, int, SubscriptionTier]:
    """Resolve global rate limits from LaunchDarkly, falling back to config.

-    The base limits (from LD or config) are multiplied by the user's
-    tier multiplier so that higher tiers receive proportionally larger
-    allowances.
+    Values are microdollars. The base limits (from LD or config) are
+    multiplied by the user's tier multiplier so that higher tiers receive
+    proportionally larger allowances.

    Args:
        user_id: User ID for LD flag evaluation context.
-        config_daily: Fallback daily limit from ChatConfig.
-        config_weekly: Fallback weekly limit from ChatConfig.
+        config_daily: Fallback daily cost limit (microdollars) from ChatConfig.
+        config_weekly: Fallback weekly cost limit (microdollars) from ChatConfig.

    Returns:
-        (daily_token_limit, weekly_token_limit, tier) 3-tuple.
+        (daily_cost_limit, weekly_cost_limit, tier) — limits in microdollars.
    """
    # Lazy import to avoid circular dependency:
    # rate_limit -> feature_flag -> settings -> ... -> rate_limit
    from backend.util.feature_flag import Flag, get_feature_flag_value

-    daily_raw = await get_feature_flag_value(
-        Flag.COPILOT_DAILY_TOKEN_LIMIT.value, user_id, config_daily
-    )
-    weekly_raw = await get_feature_flag_value(
-        Flag.COPILOT_WEEKLY_TOKEN_LIMIT.value, user_id, config_weekly
+    # Fetch daily + weekly flags in parallel — each LD evaluation is an
+    # independent network round-trip, so gather cuts latency roughly in half.
+    daily_raw, weekly_raw = await asyncio.gather(
+        get_feature_flag_value(
+            Flag.COPILOT_DAILY_COST_LIMIT.value, user_id, config_daily
+        ),
+        get_feature_flag_value(
+            Flag.COPILOT_WEEKLY_COST_LIMIT.value, user_id, config_weekly
+        ),
    )
    try:
        daily = max(0, int(daily_raw))
    except (TypeError, ValueError):
-        logger.warning("Invalid LD value for daily token limit: %r", daily_raw)
+        logger.warning("Invalid LD value for daily cost limit: %r", daily_raw)
        daily = config_daily
    try:
        weekly = max(0, int(weekly_raw))
    except (TypeError, ValueError):
-        logger.warning("Invalid LD value for weekly token limit: %r", weekly_raw)
+        logger.warning("Invalid LD value for weekly cost limit: %r", weekly_raw)
        weekly = config_weekly

    # Apply tier multiplier
--- a/autogpt_platform/backend/backend/copilot/rate_limit_test.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit_test.py
@@ -24,7 +24,7 @@ from .rate_limit import (
    get_usage_status,
    get_user_tier,
    increment_daily_reset_count,
-    record_token_usage,
+    record_cost_usage,
    release_reset_lock,
    reset_daily_usage,
    reset_user_usage,
@@ -82,7 +82,7 @@ class TestGetUsageStatus:
            return_value=mock_redis,
        ):
            status = await get_usage_status(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

        assert isinstance(status, CoPilotUsageStatus)
@@ -98,7 +98,7 @@ class TestGetUsageStatus:
            side_effect=ConnectionError("Redis down"),
        ):
            status = await get_usage_status(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

        assert status.daily.used == 0
@@ -115,7 +115,7 @@ class TestGetUsageStatus:
            return_value=mock_redis,
        ):
            status = await get_usage_status(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

        assert status.daily.used == 0
@@ -132,7 +132,7 @@ class TestGetUsageStatus:
            return_value=mock_redis,
        ):
            status = await get_usage_status(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

        assert status.daily.used == 500
@@ -148,7 +148,7 @@ class TestGetUsageStatus:
            return_value=mock_redis,
        ):
            status = await get_usage_status(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

        now = datetime.now(UTC)
@@ -174,7 +174,7 @@ class TestCheckRateLimit:
        ):
            # Should not raise
            await check_rate_limit(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

    @pytest.mark.asyncio
@@ -188,7 +188,7 @@ class TestCheckRateLimit:
        ):
            with pytest.raises(RateLimitExceeded) as exc_info:
                await check_rate_limit(
-                    _USER, daily_token_limit=10000, weekly_token_limit=50000
+                    _USER, daily_cost_limit=10000, weekly_cost_limit=50000
                )
            assert exc_info.value.window == "daily"

@@ -203,7 +203,7 @@ class TestCheckRateLimit:
        ):
            with pytest.raises(RateLimitExceeded) as exc_info:
                await check_rate_limit(
-                    _USER, daily_token_limit=10000, weekly_token_limit=50000
+                    _USER, daily_cost_limit=10000, weekly_cost_limit=50000
                )
            assert exc_info.value.window == "weekly"

@@ -216,7 +216,7 @@ class TestCheckRateLimit:
        ):
            # Should not raise
            await check_rate_limit(
-                _USER, daily_token_limit=10000, weekly_token_limit=50000
+                _USER, daily_cost_limit=10000, weekly_cost_limit=50000
            )

    @pytest.mark.asyncio
@@ -229,15 +229,15 @@ class TestCheckRateLimit:
            return_value=mock_redis,
        ):
            # Should not raise — limits of 0 mean unlimited
-            await check_rate_limit(_USER, daily_token_limit=0, weekly_token_limit=0)
+            await check_rate_limit(_USER, daily_cost_limit=0, weekly_cost_limit=0)


 # ---------------------------------------------------------------------------
-# record_token_usage
+# record_cost_usage
 # ---------------------------------------------------------------------------


-class TestRecordTokenUsage:
+class TestRecordCostUsage:
    @staticmethod
    def _make_pipeline_mock() -> MagicMock:
        """Create a pipeline mock with sync methods and async execute."""
@@ -255,27 +255,40 @@ class TestRecordTokenUsage:
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+            await record_cost_usage(_USER, cost_microdollars=123_456)

-        # Should call incrby twice (daily + weekly) with total=150
+        # Should call incrby twice (daily + weekly) with the same cost
        incrby_calls = mock_pipe.incrby.call_args_list
        assert len(incrby_calls) == 2
-        assert incrby_calls[0].args[1] == 150  # daily
-        assert incrby_calls[1].args[1] == 150  # weekly
+        assert incrby_calls[0].args[1] == 123_456  # daily
+        assert incrby_calls[1].args[1] == 123_456  # weekly

    @pytest.mark.asyncio
-    async def test_skips_when_zero_tokens(self):
+    async def test_skips_when_cost_is_zero(self):
        mock_redis = AsyncMock()

        with patch(
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            await record_token_usage(_USER, prompt_tokens=0, completion_tokens=0)
+            await record_cost_usage(_USER, cost_microdollars=0)

        # Should not call pipeline at all
        mock_redis.pipeline.assert_not_called()

+    @pytest.mark.asyncio
+    async def test_skips_when_cost_is_negative(self):
+        """Negative costs are clamped to zero and skip the pipeline."""
+        mock_redis = AsyncMock()
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_cost_usage(_USER, cost_microdollars=-10)
+
+        mock_redis.pipeline.assert_not_called()
+
    @pytest.mark.asyncio
    async def test_sets_expire_on_both_keys(self):
        """Pipeline should call expire for both daily and weekly keys."""
@@ -287,7 +300,7 @@ class TestRecordTokenUsage:
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+            await record_cost_usage(_USER, cost_microdollars=5_000)

        expire_calls = mock_pipe.expire.call_args_list
        assert len(expire_calls) == 2
@@ -308,32 +321,7 @@ class TestRecordTokenUsage:
            side_effect=ConnectionError("Redis down"),
        ):
            # Should not raise
-            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
-
-    @pytest.mark.asyncio
-    async def test_cost_weighted_counting(self):
-        """Cached tokens should be weighted: cache_read=10%, cache_create=25%."""
-        mock_pipe = self._make_pipeline_mock()
-        mock_redis = AsyncMock()
-        mock_redis.pipeline = lambda **_kw: mock_pipe
-
-        with patch(
-            "backend.copilot.rate_limit.get_redis_async",
-            return_value=mock_redis,
-        ):
-            await record_token_usage(
-                _USER,
-                prompt_tokens=100,  # uncached → 100
-                completion_tokens=50,  # output → 50
-                cache_read_tokens=10000,  # 10% → 1000
-                cache_creation_tokens=400,  # 25% → 100
-            )
-
-        # Expected weighted total: 100 + 1000 + 100 + 50 = 1250
-        incrby_calls = mock_pipe.incrby.call_args_list
-        assert len(incrby_calls) == 2
-        assert incrby_calls[0].args[1] == 1250  # daily
-        assert incrby_calls[1].args[1] == 1250  # weekly
+            await record_cost_usage(_USER, cost_microdollars=5_000)

    @pytest.mark.asyncio
    async def test_handles_redis_error_during_pipeline_execute(self):
@@ -348,7 +336,7 @@ class TestRecordTokenUsage:
            return_value=mock_redis,
        ):
            # Should not raise — fail-open
-            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+            await record_cost_usage(_USER, cost_microdollars=5_000)


 # ---------------------------------------------------------------------------
@@ -581,6 +569,80 @@ class TestSetUserTier:

        assert tier_after == SubscriptionTier.ENTERPRISE

+    @pytest.mark.asyncio
+    async def test_drift_check_swallows_launchdarkly_failure(self):
+        """LaunchDarkly price-id lookup failures inside the drift check must
+        never bubble up and 500 the admin tier write — the DB update is
+        already committed by the time we check drift."""
+        mock_prisma = AsyncMock()
+        mock_prisma.update = AsyncMock(return_value=None)
+
+        mock_user = MagicMock()
+        mock_user.stripe_customer_id = "cus_abc"
+
+        mock_sub = MagicMock()
+        mock_sub.id = "sub_abc"
+        mock_sub["items"].data = [MagicMock(price=MagicMock(id="price_mismatch"))]
+
+        with (
+            patch(
+                "backend.copilot.rate_limit.PrismaUser.prisma",
+                return_value=mock_prisma,
+            ),
+            patch(
+                "backend.copilot.rate_limit.get_user_by_id",
+                new_callable=AsyncMock,
+                return_value=mock_user,
+            ),
+            patch(
+                "backend.data.credit._get_active_subscription",
+                new_callable=AsyncMock,
+                return_value=mock_sub,
+            ),
+            patch(
+                "backend.data.credit.get_subscription_price_id",
+                new_callable=AsyncMock,
+                side_effect=RuntimeError("LD SDK not initialized"),
+            ),
+        ):
+            # Must NOT raise — drift check is best-effort diagnostic only.
+            await set_user_tier(_USER, SubscriptionTier.PRO)
+
+        mock_prisma.update.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_drift_check_timeout_is_bounded(self):
+        """A Stripe call that stalls on the 80s SDK default must not block the
+        admin tier write — set_user_tier wraps the drift check in a 5s timeout
+        and logs + returns on TimeoutError."""
+        import asyncio as _asyncio
+
+        mock_prisma = AsyncMock()
+        mock_prisma.update = AsyncMock(return_value=None)
+
+        async def _never_returns(_user_id: str, _tier):
+            await _asyncio.sleep(60)
+
+        with (
+            patch(
+                "backend.copilot.rate_limit.PrismaUser.prisma",
+                return_value=mock_prisma,
+            ),
+            patch(
+                "backend.copilot.rate_limit._warn_if_stripe_subscription_drifts",
+                side_effect=_never_returns,
+            ),
+            patch(
+                "backend.copilot.rate_limit.asyncio.wait_for",
+                new_callable=AsyncMock,
+                side_effect=_asyncio.TimeoutError,
+            ),
+        ):
+            await set_user_tier(_USER, SubscriptionTier.PRO)
+
+        # Set_user_tier still completed — the drift timeout did not propagate.
+        mock_prisma.update.assert_awaited_once()
+

 # ---------------------------------------------------------------------------
 # get_global_rate_limits with tiers
@@ -745,7 +807,7 @@ class TestTierLimitsRespected:
            assert tier == SubscriptionTier.PRO
            # Should NOT raise — 3M < 12.5M
            await check_rate_limit(
-                _USER, daily_token_limit=daily, weekly_token_limit=weekly
+                _USER, daily_cost_limit=daily, weekly_cost_limit=weekly
            )

    @pytest.mark.asyncio
@@ -779,7 +841,7 @@ class TestTierLimitsRespected:
            # Should raise — 2.5M >= 2.5M
            with pytest.raises(RateLimitExceeded):
                await check_rate_limit(
-                    _USER, daily_token_limit=daily, weekly_token_limit=weekly
+                    _USER, daily_cost_limit=daily, weekly_cost_limit=weekly
                )

    @pytest.mark.asyncio
@@ -811,7 +873,7 @@ class TestTierLimitsRespected:
            assert tier == SubscriptionTier.ENTERPRISE
            # Should NOT raise — 100M < 150M
            await check_rate_limit(
-                _USER, daily_token_limit=daily, weekly_token_limit=weekly
+                _USER, daily_cost_limit=daily, weekly_cost_limit=weekly
            )


@@ -838,7 +900,7 @@ class TestResetDailyUsage:
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            result = await reset_daily_usage(_USER, daily_token_limit=10000)
+            result = await reset_daily_usage(_USER, daily_cost_limit=10000)

        assert result is True
        mock_pipe.delete.assert_called_once()
@@ -854,7 +916,7 @@ class TestResetDailyUsage:
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            await reset_daily_usage(_USER, daily_token_limit=10000)
+            await reset_daily_usage(_USER, daily_cost_limit=10000)

        mock_pipe.decrby.assert_called_once()
        mock_redis.set.assert_not_called()  # 35000 > 0, no clamp needed
@@ -870,14 +932,14 @@ class TestResetDailyUsage:
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            await reset_daily_usage(_USER, daily_token_limit=10000)
+            await reset_daily_usage(_USER, daily_cost_limit=10000)

        mock_pipe.decrby.assert_called_once()
        mock_redis.set.assert_called_once()

    @pytest.mark.asyncio
    async def test_no_weekly_reduction_when_daily_limit_zero(self):
-        """When daily_token_limit is 0, weekly counter should not be touched."""
+        """When daily_cost_limit is 0, weekly counter should not be touched."""
        mock_pipe = self._make_pipeline_mock()
        mock_pipe.execute = AsyncMock(return_value=[1])  # only delete result
        mock_redis = AsyncMock()
@@ -887,7 +949,7 @@ class TestResetDailyUsage:
            "backend.copilot.rate_limit.get_redis_async",
            return_value=mock_redis,
        ):
-            await reset_daily_usage(_USER, daily_token_limit=0)
+            await reset_daily_usage(_USER, daily_cost_limit=0)

        mock_pipe.delete.assert_called_once()
        mock_pipe.decrby.assert_not_called()
@@ -898,7 +960,7 @@ class TestResetDailyUsage:
            "backend.copilot.rate_limit.get_redis_async",
            side_effect=ConnectionError("Redis down"),
        ):
-            result = await reset_daily_usage(_USER, daily_token_limit=10000)
+            result = await reset_daily_usage(_USER, daily_cost_limit=10000)

        assert result is False

--- a/autogpt_platform/backend/backend/copilot/reset_usage_test.py
+++ b/autogpt_platform/backend/backend/copilot/reset_usage_test.py
@@ -16,14 +16,14 @@ from backend.util.exceptions import InsufficientBalanceError
 # Minimal config mock matching ChatConfig fields used by the endpoint.
 def _make_config(
    rate_limit_reset_cost: int = 500,
-    daily_token_limit: int = 2_500_000,
-    weekly_token_limit: int = 12_500_000,
+    daily_cost_limit_microdollars: int = 10_000_000,
+    weekly_cost_limit_microdollars: int = 50_000_000,
    max_daily_resets: int = 5,
 ):
    cfg = MagicMock()
    cfg.rate_limit_reset_cost = rate_limit_reset_cost
-    cfg.daily_token_limit = daily_token_limit
-    cfg.weekly_token_limit = weekly_token_limit
+    cfg.daily_cost_limit_microdollars = daily_cost_limit_microdollars
+    cfg.weekly_cost_limit_microdollars = weekly_cost_limit_microdollars
    cfg.max_daily_resets = max_daily_resets
    return cfg

@@ -77,10 +77,10 @@ class TestResetCopilotUsage:
            assert "not available" in exc_info.value.detail

    async def test_no_daily_limit_returns_400(self):
-        """When daily_token_limit=0 (unlimited), endpoint returns 400."""
+        """When daily_cost_limit=0 (unlimited), endpoint returns 400."""

        with (
-            patch(f"{_MODULE}.config", _make_config(daily_token_limit=0)),
+            patch(f"{_MODULE}.config", _make_config(daily_cost_limit_microdollars=0)),
            patch(f"{_MODULE}.settings", _mock_settings()),
            _mock_rate_limits(daily=0),
        ):
--- a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
@@ -94,21 +94,23 @@ def test_agent_options_accepts_required_fields():
 def test_agent_options_accepts_system_prompt_preset_with_exclude_dynamic_sections():
    """Verify ClaudeAgentOptions accepts the exact preset dict _build_system_prompt_value produces.

-    The production code always includes ``exclude_dynamic_sections=True`` in the preset
-    dict.  This compat test mirrors that exact shape so any SDK version that starts
-    rejecting unknown keys will be caught here rather than at runtime.
+    The Turn 1 (non-resume) code path includes ``exclude_dynamic_sections=True`` in
+    the preset dict for cross-user caching.  This compat test mirrors that exact
+    shape so any SDK version that starts rejecting unknown keys will be caught
+    here rather than at runtime.
    """
    from claude_agent_sdk import ClaudeAgentOptions
    from claude_agent_sdk.types import SystemPromptPreset

    from .service import _build_system_prompt_value

-    # Call the production helper directly so this test is tied to the real
-    # dict shape rather than a hand-rolled copy.
    preset = _build_system_prompt_value("custom system prompt", cross_user_cache=True)
    assert isinstance(
        preset, dict
    ), "_build_system_prompt_value must return a dict when caching is on"
+    assert preset.get("exclude_dynamic_sections") is True, (
+        "Turn 1 must strip dynamic sections to keep the prefix cacheable " "cross-user"
+    )

    sdk_preset = cast(SystemPromptPreset, preset)
    opts = ClaudeAgentOptions(system_prompt=sdk_preset)
@@ -116,8 +118,9 @@ def test_agent_options_accepts_system_prompt_preset_with_exclude_dynamic_section


 def test_build_system_prompt_value_returns_plain_string_when_cross_user_cache_off():
-    """When cross_user_cache=False (e.g. on --resume turns), the helper must return
-    a plain string so the preset+resume crash is avoided."""
+    """When cross_user_cache=False (feature flag disabled globally), the
+    helper returns a plain string; the CLI will receive --system-prompt
+    (replace-mode) and skip the preset entirely."""
    from .service import _build_system_prompt_value

    result = _build_system_prompt_value("my prompt", cross_user_cache=False)
@@ -262,6 +265,12 @@ _KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset(
        "2.1.97",  # claude-agent-sdk 0.1.58 -- OpenRouter-safe only with
        #          CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 (injected by
        #          build_sdk_env() in env.py).
+        "2.1.116",  # claude-agent-sdk 0.1.64 -- first bundled version that
+        #           fixes the --resume + excludeDynamicSections=True crash
+        #           (introduced in 2.1.98), unlocking cross-user prompt
+        #           cache reads on every resumed SDK turn.  Still requires
+        #           CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1.  Verified
+        #           OpenRouter-safe via cli_openrouter_compat_test.py.
    }
 )

--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -165,11 +165,6 @@ _MAX_STREAM_ATTEMPTS = 3
 # self-correct.  The limit is generous to allow recovery attempts.
 _EMPTY_TOOL_CALL_LIMIT = 5

-# Cost multiplier for Opus model turns — Opus is ~5× more expensive than Sonnet
-# ($15/$75 vs $3/$15 per M tokens).  Applied to rate-limit counters so Opus
-# turns deplete quota proportionally faster.
-_OPUS_COST_MULTIPLIER = 5.0
-
 # User-facing error shown when the empty-tool-call circuit breaker trips.
 _CIRCUIT_BREAKER_ERROR_MSG = (
    "AutoPilot was unable to complete the tool call "
@@ -725,22 +720,20 @@ def _resolve_fallback_model() -> str | None:
    return _normalize_model_name(raw)


-async def _resolve_model_and_multiplier(
+async def _resolve_sdk_model_for_request(
    model: "CopilotLlmModel | None",
    session_id: str,
-) -> tuple[str | None, float]:
-    """Resolve the SDK model string and rate-limit cost multiplier for a turn.
+) -> str | None:
+    """Resolve the SDK model string for a turn.

    Priority (highest first):
    1. Explicit per-request ``model`` tier from the frontend toggle.
    2. Global config default (``_resolve_sdk_model()``).

-    Returns a ``(sdk_model, cost_multiplier)`` pair.
-    ``sdk_model`` is ``None`` when the Claude Code subscription default applies.
-    ``cost_multiplier`` is 5.0 for Opus, 1.0 otherwise.
+    Returns ``None`` when the Claude Code subscription default applies.
+    Rate-limit accounting no longer applies a multiplier — the real turn
+    cost (reported by the SDK) already reflects model-pricing differences.
    """
-    sdk_model = _resolve_sdk_model()
-
    if model == "advanced":
        sdk_model = _normalize_model_name(config.advanced_model)
        logger.info(
@@ -748,7 +741,7 @@ async def _resolve_model_and_multiplier(
            session_id[:12] if session_id else "?",
            sdk_model,
        )
-        return sdk_model, _OPUS_COST_MULTIPLIER
+        return sdk_model

    if model == "standard":
        # Reset to config default — respects subscription mode (None = CLI default).
@@ -758,13 +751,9 @@ async def _resolve_model_and_multiplier(
            session_id[:12] if session_id else "?",
            sdk_model or "subscription-default",
        )
-        return sdk_model, 1.0
+        return sdk_model

-    # No per-request override; derive multiplier from final resolved model.
-    cost_multiplier = (
-        _OPUS_COST_MULTIPLIER if sdk_model and "opus" in sdk_model else 1.0
-    )
-    return sdk_model, cost_multiplier
+    return _resolve_sdk_model()


 _MAX_TRANSIENT_BACKOFF_SECONDS = 30
@@ -847,16 +836,25 @@ def _is_fallback_stderr(line: str) -> bool:

 def _build_system_prompt_value(
    system_prompt: str,
+    *,
    cross_user_cache: bool,
 ) -> str | SystemPromptPreset:
    """Build the ``system_prompt`` argument for :class:`ClaudeAgentOptions`.

    When *cross_user_cache* is enabled, returns a :class:`SystemPromptPreset`
-    dict so the Claude Code default prompt becomes a cacheable prefix shared
-    across all users; our custom *system_prompt* is appended after it.
+    with ``exclude_dynamic_sections=True`` so every turn — Turn 1 *and*
+    resumed turns — shares the same static prefix and hits the cross-user
+    prompt cache.  Our custom *system_prompt* is appended after the preset.

-    When disabled (or if the SDK is too old to support ``SystemPromptPreset``),
-    the raw *system_prompt* string is returned unchanged.
+    Requires CLI ≥ 2.1.98 (older CLIs crash when ``excludeDynamicSections``
+    is combined with ``--resume``).  The SDK bundles CLI 2.1.116 at
+    ``claude-agent-sdk >= 0.1.64``, so the pin in ``pyproject.toml`` is
+    the single source of truth — no external install needed.
+
+    When *cross_user_cache* is disabled, the raw *system_prompt* string is
+    returned.  Note this causes the CLI to REPLACE its built-in prompt via
+    ``--system-prompt`` (vs ``--append-system-prompt`` for the preset),
+    which loses Claude Code's default prompt and its cache markers entirely.

    An empty *system_prompt* is accepted: the preset dict will have
    ``append: ""`` which the SDK treats as no custom suffix.
@@ -2895,7 +2893,6 @@ async def stream_chat_completion_sdk(
    # Defaults ensure the finally block can always reference these safely even when
    # an early return (e.g. sdk_cwd error) skips their normal assignment below.
    sdk_model: str | None = None
-    model_cost_multiplier: float = 1.0

    # Make sure there is no more code between the lock acquisition and try-block.
    try:
@@ -3012,10 +3009,8 @@ async def stream_chat_completion_sdk(

        mcp_server = create_copilot_mcp_server(use_e2b=use_e2b)

-        # Resolve model and cost multiplier (request tier → config default).
-        sdk_model, model_cost_multiplier = await _resolve_model_and_multiplier(
-            model, session_id
-        )
+        # Resolve model (request tier → config default).
+        sdk_model = await _resolve_sdk_model_for_request(model, session_id)

        # Track SDK-internal compaction (PreCompact hook → start, next msg → end)
        compaction = CompactionTracker()
@@ -3050,15 +3045,17 @@ async def stream_chat_completion_sdk(
                    sid,
                )

-        # Use SystemPromptPreset for cross-user prompt caching.
-        # WORKAROUND: CLI 2.1.97 (sdk 0.1.58) exits code 1 when
-        # excludeDynamicSections=True is in the initialize request AND
-        # --resume is active.  Disable the preset on resumed turns.
-        # Turn 1 still gets the preset (no --resume).
-        _cross_user = config.claude_agent_cross_user_prompt_cache and not use_resume
+        # Use SystemPromptPreset with exclude_dynamic_sections=True on
+        # every turn — including resumed ones — so all turns share the
+        # same static prefix and hit the cross-user prompt cache.
+        #
+        # Requires CLI ≥ 2.1.98 (older CLIs crash when excludeDynamicSections
+        # is combined with --resume).  claude-agent-sdk >= 0.1.64 bundles
+        # CLI 2.1.116, so the pin in pyproject.toml is sufficient — no
+        # external install or env-var override needed.
        system_prompt_value = _build_system_prompt_value(
            system_prompt,
-            cross_user_cache=_cross_user,
+            cross_user_cache=config.claude_agent_cross_user_prompt_cache,
        )

        sdk_options_kwargs: dict[str, Any] = {
@@ -3415,15 +3412,12 @@ async def stream_chat_completion_sdk(
                    # fail with "Session ID already in use".
                    sdk_options_kwargs_retry.pop("resume", None)
                    sdk_options_kwargs_retry.pop("session_id", None)
-                # Recompute system_prompt for retry — ctx.use_resume may have
-                # changed (context reduction enabled --resume).  CLI 2.1.97
-                # crashes when excludeDynamicSections=True is combined with
-                # --resume, so disable the cross-user preset on resumed turns.
-                _cross_user_retry = (
-                    config.claude_agent_cross_user_prompt_cache and not ctx.use_resume
-                )
+                # Recompute system_prompt for retry — the preset is safe on
+                # every turn (requires CLI ≥ 2.1.98, installed in the Docker
+                # image and configured via CHAT_CLAUDE_AGENT_CLI_PATH).
                sdk_options_kwargs_retry["system_prompt"] = _build_system_prompt_value(
-                    system_prompt, cross_user_cache=_cross_user_retry
+                    system_prompt,
+                    cross_user_cache=config.claude_agent_cross_user_prompt_cache,
                )
                state.options = ClaudeAgentOptions(**sdk_options_kwargs_retry)  # type: ignore[arg-type]  # dynamic kwargs
                # Retry intentionally omits prior_messages (transcript+gap context) and
@@ -3813,7 +3807,6 @@ async def stream_chat_completion_sdk(
            cost_usd=turn_cost_usd,
            model=sdk_model or config.model,
            provider="anthropic",
-            model_cost_multiplier=model_cost_multiplier,
        )

        # --- Persist session messages ---
--- a/autogpt_platform/backend/backend/copilot/sdk/service_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service_test.py
@@ -177,70 +177,18 @@ class TestPromptSupplement:
        assert "## Tool notes" in local_supplement
        assert "## Tool notes" in e2b_supplement

-    def test_baseline_supplement_includes_tool_docs(self):
-        """Baseline mode MUST include tool documentation (direct API needs it)."""
-        from backend.copilot.prompting import get_baseline_supplement
+    def test_baseline_supplement_has_shared_notes_no_tool_list(self):
+        """Baseline now relies on the OpenAI tools array for schemas and only
+        appends SHARED_TOOL_NOTES (workflow rules not present in any schema).
+        The old auto-generated ``## AVAILABLE TOOLS`` list is gone — it was
+        ~4.3K tokens of pure duplication of the tools array."""
+        from backend.copilot.prompting import SHARED_TOOL_NOTES

-        supplement = get_baseline_supplement()
-
-        # MUST have tool list section
-        assert "## AVAILABLE TOOLS" in supplement
-
-        # Should NOT have environment-specific notes (SDK-only)
-        assert "## Tool notes" not in supplement
-
-    def test_baseline_supplement_includes_key_tools(self):
-        """Baseline supplement should document all essential tools."""
-        from backend.copilot.prompting import get_baseline_supplement
-        from backend.copilot.tools import TOOL_REGISTRY
-
-        docs = get_baseline_supplement()
-
-        # Core agent workflow tools (always available)
-        assert "`create_agent`" in docs
-        assert "`run_agent`" in docs
-        assert "`find_library_agent`" in docs
-        assert "`edit_agent`" in docs
-
-        # MCP integration (always available)
-        assert "`run_mcp_tool`" in docs
-
-        # Folder management (always available)
-        assert "`create_folder`" in docs
-
-        # Browser tools only if available (Playwright may not be installed in CI)
-        if (
-            TOOL_REGISTRY.get("browser_navigate")
-            and TOOL_REGISTRY["browser_navigate"].is_available
-        ):
-            assert "`browser_navigate`" in docs
-
-    def test_baseline_supplement_includes_workflows(self):
-        """Baseline supplement should include workflow guidance in tool descriptions."""
-        from backend.copilot.prompting import get_baseline_supplement
-
-        docs = get_baseline_supplement()
-
-        # Workflows are now in individual tool descriptions (not separate sections)
-        # Check that key workflow concepts appear in tool descriptions
-        assert "agent_json" in docs or "find_block" in docs
-        assert "run_mcp_tool" in docs
-
-    def test_baseline_supplement_completeness(self):
-        """All available tools from TOOL_REGISTRY should appear in baseline supplement."""
-        from backend.copilot.prompting import get_baseline_supplement
-        from backend.copilot.tools import TOOL_REGISTRY
-
-        docs = get_baseline_supplement()
-
-        # Verify each available registered tool is documented
-        # (matches _generate_tool_documentation which filters by is_available)
-        for tool_name, tool in TOOL_REGISTRY.items():
-            if not tool.is_available:
-                continue
-            assert (
-                f"`{tool_name}`" in docs
-            ), f"Tool '{tool_name}' missing from baseline supplement"
+        assert "## AVAILABLE TOOLS" not in SHARED_TOOL_NOTES
+        # Keep the high-value workflow rules that are NOT in any tool schema.
+        assert "@@agptfile:" in SHARED_TOOL_NOTES
+        assert "Tool Discovery Priority" in SHARED_TOOL_NOTES
+        assert "run_sub_session" in SHARED_TOOL_NOTES

    def test_pause_task_scheduled_before_transcript_upload(self):
        """Pause is scheduled as a background task before transcript upload begins.
@@ -284,21 +232,6 @@ class TestPromptSupplement:
        # concurrently during upload's first yield. The ordering guarantee is
        # that create_task is CALLED before upload is AWAITED (see source order).

-    def test_baseline_supplement_no_duplicate_tools(self):
-        """No tool should appear multiple times in baseline supplement."""
-        from backend.copilot.prompting import get_baseline_supplement
-        from backend.copilot.tools import TOOL_REGISTRY
-
-        docs = get_baseline_supplement()
-
-        # Count occurrences of each available tool in the entire supplement
-        for tool_name, tool in TOOL_REGISTRY.items():
-            if not tool.is_available:
-                continue
-            # Count how many times this tool appears as a bullet point
-            count = docs.count(f"- **`{tool_name}`**")
-            assert count == 1, f"Tool '{tool_name}' appears {count} times (should be 1)"
-

 # ---------------------------------------------------------------------------
 # _cleanup_sdk_tool_results — orchestration + rate-limiting
@@ -700,6 +633,17 @@ class TestSystemPromptPreset:
        assert result["append"] == ""
        assert result["exclude_dynamic_sections"] is True

+    def test_resume_and_fresh_share_the_same_static_prefix(self):
+        """Every turn (fresh + --resume) must emit the same preset dict
+        so the cross-user cache prefix match works on all turns.  This
+        relies on CLI ≥ 2.1.98 (installed in the Docker image); older
+        CLIs would crash on --resume + excludeDynamicSections=True."""
+        fresh = _build_system_prompt_value("sys", cross_user_cache=True)
+        resumed = _build_system_prompt_value("sys", cross_user_cache=True)
+        assert fresh == resumed
+        assert isinstance(fresh, dict)
+        assert fresh.get("exclude_dynamic_sections") is True
+
    def test_default_config_is_enabled(self, _clean_config_env):
        """The default value for claude_agent_cross_user_prompt_cache is True."""
        cfg = cfg_mod.ChatConfig(
--- a/autogpt_platform/backend/backend/copilot/stream_registry.py
+++ b/autogpt_platform/backend/backend/copilot/stream_registry.py
@@ -35,7 +35,7 @@ from backend.data.redis_client import get_redis_async
 from backend.data.redis_helpers import hash_compare_and_set

 from .config import ChatConfig
-from .executor.utils import COPILOT_CONSUMER_TIMEOUT_SECONDS
+from .executor.utils import COPILOT_CONSUMER_TIMEOUT_SECONDS, get_session_lock_key
 from .response_model import (
    ResponseType,
    StreamBaseResponse,
@@ -851,6 +851,15 @@ async def mark_session_completed(
        logger.debug(f"Session {session_id} already completed/failed, skipping")
        return False

+    # Force-release the executor's cluster lock so the next enqueued turn can
+    # acquire it immediately. The lock holder's on_run_done will also release
+    # (idempotent delete); doing it here unblocks cases where the task hangs
+    # past the cancel timeout or a pod crash leaves the lock orphaned.
+    try:
+        await redis.delete(get_session_lock_key(session_id))
+    except RedisError as e:
+        logger.warning(f"Failed to release cluster lock for session {session_id}: {e}")
+
    if error_message and not skip_error_publish:
        try:
            await publish_chunk(turn_id, StreamError(errorText=error_message))
--- a/autogpt_platform/backend/backend/copilot/stream_registry_test.py
+++ b/autogpt_platform/backend/backend/copilot/stream_registry_test.py
@@ -4,8 +4,10 @@ import asyncio
 from unittest.mock import AsyncMock, patch

 import pytest
+from redis.exceptions import RedisError

 from backend.copilot import stream_registry
+from backend.copilot.executor.utils import get_session_lock_key


@pytest.fixture(autouse=True)
@@ -221,3 +223,115 @@ async def test_stream_and_publish_consumer_break_then_aclose_releases_inner():
        await wrapper.aclose()

    assert inner_finally_ran.is_set()
+
+
+# ---------------------------------------------------------------------------
+# mark_session_completed: the atomic meta flip to completed/failed must also
+# release the per-session cluster lock, so the next enqueued turn's run
+# handler can acquire it without waiting for the TTL (5 min default).
+# ---------------------------------------------------------------------------
+
+
+class _FakeRedis:
+    """Minimal async-Redis fake: only the calls mark_session_completed makes."""
+
+    def __init__(self, meta: dict[str, str]):
+        self._meta = dict(meta)
+        self.deleted_keys: list[str] = []
+        self.delete = AsyncMock(side_effect=self._record_delete)
+
+    async def _record_delete(self, *keys: str):
+        self.deleted_keys.extend(keys)
+        for k in keys:
+            self._meta.pop(k, None)
+        return len(keys)
+
+    async def hgetall(self, _key: str):
+        return dict(self._meta)
+
+
+@pytest.mark.asyncio
+async def test_mark_session_completed_releases_cluster_lock_on_success():
+    """CAS swap must be followed by a DELETE on the session's lock key so a
+    stuck-because-of-stale-lock session becomes immediately claimable."""
+    fake_redis = _FakeRedis({"status": "running", "turn_id": "turn-1"})
+
+    with (
+        patch.object(
+            stream_registry, "get_redis_async", new=AsyncMock(return_value=fake_redis)
+        ),
+        patch.object(
+            stream_registry, "hash_compare_and_set", new=AsyncMock(return_value=True)
+        ),
+        patch.object(stream_registry, "publish_chunk", new=AsyncMock()),
+        patch.object(
+            stream_registry.chat_db(),
+            "set_turn_duration",
+            new=AsyncMock(),
+            create=True,
+        ),
+    ):
+        result = await stream_registry.mark_session_completed("sess-1")
+
+    assert result is True
+    assert get_session_lock_key("sess-1") in fake_redis.deleted_keys
+
+
+@pytest.mark.asyncio
+async def test_mark_session_completed_skips_lock_release_when_already_completed():
+    """CAS failure = someone else completed the session first; we must not
+    delete their already-released lock, and we must NOT publish StreamFinish
+    twice (the winning caller already published it)."""
+    fake_redis = _FakeRedis({"status": "completed", "turn_id": "turn-1"})
+    publish_mock = AsyncMock()
+
+    with (
+        patch.object(
+            stream_registry, "get_redis_async", new=AsyncMock(return_value=fake_redis)
+        ),
+        patch.object(
+            stream_registry, "hash_compare_and_set", new=AsyncMock(return_value=False)
+        ),
+        patch.object(stream_registry, "publish_chunk", new=publish_mock),
+    ):
+        result = await stream_registry.mark_session_completed("sess-1")
+
+    assert result is False
+    assert get_session_lock_key("sess-1") not in fake_redis.deleted_keys
+    assert not any(
+        isinstance(call.args[1], stream_registry.StreamFinish)
+        for call in publish_mock.call_args_list
+    ), "StreamFinish must NOT be re-published on the CAS-no-op branch"
+
+
+@pytest.mark.asyncio
+async def test_mark_session_completed_survives_lock_release_redis_error():
+    """A Redis hiccup during lock DELETE must not prevent the StreamFinish
+    publish — the client's SSE stream would otherwise hang on the stale meta
+    status while Redis recovers."""
+    fake_redis = _FakeRedis({"status": "running", "turn_id": "turn-1"})
+    fake_redis.delete = AsyncMock(side_effect=RedisError("boom"))
+    publish_mock = AsyncMock()
+
+    with (
+        patch.object(
+            stream_registry, "get_redis_async", new=AsyncMock(return_value=fake_redis)
+        ),
+        patch.object(
+            stream_registry, "hash_compare_and_set", new=AsyncMock(return_value=True)
+        ),
+        patch.object(stream_registry, "publish_chunk", new=publish_mock),
+        patch.object(
+            stream_registry.chat_db(),
+            "set_turn_duration",
+            new=AsyncMock(),
+            create=True,
+        ),
+    ):
+        result = await stream_registry.mark_session_completed("sess-1")
+
+    assert result is True
+    assert any(
+        isinstance(call.args[1], stream_registry.StreamFinish)
+        for call in publish_mock.call_args_list
+    ), "StreamFinish must still be published even if lock DELETE raises"
--- a/autogpt_platform/backend/backend/copilot/token_tracking.py
+++ b/autogpt_platform/backend/backend/copilot/token_tracking.py
@@ -1,9 +1,9 @@
-"""Shared token-usage persistence and rate-limit recording.
+"""Shared usage persistence and rate-limit recording.

 Both the baseline (OpenRouter) and SDK (Anthropic) service layers need to:
  1. Append a ``Usage`` record to the session.
-  2. Log the turn's token counts.
-  3. Record weighted usage in Redis for rate-limiting.
+  2. Log the turn's token counts and cost.
+  3. Record the real generation cost in Redis for rate-limiting.
  4. Write a PlatformCostLog entry for admin cost tracking.

 This module extracts that common logic so both paths stay in sync.
@@ -19,7 +19,7 @@ from backend.data.db_accessors import platform_cost_db
 from backend.data.platform_cost import PlatformCostEntry, usd_to_microdollars

 from .model import ChatSession, Usage
-from .rate_limit import record_token_usage
+from .rate_limit import record_cost_usage

 logger = logging.getLogger(__name__)

@@ -96,9 +96,14 @@ async def persist_and_record_usage(
    cost_usd: float | str | None = None,
    model: str | None = None,
    provider: str = "open_router",
-    model_cost_multiplier: float = 1.0,
 ) -> int:
-    """Persist token usage to session and record for rate limiting.
+    """Persist token usage to session and record generation cost for rate limiting.
+
+    Rate-limit counters are charged in microdollars against the provider's
+    reported cost (``cost_usd``), so cache discounts and cross-model pricing
+    differences are already reflected. When cost is unknown the turn is
+    logged but the rate-limit counter is left alone — the caller logs an
+    error at the point the absence is detected.

    Args:
        session: The chat session to append usage to (may be None on error).
@@ -108,11 +113,11 @@ async def persist_and_record_usage(
        cache_read_tokens: Tokens served from prompt cache (Anthropic only).
        cache_creation_tokens: Tokens written to prompt cache (Anthropic only).
        log_prefix: Prefix for log messages (e.g. "[SDK]", "[Baseline]").
-        cost_usd: Optional cost for logging (float from SDK, str otherwise).
+        cost_usd: Real generation cost for the turn (float from SDK or parsed
+            from OpenRouter usage.cost). ``None`` means the provider did not
+            report a cost and rate limiting is skipped for this turn.
+        model: Model identifier for cost log attribution.
        provider: Cost provider name (e.g. "anthropic", "open_router").
-        model_cost_multiplier: Relative model cost factor for rate limiting
-            (1.0 = Sonnet/default, 5.0 = Opus). Scales the token counter so
-            more expensive models deplete the rate limit proportionally faster.

    Returns:
        The computed total_tokens (prompt + completion; cache excluded).
@@ -156,37 +161,51 @@ async def persist_and_record_usage(
    else:
        logger.info(
            f"{log_prefix} Turn usage: prompt={prompt_tokens}, completion={completion_tokens},"
-            f" total={total_tokens}"
+            f" total={total_tokens}, cost_usd={cost_usd}"
        )

-    if user_id:
+    cost_float: float | None = None
+    if cost_usd is not None:
        try:
-            await record_token_usage(
-                user_id=user_id,
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                cache_read_tokens=cache_read_tokens,
-                cache_creation_tokens=cache_creation_tokens,
-                model_cost_multiplier=model_cost_multiplier,
+            val = float(cost_usd)
+        except (ValueError, TypeError):
+            logger.error(
+                "%s cost_usd is not numeric: %r — rate limit skipped",
+                log_prefix,
+                cost_usd,
            )
-        except Exception as usage_err:
-            logger.warning("%s Failed to record token usage: %s", log_prefix, usage_err)
+        else:
+            if not math.isfinite(val):
+                logger.error(
+                    "%s cost_usd is non-finite: %r — rate limit skipped",
+                    log_prefix,
+                    val,
+                )
+            elif val < 0:
+                logger.warning(
+                    "%s cost_usd %s is negative — skipping rate-limit + cost log",
+                    log_prefix,
+                    val,
+                )
+            else:
+                cost_float = val
+
+    cost_microdollars = usd_to_microdollars(cost_float)
+
+    if user_id and cost_microdollars is not None and cost_microdollars > 0:
+        # record_cost_usage() owns its fail-open handling for Redis/network
+        # errors. Don't wrap with a broad except here — unexpected accounting
+        # bugs should surface instead of being silently logged as warnings.
+        await record_cost_usage(
+            user_id=user_id,
+            cost_microdollars=cost_microdollars,
+        )

    # Log to PlatformCostLog for admin cost dashboard.
    # Include entries where cost_usd is set even if token count is 0
    # (e.g. fully-cached Anthropic responses where only cache tokens
    # accumulate a charge without incrementing total_tokens).
-    if user_id and (total_tokens > 0 or cost_usd is not None):
-        cost_float = None
-        if cost_usd is not None:
-            try:
-                val = float(cost_usd)
-                if math.isfinite(val) and val >= 0:
-                    cost_float = val
-            except (ValueError, TypeError):
-                pass
-
-        cost_microdollars = usd_to_microdollars(cost_float)
+    if user_id and (total_tokens > 0 or cost_float is not None):
        session_id = session.session_id if session else None

        if cost_float is not None:
--- a/autogpt_platform/backend/backend/copilot/token_tracking_test.py
+++ b/autogpt_platform/backend/backend/copilot/token_tracking_test.py
@@ -37,7 +37,7 @@ class TestTotalTokens:
    async def test_returns_prompt_plus_completion(self):
        """total_tokens = prompt + completion (cache excluded from total)."""
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            total = await persist_and_record_usage(
@@ -63,7 +63,7 @@ class TestTotalTokens:
    async def test_cache_tokens_excluded_from_total(self):
        """Cache tokens are stored separately and not added to total_tokens."""
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            total = await persist_and_record_usage(
@@ -81,7 +81,7 @@ class TestTotalTokens:
    async def test_baseline_path_no_cache(self):
        """Baseline (OpenRouter) path passes no cache tokens; total = prompt + completion."""
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            total = await persist_and_record_usage(
@@ -97,7 +97,7 @@ class TestTotalTokens:
    async def test_sdk_path_with_cache(self):
        """SDK (Anthropic) path passes cache tokens; total still = prompt + completion."""
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            total = await persist_and_record_usage(
@@ -123,7 +123,7 @@ class TestSessionPersistence:
    async def test_appends_usage_to_session(self):
        session = _make_session()
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            await persist_and_record_usage(
@@ -144,7 +144,7 @@ class TestSessionPersistence:
    async def test_appends_cache_breakdown_to_session(self):
        session = _make_session()
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            await persist_and_record_usage(
@@ -163,7 +163,7 @@ class TestSessionPersistence:
    async def test_multiple_turns_append_multiple_records(self):
        session = _make_session()
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            await persist_and_record_usage(
@@ -178,7 +178,7 @@ class TestSessionPersistence:
    async def test_none_session_does_not_raise(self):
        """When session is None (e.g. error path), no exception should be raised."""
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new_callable=AsyncMock,
        ):
            total = await persist_and_record_usage(
@@ -210,10 +210,11 @@ class TestSessionPersistence:

 class TestRateLimitRecording:
    @pytest.mark.asyncio
-    async def test_calls_record_token_usage_when_user_id_present(self):
+    async def test_calls_record_cost_usage_when_cost_and_user_id_present(self):
+        """Rate-limit counter is charged with the real provider cost (microdollars)."""
        mock_record = AsyncMock()
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new=mock_record,
        ):
            await persist_and_record_usage(
@@ -223,22 +224,35 @@ class TestRateLimitRecording:
                completion_tokens=50,
                cache_read_tokens=1000,
                cache_creation_tokens=200,
+                cost_usd=0.0123,
            )
        mock_record.assert_awaited_once_with(
            user_id="user-abc",
-            prompt_tokens=100,
-            completion_tokens=50,
-            cache_read_tokens=1000,
-            cache_creation_tokens=200,
-            model_cost_multiplier=1.0,
+            cost_microdollars=12_300,
        )

+    @pytest.mark.asyncio
+    async def test_skips_record_when_cost_is_missing(self):
+        """Without a provider cost we have no authoritative figure to charge."""
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_cost_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id="user-abc",
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        mock_record.assert_not_awaited()
+
    @pytest.mark.asyncio
    async def test_skips_record_when_user_id_is_none(self):
        """Anonymous sessions should not create Redis keys."""
        mock_record = AsyncMock()
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new=mock_record,
        ):
            await persist_and_record_usage(
@@ -246,32 +260,38 @@ class TestRateLimitRecording:
                user_id=None,
                prompt_tokens=100,
                completion_tokens=50,
+                cost_usd=0.001,
            )
        mock_record.assert_not_awaited()

    @pytest.mark.asyncio
-    async def test_record_failure_does_not_raise(self):
-        """A Redis error in record_token_usage should be swallowed (fail-open)."""
-        mock_record = AsyncMock(side_effect=ConnectionError("Redis down"))
+    async def test_record_usage_bubbles_unexpected_error(self):
+        """Unexpected errors from record_cost_usage must propagate.
+
+        record_cost_usage() owns its own (RedisError, ConnectionError, OSError)
+        fail-open handling. Anything else is a real accounting bug and
+        should not be silently swallowed at this layer.
+        """
+        mock_record = AsyncMock(side_effect=RuntimeError("boom"))
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new=mock_record,
        ):
-            # Should not raise
-            total = await persist_and_record_usage(
-                session=None,
-                user_id="user-xyz",
-                prompt_tokens=100,
-                completion_tokens=50,
-            )
-        assert total == 150
+            with pytest.raises(RuntimeError, match="boom"):
+                await persist_and_record_usage(
+                    session=None,
+                    user_id="user-xyz",
+                    prompt_tokens=100,
+                    completion_tokens=50,
+                    cost_usd=0.002,
+                )

    @pytest.mark.asyncio
-    async def test_skips_record_when_zero_tokens(self):
-        """Returns 0 before calling record_token_usage when tokens are zero."""
+    async def test_skips_record_when_zero_tokens_and_no_cost(self):
+        """Returns 0 before calling record_cost_usage when there is nothing to record."""
        mock_record = AsyncMock()
        with patch(
-            "backend.copilot.token_tracking.record_token_usage",
+            "backend.copilot.token_tracking.record_cost_usage",
            new=mock_record,
        ):
            await persist_and_record_usage(
@@ -295,7 +315,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -336,7 +356,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -369,7 +389,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -394,7 +414,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -423,7 +443,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -452,7 +472,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -479,7 +499,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -509,7 +529,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
@@ -545,7 +565,7 @@ class TestPlatformCostLogging:
        mock_log = AsyncMock()
        with (
            patch(
-                "backend.copilot.token_tracking.record_token_usage",
+                "backend.copilot.token_tracking.record_cost_usage",
                new_callable=AsyncMock,
            ),
            patch(
--- a/autogpt_platform/backend/backend/data/credit.py
+++ b/autogpt_platform/backend/backend/data/credit.py
@@ -15,7 +15,7 @@ from prisma.enums import (
    OnboardingStep,
    SubscriptionTier,
 )
-from prisma.errors import UniqueViolationError
+from prisma.errors import PrismaError, UniqueViolationError
 from prisma.models import CreditRefundRequest, CreditTransaction, User, UserBalance
 from prisma.types import CreditRefundRequestCreateInput, CreditTransactionWhereInput
 from pydantic import BaseModel
@@ -1280,6 +1280,12 @@ async def set_subscription_tier(user_id: str, tier: SubscriptionTier) -> None:
    from backend.copilot.rate_limit import get_user_tier  # local import avoids circular

    get_user_tier.cache_delete(user_id)  # type: ignore[attr-defined]
+    # Invalidate the pending-change cache too — an admin tier override or the
+    # webhook-driven phase transition means any cached pending-change state
+    # (schedule, cancel_at_period_end) is likely stale. Without this the
+    # billing page can show a pending change for up to 30s after the tier
+    # has already flipped.
+    get_pending_subscription_change.cache_delete(user_id)


 async def _cancel_customer_subscriptions(
@@ -1330,6 +1336,21 @@ async def _cancel_customer_subscriptions(
                continue
            seen_ids.add(sub_id)
            if at_period_end:
+                # Stripe rejects modify(cancel_at_period_end=True) with 400 when a
+                # Subscription Schedule is attached (e.g. the user previously
+                # queued a paid→paid downgrade and is now clicking "Cancel").
+                # Release the schedule first so the cancel flag can be set; the
+                # schedule's pending phase change is superseded by the cancel.
+                existing_schedule = sub.schedule
+                if existing_schedule:
+                    schedule_id = (
+                        existing_schedule
+                        if isinstance(existing_schedule, str)
+                        else existing_schedule.id
+                    )
+                    await _release_schedule_ignoring_terminal(
+                        schedule_id, "_cancel_customer_subscriptions"
+                    )
                await run_in_threadpool(
                    stripe.Subscription.modify, sub_id, cancel_at_period_end=True
                )
@@ -1366,6 +1387,8 @@ async def cancel_stripe_subscription(user_id: str) -> bool:
        cancelled_count = await _cancel_customer_subscriptions(
            customer_id, at_period_end=True
        )
+        if cancelled_count > 0:
+            get_pending_subscription_change.cache_delete(user_id)
        return cancelled_count > 0
    except stripe.StripeError:
        logger.warning(
@@ -1415,18 +1438,224 @@ async def get_proration_credit_cents(user_id: str, monthly_cost_cents: int) -> i
        return 0


+# Ordered from least- to most-privileged. Used to distinguish upgrades
+# (move right) from downgrades (move left); ENTERPRISE is admin-managed and
+# never reached via self-service flows.
+_TIER_ORDER: tuple[SubscriptionTier, ...] = (
+    SubscriptionTier.FREE,
+    SubscriptionTier.PRO,
+    SubscriptionTier.BUSINESS,
+    SubscriptionTier.ENTERPRISE,
+)
+
+
+def _tier_rank(tier: SubscriptionTier) -> int:
+    return _TIER_ORDER.index(tier)
+
+
+def is_tier_upgrade(current: SubscriptionTier, target: SubscriptionTier) -> bool:
+    return _tier_rank(target) > _tier_rank(current)
+
+
+def is_tier_downgrade(current: SubscriptionTier, target: SubscriptionTier) -> bool:
+    return _tier_rank(target) < _tier_rank(current)
+
+
+class PendingChangeUnknown(Exception):
+    """Raised when pending-change state cannot be determined (e.g. LaunchDarkly
+    price-id lookup failed). Propagates past the @cached wrapper so the next
+    request retries instead of serving a stale `None` for the TTL window."""
+
+
+async def _get_active_subscription(customer_id: str) -> stripe.Subscription | None:
+    """Return the customer's active or trialing subscription, or None."""
+    for status in ("active", "trialing"):
+        subs = await stripe.Subscription.list_async(
+            customer=customer_id, status=status, limit=1
+        )
+        if subs.data:
+            return subs.data[0]
+    return None
+
+
+# Substrings Stripe uses in InvalidRequestError messages when the schedule is
+# already in a terminal state (released / completed / canceled) and therefore
+# cannot be released again. We only swallow the error when one of these appears;
+# anything else (typo'd schedule id, wrong subscription, 404, etc.) must
+# propagate so bugs aren't masked as silent no-ops.
+_TERMINAL_SCHEDULE_ERROR_SUBSTRINGS = (
+    "already been released",
+    "already released",
+    "already been completed",
+    "already completed",
+    "already been canceled",
+    "already been cancelled",
+    "already canceled",
+    "already cancelled",
+    "is not active",
+    "is not in a state",
+)
+
+
+async def _release_schedule_ignoring_terminal(
+    schedule_id: str, log_context: str
+) -> bool:
+    """Release a Stripe schedule; swallow InvalidRequestError on terminal state.
+
+    Returns True if the release call succeeded, False if the schedule was
+    already in a terminal (released / completed / canceled) state. Any other
+    Stripe error — including non-terminal InvalidRequestErrors such as typo'd
+    ids or 404s — propagates so the caller can surface the failure instead of
+    silently masking a bug.
+    """
+    try:
+        await stripe.SubscriptionSchedule.release_async(schedule_id)
+        return True
+    except stripe.InvalidRequestError as e:
+        message = getattr(e, "user_message", None) or str(e)
+        if not any(
+            marker in message.lower() for marker in _TERMINAL_SCHEDULE_ERROR_SUBSTRINGS
+        ):
+            logger.warning(
+                "%s: schedule %s release failed with non-terminal"
+                " InvalidRequestError (%s); re-raising",
+                log_context,
+                schedule_id,
+                message,
+            )
+            raise
+        logger.warning(
+            "%s: schedule %s not releasable (%s); treating as already released",
+            log_context,
+            schedule_id,
+            message,
+        )
+        return False
+
+
+async def _schedule_downgrade_at_period_end(
+    sub: stripe.Subscription,
+    new_price_id: str,
+    user_id: str,
+    tier: SubscriptionTier,
+) -> None:
+    """Create a Subscription Schedule that defers a tier change to period end.
+
+    Stripe's Subscription Schedule drives an existing subscription through a
+    series of phases. By keeping the current price for the remainder of the
+    billing period and switching to ``new_price_id`` afterwards, the user does
+    NOT receive an immediate proration charge and keeps their current tier
+    until period end.
+
+    Stripe allows at most one active schedule per subscription and rejects
+    ``SubscriptionSchedule.create`` if either (a) a schedule is already
+    attached to the subscription or (b) ``cancel_at_period_end=True`` is set.
+    Both conditions mean the user is overwriting a pending change they made
+    earlier (e.g. BUSINESS→FREE cancel, now switching to BUSINESS→PRO
+    downgrade). We clear the conflicting state first so the new schedule can
+    be created. These defensive reads serialize through Stripe's own atomic
+    operations — by the time modify/release returns, the subscription is in a
+    known-clean state for the subsequent create.
+    """
+    sub_id = sub.id
+    # ``sub["items"]`` (dict-item) rather than ``sub.items`` because the latter
+    # is shadowed by Python's dict.items() method on StripeObject.
+    items = sub["items"].data
+    if not items:
+        raise ValueError(f"Subscription {sub_id} has no items; cannot schedule")
+    price = items[0].price
+    current_price_id = price if isinstance(price, str) else price.id
+    period_start: int = sub["current_period_start"]
+    period_end: int = sub["current_period_end"]
+
+    if sub.cancel_at_period_end:
+        await stripe.Subscription.modify_async(sub_id, cancel_at_period_end=False)
+        logger.info(
+            "_schedule_downgrade_at_period_end: cleared cancel_at_period_end"
+            " on sub %s for user %s before scheduling downgrade",
+            sub_id,
+            user_id,
+        )
+    if sub.schedule:
+        existing_schedule_id = (
+            sub.schedule if isinstance(sub.schedule, str) else sub.schedule.id
+        )
+        await _release_schedule_ignoring_terminal(
+            existing_schedule_id, "_schedule_downgrade_at_period_end"
+        )
+
+    # Create + modify as a two-step transaction. If modify fails (network,
+    # Stripe 500) the created schedule is orphaned AND attached to the
+    # subscription, which blocks any future Stripe-side change until manually
+    # released. Roll back by releasing the orphan, then re-raise so the caller
+    # sees the original failure.
+    schedule = await stripe.SubscriptionSchedule.create_async(from_subscription=sub_id)
+    try:
+        await stripe.SubscriptionSchedule.modify_async(
+            schedule.id,
+            phases=[
+                {
+                    "items": [{"price": current_price_id, "quantity": 1}],
+                    "start_date": period_start,
+                    "end_date": period_end,
+                    "proration_behavior": "none",
+                },
+                {
+                    "items": [{"price": new_price_id, "quantity": 1}],
+                    "proration_behavior": "none",
+                },
+            ],
+            metadata={"user_id": user_id, "pending_tier": tier.value},
+        )
+    except stripe.StripeError:
+        logger.exception(
+            "_schedule_downgrade_at_period_end: modify failed for schedule %s"
+            " on sub %s user %s; attempting rollback release",
+            schedule.id,
+            sub_id,
+            user_id,
+        )
+        try:
+            await _release_schedule_ignoring_terminal(
+                schedule.id, "_schedule_downgrade_at_period_end_rollback"
+            )
+        except stripe.StripeError:
+            logger.exception(
+                "_schedule_downgrade_at_period_end: rollback release also failed"
+                " for orphaned schedule %s on sub %s user %s; manual cleanup"
+                " required",
+                schedule.id,
+                sub_id,
+                user_id,
+            )
+        raise
+    logger.info(
+        "modify_stripe_subscription_for_tier: scheduled sub %s downgrade for user %s → %s at %d",
+        sub_id,
+        user_id,
+        tier,
+        period_end,
+    )
+
+
 async def modify_stripe_subscription_for_tier(
    user_id: str, tier: SubscriptionTier
 ) -> bool:
-    """Modify an existing Stripe subscription to a new paid tier using proration.
+    """Change a Stripe subscription to a new paid tier.

-    For paid→paid tier changes (e.g. PRO↔BUSINESS), modifying the existing
-    subscription is preferable to cancelling + creating a new one via Checkout:
-    Stripe handles proration automatically, crediting unused time on the old plan
-    and charging the pro-rated amount for the new plan in the same billing cycle.
+    Upgrades (e.g. PRO→BUSINESS) apply immediately via ``stripe.Subscription.modify``
+    with ``proration_behavior="create_prorations"``: Stripe credits unused time on
+    the old plan and charges the pro-rated amount for the new plan in the same
+    billing cycle.
+
+    Downgrades (e.g. BUSINESS→PRO) are deferred to the end of the current billing
+    period via a Stripe Subscription Schedule: the user keeps their current tier
+    for the time they already paid for, and the new tier takes effect when the
+    next invoice is generated. The DB tier flip happens via the webhook fired
+    when the schedule advances to its next phase.

    Returns:
-        True  — a subscription was found and modified successfully.
+        True  — a subscription was found and modified/scheduled successfully.
        False — no active/trialing subscription exists (e.g. admin-granted tier or
                first-time paid signup); caller should fall back to Checkout.

@@ -1437,41 +1666,262 @@ async def modify_stripe_subscription_for_tier(
    if not price_id:
        raise ValueError(f"No Stripe price ID configured for tier {tier}")

-    # Guard: only proceed if the user already has a Stripe customer ID.  Calling
-    # get_stripe_customer_id for a user with no Stripe record (e.g. admin-granted tier)
-    # would create an orphaned customer object if the subsequent Subscription.list call
-    # fails.  Return False early so the API layer falls back to Checkout instead.
    user = await get_user_by_id(user_id)
    if not user.stripe_customer_id:
        return False
+    current_tier = user.subscription_tier or SubscriptionTier.FREE

-    customer_id = user.stripe_customer_id
-    for status in ("active", "trialing"):
-        subscriptions = await run_in_threadpool(
-            stripe.Subscription.list, customer=customer_id, status=status, limit=1
-        )
-        if not subscriptions.data:
-            continue
-        sub = subscriptions.data[0]
-        sub_id = sub["id"]
-        items = sub.get("items", {}).get("data", [])
-        if not items:
-            continue
-        item_id = items[0]["id"]
-        await run_in_threadpool(
-            stripe.Subscription.modify,
-            sub_id,
-            items=[{"id": item_id, "price": price_id}],
-            proration_behavior="create_prorations",
-        )
+    sub = await _get_active_subscription(user.stripe_customer_id)
+    if sub is None:
+        return False
+    items = sub["items"].data
+    if not items:
+        return False
+    sub_id = sub.id
+
+    # Invalidate the cache unconditionally on exit (success OR failure): any
+    # Stripe mutation below — clearing cancel_at_period_end, releasing an old
+    # schedule, creating a new one — may have landed partially before an error
+    # was raised, and the cached pending-change state would otherwise go stale
+    # for up to 30s until the TTL expires.
+    try:
+        if is_tier_downgrade(current_tier, tier):
+            await _schedule_downgrade_at_period_end(sub, price_id, user_id, tier)
+            return True
+
+        # Upgrade path. If a schedule is attached from a previous pending
+        # downgrade, release it first — an upgrade expresses the user's
+        # intent to be on this tier immediately, which overrides any pending
+        # deferred change. Ignore terminal-state errors from release.
+        if sub.schedule:
+            existing_schedule_id = (
+                sub.schedule if isinstance(sub.schedule, str) else sub.schedule.id
+            )
+            await _release_schedule_ignoring_terminal(
+                existing_schedule_id, "modify_stripe_subscription_for_tier"
+            )
+
+        # If a paid→FREE cancel is pending (cancel_at_period_end=True), clear it
+        # as part of the upgrade — the user is explicitly choosing to stay on a
+        # paid tier. Without this, the sub would be upgraded AND still cancelled
+        # at period end, leaving a confusing dual state.
+        modify_kwargs: dict = {
+            "items": [{"id": items[0].id, "price": price_id}],
+            "proration_behavior": "create_prorations",
+        }
+        if sub.cancel_at_period_end:
+            modify_kwargs["cancel_at_period_end"] = False
+
+        await stripe.Subscription.modify_async(sub_id, **modify_kwargs)
+        # Flip the DB tier immediately. The customer.subscription.updated webhook
+        # will also fire and set it again — idempotent. Without this synchronous
+        # update, the UI refetches before the webhook lands and shows the old
+        # tier, making the upgrade look like a no-op to the user.
+        #
+        # Swallow DB-write exceptions here: Stripe is authoritative and the
+        # modify above already succeeded (the user has been charged). If the
+        # DB write fails and we re-raised, the API would return 5xx and the UI
+        # would surface a failed upgrade to a user who was already charged.
+        # The customer.subscription.updated webhook will reconcile the DB shortly.
+        #
+        # Only catch actual DB/connection failures — letting KeyError,
+        # AttributeError etc. propagate so programming errors surface in Sentry
+        # instead of being silently masked as benign DB-write-swallow events.
+        try:
+            await set_subscription_tier(user_id, tier)
+        except (PrismaError, ConnectionError, asyncio.TimeoutError):
+            logger.exception(
+                "modify_stripe_subscription_for_tier: Stripe modify on sub %s"
+                " succeeded for user %s → %s but DB tier flip failed; webhook"
+                " will reconcile",
+                sub_id,
+                user_id,
+                tier,
+            )
        logger.info(
-            "modify_stripe_subscription_for_tier: modified sub %s for user %s → %s",
+            "modify_stripe_subscription_for_tier: upgraded sub %s for user %s → %s",
            sub_id,
            user_id,
            tier,
        )
        return True
-    return False
+    finally:
+        get_pending_subscription_change.cache_delete(user_id)
+
+
+async def release_pending_subscription_schedule(user_id: str) -> bool:
+    """Cancel any pending subscription change (scheduled downgrade or cancellation).
+
+    Two pending-change mechanisms can be attached to a Stripe subscription:
+
+    - **Subscription Schedule** (paid→paid downgrade): ``stripe.SubscriptionSchedule.release``
+      detaches the schedule and lets the subscription continue on its current
+      phase's price.
+    - **cancel_at_period_end=True** (paid→FREE cancel): clearing that flag via
+      ``stripe.Subscription.modify`` keeps the subscription active indefinitely.
+
+    Returns True if a pending change was found and reverted, False otherwise.
+    """
+    user = await get_user_by_id(user_id)
+    if not user.stripe_customer_id:
+        return False
+
+    sub = await _get_active_subscription(user.stripe_customer_id)
+    if sub is None:
+        return False
+
+    sub_id = sub.id
+    did_anything = False
+    schedule_released = False
+    schedule_id: str | None = None
+    try:
+        if sub.schedule:
+            schedule_id = (
+                sub.schedule if isinstance(sub.schedule, str) else sub.schedule.id
+            )
+            schedule_released = await _release_schedule_ignoring_terminal(
+                schedule_id, "release_pending_subscription_schedule"
+            )
+            if schedule_released:
+                logger.info(
+                    "release_pending_subscription_schedule: released schedule %s for user %s",
+                    schedule_id,
+                    user_id,
+                )
+                did_anything = True
+        if sub.cancel_at_period_end:
+            try:
+                await stripe.Subscription.modify_async(
+                    sub_id, cancel_at_period_end=False
+                )
+            except stripe.StripeError:
+                if schedule_released:
+                    logger.exception(
+                        "release_pending_subscription_schedule: partial release"
+                        " — schedule %s released but cancel_at_period_end clear"
+                        " failed on sub %s for user %s; manual reconciliation"
+                        " may be needed",
+                        schedule_id,
+                        sub_id,
+                        user_id,
+                    )
+                raise
+            did_anything = True
+            logger.info(
+                "release_pending_subscription_schedule: cleared cancel_at_period_end"
+                " on sub %s for user %s",
+                sub_id,
+                user_id,
+            )
+    finally:
+        if did_anything:
+            get_pending_subscription_change.cache_delete(user_id)
+    return did_anything
+
+
+@cached(ttl_seconds=30, maxsize=512, cache_none=True, shared_cache=True)
+async def get_pending_subscription_change(
+    user_id: str,
+) -> tuple[SubscriptionTier, datetime] | None:
+    """Return ``(pending_tier, effective_at)`` when a change is queued, else ``None``.
+
+    Reflects both Subscription Schedule phase transitions (paid→paid downgrade)
+    and ``cancel_at_period_end=True`` (paid→FREE cancel).
+
+    Cached for 30 seconds per user_id. *Why the cache exists:* this function
+    runs on every dashboard/home fetch and would otherwise fire
+    2× Subscription.list + 1× Schedule.retrieve per page load. A busy user
+    polling the billing page would quickly brush up against Stripe's per-API
+    rate limits; the 30s TTL absorbs dashboard polling while being short
+    enough that the UI reconciles quickly after a downgrade / cancel action.
+
+    *Invalidation contract.* Every call-site that mutates Stripe state which
+    could change the pending-change answer MUST call
+    ``get_pending_subscription_change.cache_delete(user_id)`` so the UI never
+    shows a stale pending badge after a user-visible action. Current
+    invalidators (keep this list in sync when adding new mutators):
+
+    - ``set_subscription_tier`` — admin or webhook-driven tier flip.
+    - ``modify_stripe_subscription_for_tier`` — ``finally`` block (covers
+      upgrade path clear + downgrade-schedule create + any partial failure).
+    - ``release_pending_subscription_schedule`` — ``finally`` block when a
+      schedule release OR ``cancel_at_period_end`` clear succeeded.
+    - ``cancel_stripe_subscription`` — after scheduling period-end cancel.
+    - ``sync_subscription_from_stripe`` — webhook entry point.
+    - ``set_user_tier`` (``backend.copilot.rate_limit``) — admin tier override
+      invalidates any cached pending state keyed off the old tier.
+    """
+    user = await get_user_by_id(user_id)
+    if not user.stripe_customer_id:
+        # Short-circuit for users with no Stripe customer (admin-granted tiers,
+        # FREE-only users): skip the Stripe API calls entirely.
+        return None
+
+    pro_price, biz_price = await asyncio.gather(
+        get_subscription_price_id(SubscriptionTier.PRO),
+        get_subscription_price_id(SubscriptionTier.BUSINESS),
+    )
+    price_to_tier: dict[str, SubscriptionTier] = {}
+    if pro_price:
+        price_to_tier[pro_price] = SubscriptionTier.PRO
+    if biz_price:
+        price_to_tier[biz_price] = SubscriptionTier.BUSINESS
+    if not price_to_tier:
+        logger.warning(
+            "get_pending_subscription_change: no Stripe price IDs resolvable for"
+            " PRO/BUSINESS (LaunchDarkly fetch failed?); raising to bypass the"
+            " None cache so the next request retries fresh"
+        )
+        raise PendingChangeUnknown(
+            "Stripe price lookup failed; pending-change state cannot be determined"
+        )
+
+    sub = await _get_active_subscription(user.stripe_customer_id)
+    if sub is None:
+        return None
+    period_end = sub.current_period_end
+    if not isinstance(period_end, int):
+        return None
+    effective_at = datetime.fromtimestamp(period_end, tz=timezone.utc)
+    if sub.cancel_at_period_end:
+        return SubscriptionTier.FREE, effective_at
+    if not sub.schedule:
+        return None
+    schedule_id = sub.schedule if isinstance(sub.schedule, str) else sub.schedule.id
+    schedule = await stripe.SubscriptionSchedule.retrieve_async(schedule_id)
+    return _next_phase_tier_and_start(schedule, price_to_tier)
+
+
+def _next_phase_tier_and_start(
+    schedule: stripe.SubscriptionSchedule,
+    price_to_tier: dict[str, SubscriptionTier],
+) -> tuple[SubscriptionTier, datetime] | None:
+    """Return (tier, start_datetime) of the phase that follows the active one.
+
+    Using the phase's own ``start_date`` (not the subscription's current_period_end)
+    is correct even for schedules created outside this flow — a dashboard-authored
+    schedule can have phase transitions at arbitrary timestamps.
+    """
+    now = int(time.time())
+    for phase in schedule.phases or []:
+        if not isinstance(phase.start_date, int) or phase.start_date <= now:
+            continue
+        # ``phase["items"]`` because ``phase.items`` is shadowed by dict.items().
+        items = phase["items"] or []
+        if not items:
+            continue
+        price = items[0].price
+        price_id = price if isinstance(price, str) else price.id
+        if price_id in price_to_tier:
+            return price_to_tier[price_id], datetime.fromtimestamp(
+                phase.start_date, tz=timezone.utc
+            )
+        logger.warning(
+            "next_phase_tier_and_start: unknown price %s on schedule %s",
+            price_id,
+            schedule.id,
+        )
+    return None


 async def get_auto_top_up(user_id: str) -> AutoTopUpConfig:
@@ -1732,6 +2182,50 @@ async def sync_subscription_from_stripe(stripe_subscription: dict) -> None:
        # cancel the old sub.
        await _cleanup_stale_subscriptions(customer_id, new_sub_id)
    await set_subscription_tier(user.id, tier)
+    # Tier changed — bust any cached pending-change view so the next
+    # dashboard fetch reflects the new state immediately.
+    get_pending_subscription_change.cache_delete(user.id)
+
+
+async def sync_subscription_schedule_from_stripe(stripe_schedule: dict) -> None:
+    """Sync the DB tier from a ``subscription_schedule.*`` webhook event.
+
+    Stripe fires ``subscription_schedule.released`` / ``.completed`` /
+    ``.updated`` when a schedule advances phases or is detached. The regular
+    ``customer.subscription.updated`` webhook with the new price covers the
+    phase transition in most cases, but listening to schedule events is a
+    safety net that also catches releases done via the Stripe dashboard.
+
+    The schedule payload doesn't carry the active price directly — it carries
+    a ``subscription`` id that we look up to get the current item.
+
+    Webhook-ordering safety: we deliberately funnel both event sources through
+    ``sync_subscription_from_stripe`` so they share one code path and one DB
+    write. That function is idempotent — it no-ops when ``current_tier ==
+    tier`` — so concurrent or out-of-order deliveries of
+    ``subscription_schedule.*`` and ``customer.subscription.updated`` converge
+    to the same DB state regardless of which arrives first.
+    """
+    # When a schedule is released, Stripe clears `subscription` and moves the id
+    # to `released_subscription`. Fall back to that so `.released` events — the
+    # main reason we listen to schedule webhooks as a safety net — are processed.
+    sub_id = stripe_schedule.get("subscription") or stripe_schedule.get(
+        "released_subscription"
+    )
+    if not isinstance(sub_id, str) or not sub_id:
+        logger.warning(
+            "sync_subscription_schedule_from_stripe: no 'subscription' id; skipping"
+        )
+        return
+    try:
+        sub = await stripe.Subscription.retrieve_async(sub_id)
+    except stripe.StripeError:
+        logger.warning(
+            "sync_subscription_schedule_from_stripe: failed to retrieve sub %s",
+            sub_id,
+        )
+        return
+    await sync_subscription_from_stripe(dict(sub))


 async def handle_subscription_payment_failure(invoice: dict) -> None:
--- a/autogpt_platform/backend/backend/data/credit_subscription_test.py
+++ b/autogpt_platform/backend/backend/data/credit_subscription_test.py
--- a/autogpt_platform/backend/backend/executor/cluster_lock.py
+++ b/autogpt_platform/backend/backend/executor/cluster_lock.py
@@ -4,7 +4,7 @@ import asyncio
 import logging
 import threading
 import time
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, cast

 if TYPE_CHECKING:
    from redis import Redis
@@ -12,6 +12,17 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+# Lua CAS release: only delete the key if the stored value still matches our
+# owner_id. Returns 1 on delete, 0 on no-op. This makes release() safe against
+# the race where an external caller (e.g. mark_session_completed's force-release)
+# deletes our key and a new owner acquires it before our release() fires — without
+# the CAS guard, release() would wipe the successor's valid lock.
+_RELEASE_LUA = (
+    "if redis.call('get', KEYS[1]) == ARGV[1] then "
+    "return redis.call('del', KEYS[1]) "
+    "else return 0 end"
+)
+

 class ClusterLock:
    """Simple Redis-based distributed lock for preventing duplicate execution."""
@@ -116,13 +127,18 @@ class ClusterLock:
            return False

    def release(self):
-        """Release the lock."""
+        """Release the lock.
+
+        Owner-checked: only deletes the Redis key if the stored value still
+        matches our owner_id. Prevents wiping a successor's lock when the
+        original key was force-released externally and re-acquired.
+        """
        with self._refresh_lock:
            if self._last_refresh == 0:
                return

        try:
-            self.redis.delete(self.key)
+            self.redis.eval(_RELEASE_LUA, 1, self.key, self.owner_id)
        except Exception:
            pass

@@ -237,13 +253,18 @@ class AsyncClusterLock:
            return False

    async def release(self):
-        """Release the lock."""
+        """Release the lock.
+
+        Owner-checked: only deletes the Redis key if the stored value still
+        matches our owner_id. Prevents wiping a successor's lock when the
+        original key was force-released externally and re-acquired.
+        """
        async with self._refresh_lock:
            if self._last_refresh == 0:
                return

        try:
-            await self.redis.delete(self.key)
+            await cast(Any, self.redis.eval(_RELEASE_LUA, 1, self.key, self.owner_id))
        except Exception:
            pass

--- a/autogpt_platform/backend/backend/executor/cluster_lock_test.py
+++ b/autogpt_platform/backend/backend/executor/cluster_lock_test.py
@@ -108,6 +108,33 @@ class TestClusterLockBasic:
        new_lock = ClusterLock(redis_client, lock_key, new_owner_id, timeout=60)
        assert new_lock.try_acquire() == new_owner_id

+    def test_release_does_not_wipe_successor_lock(self, redis_client, lock_key):
+        """Releasing after external delete+reacquire must NOT delete successor.
+
+        Race: an external caller force-deletes the lock key, a new owner
+        acquires it, then the original ClusterLock.release() runs. Owner-checked
+        release must leave the successor's key intact.
+        """
+        owner_a = str(uuid.uuid4())
+        owner_b = str(uuid.uuid4())
+
+        lock_a = ClusterLock(redis_client, lock_key, owner_a, timeout=60)
+        assert lock_a.try_acquire() == owner_a
+
+        # External force-release (e.g. mark_session_completed).
+        redis_client.delete(lock_key)
+
+        # Successor acquires the same key.
+        lock_b = ClusterLock(redis_client, lock_key, owner_b, timeout=60)
+        assert lock_b.try_acquire() == owner_b
+
+        # Original releases — must be a no-op on Redis because value != owner_a.
+        lock_a.release()
+
+        # Successor's lock is still intact.
+        assert redis_client.exists(lock_key) == 1
+        assert redis_client.get(lock_key).decode("utf-8") == owner_b
+

 class TestClusterLockRefresh:
    """Lock refresh and TTL management."""
--- a/autogpt_platform/backend/backend/util/feature_flag.py
+++ b/autogpt_platform/backend/backend/util/feature_flag.py
@@ -42,8 +42,8 @@ class Flag(str, Enum):
    CHAT = "chat"
    CHAT_MODE_OPTION = "chat-mode-option"
    COPILOT_SDK = "copilot-sdk"
-    COPILOT_DAILY_TOKEN_LIMIT = "copilot-daily-token-limit"
-    COPILOT_WEEKLY_TOKEN_LIMIT = "copilot-weekly-token-limit"
+    COPILOT_DAILY_COST_LIMIT = "copilot-daily-cost-limit-microdollars"
+    COPILOT_WEEKLY_COST_LIMIT = "copilot-weekly-cost-limit-microdollars"
    STRIPE_PRICE_PRO = "stripe-price-id-pro"
    STRIPE_PRICE_BUSINESS = "stripe-price-id-business"
    GRAPHITI_MEMORY = "graphiti-memory"
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.

 [[package]]
 name = "agentmail"
@@ -909,18 +909,18 @@ files = [

 [[package]]
 name = "claude-agent-sdk"
-version = "0.1.58"
+version = "0.1.64"
 description = "Python SDK for Claude Code"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "claude_agent_sdk-0.1.58-py3-none-macosx_11_0_arm64.whl", hash = "sha256:69197950809754c4f06bba8261f2d99c3f9605b6cc1c13d3409d0eb82fb4ee64"},
-    {file = "claude_agent_sdk-0.1.58-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:75d60883fc5e2070bccd8d9b19505fe16af8e049120c03821e9dc8c826cca434"},
-    {file = "claude_agent_sdk-0.1.58-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:7bf4eb0f00ec944a7b63eb94788f120dfb0460c348a525235c7d6641805acc1d"},
-    {file = "claude_agent_sdk-0.1.58-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:650d298a3d3c0dcdde4b5f1dbf52f472ff0b0ec82987b27ffa2a4e0e72928408"},
-    {file = "claude_agent_sdk-0.1.58-py3-none-win_amd64.whl", hash = "sha256:2c2130a7ffe06ed4f88d56b217a5091c91c9bcb1a69cfd94d5dcf0d2946d8c55"},
-    {file = "claude_agent_sdk-0.1.58.tar.gz", hash = "sha256:77bee8fd60be033cb870def46c2ab1625a512fa8a3de4ff8d766664ffb16d6a6"},
+    {file = "claude_agent_sdk-0.1.64-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4cf47a9e40c0a683a05afff4fac1e3d5ea7965b1e9f72a8e266c8d2efbf65904"},
+    {file = "claude_agent_sdk-0.1.64-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:7fe765c6482c74bc6b0b4491ad3bddd1349c25f4cdf4483191c68ea9c1336825"},
+    {file = "claude_agent_sdk-0.1.64-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:605eebf46e7590e4f878572c2743954fba3f3530dfd99e10ff3b8b41a9fee757"},
+    {file = "claude_agent_sdk-0.1.64-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:bbb1373ee0b4494e2db24aa10d312d22b86895b4b8f18eb5b58f99f14d827237"},
+    {file = "claude_agent_sdk-0.1.64-py3-none-win_amd64.whl", hash = "sha256:453fa251e2a4aeed580c72d4c7b2cb98fc8d8d26012798126f5cb11a9829cd71"},
+    {file = "claude_agent_sdk-0.1.64.tar.gz", hash = "sha256:147e513cb45095b57c37d74b8d01dd41b5f3ec7f70e408edce43a6590159c27d"},
 ]

 [package.dependencies]
@@ -930,6 +930,8 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}

 [package.extras]
 dev = ["anyio[trio] (>=4.0.0)", "mypy (>=1.0.0)", "pytest (>=7.0.0)", "pytest-asyncio (>=0.20.0)", "pytest-cov (>=4.0.0)", "ruff (>=0.1.0)"]
+examples = ["asyncpg (>=0.27.0)", "boto3 (>=1.28.0)", "fakeredis (>=2.20.0)", "moto[s3] (>=5.0.0)", "redis (>=4.2.0)"]
+otel = ["opentelemetry-api (>=1.20.0)"]

 [[package]]
 name = "cleo"
@@ -8929,4 +8931,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "c4cc6a0a26869a167ce182b178224554135d89d8ffa4605257d17b3f495cdf59"
+content-hash = "529e1acbb1213421ef617f9dab309787cf81ea5d787eeffebc1bd38a42daf976"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -18,7 +18,7 @@ apscheduler = "^3.11.1"
 autogpt-libs = { path = "../autogpt_libs", develop = true }
 bleach = { extras = ["css"], version = "^6.2.0" }
 cachetools = "^5.5.0"
-claude-agent-sdk = "0.1.58"  # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
+claude-agent-sdk = "^0.1.64"  # bundled CLI 2.1.116 -- 2.1.98+ fixes the --resume + excludeDynamicSections crash that used to force a per-turn 33K cache write. CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
 click = "^8.2.0"
 cryptography = "^46.0"
 discord-py = "^2.5.2"
--- a/autogpt_platform/backend/snapshots/get_rate_limit
+++ b/autogpt_platform/backend/snapshots/get_rate_limit
@@ -1,9 +1,9 @@
 {
-  "daily_token_limit": 2500000,
-  "daily_tokens_used": 500000,
+  "daily_cost_limit_microdollars": 2500000,
+  "daily_cost_used_microdollars": 500000,
  "tier": "FREE",
  "user_email": "target@example.com",
  "user_id": "5e53486c-cf57-477e-ba2a-cb02dc828e1c",
-  "weekly_token_limit": 12500000,
-  "weekly_tokens_used": 3000000
+  "weekly_cost_limit_microdollars": 12500000,
+  "weekly_cost_used_microdollars": 3000000
 }
--- a/autogpt_platform/backend/snapshots/reset_user_usage_daily_and_weekly
+++ b/autogpt_platform/backend/snapshots/reset_user_usage_daily_and_weekly
@@ -1,9 +1,9 @@
 {
-  "daily_token_limit": 2500000,
-  "daily_tokens_used": 0,
+  "daily_cost_limit_microdollars": 2500000,
+  "daily_cost_used_microdollars": 0,
  "tier": "FREE",
  "user_email": "target@example.com",
  "user_id": "5e53486c-cf57-477e-ba2a-cb02dc828e1c",
-  "weekly_token_limit": 12500000,
-  "weekly_tokens_used": 0
+  "weekly_cost_limit_microdollars": 12500000,
+  "weekly_cost_used_microdollars": 0
 }
--- a/autogpt_platform/backend/snapshots/reset_user_usage_daily_only
+++ b/autogpt_platform/backend/snapshots/reset_user_usage_daily_only
@@ -1,9 +1,9 @@
 {
-  "daily_token_limit": 2500000,
-  "daily_tokens_used": 0,
+  "daily_cost_limit_microdollars": 2500000,
+  "daily_cost_used_microdollars": 0,
  "tier": "FREE",
  "user_email": "target@example.com",
  "user_id": "5e53486c-cf57-477e-ba2a-cb02dc828e1c",
-  "weekly_token_limit": 12500000,
-  "weekly_tokens_used": 3000000
+  "weekly_cost_limit_microdollars": 12500000,
+  "weekly_cost_used_microdollars": 3000000
 }
--- a/autogpt_platform/frontend/src/app/(platform)/admin/components/UsageBar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/components/UsageBar.tsx
@@ -1,10 +1,6 @@
 "use client";

-export function formatTokens(tokens: number): string {
-  if (tokens >= 1_000_000) return `${(tokens / 1_000_000).toFixed(1)}M`;
-  if (tokens >= 1_000) return `${(tokens / 1_000).toFixed(0)}K`;
-  return tokens.toString();
-}
+import { formatMicrodollarsAsUsd } from "@/app/(platform)/copilot/components/usageHelpers";

 export function UsageBar({ used, limit }: { used: number; limit: number }) {
  if (limit === 0) {
@@ -17,8 +13,8 @@ export function UsageBar({ used, limit }: { used: number; limit: number }) {
  return (
    <div className="space-y-1">
      <div className="flex justify-between text-sm">
-        <span>{formatTokens(used)} used</span>
-        <span>{formatTokens(limit)} limit</span>
+        <span>{formatMicrodollarsAsUsd(used)} spent</span>
+        <span>{formatMicrodollarsAsUsd(limit)} limit</span>
      </div>
      <div className="h-2 w-full rounded-full bg-gray-200">
        <div
--- a/autogpt_platform/frontend/src/app/(platform)/admin/components/tests/UsageBar.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/components/tests/UsageBar.test.tsx
@@ -0,0 +1,31 @@
+import { render, screen } from "@/tests/integrations/test-utils";
+import { describe, expect, it } from "vitest";
+import { UsageBar } from "../UsageBar";
+
+describe("UsageBar", () => {
+  it('renders "Unlimited" when limit is 0', () => {
+    render(<UsageBar used={100} limit={0} />);
+    expect(screen.getByText("Unlimited")).toBeDefined();
+  });
+
+  it("renders spent + limit in USD", () => {
+    render(<UsageBar used={1_500_000} limit={10_000_000} />);
+    expect(screen.getByText("$1.50 spent")).toBeDefined();
+    expect(screen.getByText("$10.00 limit")).toBeDefined();
+  });
+
+  it("renders the computed percentage", () => {
+    render(<UsageBar used={500_000} limit={1_000_000} />);
+    expect(screen.getByText("50.0% used")).toBeDefined();
+  });
+
+  it("clamps percentage at 100% when over limit", () => {
+    render(<UsageBar used={2_000_000} limit={1_000_000} />);
+    expect(screen.getByText("100.0% used")).toBeDefined();
+  });
+
+  it("clamps percentage at 0% for negative used", () => {
+    render(<UsageBar used={-100} limit={1_000_000} />);
+    expect(screen.getByText("0.0% used")).toBeDefined();
+  });
+});
--- a/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/RateLimitDisplay.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/RateLimitDisplay.tsx
@@ -88,8 +88,9 @@ export function RateLimitDisplay({
  }

  const nothingToReset = resetWeekly
-    ? data.daily_tokens_used === 0 && data.weekly_tokens_used === 0
-    : data.daily_tokens_used === 0;
+    ? data.daily_cost_used_microdollars === 0 &&
+      data.weekly_cost_used_microdollars === 0
+    : data.daily_cost_used_microdollars === 0;

  return (
    <div className={className ?? "rounded-md border bg-white p-6"}>
@@ -133,17 +134,17 @@ export function RateLimitDisplay({

      <div className="grid grid-cols-2 gap-6">
        <div className="space-y-2">
-          <h3 className="text-sm font-medium text-gray-700">Daily Usage</h3>
+          <h3 className="text-sm font-medium text-gray-700">Daily Spend</h3>
          <UsageBar
-            used={data.daily_tokens_used}
-            limit={data.daily_token_limit}
+            used={data.daily_cost_used_microdollars}
+            limit={data.daily_cost_limit_microdollars}
          />
        </div>
        <div className="space-y-2">
-          <h3 className="text-sm font-medium text-gray-700">Weekly Usage</h3>
+          <h3 className="text-sm font-medium text-gray-700">Weekly Spend</h3>
          <UsageBar
-            used={data.weekly_tokens_used}
-            limit={data.weekly_token_limit}
+            used={data.weekly_cost_used_microdollars}
+            limit={data.weekly_cost_limit_microdollars}
          />
        </div>
      </div>
--- a/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/tests/RateLimitDisplay.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/tests/RateLimitDisplay.test.tsx
@@ -30,10 +30,10 @@ function makeData(
  return {
    user_id: "user-abc-123",
    user_email: "alice@example.com",
-    daily_token_limit: 10000,
-    weekly_token_limit: 50000,
-    daily_tokens_used: 2500,
-    weekly_tokens_used: 10000,
+    daily_cost_limit_microdollars: 10_000_000,
+    weekly_cost_limit_microdollars: 50_000_000,
+    daily_cost_used_microdollars: 2_500_000,
+    weekly_cost_used_microdollars: 10_000_000,
    tier: "FREE",
    ...overrides,
  };
@@ -113,8 +113,8 @@ describe("RateLimitDisplay", () => {

  it("renders daily and weekly usage sections", () => {
    render(<RateLimitDisplay data={makeData()} onReset={vi.fn()} />);
-    expect(screen.getByText("Daily Usage")).toBeDefined();
-    expect(screen.getByText("Weekly Usage")).toBeDefined();
+    expect(screen.getByText("Daily Spend")).toBeDefined();
+    expect(screen.getByText("Weekly Spend")).toBeDefined();
  });

  it("renders reset scope dropdown and reset button", () => {
@@ -126,7 +126,7 @@ describe("RateLimitDisplay", () => {
  it("disables reset button when nothing to reset", () => {
    render(
      <RateLimitDisplay
-        data={makeData({ daily_tokens_used: 0 })}
+        data={makeData({ daily_cost_used_microdollars: 0 })}
        onReset={vi.fn()}
      />,
    );
@@ -137,7 +137,7 @@ describe("RateLimitDisplay", () => {
  it("enables reset button when there is usage to reset", () => {
    render(
      <RateLimitDisplay
-        data={makeData({ daily_tokens_used: 100 })}
+        data={makeData({ daily_cost_used_microdollars: 100_000 })}
        onReset={vi.fn()}
      />,
    );
@@ -174,7 +174,7 @@ describe("RateLimitDisplay", () => {

    render(
      <RateLimitDisplay
-        data={makeData({ weekly_tokens_used: 100 })}
+        data={makeData({ weekly_cost_used_microdollars: 100_000 })}
        onReset={onReset}
      />,
    );
--- a/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/tests/RateLimitManager.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/tests/RateLimitManager.test.tsx
@@ -174,10 +174,10 @@ describe("RateLimitManager", () => {
      rateLimitData: {
        user_id: "user-123",
        user_email: "alice@example.com",
-        daily_token_limit: 10000,
-        weekly_token_limit: 50000,
-        daily_tokens_used: 2500,
-        weekly_tokens_used: 10000,
+        daily_cost_limit_microdollars: 10_000_000,
+        weekly_cost_limit_microdollars: 50_000_000,
+        daily_cost_used_microdollars: 2_500_000,
+        weekly_cost_used_microdollars: 10_000_000,
        tier: "FREE",
      },
    });
@@ -197,10 +197,10 @@ describe("RateLimitManager", () => {
      rateLimitData: {
        user_id: "user-123",
        user_email: "alice@example.com",
-        daily_token_limit: 10000,
-        weekly_token_limit: 50000,
-        daily_tokens_used: 2500,
-        weekly_tokens_used: 10000,
+        daily_cost_limit_microdollars: 10_000_000,
+        weekly_cost_limit_microdollars: 50_000_000,
+        daily_cost_used_microdollars: 2_500_000,
+        weekly_cost_used_microdollars: 10_000_000,
        tier: "FREE",
      },
    });
--- a/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/tests/useRateLimitManager.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/rate-limits/components/tests/useRateLimitManager.test.ts
@@ -28,10 +28,10 @@ function makeRateLimitResponse(overrides = {}) {
  return {
    user_id: "user-123",
    user_email: "alice@example.com",
-    daily_token_limit: 10000,
-    weekly_token_limit: 50000,
-    daily_tokens_used: 2500,
-    weekly_tokens_used: 10000,
+    daily_cost_limit_microdollars: 10_000_000,
+    weekly_cost_limit_microdollars: 50_000_000,
+    daily_cost_used_microdollars: 2_500_000,
+    weekly_cost_used_microdollars: 10_000_000,
    tier: "FREE",
    ...overrides,
  };
@@ -229,8 +229,12 @@ describe("useRateLimitManager", () => {
  });

  it("handleReset calls reset endpoint and updates data", async () => {
-    const initial = makeRateLimitResponse({ daily_tokens_used: 5000 });
-    const after = makeRateLimitResponse({ daily_tokens_used: 0 });
+    const initial = makeRateLimitResponse({
+      daily_cost_used_microdollars: 5_000_000,
+    });
+    const after = makeRateLimitResponse({
+      daily_cost_used_microdollars: 0,
+    });
    mockGetV2GetUserRateLimit.mockResolvedValue({ status: 200, data: initial });
    mockPostV2ResetUserRateLimitUsage.mockResolvedValue({
      status: 200,
@@ -338,7 +342,9 @@ describe("useRateLimitManager", () => {
  });

  it("handleReset throws when endpoint returns non-200 status", async () => {
-    const initial = makeRateLimitResponse({ daily_tokens_used: 5000 });
+    const initial = makeRateLimitResponse({
+      daily_cost_used_microdollars: 5_000_000,
+    });
    mockGetV2GetUserRateLimit.mockResolvedValue({ status: 200, data: initial });
    mockPostV2ResetUserRateLimitUsage.mockResolvedValue({ status: 500 });

--- a/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
@@ -1,6 +1,6 @@
 "use client";

-import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import type { CoPilotUsagePublic } from "@/app/api/__generated__/models/coPilotUsagePublic";
 import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";
 import { toast } from "@/components/molecules/Toast/use-toast";
 import useCredits from "@/hooks/useCredits";
@@ -125,7 +125,7 @@ export function CopilotPage() {
    isError: usageError,
  } = useGetV2GetCopilotUsage({
    query: {
-      select: (res) => res.data as CoPilotUsageStatus,
+      select: (res) => res.data as CoPilotUsagePublic,
      refetchInterval: 30000,
      staleTime: 10000,
    },
@@ -258,9 +258,7 @@ export function CopilotPage() {
        resetCost={resetCost ?? 0}
        resetMessage={rateLimitMessage ?? ""}
        isWeeklyExhausted={
-          hasUsage &&
-          usage.weekly.limit > 0 &&
-          usage.weekly.used >= usage.weekly.limit
+          hasUsage && !!usage.weekly && usage.weekly.percent_used >= 100
        }
        hasInsufficientCredits={hasInsufficientCredits}
        isBillingEnabled={isBillingEnabled}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tests/CopilotPage.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tests/CopilotPage.test.tsx
@@ -39,13 +39,23 @@ vi.mock("@/components/ui/sidebar", () => ({
  ),
 }));

-// Mock hooks that hit the network
+// Mock hooks that hit the network. Exercise the `select` callback so its
+// line counts as covered alongside the rest of the options.
 vi.mock("@/app/api/__generated__/endpoints/chat/chat", () => ({
-  useGetV2GetCopilotUsage: () => ({
-    data: undefined,
-    isSuccess: false,
-    isError: false,
-  }),
+  useGetV2GetCopilotUsage: (opts: {
+    query?: { select?: (r: { data: unknown }) => unknown };
+  }) => {
+    const data = {
+      daily: null,
+      weekly: null,
+      tier: "FREE",
+      reset_cost: 0,
+    };
+    if (typeof opts?.query?.select === "function") {
+      opts.query.select({ data });
+    }
+    return { data: undefined, isSuccess: false, isError: false };
+  },
 }));
 vi.mock("@/hooks/useCredits", () => ({
  default: () => ({ credits: null, fetchCredits: vi.fn() }),
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
@@ -1,4 +1,4 @@
-import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import type { CoPilotUsagePublic } from "@/app/api/__generated__/models/coPilotUsagePublic";
 import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";
 import useCredits from "@/hooks/useCredits";
 import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
@@ -14,9 +14,9 @@ import { UsagePanelContent } from "./UsagePanelContent";
 export { UsagePanelContent, formatResetTime } from "./UsagePanelContent";

 export function UsageLimits() {
-  const { data: usage, isLoading } = useGetV2GetCopilotUsage({
+  const { data: usage, isSuccess } = useGetV2GetCopilotUsage({
    query: {
-      select: (res) => res.data as CoPilotUsageStatus,
+      select: (res) => res.data as CoPilotUsagePublic,
      refetchInterval: 30000,
      staleTime: 10000,
    },
@@ -28,8 +28,8 @@ export function UsageLimits() {
  const hasInsufficientCredits =
    credits !== null && resetCost != null && credits < resetCost;

-  if (isLoading || !usage?.daily || !usage?.weekly) return null;
-  if (usage.daily.limit <= 0 && usage.weekly.limit <= 0) return null;
+  if (!isSuccess || !usage) return null;
+  if (!usage.daily && !usage.weekly) return null;

  return (
    <Popover>
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsagePanelContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsagePanelContent.tsx
@@ -1,4 +1,4 @@
-import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import type { CoPilotUsagePublic } from "@/app/api/__generated__/models/coPilotUsagePublic";
 import { Button } from "@/components/atoms/Button/Button";
 import Link from "next/link";
 import { formatCents, formatResetTime } from "../usageHelpers";
@@ -8,22 +8,17 @@ export { formatResetTime };

 function UsageBar({
  label,
-  used,
-  limit,
+  percentUsed,
  resetsAt,
 }: {
  label: string;
-  used: number;
-  limit: number;
+  percentUsed: number;
  resetsAt: Date | string;
 }) {
-  if (limit <= 0) return null;
-
-  const rawPercent = (used / limit) * 100;
-  const percent = Math.min(100, Math.round(rawPercent));
+  const percent = Math.min(100, Math.max(0, Math.round(percentUsed)));
  const isHigh = percent >= 80;
  const percentLabel =
-    used > 0 && percent === 0 ? "<1% used" : `${percent}% used`;
+    percentUsed > 0 && percent === 0 ? "<1% used" : `${percent}% used`;

  return (
    <div className="flex flex-col gap-1">
@@ -38,10 +33,15 @@ function UsageBar({
      </div>
      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200">
        <div
+          role="progressbar"
+          aria-label={`${label} usage`}
+          aria-valuemin={0}
+          aria-valuemax={100}
+          aria-valuenow={percent}
          className={`h-full rounded-full transition-[width] duration-300 ease-out ${
            isHigh ? "bg-orange-500" : "bg-blue-500"
          }`}
-          style={{ width: `${Math.max(used > 0 ? 1 : 0, percent)}%` }}
+          style={{ width: `${Math.max(percent > 0 ? 1 : 0, percent)}%` }}
        />
      </div>
    </div>
@@ -79,21 +79,19 @@ export function UsagePanelContent({
  isBillingEnabled = false,
  onCreditChange,
 }: {
-  usage: CoPilotUsageStatus;
+  usage: CoPilotUsagePublic;
  showBillingLink?: boolean;
  hasInsufficientCredits?: boolean;
  isBillingEnabled?: boolean;
  onCreditChange?: () => void;
 }) {
-  const hasDailyLimit = usage.daily.limit > 0;
-  const hasWeeklyLimit = usage.weekly.limit > 0;
-  const isDailyExhausted =
-    hasDailyLimit && usage.daily.used >= usage.daily.limit;
-  const isWeeklyExhausted =
-    hasWeeklyLimit && usage.weekly.used >= usage.weekly.limit;
+  const daily = usage.daily;
+  const weekly = usage.weekly;
+  const isDailyExhausted = !!daily && daily.percent_used >= 100;
+  const isWeeklyExhausted = !!weekly && weekly.percent_used >= 100;
  const resetCost = usage.reset_cost ?? 0;

-  if (!hasDailyLimit && !hasWeeklyLimit) {
+  if (!daily && !weekly) {
    return (
      <div className="text-xs text-neutral-500">No usage limits configured</div>
    );
@@ -113,20 +111,18 @@ export function UsagePanelContent({
          <span className="text-[11px] text-neutral-500">{tierLabel} plan</span>
        )}
      </div>
-      {hasDailyLimit && (
+      {daily && (
        <UsageBar
          label="Today"
-          used={usage.daily.used}
-          limit={usage.daily.limit}
-          resetsAt={usage.daily.resets_at}
+          percentUsed={daily.percent_used}
+          resetsAt={daily.resets_at}
        />
      )}
-      {hasWeeklyLimit && (
+      {weekly && (
        <UsageBar
          label="This week"
-          used={usage.weekly.used}
-          limit={usage.weekly.limit}
-          resetsAt={usage.weekly.resets_at}
+          percentUsed={weekly.percent_used}
+          resetsAt={weekly.resets_at}
        />
      )}
      {isDailyExhausted &&
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsageLimits.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsageLimits.test.tsx
@@ -2,10 +2,19 @@ import { render, screen, cleanup } from "@/tests/integrations/test-utils";
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { UsageLimits } from "../UsageLimits";

-// Mock the generated Orval hook
+// Mock the generated Orval hook, exercising the `select` callback so its
+// line counts as covered alongside the rest of the options.
 const mockUseGetV2GetCopilotUsage = vi.fn();
 vi.mock("@/app/api/__generated__/endpoints/chat/chat", () => ({
-  useGetV2GetCopilotUsage: (opts: unknown) => mockUseGetV2GetCopilotUsage(opts),
+  useGetV2GetCopilotUsage: (opts: {
+    query?: { select?: (r: { data: unknown }) => unknown };
+  }) => {
+    const ret = mockUseGetV2GetCopilotUsage(opts) as { data?: unknown };
+    if (ret?.data !== undefined && typeof opts?.query?.select === "function") {
+      opts.query.select({ data: ret.data });
+    }
+    return ret;
+  },
 }));

 // Mock Popover to render children directly (Radix portals don't work in happy-dom)
@@ -27,22 +36,24 @@ afterEach(() => {
 });

 function makeUsage({
-  dailyUsed = 500,
-  dailyLimit = 10000,
-  weeklyUsed = 2000,
-  weeklyLimit = 50000,
+  dailyPercent = 5,
+  weeklyPercent = 4,
  tier = "FREE",
 }: {
-  dailyUsed?: number;
-  dailyLimit?: number;
-  weeklyUsed?: number;
-  weeklyLimit?: number;
+  dailyPercent?: number | null;
+  weeklyPercent?: number | null;
  tier?: string;
 } = {}) {
-  const future = new Date(Date.now() + 3600 * 1000); // 1h from now
+  const future = new Date(Date.now() + 3600 * 1000).toISOString();
  return {
-    daily: { used: dailyUsed, limit: dailyLimit, resets_at: future },
-    weekly: { used: weeklyUsed, limit: weeklyLimit, resets_at: future },
+    daily:
+      dailyPercent === null
+        ? null
+        : { percent_used: dailyPercent, resets_at: future },
+    weekly:
+      weeklyPercent === null
+        ? null
+        : { percent_used: weeklyPercent, resets_at: future },
    tier,
  };
 }
@@ -51,7 +62,7 @@ describe("UsageLimits", () => {
  it("renders nothing while loading", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
      data: undefined,
-      isLoading: true,
+      isSuccess: false,
    });
    const { container } = render(<UsageLimits />);
    expect(container.innerHTML).toBe("");
@@ -59,8 +70,8 @@ describe("UsageLimits", () => {

  it("renders nothing when no limits are configured", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
-      data: makeUsage({ dailyLimit: 0, weeklyLimit: 0 }),
-      isLoading: false,
+      data: makeUsage({ dailyPercent: null, weeklyPercent: null }),
+      isSuccess: true,
    });
    const { container } = render(<UsageLimits />);
    expect(container.innerHTML).toBe("");
@@ -69,16 +80,16 @@ describe("UsageLimits", () => {
  it("renders the usage button when limits exist", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
      data: makeUsage(),
-      isLoading: false,
+      isSuccess: true,
    });
    render(<UsageLimits />);
    expect(screen.getByRole("button", { name: /usage limits/i })).toBeDefined();
  });

-  it("displays daily and weekly usage percentages", () => {
+  it("displays daily and weekly percentage", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
-      data: makeUsage({ dailyUsed: 5000, dailyLimit: 10000 }),
-      isLoading: false,
+      data: makeUsage({ dailyPercent: 50, weeklyPercent: 4 }),
+      isSuccess: true,
    });
    render(<UsageLimits />);

@@ -88,14 +99,10 @@ describe("UsageLimits", () => {
    expect(screen.getByText("Usage limits")).toBeDefined();
  });

-  it("shows only weekly bar when daily limit is 0", () => {
+  it("shows only weekly bar when daily is null", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
-      data: makeUsage({
-        dailyLimit: 0,
-        weeklyUsed: 25000,
-        weeklyLimit: 50000,
-      }),
-      isLoading: false,
+      data: makeUsage({ dailyPercent: null, weeklyPercent: 50 }),
+      isSuccess: true,
    });
    render(<UsageLimits />);

@@ -103,20 +110,22 @@ describe("UsageLimits", () => {
    expect(screen.queryByText("Today")).toBeNull();
  });

-  it("caps percentage at 100% when over limit", () => {
+  it("caps bar width at 100% when over limit", () => {
+    // 150% exercises the clamp — 100% exactly is merely exhausted, not over.
    mockUseGetV2GetCopilotUsage.mockReturnValue({
-      data: makeUsage({ dailyUsed: 15000, dailyLimit: 10000 }),
-      isLoading: false,
+      data: makeUsage({ dailyPercent: 150 }),
+      isSuccess: true,
    });
    render(<UsageLimits />);

-    expect(screen.getByText("100% used")).toBeDefined();
+    const dailyBar = screen.getByRole("progressbar", { name: /today usage/i });
+    expect(dailyBar.getAttribute("aria-valuenow")).toBe("100");
  });

  it("displays the user tier label", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
      data: makeUsage({ tier: "PRO" }),
-      isLoading: false,
+      isSuccess: true,
    });
    render(<UsageLimits />);

@@ -126,7 +135,7 @@ describe("UsageLimits", () => {
  it("shows learn more link to credits page", () => {
    mockUseGetV2GetCopilotUsage.mockReturnValue({
      data: makeUsage(),
-      isLoading: false,
+      isSuccess: true,
    });
    render(<UsageLimits />);

--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsagePanelContentRender.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsagePanelContentRender.test.tsx
@@ -6,7 +6,7 @@ import {
 } from "@/tests/integrations/test-utils";
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { UsagePanelContent } from "../UsagePanelContent";
-import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import type { CoPilotUsagePublic } from "@/app/api/__generated__/models/coPilotUsagePublic";

 const mockResetUsage = vi.fn();
 vi.mock("../../../hooks/useResetRateLimit", () => ({
@@ -20,36 +20,38 @@ afterEach(() => {

 function makeUsage(
  overrides: Partial<{
-    dailyUsed: number;
-    dailyLimit: number;
-    weeklyUsed: number;
-    weeklyLimit: number;
+    dailyPercent: number | null;
+    weeklyPercent: number | null;
    tier: string;
    resetCost: number;
  }> = {},
-): CoPilotUsageStatus {
+): CoPilotUsagePublic {
  const {
-    dailyUsed = 500,
-    dailyLimit = 10000,
-    weeklyUsed = 2000,
-    weeklyLimit = 50000,
+    dailyPercent = 5,
+    weeklyPercent = 4,
    tier = "FREE",
    resetCost = 100,
  } = overrides;
-  const future = new Date(Date.now() + 3600 * 1000);
+  const future = new Date(Date.now() + 3600 * 1000).toISOString();
  return {
-    daily: { used: dailyUsed, limit: dailyLimit, resets_at: future },
-    weekly: { used: weeklyUsed, limit: weeklyLimit, resets_at: future },
+    daily:
+      dailyPercent === null
+        ? null
+        : { percent_used: dailyPercent, resets_at: future },
+    weekly:
+      weeklyPercent === null
+        ? null
+        : { percent_used: weeklyPercent, resets_at: future },
    tier,
    reset_cost: resetCost,
-  } as CoPilotUsageStatus;
+  } as CoPilotUsagePublic;
 }

 describe("UsagePanelContent", () => {
-  it("renders 'No usage limits configured' when both limits are zero", () => {
+  it("renders 'No usage limits configured' when both windows are null", () => {
    render(
      <UsagePanelContent
-        usage={makeUsage({ dailyLimit: 0, weeklyLimit: 0 })}
+        usage={makeUsage({ dailyPercent: null, weeklyPercent: null })}
      />,
    );
    expect(screen.getByText("No usage limits configured")).toBeDefined();
@@ -58,11 +60,7 @@ describe("UsagePanelContent", () => {
  it("renders the reset button when daily limit is exhausted", () => {
    render(
      <UsagePanelContent
-        usage={makeUsage({
-          dailyUsed: 10000,
-          dailyLimit: 10000,
-          resetCost: 50,
-        })}
+        usage={makeUsage({ dailyPercent: 100, resetCost: 50 })}
      />,
    );
    expect(screen.getByText(/Reset daily limit/)).toBeDefined();
@@ -72,10 +70,8 @@ describe("UsagePanelContent", () => {
    render(
      <UsagePanelContent
        usage={makeUsage({
-          dailyUsed: 10000,
-          dailyLimit: 10000,
-          weeklyUsed: 50000,
-          weeklyLimit: 50000,
+          dailyPercent: 100,
+          weeklyPercent: 100,
          resetCost: 50,
        })}
      />,
@@ -86,11 +82,7 @@ describe("UsagePanelContent", () => {
  it("calls resetUsage when the reset button is clicked", () => {
    render(
      <UsagePanelContent
-        usage={makeUsage({
-          dailyUsed: 10000,
-          dailyLimit: 10000,
-          resetCost: 50,
-        })}
+        usage={makeUsage({ dailyPercent: 100, resetCost: 50 })}
      />,
    );
    fireEvent.click(screen.getByText(/Reset daily limit/));
@@ -100,15 +92,21 @@ describe("UsagePanelContent", () => {
  it("renders 'Add credits' link when insufficient credits", () => {
    render(
      <UsagePanelContent
-        usage={makeUsage({
-          dailyUsed: 10000,
-          dailyLimit: 10000,
-          resetCost: 50,
-        })}
+        usage={makeUsage({ dailyPercent: 100, resetCost: 50 })}
        hasInsufficientCredits={true}
        isBillingEnabled={true}
      />,
    );
    expect(screen.getByText("Add credits to reset")).toBeDefined();
  });
+
+  it("renders percent used in the usage bar", () => {
+    render(<UsagePanelContent usage={makeUsage({ dailyPercent: 25 })} />);
+    expect(screen.getByText("25% used")).toBeDefined();
+  });
+
+  it("renders '<1% used' when usage is greater than 0 but rounds to 0", () => {
+    render(<UsagePanelContent usage={makeUsage({ dailyPercent: 0.3 })} />);
+    expect(screen.getByText("<1% used")).toBeDefined();
+  });
 });
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/tests/usageHelpers.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/tests/usageHelpers.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, it } from "vitest";
+import {
+  formatCents,
+  formatMicrodollarsAsUsd,
+  formatResetTime,
+} from "../usageHelpers";
+
+describe("formatCents", () => {
+  it("formats whole dollars", () => {
+    expect(formatCents(500)).toBe("$5.00");
+  });
+
+  it("formats zero", () => {
+    expect(formatCents(0)).toBe("$0.00");
+  });
+
+  it("formats fractional cents", () => {
+    expect(formatCents(1999)).toBe("$19.99");
+  });
+});
+
+describe("formatMicrodollarsAsUsd", () => {
+  it("formats zero as $0.00", () => {
+    expect(formatMicrodollarsAsUsd(0)).toBe("$0.00");
+  });
+
+  it("formats whole dollar amounts", () => {
+    expect(formatMicrodollarsAsUsd(1_500_000)).toBe("$1.50");
+  });
+
+  it("formats amounts that round to $0.00 but are > 0 as <$0.01", () => {
+    expect(formatMicrodollarsAsUsd(999)).toBe("<$0.01");
+  });
+
+  it("formats exactly one cent as $0.01", () => {
+    expect(formatMicrodollarsAsUsd(10_000)).toBe("$0.01");
+  });
+
+  it("formats negative input with toFixed semantics (no special case)", () => {
+    // Negative should never come from the backend, but the helper is
+    // safe — it simply passes through `toFixed`.
+    expect(formatMicrodollarsAsUsd(-1_500_000)).toBe("$-1.50");
+  });
+
+  it("formats very large values without truncating", () => {
+    expect(formatMicrodollarsAsUsd(1_234_567_890)).toBe("$1234.57");
+  });
+});
+
+describe("formatResetTime", () => {
+  it("returns 'now' when reset time is in the past", () => {
+    const now = new Date("2026-04-21T12:00:00Z");
+    const past = new Date("2026-04-21T11:59:00Z");
+    expect(formatResetTime(past, now)).toBe("now");
+  });
+
+  it("renders sub-hour resets as minutes", () => {
+    const now = new Date("2026-04-21T12:00:00Z");
+    const future = new Date("2026-04-21T12:15:00Z");
+    expect(formatResetTime(future, now)).toBe("in 15m");
+  });
+
+  it("renders same-day resets as 'Xh Ym'", () => {
+    const now = new Date("2026-04-21T12:00:00Z");
+    const future = new Date("2026-04-21T14:30:00Z");
+    expect(formatResetTime(future, now)).toBe("in 2h 30m");
+  });
+
+  it("renders future-day resets as a localized date string", () => {
+    const now = new Date("2026-04-21T12:00:00Z");
+    const future = new Date("2026-04-24T12:00:00Z");
+    // Not asserting exact format (localized), just that it's not the
+    // minute/hour form.
+    expect(formatResetTime(future, now)).not.toMatch(/^in \d/);
+  });
+});
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/usageHelpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/usageHelpers.ts
@@ -2,6 +2,12 @@ export function formatCents(cents: number): string {
  return `$${(cents / 100).toFixed(2)}`;
 }

+export function formatMicrodollarsAsUsd(microdollars: number): string {
+  const dollars = microdollars / 1_000_000;
+  if (microdollars > 0 && dollars < 0.01) return "<$0.01";
+  return `$${dollars.toFixed(2)}`;
+}
+
 export function formatResetTime(
  resetsAt: Date | string,
  now: Date = new Date(),
--- a/autogpt_platform/frontend/src/app/(platform)/library/components/AgentBriefingPanel/BriefingTabContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/library/components/AgentBriefingPanel/BriefingTabContent.tsx
@@ -1,6 +1,6 @@
 "use client";

-import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import type { CoPilotUsagePublic } from "@/app/api/__generated__/models/coPilotUsagePublic";
 import type { LibraryAgent } from "@/app/api/__generated__/models/libraryAgent";
 import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";
 import {
@@ -42,9 +42,9 @@ export function BriefingTabContent({ activeTab, agents }: Props) {
 }

 function UsageSection() {
-  const { data: usage } = useGetV2GetCopilotUsage({
+  const { data: usage, isSuccess } = useGetV2GetCopilotUsage({
    query: {
-      select: (res) => res.data as CoPilotUsageStatus,
+      select: (res) => res.data as CoPilotUsagePublic,
      refetchInterval: 30000,
      staleTime: 10000,
    },
@@ -56,7 +56,8 @@ function UsageSection() {
  const hasInsufficientCredits =
    credits !== null && resetCost != null && credits < resetCost;

-  if (!usage?.daily || !usage?.weekly) return null;
+  if (!isSuccess || !usage) return null;
+  if (!usage.daily && !usage.weekly) return null;

  return (
    <div className="py-2">
@@ -80,19 +81,17 @@ function UsageSection() {
        )}
      </div>
      <div className="mt-4 grid grid-cols-1 gap-6 sm:grid-cols-2">
-        {usage.daily.limit > 0 && (
+        {usage.daily && (
          <UsageMeter
            label="Today"
-            used={usage.daily.used}
-            limit={usage.daily.limit}
+            percentUsed={usage.daily.percent_used}
            resetsAt={usage.daily.resets_at}
          />
        )}
-        {usage.weekly.limit > 0 && (
+        {usage.weekly && (
          <UsageMeter
            label="This week"
-            used={usage.weekly.used}
-            limit={usage.weekly.limit}
+            percentUsed={usage.weekly.percent_used}
            resetsAt={usage.weekly.resets_at}
          />
        )}
@@ -244,14 +243,12 @@ function UsageFooter({
  hasInsufficientCredits,
  onCreditChange,
 }: {
-  usage: CoPilotUsageStatus;
+  usage: CoPilotUsagePublic;
  hasInsufficientCredits: boolean;
  onCreditChange?: () => void;
 }) {
-  const isDailyExhausted =
-    usage.daily.limit > 0 && usage.daily.used >= usage.daily.limit;
-  const isWeeklyExhausted =
-    usage.weekly.limit > 0 && usage.weekly.used >= usage.weekly.limit;
+  const isDailyExhausted = !!usage.daily && usage.daily.percent_used >= 100;
+  const isWeeklyExhausted = !!usage.weekly && usage.weekly.percent_used >= 100;
  const resetCost = usage.reset_cost ?? 0;
  const { resetUsage, isPending } = useResetRateLimit({ onCreditChange });

@@ -294,22 +291,17 @@ function UsageFooter({

 function UsageMeter({
  label,
-  used,
-  limit,
+  percentUsed,
  resetsAt,
 }: {
  label: string;
-  used: number;
-  limit: number;
+  percentUsed: number;
  resetsAt: Date | string;
 }) {
-  if (limit <= 0) return null;
-
-  const rawPercent = (used / limit) * 100;
-  const percent = Math.min(100, Math.round(rawPercent));
+  const percent = Math.min(100, Math.max(0, Math.round(percentUsed)));
  const isHigh = percent >= 80;
  const percentLabel =
-    used > 0 && percent === 0 ? "<1% used" : `${percent}% used`;
+    percentUsed > 0 && percent === 0 ? "<1% used" : `${percent}% used`;

  return (
    <div className="flex flex-col gap-2">
@@ -323,20 +315,20 @@ function UsageMeter({
      </div>
      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200">
        <div
+          role="progressbar"
+          aria-label={`${label} usage`}
+          aria-valuemin={0}
+          aria-valuemax={100}
+          aria-valuenow={percent}
          className={`h-full rounded-full transition-[width] duration-300 ease-out ${
            isHigh ? "bg-orange-500" : "bg-blue-500"
          }`}
-          style={{ width: `${Math.max(used > 0 ? 1 : 0, percent)}%` }}
+          style={{ width: `${Math.max(percent > 0 ? 1 : 0, percent)}%` }}
        />
      </div>
-      <div className="flex items-baseline justify-between">
-        <Text variant="small" className="tabular-nums text-neutral-500">
-          {used.toLocaleString()} / {limit.toLocaleString()}
-        </Text>
-        <Text variant="small" className="text-neutral-400">
-          Resets {formatResetTime(resetsAt)}
-        </Text>
-      </div>
+      <Text variant="small" className="text-neutral-400">
+        Resets {formatResetTime(resetsAt)}
+      </Text>
    </div>
  );
 }
--- a/autogpt_platform/frontend/src/app/(platform)/library/components/AgentBriefingPanel/tests/BriefingTabContent.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/library/components/AgentBriefingPanel/tests/BriefingTabContent.test.tsx
@@ -0,0 +1,212 @@
+import { render, screen, cleanup } from "@/tests/integrations/test-utils";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { BriefingTabContent } from "../BriefingTabContent";
+
+const mockUseGetV2GetCopilotUsage = vi.fn();
+vi.mock("@/app/api/__generated__/endpoints/chat/chat", () => ({
+  useGetV2GetCopilotUsage: (opts: {
+    query?: { select?: (r: { data: unknown }) => unknown };
+  }) => {
+    const ret = mockUseGetV2GetCopilotUsage(opts) as { data?: unknown };
+    // Exercise the `select` callback so its line counts as covered.
+    if (ret?.data !== undefined && typeof opts?.query?.select === "function") {
+      opts.query.select({ data: ret.data });
+    }
+    return ret;
+  },
+}));
+
+const mockUseGetFlag = vi.fn();
+vi.mock("@/services/feature-flags/use-get-flag", async () => {
+  const actual = await vi.importActual<
+    typeof import("@/services/feature-flags/use-get-flag")
+  >("@/services/feature-flags/use-get-flag");
+  return {
+    ...actual,
+    useGetFlag: (flag: unknown) => mockUseGetFlag(flag),
+  };
+});
+
+const mockUseCredits = vi.fn();
+vi.mock("@/hooks/useCredits", () => ({
+  default: (opts: unknown) => mockUseCredits(opts),
+}));
+
+const mockResetUsage = vi.fn();
+vi.mock("@/app/(platform)/copilot/hooks/useResetRateLimit", () => ({
+  useResetRateLimit: () => ({
+    resetUsage: mockResetUsage,
+    isPending: false,
+  }),
+}));
+
+afterEach(() => {
+  cleanup();
+  mockUseGetV2GetCopilotUsage.mockReset();
+  mockUseGetFlag.mockReset();
+  mockUseCredits.mockReset();
+  mockResetUsage.mockReset();
+});
+
+function makeUsage({
+  dailyPercent = 5,
+  weeklyPercent = 4,
+  tier = "FREE",
+  resetCost = 500,
+}: {
+  dailyPercent?: number | null;
+  weeklyPercent?: number | null;
+  tier?: string;
+  resetCost?: number;
+} = {}) {
+  const future = new Date(Date.now() + 3600 * 1000).toISOString();
+  return {
+    daily:
+      dailyPercent === null
+        ? null
+        : { percent_used: dailyPercent, resets_at: future },
+    weekly:
+      weeklyPercent === null
+        ? null
+        : { percent_used: weeklyPercent, resets_at: future },
+    tier,
+    reset_cost: resetCost,
+  };
+}
+
+describe("BriefingTabContent — UsageSection", () => {
+  it("renders nothing when usage fetch has not succeeded", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: undefined,
+      isSuccess: false,
+    });
+    mockUseGetFlag.mockReturnValue(false);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+    const { container } = render(
+      <BriefingTabContent activeTab="all" agents={[]} />,
+    );
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("renders nothing when both windows are null (no limits configured)", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyPercent: null, weeklyPercent: null }),
+      isSuccess: true,
+    });
+    mockUseGetFlag.mockReturnValue(false);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+    const { container } = render(
+      <BriefingTabContent activeTab="all" agents={[]} />,
+    );
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("renders tier badge + daily+weekly meters at normal usage", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyPercent: 12, weeklyPercent: 4, tier: "PRO" }),
+      isSuccess: true,
+    });
+    mockUseGetFlag.mockReturnValue(true);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+    render(<BriefingTabContent activeTab="all" agents={[]} />);
+
+    expect(screen.getByText("Usage limits")).toBeDefined();
+    expect(screen.getByText("Pro plan")).toBeDefined();
+    expect(screen.getByText("12% used")).toBeDefined();
+    expect(screen.getByText("4% used")).toBeDefined();
+    expect(screen.getByText("Today")).toBeDefined();
+    expect(screen.getByText("This week")).toBeDefined();
+    expect(screen.getByText("Manage billing")).toBeDefined();
+  });
+
+  it("shows reset button when daily limit is exhausted and user has credits", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyPercent: 100, weeklyPercent: 40, resetCost: 500 }),
+      isSuccess: true,
+    });
+    mockUseGetFlag.mockReturnValue(true);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+    render(<BriefingTabContent activeTab="all" agents={[]} />);
+
+    expect(screen.getByText(/Reset daily limit/)).toBeDefined();
+  });
+
+  it("shows 'Add credits' CTA when daily exhausted but user lacks credits", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyPercent: 100, weeklyPercent: 40, resetCost: 500 }),
+      isSuccess: true,
+    });
+    mockUseGetFlag.mockReturnValue(true);
+    mockUseCredits.mockReturnValue({ credits: 10, fetchCredits: vi.fn() });
+    render(<BriefingTabContent activeTab="all" agents={[]} />);
+
+    expect(screen.getByText("Add credits to reset")).toBeDefined();
+    expect(screen.queryByText(/Reset daily limit/)).toBeNull();
+  });
+
+  it("hides reset CTAs when the weekly limit is also exhausted", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({
+        dailyPercent: 100,
+        weeklyPercent: 100,
+        resetCost: 500,
+      }),
+      isSuccess: true,
+    });
+    mockUseGetFlag.mockReturnValue(true);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+    render(<BriefingTabContent activeTab="all" agents={[]} />);
+
+    expect(screen.queryByText(/Reset daily limit/)).toBeNull();
+    expect(screen.queryByText("Add credits to reset")).toBeNull();
+  });
+
+  it("renders <1% used when percent is >0 but rounds to 0", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyPercent: 0.4, weeklyPercent: 0 }),
+      isSuccess: true,
+    });
+    mockUseGetFlag.mockReturnValue(false);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+    render(<BriefingTabContent activeTab="all" agents={[]} />);
+
+    expect(screen.getByText("<1% used")).toBeDefined();
+  });
+
+  it("dispatches to ExecutionListSection for running/attention/completed tabs", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: undefined,
+      isSuccess: false,
+    });
+    mockUseGetFlag.mockReturnValue(false);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+
+    for (const tab of ["running", "attention", "completed"] as const) {
+      const { unmount } = render(
+        <BriefingTabContent activeTab={tab} agents={[]} />,
+      );
+      // Empty list -> EmptyMessage renders for each of the execution tabs.
+      expect(
+        screen.getByText(/No agents|No recently completed/i),
+      ).toBeDefined();
+      unmount();
+    }
+  });
+
+  it("dispatches to AgentListSection for listening/scheduled/idle tabs", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: undefined,
+      isSuccess: false,
+    });
+    mockUseGetFlag.mockReturnValue(false);
+    mockUseCredits.mockReturnValue({ credits: 1000, fetchCredits: vi.fn() });
+
+    for (const tab of ["listening", "scheduled", "idle"] as const) {
+      const { unmount } = render(
+        <BriefingTabContent activeTab={tab} agents={[]} />,
+      );
+      expect(screen.getByText(/No/i)).toBeDefined();
+      unmount();
+    }
+  });
+});
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/SubscriptionTierSection.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/SubscriptionTierSection.tsx
@@ -4,42 +4,14 @@ import { Button } from "@/components/ui/button";
 import { Dialog } from "@/components/molecules/Dialog/Dialog";
 import { Skeleton } from "@/components/atoms/Skeleton/Skeleton";
 import { useSubscriptionTierSection } from "./useSubscriptionTierSection";
-
-type TierInfo = {
-  key: string;
-  label: string;
-  multiplier: string;
-  description: string;
-};
-
-const TIERS: TierInfo[] = [
-  {
-    key: "FREE",
-    label: "Free",
-    multiplier: "1x",
-    description: "Base AutoPilot capacity with standard rate limits",
-  },
-  {
-    key: "PRO",
-    label: "Pro",
-    multiplier: "5x",
-    description: "5x AutoPilot capacity — run 5× more tasks per day/week",
-  },
-  {
-    key: "BUSINESS",
-    label: "Business",
-    multiplier: "20x",
-    description: "20x AutoPilot capacity — ideal for teams and heavy workloads",
-  },
-];
-
-const TIER_ORDER = ["FREE", "PRO", "BUSINESS", "ENTERPRISE"];
-
-function formatCost(cents: number, tierKey: string): string {
-  if (tierKey === "FREE") return "Free";
-  if (cents === 0) return "Pricing available soon";
-  return `$${(cents / 100).toFixed(2)}/mo`;
-}
+import { PendingChangeBanner } from "./components/PendingChangeBanner/PendingChangeBanner";
+import {
+  TIERS,
+  TIER_ORDER,
+  formatCost,
+  formatPendingDate,
+  getTierLabel,
+} from "./helpers";

 export function SubscriptionTierSection() {
  const {
@@ -55,10 +27,14 @@ export function SubscriptionTierSection() {
    isPaymentEnabled,
    changeTier,
    handleTierChange,
+    cancelPendingChange,
  } = useSubscriptionTierSection();
  const [confirmDowngradeTo, setConfirmDowngradeTo] = useState<string | null>(
    null,
  );
+  const [confirmReplacePendingTo, setConfirmReplacePendingTo] = useState<
+    string | null
+  >(null);

  if (isLoading) {
    return (
@@ -115,6 +91,34 @@ export function SubscriptionTierSection() {
    await changeTier(tier);
  }

+  async function confirmReplacePending() {
+    if (!confirmReplacePendingTo) return;
+    const tier = confirmReplacePendingTo;
+    setConfirmReplacePendingTo(null);
+    handleTierChange(tier, currentTier, setConfirmDowngradeTo);
+  }
+
+  const pendingTierFromSubscription = subscription.pending_tier ?? null;
+  const hasPendingChange =
+    pendingTierFromSubscription !== null &&
+    pendingTierFromSubscription !== currentTier;
+
+  function onTierButtonClick(targetTierKey: string) {
+    // If a pending change is queued and the user clicks a DIFFERENT non-current,
+    // non-pending tier, surface a confirmation so they don't silently overwrite
+    // their own scheduled change. The on-card button for the pending tier itself
+    // is already disabled; the primary cancel path is the banner.
+    if (
+      hasPendingChange &&
+      targetTierKey !== pendingTierFromSubscription &&
+      targetTierKey !== currentTier
+    ) {
+      setConfirmReplacePendingTo(targetTierKey);
+      return;
+    }
+    handleTierChange(targetTierKey, currentTier, setConfirmDowngradeTo);
+  }
+
  return (
    <div className="space-y-4">
      <h3 className="text-lg font-medium">Subscription Plan</h3>
@@ -128,6 +132,16 @@ export function SubscriptionTierSection() {
        </p>
      )}

+      {hasPendingChange && pendingTierFromSubscription ? (
+        <PendingChangeBanner
+          currentTier={currentTier}
+          pendingTier={pendingTierFromSubscription}
+          pendingEffectiveAt={subscription.pending_tier_effective_at}
+          onKeepCurrent={() => void cancelPendingChange()}
+          isBusy={isPending}
+        />
+      ) : null}
+
      <div className="grid grid-cols-1 gap-3 sm:grid-cols-3">
        {TIERS.map((tier) => {
          const isCurrent = currentTier === tier.key;
@@ -137,6 +151,8 @@ export function SubscriptionTierSection() {
          const isUpgrade = targetIdx > currentIdx;
          const isDowngrade = targetIdx < currentIdx;
          const isThisPending = pendingTier === tier.key;
+          const isScheduledTier =
+            hasPendingChange && pendingTierFromSubscription === tier.key;

          return (
            <div
@@ -171,22 +187,18 @@ export function SubscriptionTierSection() {
                <Button
                  className="w-full"
                  variant={isUpgrade ? "default" : "outline"}
-                  disabled={isPending}
-                  onClick={() =>
-                    handleTierChange(
-                      tier.key,
-                      currentTier,
-                      setConfirmDowngradeTo,
-                    )
-                  }
+                  disabled={isPending || isScheduledTier}
+                  onClick={() => onTierButtonClick(tier.key)}
                >
                  {isThisPending
                    ? "Updating..."
-                    : isUpgrade
-                      ? `Upgrade to ${tier.label}`
-                      : isDowngrade
-                        ? `Downgrade to ${tier.label}`
-                        : `Switch to ${tier.label}`}
+                    : isScheduledTier
+                      ? "Scheduled"
+                      : isUpgrade
+                        ? `Upgrade to ${tier.label}`
+                        : isDowngrade
+                          ? `Downgrade to ${tier.label}`
+                          : `Switch to ${tier.label}`}
                </Button>
              )}
            </div>
@@ -196,9 +208,9 @@ export function SubscriptionTierSection() {

      {currentTier !== "FREE" && isPaymentEnabled && (
        <p className="text-sm text-neutral-500">
-          Your subscription is managed through Stripe. Upgrades and paid-tier
-          changes take effect immediately; downgrades to Free are scheduled for
-          the end of the current billing period.
+          Your subscription is managed through Stripe. Upgrades take effect
+          immediately. Downgrades take effect at the end of your current billing
+          period.
        </p>
      )}

@@ -215,7 +227,7 @@ export function SubscriptionTierSection() {
          <p className="text-sm text-neutral-600 dark:text-neutral-400">
            {confirmDowngradeTo === "FREE"
              ? "Downgrading to Free will schedule your subscription to cancel at the end of your current billing period. You keep your current plan until then."
-              : `Switching to ${TIERS.find((t) => t.key === confirmDowngradeTo)?.label ?? confirmDowngradeTo} will take effect immediately.`}{" "}
+              : `Switching to ${TIERS.find((t) => t.key === confirmDowngradeTo)?.label ?? confirmDowngradeTo} will take effect at the end of your current billing period. You keep your current plan until then.`}{" "}
            Are you sure?
          </p>
          <Dialog.Footer>
@@ -235,6 +247,42 @@ export function SubscriptionTierSection() {
        </Dialog.Content>
      </Dialog>

+      <Dialog
+        title="Replace pending change?"
+        controlled={{
+          isOpen: !!confirmReplacePendingTo,
+          set: (open) => {
+            if (!open) setConfirmReplacePendingTo(null);
+          },
+        }}
+      >
+        <Dialog.Content>
+          <p className="text-sm text-neutral-600 dark:text-neutral-400">
+            You have a pending change to{" "}
+            {getTierLabel(pendingTierFromSubscription ?? "")}
+            {subscription.pending_tier_effective_at
+              ? ` scheduled for ${formatPendingDate(subscription.pending_tier_effective_at)}`
+              : ""}
+            . Switching to {getTierLabel(confirmReplacePendingTo ?? "")} will
+            replace it. Continue?
+          </p>
+          <Dialog.Footer>
+            <Button
+              variant="outline"
+              onClick={() => setConfirmReplacePendingTo(null)}
+            >
+              Cancel
+            </Button>
+            <Button
+              variant="destructive"
+              onClick={() => void confirmReplacePending()}
+            >
+              Replace pending change
+            </Button>
+          </Dialog.Footer>
+        </Dialog.Content>
+      </Dialog>
+
      <Dialog
        title="Confirm Upgrade"
        controlled={{
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/tests/SubscriptionTierSection.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/tests/SubscriptionTierSection.test.tsx
@@ -71,17 +71,23 @@ function makeSubscription({
  monthlyCost = 0,
  tierCosts = { FREE: 0, PRO: 1999, BUSINESS: 4999, ENTERPRISE: 0 },
  prorationCreditCents = 0,
+  pendingTier = null as string | null,
+  pendingTierEffectiveAt = null as Date | string | null,
 }: {
  tier?: string;
  monthlyCost?: number;
  tierCosts?: Record<string, number>;
  prorationCreditCents?: number;
+  pendingTier?: string | null;
+  pendingTierEffectiveAt?: Date | string | null;
 } = {}) {
  return {
    tier,
    monthly_cost: monthlyCost,
    tier_costs: tierCosts,
    proration_credit_cents: prorationCreditCents,
+    pending_tier: pendingTier,
+    pending_tier_effective_at: pendingTierEffectiveAt,
  };
 }

@@ -92,6 +98,7 @@ function setupMocks({
  mutateFn = vi.fn().mockResolvedValue({ status: 200, data: { url: "" } }),
  isPending = false,
  variables = undefined as { data?: { tier?: string } } | undefined,
+  refetchFn = vi.fn(),
 } = {}) {
  // The hook uses select: (data) => (data.status === 200 ? data.data : null)
  // so the data value returned by the hook is already the transformed subscription object.
@@ -100,13 +107,14 @@ function setupMocks({
    data: subscription,
    isLoading,
    error: queryError,
-    refetch: vi.fn(),
+    refetch: refetchFn,
  });
  mockUseUpdateSubscriptionTier.mockReturnValue({
    mutateAsync: mutateFn,
    isPending,
    variables,
  });
+  return { refetchFn, mutateFn };
 }

 afterEach(() => {
@@ -355,4 +363,229 @@ describe("SubscriptionTierSection", () => {
    // No toast should fire — the user simply abandoned checkout
    expect(mockToast).not.toHaveBeenCalled();
  });
+
+  it("renders pending-change banner when pending_tier is set", () => {
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "PRO",
+        pendingTierEffectiveAt: new Date("2026-11-15T00:00:00Z"),
+      }),
+    });
+    render(<SubscriptionTierSection />);
+    expect(screen.getByText(/scheduled to downgrade to/i)).toBeDefined();
+    // Banner "Keep Business" button — the only Keep button, since the on-card
+    // duplicate was removed in favour of the banner.
+    expect(
+      screen.getAllByRole("button", { name: /keep business/i }),
+    ).toHaveLength(1);
+  });
+
+  it("does not render pending-change banner when pending_tier is null", () => {
+    setupMocks({
+      subscription: makeSubscription({ tier: "BUSINESS", pendingTier: null }),
+    });
+    render(<SubscriptionTierSection />);
+    expect(screen.queryByText(/scheduled to downgrade/i)).toBeNull();
+    expect(screen.queryByRole("button", { name: /keep business/i })).toBeNull();
+  });
+
+  it("clicking Keep [CurrentTier] in banner submits a same-tier update and refetches", async () => {
+    // The cancel-pending route was collapsed into POST /credits/subscription as
+    // a same-tier request. Clicking "Keep BUSINESS" calls useUpdateSubscriptionTier
+    // with tier === current tier so the backend releases any pending schedule.
+    const mutateFn = vi
+      .fn()
+      .mockResolvedValue({ status: 200, data: { url: "", tier: "BUSINESS" } });
+    const refetchFn = vi.fn();
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "PRO",
+        pendingTierEffectiveAt: new Date("2026-11-15T00:00:00Z"),
+      }),
+      mutateFn,
+      refetchFn,
+    });
+    render(<SubscriptionTierSection />);
+
+    fireEvent.click(screen.getByRole("button", { name: /keep business/i }));
+
+    await waitFor(() => {
+      expect(mutateFn).toHaveBeenCalledWith(
+        expect.objectContaining({
+          data: expect.objectContaining({ tier: "BUSINESS" }),
+        }),
+      );
+      expect(refetchFn).toHaveBeenCalled();
+    });
+    expect(mockToast).toHaveBeenCalledWith(
+      expect.objectContaining({
+        title: "Pending subscription change cancelled.",
+      }),
+    );
+  });
+
+  it("uses end-of-period copy for paid→paid downgrade confirmation", () => {
+    setupMocks({ subscription: makeSubscription({ tier: "BUSINESS" }) });
+    render(<SubscriptionTierSection />);
+
+    fireEvent.click(screen.getByRole("button", { name: /downgrade to pro/i }));
+
+    const dialog = screen.getByRole("dialog");
+    expect(dialog.textContent).toMatch(
+      /switching to pro will take effect at the end of your current billing period/i,
+    );
+    expect(dialog.textContent).toMatch(
+      /you keep your current plan until then/i,
+    );
+    expect(dialog.textContent).not.toMatch(/take effect immediately/i);
+  });
+
+  it("shows destructive toast, tierError and still refetches when cancel-pending fails", async () => {
+    // The catch branch inside cancelPendingChange is load-bearing: it surfaces
+    // the error to the user AND re-issues a refetch so the UI reconciles if
+    // the server actually succeeded (webhook delivered after our client-side
+    // error).
+    const mutateFn = vi
+      .fn()
+      .mockRejectedValue(new Error("Stripe webhook failed"));
+    const refetchFn = vi.fn();
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "PRO",
+        pendingTierEffectiveAt: new Date("2026-11-15T00:00:00Z"),
+      }),
+      mutateFn,
+      refetchFn,
+    });
+    render(<SubscriptionTierSection />);
+
+    const keepButtons = screen.getAllByRole("button", {
+      name: /keep business/i,
+    });
+    fireEvent.click(keepButtons[0]);
+
+    await waitFor(() => {
+      expect(screen.getByRole("alert")).toBeDefined();
+      expect(screen.getByText(/stripe webhook failed/i)).toBeDefined();
+    });
+    expect(mockToast).toHaveBeenCalledWith(
+      expect.objectContaining({
+        title: "Failed to cancel pending change",
+        variant: "destructive",
+      }),
+    );
+    expect(refetchFn).toHaveBeenCalled();
+  });
+
+  it("disables the tier button that matches the pending tier so users can't overwrite their own scheduled change by mis-click", () => {
+    // User is on BUSINESS and has a pending downgrade to PRO. The "Downgrade
+    // to Pro" button must be disabled + labelled "Scheduled" so the primary
+    // cancel path stays the banner. Other tier buttons (FREE here) remain
+    // clickable — the user can still overwrite their pending change by
+    // picking a different target; backend handles that.
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "PRO",
+        pendingTierEffectiveAt: new Date("2026-11-15T00:00:00Z"),
+      }),
+    });
+    render(<SubscriptionTierSection />);
+
+    const scheduledBtn = screen.getByRole("button", { name: /scheduled/i });
+    expect(scheduledBtn).toBeDefined();
+    expect((scheduledBtn as HTMLButtonElement).disabled).toBe(true);
+
+    // The non-pending tier (FREE) button is still clickable.
+    const freeBtn = screen.getByRole("button", { name: /downgrade to free/i });
+    expect((freeBtn as HTMLButtonElement).disabled).toBe(false);
+  });
+
+  it("shows replace-pending dialog when clicking a non-pending tier while a pending change exists, and fires the mutation after confirm", async () => {
+    // User is on BUSINESS with a pending downgrade to PRO. Clicking FREE (a
+    // tier that is neither current nor the pending target) must NOT silently
+    // overwrite the pending schedule — it must open a confirmation dialog.
+    // Only after the user explicitly confirms should changeTier (→ its own
+    // downgrade confirm for paid→FREE) fire.
+    const mutateFn = vi
+      .fn()
+      .mockResolvedValue({ status: 200, data: { url: "" } });
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "PRO",
+        pendingTierEffectiveAt: new Date("2026-11-15T00:00:00Z"),
+      }),
+      mutateFn,
+    });
+    render(<SubscriptionTierSection />);
+
+    // Clicking FREE while PRO is pending surfaces the replace-pending dialog
+    // before anything mutates.
+    fireEvent.click(screen.getByRole("button", { name: /downgrade to free/i }));
+    expect(screen.getByRole("dialog")).toBeDefined();
+    expect(screen.getByText(/replace pending change/i)).toBeDefined();
+    expect(mutateFn).not.toHaveBeenCalled();
+
+    // Confirm the replace: the replace-pending dialog closes and the
+    // downgrade-to-FREE dialog takes over (because FREE is a downgrade).
+    fireEvent.click(
+      screen.getByRole("button", { name: /replace pending change/i }),
+    );
+
+    // Now the "Confirm Downgrade" dialog should be open — confirm it to fire
+    // the mutation.
+    fireEvent.click(screen.getByRole("button", { name: /confirm downgrade/i }));
+
+    await waitFor(() => {
+      expect(mutateFn).toHaveBeenCalledWith(
+        expect.objectContaining({
+          data: expect.objectContaining({ tier: "FREE" }),
+        }),
+      );
+    });
+  });
+
+  it("dismisses replace-pending dialog on Cancel without mutating", () => {
+    const mutateFn = vi
+      .fn()
+      .mockResolvedValue({ status: 200, data: { url: "" } });
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "PRO",
+        pendingTierEffectiveAt: new Date("2026-11-15T00:00:00Z"),
+      }),
+      mutateFn,
+    });
+    render(<SubscriptionTierSection />);
+
+    fireEvent.click(screen.getByRole("button", { name: /downgrade to free/i }));
+    expect(screen.getByRole("dialog")).toBeDefined();
+
+    fireEvent.click(screen.getByRole("button", { name: /^cancel$/i }));
+    expect(screen.queryByRole("dialog")).toBeNull();
+    expect(mutateFn).not.toHaveBeenCalled();
+  });
+
+  it("renders FREE cancellation copy in banner when pending_tier is FREE", () => {
+    setupMocks({
+      subscription: makeSubscription({
+        tier: "BUSINESS",
+        pendingTier: "FREE",
+        pendingTierEffectiveAt: new Date("2026-05-15T00:00:00Z"),
+      }),
+    });
+    render(<SubscriptionTierSection />);
+    // Cancellation copy — distinct from the generic downgrade phrasing.
+    expect(
+      screen.getByText(/scheduled to cancel your subscription on/i),
+    ).toBeDefined();
+    expect(screen.getByText(/May 15, 2026/)).toBeDefined();
+    // Must NOT render the "downgrade to" phrasing on FREE cancellation.
+    expect(screen.queryByText(/scheduled to downgrade to/i)).toBeNull();
+  });
 });
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/components/PendingChangeBanner/PendingChangeBanner.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/components/PendingChangeBanner/PendingChangeBanner.tsx
@@ -0,0 +1,60 @@
+import { Button } from "@/components/ui/button";
+import { formatPendingDate, getTierLabel } from "../../helpers";
+
+interface Props {
+  currentTier: string;
+  pendingTier: string;
+  pendingEffectiveAt: Date | string | null | undefined;
+  onKeepCurrent: () => void;
+  isBusy: boolean;
+}
+
+export function PendingChangeBanner({
+  currentTier,
+  pendingTier,
+  pendingEffectiveAt,
+  onKeepCurrent,
+  isBusy,
+}: Props) {
+  // Backend invariant: pending_tier_effective_at is always populated when
+  // pending_tier is set. Bail early if the date is missing so the sentence
+  // always reads with a date instead of a null-fallback branch.
+  if (!pendingEffectiveAt) return null;
+
+  const pendingLabel = getTierLabel(pendingTier);
+  const currentLabel = getTierLabel(currentTier);
+  const dateText = formatPendingDate(pendingEffectiveAt);
+
+  const isCancellation = pendingTier === "FREE";
+
+  return (
+    <div
+      role="status"
+      aria-live="polite"
+      className="flex flex-col gap-2 rounded-md border border-violet-500 bg-violet-50 px-3 py-2 text-sm text-violet-800 sm:flex-row sm:items-center sm:justify-between"
+    >
+      <p>
+        {isCancellation ? (
+          <>
+            Scheduled to cancel your subscription on{" "}
+            <span className="font-semibold">{dateText}</span>.
+          </>
+        ) : (
+          <>
+            Scheduled to downgrade to{" "}
+            <span className="font-semibold">{pendingLabel}</span> on{" "}
+            <span className="font-semibold">{dateText}</span>.
+          </>
+        )}
+      </p>
+      <Button
+        variant="outline"
+        size="sm"
+        disabled={isBusy}
+        onClick={onKeepCurrent}
+      >
+        {isBusy ? "Cancelling..." : `Keep ${currentLabel}`}
+      </Button>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/helpers.ts
@@ -0,0 +1,54 @@
+export interface TierInfo {
+  key: string;
+  label: string;
+  multiplier: string;
+  description: string;
+}
+
+export const TIERS: TierInfo[] = [
+  {
+    key: "FREE",
+    label: "Free",
+    multiplier: "1x",
+    description: "Base AutoPilot capacity with standard rate limits",
+  },
+  {
+    key: "PRO",
+    label: "Pro",
+    multiplier: "5x",
+    description: "5x AutoPilot capacity — run 5× more tasks per day/week",
+  },
+  {
+    key: "BUSINESS",
+    label: "Business",
+    multiplier: "20x",
+    description: "20x AutoPilot capacity — ideal for teams and heavy workloads",
+  },
+];
+
+export const TIER_ORDER = ["FREE", "PRO", "BUSINESS", "ENTERPRISE"];
+
+export function formatCost(cents: number, tierKey: string): string {
+  if (tierKey === "FREE") return "Free";
+  if (cents === 0) return "Pricing available soon";
+  return `$${(cents / 100).toFixed(2)}/mo`;
+}
+
+export function getTierLabel(tierKey: string): string {
+  return (
+    TIERS.find((t) => t.key === tierKey)?.label ??
+    tierKey.charAt(0) + tierKey.slice(1).toLowerCase()
+  );
+}
+
+export function formatPendingDate(value: Date | string): string {
+  const date = value instanceof Date ? value : new Date(value);
+  // Pin to en-US so SSR and CSR produce the same string — passing `undefined`
+  // picks up the server's locale during prerender and the browser's locale on
+  // hydration, which triggers a React hydration mismatch warning.
+  return date.toLocaleDateString("en-US", {
+    year: "numeric",
+    month: "short",
+    day: "numeric",
+  });
+}
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/useSubscriptionTierSection.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/components/SubscriptionTierSection/useSubscriptionTierSection.ts
@@ -117,6 +117,47 @@ export function useSubscriptionTierSection() {
    await changeTier(tier);
  }

+  async function cancelPendingChange() {
+    if (!subscription) return;
+    setTierError(null);
+    try {
+      // "Stay on my current tier" is a same-tier POST: the backend collapses
+      // cancel-pending into update-tier and releases any pending schedule.
+      // success_url/cancel_url are unused in this branch (no Stripe Checkout
+      // is created) but are sent to satisfy the request schema.
+      await doUpdateTier({
+        data: {
+          tier: subscription.tier as SubscriptionTierRequestTier,
+          success_url: `${window.location.origin}${window.location.pathname}`,
+          cancel_url: `${window.location.origin}${window.location.pathname}`,
+        },
+      });
+      await refetch();
+      toast({
+        title: "Pending subscription change cancelled.",
+      });
+    } catch (e: unknown) {
+      const msg =
+        e instanceof Error
+          ? e.message
+          : "Failed to cancel pending subscription change";
+      setTierError(msg);
+      toast({
+        title: "Failed to cancel pending change",
+        description: msg,
+        variant: "destructive",
+      });
+      // Refetch on error so the UI reconciles if the server actually
+      // succeeded (e.g. webhook delivered after our client-side error).
+      // Swallow refetch errors — we already have the primary error for display.
+      try {
+        await refetch();
+      } catch {
+        // intentional
+      }
+    }
+  }
+
  const pendingTier =
    isPending && variables?.data?.tier ? variables.data.tier : null;

@@ -133,5 +174,6 @@ export function useSubscriptionTierSection() {
    isPaymentEnabled,
    changeTier,
    handleTierChange,
+    cancelPendingChange,
  };
 }
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/page.tsx
@@ -13,7 +13,7 @@ import { RefundModal } from "./RefundModal";
 import { SubscriptionTierSection } from "./components/SubscriptionTierSection/SubscriptionTierSection";
 import { CreditTransaction } from "@/lib/autogpt-server-api";
 import { UsagePanelContent } from "@/app/(platform)/copilot/components/UsageLimits/UsageLimits";
-import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import type { CoPilotUsagePublic } from "@/app/api/__generated__/models/coPilotUsagePublic";
 import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";

 import {
@@ -27,16 +27,16 @@ import {

 function CoPilotUsageSection() {
  const router = useRouter();
-  const { data: usage, isLoading } = useGetV2GetCopilotUsage({
+  const { data: usage, isSuccess } = useGetV2GetCopilotUsage({
    query: {
-      select: (res) => res.data as CoPilotUsageStatus,
+      select: (res) => res.data as CoPilotUsagePublic,
      refetchInterval: 30000,
      staleTime: 10000,
    },
  });

-  if (isLoading || !usage?.daily || !usage?.weekly) return null;
-  if (usage.daily.limit <= 0 && usage.weekly.limit <= 0) return null;
+  if (!isSuccess || !usage) return null;
+  if (!usage.daily && !usage.weekly) return null;

  return (
    <div className="my-6 space-y-4">
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -1836,7 +1836,7 @@
            }
          },
          "429": {
-            "description": "Token rate-limit or call-frequency cap exceeded"
+            "description": "Cost rate-limit or call-frequency cap exceeded"
          }
        }
      }
@@ -1922,14 +1922,14 @@
      "get": {
        "tags": ["v2", "chat", "chat"],
        "summary": "Get Copilot Usage",
-        "description": "Get CoPilot usage status for the authenticated user.\n\nReturns current token usage vs limits for daily and weekly windows.\nGlobal defaults sourced from LaunchDarkly (falling back to config).\nIncludes the user's rate-limit tier.",
+        "description": "Get CoPilot usage status for the authenticated user.\n\nReturns the percentage of the daily/weekly allowance used — not the\nraw spend or cap — so clients cannot derive per-turn cost or platform\nmargins. Global defaults sourced from LaunchDarkly (falling back to\nconfig). Includes the user's rate-limit tier.",
        "operationId": "getV2GetCopilotUsage",
        "responses": {
          "200": {
            "description": "Successful Response",
            "content": {
              "application/json": {
-                "schema": { "$ref": "#/components/schemas/CoPilotUsageStatus" }
+                "schema": { "$ref": "#/components/schemas/CoPilotUsagePublic" }
              }
            }
          },
@@ -1944,7 +1944,7 @@
      "post": {
        "tags": ["v2", "chat", "chat"],
        "summary": "Reset Copilot Usage",
-        "description": "Reset the daily CoPilot rate limit by spending credits.\n\nAllows users who have hit their daily token limit to spend credits\nto reset their daily usage counter and continue working.\nReturns 400 if the feature is disabled or the user is not over the limit.\nReturns 402 if the user has insufficient credits.",
+        "description": "Reset the daily CoPilot rate limit by spending credits.\n\nAllows users who have hit their daily cost limit to spend credits\nto reset their daily usage counter and continue working.\nReturns 400 if the feature is disabled or the user is not over the limit.\nReturns 402 if the user has insufficient credits.",
        "operationId": "postV2ResetCopilotUsage",
        "responses": {
          "200": {
@@ -2513,7 +2513,7 @@
      },
      "post": {
        "tags": ["v1", "credits"],
-        "summary": "Start a Stripe Checkout session to upgrade subscription tier",
+        "summary": "Update subscription tier or start a Stripe Checkout session",
        "operationId": "updateSubscriptionTier",
        "requestBody": {
          "content": {
@@ -2531,7 +2531,7 @@
            "content": {
              "application/json": {
                "schema": {
-                  "$ref": "#/components/schemas/SubscriptionCheckoutResponse"
+                  "$ref": "#/components/schemas/SubscriptionStatusResponse"
                }
              }
            }
@@ -9254,10 +9254,22 @@
        "title": "ClarifyingQuestion",
        "description": "A question that needs user clarification."
      },
-      "CoPilotUsageStatus": {
+      "CoPilotUsagePublic": {
        "properties": {
-          "daily": { "$ref": "#/components/schemas/UsageWindow" },
-          "weekly": { "$ref": "#/components/schemas/UsageWindow" },
+          "daily": {
+            "anyOf": [
+              { "$ref": "#/components/schemas/UsageWindowPublic" },
+              { "type": "null" }
+            ],
+            "description": "Null when no daily cap is configured (unlimited)."
+          },
+          "weekly": {
+            "anyOf": [
+              { "$ref": "#/components/schemas/UsageWindowPublic" },
+              { "type": "null" }
+            ],
+            "description": "Null when no weekly cap is configured (unlimited)."
+          },
          "tier": {
            "$ref": "#/components/schemas/SubscriptionTier",
            "default": "FREE"
@@ -9270,9 +9282,8 @@
          }
        },
        "type": "object",
-        "required": ["daily", "weekly"],
-        "title": "CoPilotUsageStatus",
-        "description": "Current usage status for a user across all windows."
+        "title": "CoPilotUsagePublic",
+        "description": "Current usage status for a user — public (client-safe) shape."
      },
      "ContentType": {
        "type": "string",
@@ -13074,8 +13085,8 @@
            "description": "Credit balance after charge (in cents)"
          },
          "usage": {
-            "$ref": "#/components/schemas/CoPilotUsageStatus",
-            "description": "Updated usage status after reset"
+            "$ref": "#/components/schemas/CoPilotUsagePublic",
+            "description": "Updated usage status after reset (percentages only)"
          }
        },
        "type": "object",
@@ -14286,12 +14297,6 @@
        "enum": ["DRAFT", "PENDING", "APPROVED", "REJECTED"],
        "title": "SubmissionStatus"
      },
-      "SubscriptionCheckoutResponse": {
-        "properties": { "url": { "type": "string", "title": "Url" } },
-        "type": "object",
-        "required": ["url"],
-        "title": "SubscriptionCheckoutResponse"
-      },
      "SubscriptionStatusResponse": {
        "properties": {
          "tier": {
@@ -14308,6 +14313,26 @@
          "proration_credit_cents": {
            "type": "integer",
            "title": "Proration Credit Cents"
+          },
+          "pending_tier": {
+            "anyOf": [
+              { "type": "string", "enum": ["FREE", "PRO", "BUSINESS"] },
+              { "type": "null" }
+            ],
+            "title": "Pending Tier"
+          },
+          "pending_tier_effective_at": {
+            "anyOf": [
+              { "type": "string", "format": "date-time" },
+              { "type": "null" }
+            ],
+            "title": "Pending Tier Effective At"
+          },
+          "url": {
+            "type": "string",
+            "title": "Url",
+            "description": "Populated only when POST /credits/subscription starts a Stripe Checkout Session (FREE → paid upgrade). Empty string in all other branches — the client redirects to this URL when non-empty.",
+            "default": ""
          }
        },
        "type": "object",
@@ -14323,7 +14348,7 @@
        "type": "string",
        "enum": ["FREE", "PRO", "BUSINESS", "ENTERPRISE"],
        "title": "SubscriptionTier",
-        "description": "Subscription tiers with increasing token allowances.\n\nMirrors the ``SubscriptionTier`` enum in ``schema.prisma``.\nOnce ``prisma generate`` is run, this can be replaced with::\n\n    from prisma.enums import SubscriptionTier"
+        "description": "Subscription tiers with increasing cost allowances.\n\nMirrors the ``SubscriptionTier`` enum in ``schema.prisma``.\nOnce ``prisma generate`` is run, this can be replaced with::\n\n    from prisma.enums import SubscriptionTier"
      },
      "SubscriptionTierRequest": {
        "properties": {
@@ -16000,13 +16025,14 @@
        "required": ["timezone"],
        "title": "UpdateTimezoneRequest"
      },
-      "UsageWindow": {
+      "UsageWindowPublic": {
        "properties": {
-          "used": { "type": "integer", "title": "Used" },
-          "limit": {
-            "type": "integer",
-            "title": "Limit",
-            "description": "Maximum tokens allowed in this window. 0 means unlimited."
+          "percent_used": {
+            "type": "number",
+            "maximum": 100.0,
+            "minimum": 0.0,
+            "title": "Percent Used",
+            "description": "Percentage of the window's allowance used (0-100). Clamped at 100 when over the cap."
          },
          "resets_at": {
            "type": "string",
@@ -16015,9 +16041,9 @@
          }
        },
        "type": "object",
-        "required": ["used", "limit", "resets_at"],
-        "title": "UsageWindow",
-        "description": "Usage within a single time window."
+        "required": ["percent_used", "resets_at"],
+        "title": "UsageWindowPublic",
+        "description": "Public view of a usage window — only the percentage and reset time.\n\nHides the raw spend and the cap so clients cannot derive per-turn cost\nor reverse-engineer platform margins.  ``percent_used`` is capped at 100."
      },
      "UserCostSummary": {
        "properties": {
@@ -16258,31 +16284,31 @@
            "anyOf": [{ "type": "string" }, { "type": "null" }],
            "title": "User Email"
          },
-          "daily_token_limit": {
+          "daily_cost_limit_microdollars": {
            "type": "integer",
-            "title": "Daily Token Limit"
+            "title": "Daily Cost Limit Microdollars"
          },
-          "weekly_token_limit": {
+          "weekly_cost_limit_microdollars": {
            "type": "integer",
-            "title": "Weekly Token Limit"
+            "title": "Weekly Cost Limit Microdollars"
          },
-          "daily_tokens_used": {
+          "daily_cost_used_microdollars": {
            "type": "integer",
-            "title": "Daily Tokens Used"
+            "title": "Daily Cost Used Microdollars"
          },
-          "weekly_tokens_used": {
+          "weekly_cost_used_microdollars": {
            "type": "integer",
-            "title": "Weekly Tokens Used"
+            "title": "Weekly Cost Used Microdollars"
          },
          "tier": { "$ref": "#/components/schemas/SubscriptionTier" }
        },
        "type": "object",
        "required": [
          "user_id",
-          "daily_token_limit",
-          "weekly_token_limit",
-          "daily_tokens_used",
-          "weekly_tokens_used",
+          "daily_cost_limit_microdollars",
+          "weekly_cost_limit_microdollars",
+          "daily_cost_used_microdollars",
+          "weekly_cost_used_microdollars",
          "tier"
        ],
        "title": "UserRateLimitResponse"