fix: restore size="icon" on sidebar trigger button

Merge remote-tracking branch 'origin/dev' into feat/tracking-cost-block
2026-03-17 03:00:27 -04:00 · 2026-03-17 12:06:39 +07:00 · 2026-03-17 07:10:44 +07:00 · 2026-03-17 06:16:46 +07:00 · 2026-03-17 06:14:41 +07:00 · 2026-03-16 23:08:18 +00:00
31 changed files with 2361 additions and 346 deletions
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -5,12 +5,14 @@ on:
    branches: [master, dev, ci-test*]
    paths:
      - ".github/workflows/platform-backend-ci.yml"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/backend/**"
      - "autogpt_platform/autogpt_libs/**"
  pull_request:
    branches: [master, dev, release-*]
    paths:
      - ".github/workflows/platform-backend-ci.yml"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/backend/**"
      - "autogpt_platform/autogpt_libs/**"
  merge_group:
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -120,175 +120,6 @@ jobs:
          token: ${{ secrets.GITHUB_TOKEN }}
          exitOnceUploaded: true

-  e2e_test:
-    name: end-to-end tests
-    runs-on: big-boi
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-        with:
-          submodules: recursive
-
-      - name: Set up Platform - Copy default supabase .env
-        run: |
-          cp ../.env.default ../.env
-
-      - name: Set up Platform - Copy backend .env and set OpenAI API key
-        run: |
-          cp ../backend/.env.default ../backend/.env
-          echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
-        env:
-          # Used by E2E test data script to generate embeddings for approved store agents
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-
-      - name: Set up Platform - Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver: docker-container
-          driver-opts: network=host
-
-      - name: Set up Platform - Expose GHA cache to docker buildx CLI
-        uses: crazy-max/ghaction-github-runtime@v4
-
-      - name: Set up Platform - Build Docker images (with cache)
-        working-directory: autogpt_platform
-        run: |
-          pip install pyyaml
-
-          # Resolve extends and generate a flat compose file that bake can understand
-          docker compose -f docker-compose.yml config > docker-compose.resolved.yml
-
-          # Add cache configuration to the resolved compose file
-          python ../.github/workflows/scripts/docker-ci-fix-compose-build-cache.py \
-            --source docker-compose.resolved.yml \
-            --cache-from "type=gha" \
-            --cache-to "type=gha,mode=max" \
-            --backend-hash "${{ hashFiles('autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/poetry.lock', 'autogpt_platform/backend/backend') }}" \
-            --frontend-hash "${{ hashFiles('autogpt_platform/frontend/Dockerfile', 'autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/src') }}" \
-            --git-ref "${{ github.ref }}"
-
-          # Build with bake using the resolved compose file (now includes cache config)
-          docker buildx bake --allow=fs.read=.. -f docker-compose.resolved.yml --load
-        env:
-          NEXT_PUBLIC_PW_TEST: true
-
-      - name: Set up tests - Cache E2E test data
-        id: e2e-data-cache
-        uses: actions/cache@v5
-        with:
-          path: /tmp/e2e_test_data.sql
-          key: e2e-test-data-${{ hashFiles('autogpt_platform/backend/test/e2e_test_data.py', 'autogpt_platform/backend/migrations/**', '.github/workflows/platform-frontend-ci.yml') }}
-
-      - name: Set up Platform - Start Supabase DB + Auth
-        run: |
-          docker compose -f ../docker-compose.resolved.yml up -d db auth --no-build
-          echo "Waiting for database to be ready..."
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done'
-          echo "Waiting for auth service to be ready..."
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -c "SELECT 1 FROM auth.users LIMIT 1" 2>/dev/null; do sleep 2; done' || echo "Auth schema check timeout, continuing..."
-
-      - name: Set up Platform - Run migrations
-        run: |
-          echo "Running migrations..."
-          docker compose -f ../docker-compose.resolved.yml run --rm migrate
-          echo "✅ Migrations completed"
-        env:
-          NEXT_PUBLIC_PW_TEST: true
-
-      - name: Set up tests - Load cached E2E test data
-        if: steps.e2e-data-cache.outputs.cache-hit == 'true'
-        run: |
-          echo "✅ Found cached E2E test data, restoring..."
-          {
-            echo "SET session_replication_role = 'replica';"
-            cat /tmp/e2e_test_data.sql
-            echo "SET session_replication_role = 'origin';"
-          } | docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -b
-          # Refresh materialized views after restore
-          docker compose -f ../docker-compose.resolved.yml exec -T db \
-            psql -U postgres -d postgres -b -c "SET search_path TO platform; SELECT refresh_store_materialized_views();" || true
-
-          echo "✅ E2E test data restored from cache"
-
-      - name: Set up Platform - Start (all other services)
-        run: |
-          docker compose -f ../docker-compose.resolved.yml up -d --no-build
-          echo "Waiting for rest_server to be ready..."
-          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
-        env:
-          NEXT_PUBLIC_PW_TEST: true
-
-      - name: Set up tests - Create E2E test data
-        if: steps.e2e-data-cache.outputs.cache-hit != 'true'
-        run: |
-          echo "Creating E2E test data..."
-          docker cp ../backend/test/e2e_test_data.py $(docker compose -f ../docker-compose.resolved.yml ps -q rest_server):/tmp/e2e_test_data.py
-          docker compose -f ../docker-compose.resolved.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python /tmp/e2e_test_data.py" || {
-            echo "❌ E2E test data creation failed!"
-            docker compose -f ../docker-compose.resolved.yml logs --tail=50 rest_server
-            exit 1
-          }
-
-          # Dump auth.users + platform schema for cache (two separate dumps)
-          echo "Dumping database for cache..."
-          {
-            docker compose -f ../docker-compose.resolved.yml exec -T db \
-              pg_dump -U postgres --data-only --column-inserts \
-              --table='auth.users' postgres
-            docker compose -f ../docker-compose.resolved.yml exec -T db \
-              pg_dump -U postgres --data-only --column-inserts \
-              --schema=platform \
-              --exclude-table='platform._prisma_migrations' \
-              --exclude-table='platform.apscheduler_jobs' \
-              --exclude-table='platform.apscheduler_jobs_batched_notifications' \
-              postgres
-          } > /tmp/e2e_test_data.sql
-
-          echo "✅ Database dump created for caching ($(wc -l < /tmp/e2e_test_data.sql) lines)"
-
-      - name: Set up tests - Enable corepack
-        run: corepack enable
-
-      - name: Set up tests - Set up Node
-        uses: actions/setup-node@v6
-        with:
-          node-version: "22.18.0"
-          cache: "pnpm"
-          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
-
-      - name: Set up tests - Install dependencies
-        run: pnpm install --frozen-lockfile
-
-      - name: Set up tests - Install browser 'chromium'
-        run: pnpm playwright install --with-deps chromium
-
-      - name: Run Playwright tests
-        run: pnpm test:no-build
-        continue-on-error: false
-
-      - name: Upload Playwright report
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: playwright-report
-          path: playwright-report
-          if-no-files-found: ignore
-          retention-days: 3
-
-      - name: Upload Playwright test results
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: playwright-test-results
-          path: test-results
-          if-no-files-found: ignore
-          retention-days: 3
-
-      - name: Print Final Docker Compose logs
-        if: always()
-        run: docker compose -f ../docker-compose.resolved.yml logs
-
  integration_test:
    runs-on: ubuntu-latest
    needs: setup
--- a/.github/workflows/platform-fullstack-ci.yml
+++ b/.github/workflows/platform-fullstack-ci.yml
@@ -1,14 +1,18 @@
-name: AutoGPT Platform - Frontend CI
+name: AutoGPT Platform - Full-stack CI

 on:
  push:
    branches: [master, dev]
    paths:
      - ".github/workflows/platform-fullstack-ci.yml"
+      - ".github/workflows/scripts/docker-ci-fix-compose-build-cache.py"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/**"
  pull_request:
    paths:
      - ".github/workflows/platform-fullstack-ci.yml"
+      - ".github/workflows/scripts/docker-ci-fix-compose-build-cache.py"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/**"
  merge_group:

@@ -24,42 +28,28 @@ defaults:
 jobs:
  setup:
    runs-on: ubuntu-latest
-    outputs:
-      cache-key: ${{ steps.cache-key.outputs.key }}

    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

-      - name: Set up Node.js
-        uses: actions/setup-node@v6
-        with:
-          node-version: "22.18.0"
-
      - name: Enable corepack
        run: corepack enable

-      - name: Generate cache key
-        id: cache-key
-        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
-
-      - name: Cache dependencies
-        uses: actions/cache@v5
+      - name: Set up Node
+        uses: actions/setup-node@v6
        with:
-          path: ~/.pnpm-store
-          key: ${{ steps.cache-key.outputs.key }}
-          restore-keys: |
-            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
-            ${{ runner.os }}-pnpm-
+          node-version: "22.18.0"
+          cache: "pnpm"
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml

-      - name: Install dependencies
+      - name: Install dependencies to populate cache
        run: pnpm install --frozen-lockfile

-  types:
-    runs-on: big-boi
+  check-api-types:
+    name: check API types
+    runs-on: ubuntu-latest
    needs: setup
-    strategy:
-      fail-fast: false

    steps:
      - name: Checkout repository
@@ -67,70 +57,256 @@ jobs:
        with:
          submodules: recursive

-      - name: Set up Node.js
+      # ------------------------ Backend setup ------------------------
+
+      - name: Set up Backend - Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Set up Backend - Install Poetry
+        working-directory: autogpt_platform/backend
+        run: |
+          POETRY_VERSION=$(python ../../.github/workflows/scripts/get_package_version_from_lockfile.py poetry)
+          echo "Installing Poetry version ${POETRY_VERSION}"
+          curl -sSL https://install.python-poetry.org | POETRY_VERSION=$POETRY_VERSION python3 -
+
+      - name: Set up Backend - Set up dependency cache
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
+
+      - name: Set up Backend - Install dependencies
+        working-directory: autogpt_platform/backend
+        run: poetry install
+
+      - name: Set up Backend - Generate Prisma client
+        working-directory: autogpt_platform/backend
+        run: poetry run prisma generate && poetry run gen-prisma-stub
+
+      - name: Set up Frontend - Export OpenAPI schema from Backend
+        working-directory: autogpt_platform/backend
+        run: poetry run export-api-schema --output ../frontend/src/app/api/openapi.json
+
+      # ------------------------ Frontend setup ------------------------
+
+      - name: Set up Frontend - Enable corepack
+        run: corepack enable
+
+      - name: Set up Frontend - Set up Node
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
+          cache: "pnpm"
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml

-      - name: Enable corepack
-        run: corepack enable
-
-      - name: Copy default supabase .env
-        run: |
-          cp ../.env.default ../.env
-
-      - name: Copy backend .env
-        run: |
-          cp ../backend/.env.default ../backend/.env
-
-      - name: Run docker compose
-        run: |
-          docker compose -f ../docker-compose.yml --profile local up -d deps_backend
-
-      - name: Restore dependencies cache
-        uses: actions/cache@v5
-        with:
-          path: ~/.pnpm-store
-          key: ${{ needs.setup.outputs.cache-key }}
-          restore-keys: |
-            ${{ runner.os }}-pnpm-
-
-      - name: Install dependencies
+      - name: Set up Frontend - Install dependencies
        run: pnpm install --frozen-lockfile

-      - name: Setup .env
-        run: cp .env.default .env
-
-      - name: Wait for services to be ready
-        run: |
-          echo "Waiting for rest_server to be ready..."
-          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
-          echo "Waiting for database to be ready..."
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done' || echo "Database ready check timeout, continuing..."
-
-      - name: Generate API queries
-        run: pnpm generate:api:force
+      - name: Set up Frontend - Format OpenAPI schema
+        id: format-schema
+        run: pnpm prettier --write ./src/app/api/openapi.json

      - name: Check for API schema changes
        run: |
          if ! git diff --exit-code src/app/api/openapi.json; then
            echo "❌ API schema changes detected in src/app/api/openapi.json"
            echo ""
-            echo "The openapi.json file has been modified after running 'pnpm generate:api-all'."
+            echo "The openapi.json file has been modified after exporting the API schema."
            echo "This usually means changes have been made in the BE endpoints without updating the Frontend."
            echo "The API schema is now out of sync with the Front-end queries."
            echo ""
            echo "To fix this:"
-            echo "1. Pull the backend 'docker compose pull && docker compose up -d --build --force-recreate'"
-            echo "2. Run 'pnpm generate:api' locally"
-            echo "3. Run 'pnpm types' locally"
-            echo "4. Fix any TypeScript errors that may have been introduced"
-            echo "5. Commit and push your changes"
+            echo "\nIn the backend directory:"
+            echo "1. Run 'poetry run export-api-schema --output ../frontend/src/app/api/openapi.json'"
+            echo "\nIn the frontend directory:"
+            echo "2. Run 'pnpm prettier --write src/app/api/openapi.json'"
+            echo "3. Run 'pnpm generate:api'"
+            echo "4. Run 'pnpm types'"
+            echo "5. Fix any TypeScript errors that may have been introduced"
+            echo "6. Commit and push your changes"
            echo ""
            exit 1
          else
            echo "✅ No API schema changes detected"
          fi

-      - name: Run Typescript checks
+      - name: Set up Frontend - Generate API client
+        id: generate-api-client
+        run: pnpm orval --config ./orval.config.ts
+        # Continue with type generation & check even if there are schema changes
+        if: success() || (steps.format-schema.outcome == 'success')
+
+      - name: Check for TypeScript errors
        run: pnpm types
+        if: success() || (steps.generate-api-client.outcome == 'success')
+
+  e2e_test:
+    name: end-to-end tests
+    runs-on: big-boi
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          submodules: recursive
+
+      - name: Set up Platform - Copy default supabase .env
+        run: |
+          cp ../.env.default ../.env
+
+      - name: Set up Platform - Copy backend .env and set OpenAI API key
+        run: |
+          cp ../backend/.env.default ../backend/.env
+          echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
+        env:
+          # Used by E2E test data script to generate embeddings for approved store agents
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+      - name: Set up Platform - Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          driver-opts: network=host
+
+      - name: Set up Platform - Expose GHA cache to docker buildx CLI
+        uses: crazy-max/ghaction-github-runtime@v4
+
+      - name: Set up Platform - Build Docker images (with cache)
+        working-directory: autogpt_platform
+        run: |
+          pip install pyyaml
+
+          # Resolve extends and generate a flat compose file that bake can understand
+          docker compose -f docker-compose.yml config > docker-compose.resolved.yml
+
+          # Add cache configuration to the resolved compose file
+          python ../.github/workflows/scripts/docker-ci-fix-compose-build-cache.py \
+            --source docker-compose.resolved.yml \
+            --cache-from "type=gha" \
+            --cache-to "type=gha,mode=max" \
+            --backend-hash "${{ hashFiles('autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/poetry.lock', 'autogpt_platform/backend/backend/**') }}" \
+            --frontend-hash "${{ hashFiles('autogpt_platform/frontend/Dockerfile', 'autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/src/**') }}" \
+            --git-ref "${{ github.ref }}"
+
+          # Build with bake using the resolved compose file (now includes cache config)
+          docker buildx bake --allow=fs.read=.. -f docker-compose.resolved.yml --load
+        env:
+          NEXT_PUBLIC_PW_TEST: true
+
+      - name: Set up tests - Cache E2E test data
+        id: e2e-data-cache
+        uses: actions/cache@v5
+        with:
+          path: /tmp/e2e_test_data.sql
+          key: e2e-test-data-${{ hashFiles('autogpt_platform/backend/test/e2e_test_data.py', 'autogpt_platform/backend/migrations/**', '.github/workflows/platform-fullstack-ci.yml') }}
+
+      - name: Set up Platform - Start Supabase DB + Auth
+        run: |
+          docker compose -f ../docker-compose.resolved.yml up -d db auth --no-build
+          echo "Waiting for database to be ready..."
+          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done'
+          echo "Waiting for auth service to be ready..."
+          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -c "SELECT 1 FROM auth.users LIMIT 1" 2>/dev/null; do sleep 2; done' || echo "Auth schema check timeout, continuing..."
+
+      - name: Set up Platform - Run migrations
+        run: |
+          echo "Running migrations..."
+          docker compose -f ../docker-compose.resolved.yml run --rm migrate
+          echo "✅ Migrations completed"
+        env:
+          NEXT_PUBLIC_PW_TEST: true
+
+      - name: Set up tests - Load cached E2E test data
+        if: steps.e2e-data-cache.outputs.cache-hit == 'true'
+        run: |
+          echo "✅ Found cached E2E test data, restoring..."
+          {
+            echo "SET session_replication_role = 'replica';"
+            cat /tmp/e2e_test_data.sql
+            echo "SET session_replication_role = 'origin';"
+          } | docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -b
+          # Refresh materialized views after restore
+          docker compose -f ../docker-compose.resolved.yml exec -T db \
+            psql -U postgres -d postgres -b -c "SET search_path TO platform; SELECT refresh_store_materialized_views();" || true
+
+          echo "✅ E2E test data restored from cache"
+
+      - name: Set up Platform - Start (all other services)
+        run: |
+          docker compose -f ../docker-compose.resolved.yml up -d --no-build
+          echo "Waiting for rest_server to be ready..."
+          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
+        env:
+          NEXT_PUBLIC_PW_TEST: true
+
+      - name: Set up tests - Create E2E test data
+        if: steps.e2e-data-cache.outputs.cache-hit != 'true'
+        run: |
+          echo "Creating E2E test data..."
+          docker cp ../backend/test/e2e_test_data.py $(docker compose -f ../docker-compose.resolved.yml ps -q rest_server):/tmp/e2e_test_data.py
+          docker compose -f ../docker-compose.resolved.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python /tmp/e2e_test_data.py" || {
+            echo "❌ E2E test data creation failed!"
+            docker compose -f ../docker-compose.resolved.yml logs --tail=50 rest_server
+            exit 1
+          }
+
+          # Dump auth.users + platform schema for cache (two separate dumps)
+          echo "Dumping database for cache..."
+          {
+            docker compose -f ../docker-compose.resolved.yml exec -T db \
+              pg_dump -U postgres --data-only --column-inserts \
+              --table='auth.users' postgres
+            docker compose -f ../docker-compose.resolved.yml exec -T db \
+              pg_dump -U postgres --data-only --column-inserts \
+              --schema=platform \
+              --exclude-table='platform._prisma_migrations' \
+              --exclude-table='platform.apscheduler_jobs' \
+              --exclude-table='platform.apscheduler_jobs_batched_notifications' \
+              postgres
+          } > /tmp/e2e_test_data.sql
+
+          echo "✅ Database dump created for caching ($(wc -l < /tmp/e2e_test_data.sql) lines)"
+
+      - name: Set up tests - Enable corepack
+        run: corepack enable
+
+      - name: Set up tests - Set up Node
+        uses: actions/setup-node@v6
+        with:
+          node-version: "22.18.0"
+          cache: "pnpm"
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
+
+      - name: Set up tests - Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Set up tests - Install browser 'chromium'
+        run: pnpm playwright install --with-deps chromium
+
+      - name: Run Playwright tests
+        run: pnpm test:no-build
+        continue-on-error: false
+
+      - name: Upload Playwright report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-report
+          path: playwright-report
+          if-no-files-found: ignore
+          retention-days: 3
+
+      - name: Upload Playwright test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-test-results
+          path: test-results
+          if-no-files-found: ignore
+          retention-days: 3
+
+      - name: Print Final Docker Compose logs
+        if: always()
+        run: docker compose -f ../docker-compose.resolved.yml logs
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -8,7 +8,7 @@ from typing import Annotated
 from uuid import uuid4

 from autogpt_libs import auth
-from fastapi import APIRouter, Depends, HTTPException, Query, Response, Security
+from fastapi import APIRouter, HTTPException, Query, Response, Security
 from fastapi.responses import StreamingResponse
 from prisma.models import UserWorkspaceFile
 from pydantic import BaseModel, Field, field_validator
@@ -27,6 +27,12 @@ from backend.copilot.model import (
    get_user_sessions,
    update_session_title,
 )
+from backend.copilot.rate_limit import (
+    CoPilotUsageStatus,
+    RateLimitExceeded,
+    check_rate_limit,
+    get_usage_status,
+)
 from backend.copilot.response_model import StreamError, StreamFinish, StreamHeartbeat
 from backend.copilot.tools.e2b_sandbox import kill_sandbox
 from backend.copilot.tools.models import (
@@ -120,6 +126,8 @@ class SessionDetailResponse(BaseModel):
    user_id: str | None
    messages: list[dict]
    active_stream: ActiveStreamInfo | None = None  # Present if stream is still active
+    total_prompt_tokens: int = 0
+    total_completion_tokens: int = 0


 class SessionSummaryResponse(BaseModel):
@@ -207,7 +215,7 @@ async def list_sessions(
            }
        except Exception:
            logger.warning(
-                "Failed to fetch processing status from Redis; " "defaulting to empty"
+                "Failed to fetch processing status from Redis; defaulting to empty"
            )

    return ListSessionsResponse(
@@ -229,7 +237,7 @@ async def list_sessions(
    "/sessions",
 )
 async def create_session(
-    user_id: Annotated[str, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Security(auth.get_user_id)],
 ) -> CreateSessionResponse:
    """
    Create a new chat session.
@@ -348,7 +356,7 @@ async def update_session_title_route(
 )
 async def get_session(
    session_id: str,
-    user_id: Annotated[str | None, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Security(auth.get_user_id)],
 ) -> SessionDetailResponse:
    """
    Retrieve the details of a specific chat session.
@@ -389,6 +397,10 @@ async def get_session(
            last_message_id=last_message_id,
        )

+    # Sum token usage from session
+    total_prompt = sum(u.prompt_tokens for u in session.usage)
+    total_completion = sum(u.completion_tokens for u in session.usage)
+
    return SessionDetailResponse(
        id=session.session_id,
        created_at=session.started_at.isoformat(),
@@ -396,6 +408,25 @@ async def get_session(
        user_id=session.user_id or None,
        messages=messages,
        active_stream=active_stream_info,
+        total_prompt_tokens=total_prompt,
+        total_completion_tokens=total_completion,
+    )
+
+
+@router.get(
+    "/usage",
+)
+async def get_copilot_usage(
+    user_id: Annotated[str, Security(auth.get_user_id)],
+) -> CoPilotUsageStatus:
+    """Get CoPilot usage status for the authenticated user.
+
+    Returns current token usage vs limits for daily and weekly windows.
+    """
+    return await get_usage_status(
+        user_id=user_id,
+        daily_token_limit=config.daily_token_limit,
+        weekly_token_limit=config.weekly_token_limit,
    )


@@ -405,7 +436,7 @@ async def get_session(
 )
 async def cancel_session_task(
    session_id: str,
-    user_id: Annotated[str | None, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Security(auth.get_user_id)],
 ) -> CancelSessionResponse:
    """Cancel the active streaming task for a session.

@@ -450,7 +481,7 @@ async def cancel_session_task(
 async def stream_chat_post(
    session_id: str,
    request: StreamChatRequest,
-    user_id: str | None = Depends(auth.get_user_id),
+    user_id: str = Security(auth.get_user_id),
 ):
    """
    Stream chat responses for a session (POST with context support).
@@ -467,7 +498,7 @@ async def stream_chat_post(
    Args:
        session_id: The chat session identifier to associate with the streamed messages.
        request: Request body containing message, is_user_message, and optional context.
-        user_id: Optional authenticated user ID.
+        user_id: Authenticated user ID.
    Returns:
        StreamingResponse: SSE-formatted response chunks.

@@ -476,9 +507,7 @@ async def stream_chat_post(
    import time

    stream_start_time = time.perf_counter()
-    log_meta = {"component": "ChatStream", "session_id": session_id}
-    if user_id:
-        log_meta["user_id"] = user_id
+    log_meta = {"component": "ChatStream", "session_id": session_id, "user_id": user_id}

    logger.info(
        f"[TIMING] stream_chat_post STARTED, session={session_id}, "
@@ -496,6 +525,18 @@ async def stream_chat_post(
        },
    )

+    # Pre-turn rate limit check (token-based).
+    # check_rate_limit short-circuits internally when both limits are 0.
+    if user_id:
+        try:
+            await check_rate_limit(
+                user_id=user_id,
+                daily_token_limit=config.daily_token_limit,
+                weekly_token_limit=config.weekly_token_limit,
+            )
+        except RateLimitExceeded as e:
+            raise HTTPException(status_code=429, detail=str(e)) from e
+
    # Enrich message with file metadata if file_ids are provided.
    # Also sanitise file_ids so only validated, workspace-scoped IDs are
    # forwarded downstream (e.g. to the executor via enqueue_copilot_turn).
@@ -730,7 +771,7 @@ async def stream_chat_post(
 )
 async def resume_session_stream(
    session_id: str,
-    user_id: str | None = Depends(auth.get_user_id),
+    user_id: str = Security(auth.get_user_id),
 ):
    """
    Resume an active stream for a session.
--- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py
@@ -1,5 +1,6 @@
-"""Tests for chat API routes: session title update, file attachment validation, and suggested prompts."""
+"""Tests for chat API routes: session title update, file attachment validation, usage, rate limiting, and suggested prompts."""

+from datetime import UTC, datetime, timedelta
 from unittest.mock import AsyncMock, MagicMock

 import fastapi
@@ -251,6 +252,156 @@ def test_file_ids_scoped_to_workspace(mocker: pytest_mock.MockFixture):
    assert call_kwargs["where"]["isDeleted"] is False


+# ─── Rate limit → 429 ─────────────────────────────────────────────────
+
+
+def test_stream_chat_returns_429_on_daily_rate_limit(mocker: pytest_mock.MockFixture):
+    """When check_rate_limit raises RateLimitExceeded for daily limit the endpoint returns 429."""
+    from backend.copilot.rate_limit import RateLimitExceeded
+
+    _mock_stream_internals(mocker)
+    # Ensure the rate-limit branch is entered by setting a non-zero limit.
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch(
+        "backend.api.features.chat.routes.check_rate_limit",
+        side_effect=RateLimitExceeded("daily", datetime.now(UTC) + timedelta(hours=1)),
+    )
+
+    response = client.post(
+        "/sessions/sess-1/stream",
+        json={"message": "hello"},
+    )
+    assert response.status_code == 429
+    assert "daily" in response.json()["detail"].lower()
+
+
+def test_stream_chat_returns_429_on_weekly_rate_limit(mocker: pytest_mock.MockFixture):
+    """When check_rate_limit raises RateLimitExceeded for weekly limit the endpoint returns 429."""
+    from backend.copilot.rate_limit import RateLimitExceeded
+
+    _mock_stream_internals(mocker)
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    resets_at = datetime.now(UTC) + timedelta(days=3)
+    mocker.patch(
+        "backend.api.features.chat.routes.check_rate_limit",
+        side_effect=RateLimitExceeded("weekly", resets_at),
+    )
+
+    response = client.post(
+        "/sessions/sess-1/stream",
+        json={"message": "hello"},
+    )
+    assert response.status_code == 429
+    detail = response.json()["detail"].lower()
+    assert "weekly" in detail
+    assert "resets in" in detail
+
+
+def test_stream_chat_429_includes_reset_time(mocker: pytest_mock.MockFixture):
+    """The 429 response detail should include the human-readable reset time."""
+    from backend.copilot.rate_limit import RateLimitExceeded
+
+    _mock_stream_internals(mocker)
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch(
+        "backend.api.features.chat.routes.check_rate_limit",
+        side_effect=RateLimitExceeded(
+            "daily", datetime.now(UTC) + timedelta(hours=2, minutes=30)
+        ),
+    )
+
+    response = client.post(
+        "/sessions/sess-1/stream",
+        json={"message": "hello"},
+    )
+    assert response.status_code == 429
+    detail = response.json()["detail"]
+    assert "2h" in detail
+    assert "Resets in" in detail
+
+
+# ─── Usage endpoint ───────────────────────────────────────────────────
+
+
+def _mock_usage(
+    mocker: pytest_mock.MockerFixture,
+    *,
+    daily_used: int = 500,
+    weekly_used: int = 2000,
+) -> AsyncMock:
+    """Mock get_usage_status to return a predictable CoPilotUsageStatus."""
+    from backend.copilot.rate_limit import CoPilotUsageStatus, UsageWindow
+
+    resets_at = datetime.now(UTC) + timedelta(days=1)
+    status = CoPilotUsageStatus(
+        daily=UsageWindow(used=daily_used, limit=10000, resets_at=resets_at),
+        weekly=UsageWindow(used=weekly_used, limit=50000, resets_at=resets_at),
+    )
+    return mocker.patch(
+        "backend.api.features.chat.routes.get_usage_status",
+        new_callable=AsyncMock,
+        return_value=status,
+    )
+
+
+def test_usage_returns_daily_and_weekly(
+    mocker: pytest_mock.MockerFixture,
+    test_user_id: str,
+) -> None:
+    """GET /usage returns daily and weekly usage."""
+    mock_get = _mock_usage(mocker, daily_used=500, weekly_used=2000)
+
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+
+    response = client.get("/usage")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["daily"]["used"] == 500
+    assert data["weekly"]["used"] == 2000
+
+    mock_get.assert_called_once_with(
+        user_id=test_user_id,
+        daily_token_limit=10000,
+        weekly_token_limit=50000,
+    )
+
+
+def test_usage_uses_config_limits(
+    mocker: pytest_mock.MockerFixture,
+    test_user_id: str,
+) -> None:
+    """The endpoint forwards daily_token_limit and weekly_token_limit from config."""
+    mock_get = _mock_usage(mocker)
+
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 99999)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 77777)
+
+    response = client.get("/usage")
+
+    assert response.status_code == 200
+    mock_get.assert_called_once_with(
+        user_id=test_user_id,
+        daily_token_limit=99999,
+        weekly_token_limit=77777,
+    )
+
+
+def test_usage_rejects_unauthenticated_request() -> None:
+    """GET /usage should return 401 when no valid JWT is provided."""
+    unauthenticated_app = fastapi.FastAPI()
+    unauthenticated_app.include_router(chat_routes.router)
+    unauthenticated_client = fastapi.testclient.TestClient(unauthenticated_app)
+
+    response = unauthenticated_client.get("/usage")
+
+    assert response.status_code == 401
+
+
 # ─── Suggested prompts endpoint ──────────────────────────────────────


--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -36,13 +36,15 @@ from backend.copilot.response_model import (
    StreamToolInputAvailable,
    StreamToolInputStart,
    StreamToolOutputAvailable,
+    StreamUsage,
 )
 from backend.copilot.service import (
    _build_system_prompt,
    _generate_session_title,
-    client,
+    _get_openai_client,
    config,
 )
+from backend.copilot.token_tracking import persist_and_record_usage
 from backend.copilot.tools import execute_tool, get_available_tools
 from backend.copilot.tracking import track_user_message
 from backend.util.exceptions import NotFoundError
@@ -89,7 +91,7 @@ async def _compress_session_messages(
        result = await compress_context(
            messages=messages_dict,
            model=config.model,
-            client=client,
+            client=_get_openai_client(),
        )
    except Exception as e:
        logger.warning("[Baseline] Context compression with LLM failed: %s", e)
@@ -221,6 +223,10 @@ async def stream_chat_completion_baseline(
    text_block_id = str(uuid.uuid4())
    text_started = False
    step_open = False
+    # Token usage accumulators — populated from streaming chunks
+    turn_prompt_tokens = 0
+    turn_completion_tokens = 0
+    _stream_error = False  # Track whether an error occurred during streaming
    try:
        for _round in range(_MAX_TOOL_ROUNDS):
            # Open a new step for each LLM round
@@ -232,16 +238,31 @@ async def stream_chat_completion_baseline(
                model=config.model,
                messages=openai_messages,
                stream=True,
+                stream_options={"include_usage": True},
            )
            if tools:
                create_kwargs["tools"] = tools
-            response = await client.chat.completions.create(**create_kwargs)  # type: ignore[arg-type]  # dynamic kwargs
+            response = await _get_openai_client().chat.completions.create(**create_kwargs)  # type: ignore[arg-type]  # dynamic kwargs

            # Accumulate streamed response (text + tool calls)
            round_text = ""
            tool_calls_by_index: dict[int, dict[str, str]] = {}

            async for chunk in response:
+                # Capture token usage from the streaming chunk.
+                # OpenRouter normalises all providers into OpenAI format
+                # where prompt_tokens already includes cached tokens
+                # (unlike Anthropic's native API). Use += to sum all
+                # tool-call rounds since each API call is independent.
+                # NOTE: stream_options={"include_usage": True} is not
+                # universally supported — some providers (Mistral, Llama
+                # via OpenRouter) always return chunk.usage=None. When
+                # that happens, tokens stay 0 and the tiktoken fallback
+                # below activates. Fail-open: one round is estimated.
+                if chunk.usage:
+                    turn_prompt_tokens += chunk.usage.prompt_tokens or 0
+                    turn_completion_tokens += chunk.usage.completion_tokens or 0
+
                delta = chunk.choices[0].delta if chunk.choices else None
                if not delta:
                    continue
@@ -394,6 +415,7 @@ async def stream_chat_completion_baseline(
            )

    except Exception as e:
+        _stream_error = True
        error_msg = str(e) or type(e).__name__
        logger.error("[Baseline] Streaming error: %s", error_msg, exc_info=True)
        # Close any open text/step before emitting error
@@ -411,6 +433,49 @@ async def stream_chat_completion_baseline(
            except Exception:
                logger.warning("[Baseline] Langfuse trace context teardown failed")

+        # Fallback: estimate tokens via tiktoken when the provider does
+        # not honour stream_options={"include_usage": True}.
+        # Count the full message list (system + history + turn) since
+        # each API call sends the complete context window.
+        # NOTE: This estimates one round's prompt tokens. Multi-round tool-calling
+        # turns consume prompt tokens on each API call, so the total is underestimated.
+        # Skip fallback when an error occurred and no output was produced —
+        # charging rate-limit tokens for completely failed requests is unfair.
+        if (
+            turn_prompt_tokens == 0
+            and turn_completion_tokens == 0
+            and not (_stream_error and not assistant_text)
+        ):
+            from backend.util.prompt import (
+                estimate_token_count,
+                estimate_token_count_str,
+            )
+
+            turn_prompt_tokens = max(
+                estimate_token_count(openai_messages, model=config.model), 1
+            )
+            turn_completion_tokens = estimate_token_count_str(
+                assistant_text, model=config.model
+            )
+            logger.info(
+                "[Baseline] No streaming usage reported; estimated tokens: "
+                "prompt=%d, completion=%d",
+                turn_prompt_tokens,
+                turn_completion_tokens,
+            )
+
+        # Persist token usage to session and record for rate limiting.
+        # NOTE: OpenRouter folds cached tokens into prompt_tokens, so we
+        # cannot break out cache_read/cache_creation weights. Users on the
+        # baseline path may be slightly over-counted vs the SDK path.
+        await persist_and_record_usage(
+            session=session,
+            user_id=user_id,
+            prompt_tokens=turn_prompt_tokens,
+            completion_tokens=turn_completion_tokens,
+            log_prefix="[Baseline]",
+        )
+
        # Persist assistant response
        if assistant_text:
            session.messages.append(
@@ -421,4 +486,16 @@ async def stream_chat_completion_baseline(
        except Exception as persist_err:
            logger.error("[Baseline] Failed to persist session: %s", persist_err)

+    # Yield usage and finish AFTER try/finally (not inside finally).
+    # PEP 525 prohibits yielding from finally in async generators during
+    # aclose() — doing so raises RuntimeError on client disconnect.
+    # On GeneratorExit the client is already gone, so unreachable yields
+    # are harmless; on normal completion they reach the SSE stream.
+    if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
+        yield StreamUsage(
+            prompt_tokens=turn_prompt_tokens,
+            completion_tokens=turn_completion_tokens,
+            total_tokens=turn_prompt_tokens + turn_completion_tokens,
+        )
+
    yield StreamFinish()
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -70,6 +70,27 @@ class ChatConfig(BaseSettings):
        description="Cache TTL in seconds for Langfuse prompt (0 to disable caching)",
    )

+    # Rate limiting — token-based limits per day and per week.
+    # Per-turn token cost varies with context size: ~10-15K for early turns,
+    # ~30-50K mid-session, up to ~100K pre-compaction. Average across a
+    # session with compaction cycles is ~25-35K tokens/turn, so 2.5M daily
+    # allows ~70-100 turns/day.
+    # Checked at the HTTP layer (routes.py) before each turn.
+    #
+    # TODO: These are deploy-time constants applied identically to every user.
+    #  If per-user or per-plan limits are needed (e.g., free tier vs paid), these
+    #  must move to the database (e.g., a UserPlan table) and get_usage_status /
+    #  check_rate_limit would look up each user's specific limits instead of
+    #  reading config.daily_token_limit / config.weekly_token_limit.
+    daily_token_limit: int = Field(
+        default=2_500_000,
+        description="Max tokens per day, resets at midnight UTC (0 = unlimited)",
+    )
+    weekly_token_limit: int = Field(
+        default=12_500_000,
+        description="Max tokens per week, resets Monday 00:00 UTC (0 = unlimited)",
+    )
+
    # Claude Agent SDK Configuration
    use_claude_agent_sdk: bool = Field(
        default=True,
--- a/autogpt_platform/backend/backend/copilot/model.py
+++ b/autogpt_platform/backend/backend/copilot/model.py
@@ -73,6 +73,9 @@ class Usage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
+    # Cache breakdown (Anthropic-specific; zero for non-Anthropic models)
+    cache_read_tokens: int = 0
+    cache_creation_tokens: int = 0


 class ChatSessionInfo(BaseModel):
@@ -98,7 +101,10 @@ class ChatSessionInfo(BaseModel):
            prisma_session.successfulAgentSchedules, default={}
        )

-        # Calculate usage from token counts
+        # Calculate usage from token counts.
+        # NOTE: Per-turn cache_read_tokens / cache_creation_tokens breakdown
+        # is lost after persistence — the DB only stores aggregate prompt and
+        # completion totals. This is a known limitation.
        usage = []
        if prisma_session.totalPromptTokens or prisma_session.totalCompletionTokens:
            usage.append(
--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -0,0 +1,266 @@
+"""CoPilot rate limiting based on token usage.
+
+Uses Redis fixed-window counters to track per-user token consumption
+with configurable daily and weekly limits. Daily windows reset at
+midnight UTC; weekly windows reset at ISO week boundary (Monday 00:00
+UTC). Fails open when Redis is unavailable to avoid blocking users.
+"""
+
+import asyncio
+import logging
+from datetime import UTC, datetime, timedelta
+
+from pydantic import BaseModel, Field
+from redis.exceptions import RedisError
+
+from backend.data.redis_client import get_redis_async
+
+logger = logging.getLogger(__name__)
+
+# Redis key prefixes
+_USAGE_KEY_PREFIX = "copilot:usage"
+
+
+class UsageWindow(BaseModel):
+    """Usage within a single time window."""
+
+    used: int
+    limit: int = Field(
+        description="Maximum tokens allowed in this window. 0 means unlimited."
+    )
+    resets_at: datetime
+
+
+class CoPilotUsageStatus(BaseModel):
+    """Current usage status for a user across all windows."""
+
+    daily: UsageWindow
+    weekly: UsageWindow
+
+
+class RateLimitExceeded(Exception):
+    """Raised when a user exceeds their CoPilot usage limit."""
+
+    def __init__(self, window: str, resets_at: datetime):
+        self.window = window
+        self.resets_at = resets_at
+        delta = resets_at - datetime.now(UTC)
+        total_secs = delta.total_seconds()
+        if total_secs <= 0:
+            time_str = "now"
+        else:
+            hours = int(total_secs // 3600)
+            minutes = int((total_secs % 3600) // 60)
+            time_str = f"{hours}h {minutes}m" if hours > 0 else f"{minutes}m"
+        super().__init__(
+            f"You've reached your {window} usage limit. Resets in {time_str}."
+        )
+
+
+async def get_usage_status(
+    user_id: str,
+    daily_token_limit: int,
+    weekly_token_limit: int,
+) -> CoPilotUsageStatus:
+    """Get current usage status for a user.
+
+    Args:
+        user_id: The user's ID.
+        daily_token_limit: Max tokens per day (0 = unlimited).
+        weekly_token_limit: Max tokens per week (0 = unlimited).
+
+    Returns:
+        CoPilotUsageStatus with current usage and limits.
+    """
+    now = datetime.now(UTC)
+    daily_used = 0
+    weekly_used = 0
+    try:
+        redis = await get_redis_async()
+        daily_raw, weekly_raw = await asyncio.gather(
+            redis.get(_daily_key(user_id, now=now)),
+            redis.get(_weekly_key(user_id, now=now)),
+        )
+        daily_used = int(daily_raw or 0)
+        weekly_used = int(weekly_raw or 0)
+    except (RedisError, ConnectionError, OSError):
+        logger.warning("Redis unavailable for usage status, returning zeros")
+
+    return CoPilotUsageStatus(
+        daily=UsageWindow(
+            used=daily_used,
+            limit=daily_token_limit,
+            resets_at=_daily_reset_time(now=now),
+        ),
+        weekly=UsageWindow(
+            used=weekly_used,
+            limit=weekly_token_limit,
+            resets_at=_weekly_reset_time(now=now),
+        ),
+    )
+
+
+async def check_rate_limit(
+    user_id: str,
+    daily_token_limit: int,
+    weekly_token_limit: int,
+) -> None:
+    """Check if user is within rate limits. Raises RateLimitExceeded if not.
+
+    This is a pre-turn soft check. The authoritative usage counter is updated
+    by ``record_token_usage()`` after the turn completes. Under concurrency,
+    two parallel turns may both pass this check against the same snapshot.
+    This is acceptable because token-based limits are approximate by nature
+    (the exact token count is unknown until after generation).
+
+    Fails open: if Redis is unavailable, allows the request.
+    """
+    # Short-circuit: when both limits are 0 (unlimited) skip the Redis
+    # round-trip entirely.
+    if daily_token_limit <= 0 and weekly_token_limit <= 0:
+        return
+
+    now = datetime.now(UTC)
+    try:
+        redis = await get_redis_async()
+        daily_raw, weekly_raw = await asyncio.gather(
+            redis.get(_daily_key(user_id, now=now)),
+            redis.get(_weekly_key(user_id, now=now)),
+        )
+        daily_used = int(daily_raw or 0)
+        weekly_used = int(weekly_raw or 0)
+    except (RedisError, ConnectionError, OSError):
+        logger.warning("Redis unavailable for rate limit check, allowing request")
+        return
+
+    # Worst-case overshoot: N concurrent requests × ~15K tokens each.
+    if daily_token_limit > 0 and daily_used >= daily_token_limit:
+        raise RateLimitExceeded("daily", _daily_reset_time(now=now))
+
+    if weekly_token_limit > 0 and weekly_used >= weekly_token_limit:
+        raise RateLimitExceeded("weekly", _weekly_reset_time(now=now))
+
+
+async def record_token_usage(
+    user_id: str,
+    prompt_tokens: int,
+    completion_tokens: int,
+    *,
+    cache_read_tokens: int = 0,
+    cache_creation_tokens: int = 0,
+) -> None:
+    """Record token usage for a user across all windows.
+
+    Uses cost-weighted counting so cached tokens don't unfairly penalise
+    multi-turn conversations. Anthropic's pricing:
+      - uncached input: 100%
+      - cache creation:  25%
+      - cache read:      10%
+      - output:         100%
+
+    ``prompt_tokens`` should be the *uncached* input count (``input_tokens``
+    from the API response). Cache counts are passed separately.
+
+    Args:
+        user_id: The user's ID.
+        prompt_tokens: Uncached input tokens.
+        completion_tokens: Output tokens.
+        cache_read_tokens: Tokens served from prompt cache (10% cost).
+        cache_creation_tokens: Tokens written to prompt cache (25% cost).
+    """
+    prompt_tokens = max(0, prompt_tokens)
+    completion_tokens = max(0, completion_tokens)
+    cache_read_tokens = max(0, cache_read_tokens)
+    cache_creation_tokens = max(0, cache_creation_tokens)
+
+    weighted_input = (
+        prompt_tokens
+        + round(cache_creation_tokens * 0.25)
+        + round(cache_read_tokens * 0.1)
+    )
+    total = weighted_input + completion_tokens
+    if total <= 0:
+        return
+
+    raw_total = (
+        prompt_tokens + cache_read_tokens + cache_creation_tokens + completion_tokens
+    )
+    logger.info(
+        "Recording token usage for %s: raw=%d, weighted=%d "
+        "(uncached=%d, cache_read=%d@10%%, cache_create=%d@25%%, output=%d)",
+        user_id[:8],
+        raw_total,
+        total,
+        prompt_tokens,
+        cache_read_tokens,
+        cache_creation_tokens,
+        completion_tokens,
+    )
+
+    now = datetime.now(UTC)
+    try:
+        redis = await get_redis_async()
+        # transaction=False: these are independent INCRBY+EXPIRE pairs on
+        # separate keys — no cross-key atomicity needed.  Skipping
+        # MULTI/EXEC avoids the overhead.  If the connection drops between
+        # INCRBY and EXPIRE the key survives until the next date-based key
+        # rotation (daily/weekly), so the memory-leak risk is negligible.
+        pipe = redis.pipeline(transaction=False)
+
+        # Daily counter (expires at next midnight UTC)
+        d_key = _daily_key(user_id, now=now)
+        pipe.incrby(d_key, total)
+        seconds_until_daily_reset = int(
+            (_daily_reset_time(now=now) - now).total_seconds()
+        )
+        pipe.expire(d_key, max(seconds_until_daily_reset, 1))
+
+        # Weekly counter (expires end of week)
+        w_key = _weekly_key(user_id, now=now)
+        pipe.incrby(w_key, total)
+        seconds_until_weekly_reset = int(
+            (_weekly_reset_time(now=now) - now).total_seconds()
+        )
+        pipe.expire(w_key, max(seconds_until_weekly_reset, 1))
+
+        await pipe.execute()
+    except (RedisError, ConnectionError, OSError):
+        logger.warning(
+            "Redis unavailable for recording token usage (tokens=%d)",
+            total,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+
+
+def _daily_key(user_id: str, now: datetime | None = None) -> str:
+    if now is None:
+        now = datetime.now(UTC)
+    return f"{_USAGE_KEY_PREFIX}:daily:{user_id}:{now.strftime('%Y-%m-%d')}"
+
+
+def _weekly_key(user_id: str, now: datetime | None = None) -> str:
+    if now is None:
+        now = datetime.now(UTC)
+    year, week, _ = now.isocalendar()
+    return f"{_USAGE_KEY_PREFIX}:weekly:{user_id}:{year}-W{week:02d}"
+
+
+def _daily_reset_time(now: datetime | None = None) -> datetime:
+    """Calculate when the current daily window resets (next midnight UTC)."""
+    if now is None:
+        now = datetime.now(UTC)
+    return now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
+
+
+def _weekly_reset_time(now: datetime | None = None) -> datetime:
+    """Calculate when the current weekly window resets (next Monday 00:00 UTC)."""
+    if now is None:
+        now = datetime.now(UTC)
+    days_until_monday = (7 - now.weekday()) % 7 or 7
+    return now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(
+        days=days_until_monday
+    )
--- a/autogpt_platform/backend/backend/copilot/rate_limit_test.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit_test.py
@@ -0,0 +1,334 @@
+"""Unit tests for CoPilot rate limiting."""
+
+from datetime import UTC, datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from redis.exceptions import RedisError
+
+from .rate_limit import (
+    CoPilotUsageStatus,
+    RateLimitExceeded,
+    check_rate_limit,
+    get_usage_status,
+    record_token_usage,
+)
+
+_USER = "test-user-rl"
+
+
+# ---------------------------------------------------------------------------
+# RateLimitExceeded
+# ---------------------------------------------------------------------------
+
+
+class TestRateLimitExceeded:
+    def test_message_contains_window_name(self):
+        exc = RateLimitExceeded("daily", datetime.now(UTC) + timedelta(hours=1))
+        assert "daily" in str(exc)
+
+    def test_message_contains_reset_time(self):
+        exc = RateLimitExceeded(
+            "weekly", datetime.now(UTC) + timedelta(hours=2, minutes=30)
+        )
+        msg = str(exc)
+        # Allow for slight timing drift (29m or 30m)
+        assert "2h " in msg
+        assert "Resets in" in msg
+
+    def test_message_minutes_only_when_under_one_hour(self):
+        exc = RateLimitExceeded("daily", datetime.now(UTC) + timedelta(minutes=15))
+        msg = str(exc)
+        assert "Resets in" in msg
+        # Should not have "0h"
+        assert "0h" not in msg
+
+    def test_message_says_now_when_resets_at_is_in_the_past(self):
+        """Negative delta (clock skew / stale TTL) should say 'now', not '-1h -30m'."""
+        exc = RateLimitExceeded("daily", datetime.now(UTC) - timedelta(minutes=5))
+        assert "Resets in now" in str(exc)
+
+
+# ---------------------------------------------------------------------------
+# get_usage_status
+# ---------------------------------------------------------------------------
+
+
+class TestGetUsageStatus:
+    @pytest.mark.asyncio
+    async def test_returns_redis_values(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["500", "2000"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert isinstance(status, CoPilotUsageStatus)
+        assert status.daily.used == 500
+        assert status.daily.limit == 10000
+        assert status.weekly.used == 2000
+        assert status.weekly.limit == 50000
+
+    @pytest.mark.asyncio
+    async def test_returns_zeros_when_redis_unavailable(self):
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            side_effect=ConnectionError("Redis down"),
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert status.daily.used == 0
+        assert status.weekly.used == 0
+
+    @pytest.mark.asyncio
+    async def test_partial_none_daily_counter(self):
+        """Daily counter is None (new day), weekly has usage."""
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=[None, "3000"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert status.daily.used == 0
+        assert status.weekly.used == 3000
+
+    @pytest.mark.asyncio
+    async def test_partial_none_weekly_counter(self):
+        """Weekly counter is None (start of week), daily has usage."""
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["500", None])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert status.daily.used == 500
+        assert status.weekly.used == 0
+
+    @pytest.mark.asyncio
+    async def test_resets_at_daily_is_next_midnight_utc(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["0", "0"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        now = datetime.now(UTC)
+        # Daily reset should be within 24h
+        assert status.daily.resets_at > now
+        assert status.daily.resets_at <= now + timedelta(hours=24, seconds=5)
+
+
+# ---------------------------------------------------------------------------
+# check_rate_limit
+# ---------------------------------------------------------------------------
+
+
+class TestCheckRateLimit:
+    @pytest.mark.asyncio
+    async def test_allows_when_under_limit(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["100", "200"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            # Should not raise
+            await check_rate_limit(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+    @pytest.mark.asyncio
+    async def test_raises_when_daily_limit_exceeded(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["10000", "200"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            with pytest.raises(RateLimitExceeded) as exc_info:
+                await check_rate_limit(
+                    _USER, daily_token_limit=10000, weekly_token_limit=50000
+                )
+            assert exc_info.value.window == "daily"
+
+    @pytest.mark.asyncio
+    async def test_raises_when_weekly_limit_exceeded(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["100", "50000"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            with pytest.raises(RateLimitExceeded) as exc_info:
+                await check_rate_limit(
+                    _USER, daily_token_limit=10000, weekly_token_limit=50000
+                )
+            assert exc_info.value.window == "weekly"
+
+    @pytest.mark.asyncio
+    async def test_allows_when_redis_unavailable(self):
+        """Fail-open: allow requests when Redis is down."""
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            side_effect=ConnectionError("Redis down"),
+        ):
+            # Should not raise
+            await check_rate_limit(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+    @pytest.mark.asyncio
+    async def test_skips_check_when_limit_is_zero(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["999999", "999999"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            # Should not raise — limits of 0 mean unlimited
+            await check_rate_limit(_USER, daily_token_limit=0, weekly_token_limit=0)
+
+
+# ---------------------------------------------------------------------------
+# record_token_usage
+# ---------------------------------------------------------------------------
+
+
+class TestRecordTokenUsage:
+    @staticmethod
+    def _make_pipeline_mock() -> MagicMock:
+        """Create a pipeline mock with sync methods and async execute."""
+        pipe = MagicMock()
+        pipe.execute = AsyncMock(return_value=[])
+        return pipe
+
+    @pytest.mark.asyncio
+    async def test_increments_redis_counters(self):
+        mock_pipe = self._make_pipeline_mock()
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+
+        # Should call incrby twice (daily + weekly) with total=150
+        incrby_calls = mock_pipe.incrby.call_args_list
+        assert len(incrby_calls) == 2
+        assert incrby_calls[0].args[1] == 150  # daily
+        assert incrby_calls[1].args[1] == 150  # weekly
+
+    @pytest.mark.asyncio
+    async def test_skips_when_zero_tokens(self):
+        mock_redis = AsyncMock()
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(_USER, prompt_tokens=0, completion_tokens=0)
+
+        # Should not call pipeline at all
+        mock_redis.pipeline.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_sets_expire_on_both_keys(self):
+        """Pipeline should call expire for both daily and weekly keys."""
+        mock_pipe = self._make_pipeline_mock()
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+
+        expire_calls = mock_pipe.expire.call_args_list
+        assert len(expire_calls) == 2
+
+        # Daily key TTL should be positive (seconds until next midnight)
+        daily_ttl = expire_calls[0].args[1]
+        assert daily_ttl >= 1
+
+        # Weekly key TTL should be positive (seconds until next Monday)
+        weekly_ttl = expire_calls[1].args[1]
+        assert weekly_ttl >= 1
+
+    @pytest.mark.asyncio
+    async def test_handles_redis_failure_gracefully(self):
+        """Should not raise when Redis is unavailable."""
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            side_effect=ConnectionError("Redis down"),
+        ):
+            # Should not raise
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+
+    @pytest.mark.asyncio
+    async def test_cost_weighted_counting(self):
+        """Cached tokens should be weighted: cache_read=10%, cache_create=25%."""
+        mock_pipe = self._make_pipeline_mock()
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(
+                _USER,
+                prompt_tokens=100,  # uncached → 100
+                completion_tokens=50,  # output → 50
+                cache_read_tokens=10000,  # 10% → 1000
+                cache_creation_tokens=400,  # 25% → 100
+            )
+
+        # Expected weighted total: 100 + 1000 + 100 + 50 = 1250
+        incrby_calls = mock_pipe.incrby.call_args_list
+        assert len(incrby_calls) == 2
+        assert incrby_calls[0].args[1] == 1250  # daily
+        assert incrby_calls[1].args[1] == 1250  # weekly
+
+    @pytest.mark.asyncio
+    async def test_handles_redis_error_during_pipeline_execute(self):
+        """Should not raise when pipeline.execute() fails with RedisError."""
+        mock_pipe = self._make_pipeline_mock()
+        mock_pipe.execute = AsyncMock(side_effect=RedisError("Pipeline failed"))
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            # Should not raise — fail-open
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
--- a/autogpt_platform/backend/backend/copilot/response_model.py
+++ b/autogpt_platform/backend/backend/copilot/response_model.py
@@ -186,12 +186,43 @@ class StreamToolOutputAvailable(StreamBaseResponse):


 class StreamUsage(StreamBaseResponse):
-    """Token usage statistics."""
+    """Token usage statistics.
+
+    Emitted as an SSE comment so the Vercel AI SDK parser ignores it
+    (it uses z.strictObject() and rejects unknown event types).
+    Usage data is recorded server-side (session DB + Redis counters).
+    """

    type: ResponseType = ResponseType.USAGE
-    promptTokens: int = Field(..., description="Number of prompt tokens")
-    completionTokens: int = Field(..., description="Number of completion tokens")
-    totalTokens: int = Field(..., description="Total number of tokens")
+    prompt_tokens: int = Field(
+        ...,
+        serialization_alias="promptTokens",
+        description="Number of uncached prompt tokens",
+    )
+    completion_tokens: int = Field(
+        ...,
+        serialization_alias="completionTokens",
+        description="Number of completion tokens",
+    )
+    total_tokens: int = Field(
+        ...,
+        serialization_alias="totalTokens",
+        description="Total number of tokens (raw, not weighted)",
+    )
+    cache_read_tokens: int = Field(
+        default=0,
+        serialization_alias="cacheReadTokens",
+        description="Prompt tokens served from cache (10% cost)",
+    )
+    cache_creation_tokens: int = Field(
+        default=0,
+        serialization_alias="cacheCreationTokens",
+        description="Prompt tokens written to cache (25% cost)",
+    )
+
+    def to_sse(self) -> str:
+        """Emit as SSE comment so the AI SDK parser ignores it."""
+        return f": usage {self.model_dump_json(exclude_none=True, by_alias=True)}\n\n"


 class StreamError(StreamBaseResponse):
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -55,12 +55,14 @@ from ..response_model import (
    StreamTextDelta,
    StreamToolInputAvailable,
    StreamToolOutputAvailable,
+    StreamUsage,
 )
 from ..service import (
    _build_system_prompt,
    _generate_session_title,
    _is_langfuse_configured,
 )
+from ..token_tracking import persist_and_record_usage
 from ..tools.e2b_sandbox import get_or_create_sandbox, pause_sandbox_direct
 from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path
 from ..tracking import track_user_message
@@ -736,6 +738,13 @@ async def stream_chat_completion_sdk(
    _otel_ctx: Any = None

    # Make sure there is no more code between the lock acquisition and try-block.
+    # Token usage accumulators — populated from ResultMessage at end of turn
+    turn_prompt_tokens = 0  # uncached input tokens only
+    turn_completion_tokens = 0
+    turn_cache_read_tokens = 0
+    turn_cache_creation_tokens = 0
+    turn_cost_usd: float | None = None
+
    try:
        # Build system prompt (reuses non-SDK path with Langfuse support).
        # Pre-compute the cwd here so the exact working directory path can be
@@ -1112,7 +1121,7 @@ async def stream_chat_completion_sdk(
                                - len(adapter.resolved_tool_calls),
                            )

-                    # Log ResultMessage details for debugging
+                    # Log ResultMessage details and capture token usage
                    if isinstance(sdk_msg, ResultMessage):
                        logger.info(
                            "%s Received: ResultMessage %s "
@@ -1131,6 +1140,33 @@ async def stream_chat_completion_sdk(
                                sdk_msg.result or "(no error message provided)",
                            )

+                        # Capture token usage from ResultMessage.
+                        # Anthropic reports cached tokens separately:
+                        #   input_tokens = uncached only
+                        #   cache_read_input_tokens = served from cache
+                        #   cache_creation_input_tokens = written to cache
+                        if sdk_msg.usage:
+                            turn_prompt_tokens += sdk_msg.usage.get("input_tokens", 0)
+                            turn_cache_read_tokens += sdk_msg.usage.get(
+                                "cache_read_input_tokens", 0
+                            )
+                            turn_cache_creation_tokens += sdk_msg.usage.get(
+                                "cache_creation_input_tokens", 0
+                            )
+                            turn_completion_tokens += sdk_msg.usage.get(
+                                "output_tokens", 0
+                            )
+                            logger.info(
+                                "%s Token usage: uncached=%d, cache_read=%d, cache_create=%d, output=%d",
+                                log_prefix,
+                                turn_prompt_tokens,
+                                turn_cache_read_tokens,
+                                turn_cache_creation_tokens,
+                                turn_completion_tokens,
+                            )
+                        if sdk_msg.total_cost_usd is not None:
+                            turn_cost_usd = sdk_msg.total_cost_usd
+
                    # Emit compaction end if SDK finished compacting.
                    # When compaction ends, sync TranscriptBuilder with the
                    # CLI's active context so they stay identical.
@@ -1347,6 +1383,26 @@ async def stream_chat_completion_sdk(
            ) and not has_appended_assistant:
                session.messages.append(assistant_response)

+        # Emit token usage to the client (must be in try to reach SSE stream).
+        # Session persistence of usage is in finally to stay consistent with
+        # rate-limit recording even if an exception interrupts between here
+        # and the finally block.
+        if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
+            # total_tokens = prompt (uncached input) + completion (output).
+            # Cache tokens are tracked separately and excluded from total
+            # so that the semantics match the baseline path (OpenRouter)
+            # which folds cache into prompt_tokens. Keeping total_tokens
+            # = prompt + completion everywhere makes cross-path comparisons
+            # and session-level aggregation consistent.
+            total_tokens = turn_prompt_tokens + turn_completion_tokens
+            yield StreamUsage(
+                prompt_tokens=turn_prompt_tokens,
+                completion_tokens=turn_completion_tokens,
+                total_tokens=total_tokens,
+                cache_read_tokens=turn_cache_read_tokens,
+                cache_creation_tokens=turn_cache_creation_tokens,
+            )
+
        # Transcript upload is handled exclusively in the finally block
        # to avoid double-uploads (the success path used to upload the
        # old resume file, then the finally block overwrote it with the
@@ -1411,6 +1467,20 @@ async def stream_chat_completion_sdk(
            except Exception:
                logger.warning("OTEL context teardown failed", exc_info=True)

+        # --- Persist token usage to session + rate-limit counters ---
+        # Both must live in finally so they stay consistent even when an
+        # exception interrupts the try block after StreamUsage was yielded.
+        await persist_and_record_usage(
+            session=session,
+            user_id=user_id,
+            prompt_tokens=turn_prompt_tokens,
+            completion_tokens=turn_completion_tokens,
+            cache_read_tokens=turn_cache_read_tokens,
+            cache_creation_tokens=turn_cache_creation_tokens,
+            log_prefix=log_prefix,
+            cost_usd=turn_cost_usd,
+        )
+
        # --- Persist session messages ---
        # This MUST run in finally to persist messages even when the generator
        # is stopped early (e.g., user clicks stop, processor breaks stream loop).
--- a/autogpt_platform/backend/backend/copilot/service.py
+++ b/autogpt_platform/backend/backend/copilot/service.py
@@ -28,10 +28,24 @@ logger = logging.getLogger(__name__)

 config = ChatConfig()
 settings = Settings()
-client = LangfuseAsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
+
+_client: LangfuseAsyncOpenAI | None = None
+_langfuse = None


-langfuse = get_client()
+def _get_openai_client() -> LangfuseAsyncOpenAI:
+    global _client
+    if _client is None:
+        _client = LangfuseAsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
+    return _client
+
+
+def _get_langfuse():
+    global _langfuse
+    if _langfuse is None:
+        _langfuse = get_client()
+    return _langfuse
+

 # Default system prompt used when Langfuse is not configured
 # Provides minimal baseline tone and personality - all workflow, tools, and
@@ -84,7 +98,7 @@ async def _get_system_prompt_template(context: str) -> str:
                else "latest"
            )
            prompt = await asyncio.to_thread(
-                langfuse.get_prompt,
+                _get_langfuse().get_prompt,
                config.langfuse_prompt_name,
                label=label,
                cache_ttl_seconds=config.langfuse_prompt_cache_ttl,
@@ -158,7 +172,7 @@ async def _generate_session_title(
            "environment": settings.config.app_env.value,
        }

-        response = await client.chat.completions.create(
+        response = await _get_openai_client().chat.completions.create(
            model=config.title_model,
            messages=[
                {
--- a/autogpt_platform/backend/backend/copilot/token_tracking.py
+++ b/autogpt_platform/backend/backend/copilot/token_tracking.py
@@ -0,0 +1,93 @@
+"""Shared token-usage persistence and rate-limit recording.
+
+Both the baseline (OpenRouter) and SDK (Anthropic) service layers need to:
+  1. Append a ``Usage`` record to the session.
+  2. Log the turn's token counts.
+  3. Record weighted usage in Redis for rate-limiting.
+
+This module extracts that common logic so both paths stay in sync.
+"""
+
+import logging
+
+from .model import ChatSession, Usage
+from .rate_limit import record_token_usage
+
+logger = logging.getLogger(__name__)
+
+
+async def persist_and_record_usage(
+    *,
+    session: ChatSession | None,
+    user_id: str | None,
+    prompt_tokens: int,
+    completion_tokens: int,
+    cache_read_tokens: int = 0,
+    cache_creation_tokens: int = 0,
+    log_prefix: str = "",
+    cost_usd: float | str | None = None,
+) -> int:
+    """Persist token usage to session and record for rate limiting.
+
+    Args:
+        session: The chat session to append usage to (may be None on error).
+        user_id: User ID for rate-limit counters (skipped if None).
+        prompt_tokens: Uncached input tokens.
+        completion_tokens: Output tokens.
+        cache_read_tokens: Tokens served from prompt cache (Anthropic only).
+        cache_creation_tokens: Tokens written to prompt cache (Anthropic only).
+        log_prefix: Prefix for log messages (e.g. "[SDK]", "[Baseline]").
+        cost_usd: Optional cost for logging (float from SDK, str otherwise).
+
+    Returns:
+        The computed total_tokens (prompt + completion; cache excluded).
+    """
+    prompt_tokens = max(0, prompt_tokens)
+    completion_tokens = max(0, completion_tokens)
+    cache_read_tokens = max(0, cache_read_tokens)
+    cache_creation_tokens = max(0, cache_creation_tokens)
+
+    if prompt_tokens <= 0 and completion_tokens <= 0:
+        return 0
+
+    # total_tokens = prompt + completion. Cache tokens are tracked
+    # separately and excluded from total so both baseline and SDK
+    # paths share the same semantics.
+    total_tokens = prompt_tokens + completion_tokens
+
+    if session is not None:
+        session.usage.append(
+            Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+                cache_read_tokens=cache_read_tokens,
+                cache_creation_tokens=cache_creation_tokens,
+            )
+        )
+
+    if cache_read_tokens or cache_creation_tokens:
+        logger.info(
+            f"{log_prefix} Turn usage: uncached={prompt_tokens}, "
+            f"cache_read={cache_read_tokens}, cache_create={cache_creation_tokens}, "
+            f"output={completion_tokens}, total={total_tokens}, cost_usd={cost_usd}"
+        )
+    else:
+        logger.info(
+            f"{log_prefix} Turn usage: prompt={prompt_tokens}, "
+            f"completion={completion_tokens}, total={total_tokens}"
+        )
+
+    if user_id:
+        try:
+            await record_token_usage(
+                user_id=user_id,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                cache_read_tokens=cache_read_tokens,
+                cache_creation_tokens=cache_creation_tokens,
+            )
+        except Exception as usage_err:
+            logger.warning(f"{log_prefix} Failed to record token usage: {usage_err}")
+
+    return total_tokens
--- a/autogpt_platform/backend/backend/copilot/token_tracking_test.py
+++ b/autogpt_platform/backend/backend/copilot/token_tracking_test.py
@@ -0,0 +1,281 @@
+"""Unit tests for token_tracking.persist_and_record_usage.
+
+Covers both the baseline (prompt+completion only) and SDK (with cache breakdown)
+calling conventions, session persistence, and rate-limit recording.
+"""
+
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from .model import ChatSession, Usage
+from .token_tracking import persist_and_record_usage
+
+
+def _make_session() -> ChatSession:
+    """Return a minimal in-memory ChatSession for testing."""
+    return ChatSession(
+        session_id="sess-test",
+        user_id="user-test",
+        title=None,
+        messages=[],
+        usage=[],
+        started_at=datetime.now(UTC),
+        updated_at=datetime.now(UTC),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Return value / total_tokens semantics
+# ---------------------------------------------------------------------------
+
+
+class TestTotalTokens:
+    @pytest.mark.asyncio
+    async def test_returns_prompt_plus_completion(self):
+        """total_tokens = prompt + completion (cache excluded from total)."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=300,
+                completion_tokens=200,
+            )
+        assert total == 500
+
+    @pytest.mark.asyncio
+    async def test_returns_zero_when_no_tokens(self):
+        """Returns 0 early when both prompt and completion are zero."""
+        total = await persist_and_record_usage(
+            session=None,
+            user_id=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        assert total == 0
+
+    @pytest.mark.asyncio
+    async def test_cache_tokens_excluded_from_total(self):
+        """Cache tokens are stored separately and not added to total_tokens."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+                cache_read_tokens=5000,
+                cache_creation_tokens=200,
+            )
+        # total = prompt + completion only (5000 + 200 cache excluded)
+        assert total == 150
+
+    @pytest.mark.asyncio
+    async def test_baseline_path_no_cache(self):
+        """Baseline (OpenRouter) path passes no cache tokens; total = prompt + completion."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id="u1",
+                prompt_tokens=1000,
+                completion_tokens=400,
+                log_prefix="[Baseline]",
+            )
+        assert total == 1400
+
+    @pytest.mark.asyncio
+    async def test_sdk_path_with_cache(self):
+        """SDK (Anthropic) path passes cache tokens; total still = prompt + completion."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id="u2",
+                prompt_tokens=200,
+                completion_tokens=100,
+                cache_read_tokens=8000,
+                cache_creation_tokens=400,
+                log_prefix="[SDK]",
+                cost_usd=0.0015,
+            )
+        assert total == 300
+
+
+# ---------------------------------------------------------------------------
+# Session persistence
+# ---------------------------------------------------------------------------
+
+
+class TestSessionPersistence:
+    @pytest.mark.asyncio
+    async def test_appends_usage_to_session(self):
+        session = _make_session()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            await persist_and_record_usage(
+                session=session,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        assert len(session.usage) == 1
+        usage: Usage = session.usage[0]
+        assert usage.prompt_tokens == 100
+        assert usage.completion_tokens == 50
+        assert usage.total_tokens == 150
+        assert usage.cache_read_tokens == 0
+        assert usage.cache_creation_tokens == 0
+
+    @pytest.mark.asyncio
+    async def test_appends_cache_breakdown_to_session(self):
+        session = _make_session()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            await persist_and_record_usage(
+                session=session,
+                user_id=None,
+                prompt_tokens=200,
+                completion_tokens=80,
+                cache_read_tokens=3000,
+                cache_creation_tokens=500,
+            )
+        usage: Usage = session.usage[0]
+        assert usage.cache_read_tokens == 3000
+        assert usage.cache_creation_tokens == 500
+
+    @pytest.mark.asyncio
+    async def test_multiple_turns_append_multiple_records(self):
+        session = _make_session()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            await persist_and_record_usage(
+                session=session, user_id=None, prompt_tokens=100, completion_tokens=50
+            )
+            await persist_and_record_usage(
+                session=session, user_id=None, prompt_tokens=200, completion_tokens=70
+            )
+        assert len(session.usage) == 2
+
+    @pytest.mark.asyncio
+    async def test_none_session_does_not_raise(self):
+        """When session is None (e.g. error path), no exception should be raised."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        assert total == 150
+
+    @pytest.mark.asyncio
+    async def test_no_append_when_zero_tokens(self):
+        """When tokens are zero, function returns early — session unchanged."""
+        session = _make_session()
+        total = await persist_and_record_usage(
+            session=session,
+            user_id=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        assert total == 0
+        assert len(session.usage) == 0
+
+
+# ---------------------------------------------------------------------------
+# Rate-limit recording
+# ---------------------------------------------------------------------------
+
+
+class TestRateLimitRecording:
+    @pytest.mark.asyncio
+    async def test_calls_record_token_usage_when_user_id_present(self):
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id="user-abc",
+                prompt_tokens=100,
+                completion_tokens=50,
+                cache_read_tokens=1000,
+                cache_creation_tokens=200,
+            )
+        mock_record.assert_awaited_once_with(
+            user_id="user-abc",
+            prompt_tokens=100,
+            completion_tokens=50,
+            cache_read_tokens=1000,
+            cache_creation_tokens=200,
+        )
+
+    @pytest.mark.asyncio
+    async def test_skips_record_when_user_id_is_none(self):
+        """Anonymous sessions should not create Redis keys."""
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        mock_record.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_record_failure_does_not_raise(self):
+        """A Redis error in record_token_usage should be swallowed (fail-open)."""
+        mock_record = AsyncMock(side_effect=ConnectionError("Redis down"))
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            # Should not raise
+            total = await persist_and_record_usage(
+                session=None,
+                user_id="user-xyz",
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        assert total == 150
+
+    @pytest.mark.asyncio
+    async def test_skips_record_when_zero_tokens(self):
+        """Returns 0 before calling record_token_usage when tokens are zero."""
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id="user-abc",
+                prompt_tokens=0,
+                completion_tokens=0,
+            )
+        mock_record.assert_not_awaited()
--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -8,11 +8,13 @@ from pydantic_core import PydanticUndefined

 from backend.blocks._base import AnyBlockSchema
 from backend.copilot.constants import COPILOT_NODE_PREFIX, COPILOT_SESSION_PREFIX
-from backend.data.db_accessors import workspace_db
+from backend.data.credit import UsageTransactionMetadata
+from backend.data.db_accessors import credit_db, workspace_db
 from backend.data.execution import ExecutionContext
 from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
+from backend.executor.utils import block_usage_cost
 from backend.integrations.creds_manager import IntegrationCredentialsManager
-from backend.util.exceptions import BlockError
+from backend.util.exceptions import BlockError, InsufficientBalanceError
 from backend.util.type import coerce_inputs_to_schema

 from .models import BlockOutputResponse, ErrorResponse, ToolResponseBase
@@ -115,6 +117,21 @@ async def execute_block(
        # Coerce non-matching data types to the expected input schema.
        coerce_inputs_to_schema(input_data, block.input_schema)

+        # Pre-execution credit check (courtesy; spend_credits is atomic)
+        cost, cost_filter = block_usage_cost(block, input_data)
+        has_cost = cost > 0
+        _credit_db = credit_db()
+        if has_cost:
+            balance = await _credit_db.get_credits(user_id)
+            if balance < cost:
+                return ErrorResponse(
+                    message=(
+                        f"Insufficient credits to run '{block.name}'. "
+                        "Please top up your credits to continue."
+                    ),
+                    session_id=session_id,
+                )
+
        # Execute the block and collect outputs
        outputs: dict[str, list[Any]] = defaultdict(list)
        async for output_name, output_data in block.execute(
@@ -123,6 +140,51 @@ async def execute_block(
        ):
            outputs[output_name].append(output_data)

+        # Charge credits for block execution
+        if has_cost:
+            try:
+                await _credit_db.spend_credits(
+                    user_id=user_id,
+                    cost=cost,
+                    metadata=UsageTransactionMetadata(
+                        graph_exec_id=synthetic_graph_id,
+                        graph_id=synthetic_graph_id,
+                        node_id=synthetic_node_id,
+                        node_exec_id=node_exec_id,
+                        block_id=block_id,
+                        block=block.name,
+                        input=cost_filter,
+                        reason="copilot_block_execution",
+                    ),
+                )
+            except Exception as e:
+                # Block already executed (with possible side effects). Never
+                # return ErrorResponse here — the user received output and
+                # deserves it. Log the billing failure for reconciliation.
+                leak_type = (
+                    "INSUFFICIENT_BALANCE"
+                    if isinstance(e, InsufficientBalanceError)
+                    else "UNEXPECTED_ERROR"
+                )
+                logger.error(
+                    "BILLING_LEAK[%s]: block executed but credit charge failed — "
+                    "user_id=%s, block_id=%s, node_exec_id=%s, cost=%s: %s",
+                    leak_type,
+                    user_id,
+                    block_id,
+                    node_exec_id,
+                    cost,
+                    e,
+                    extra={
+                        "json_fields": {
+                            "billing_leak": True,
+                            "leak_type": leak_type,
+                            "user_id": user_id,
+                            "cost": str(cost),
+                        }
+                    },
+                )
+
        return BlockOutputResponse(
            message=f"Block '{block.name}' executed successfully",
            block_id=block_id,
@@ -133,14 +195,14 @@ async def execute_block(
        )

    except BlockError as e:
-        logger.warning(f"Block execution failed: {e}")
+        logger.warning("Block execution failed: %s", e)
        return ErrorResponse(
            message=f"Block execution failed: {e}",
            error=str(e),
            session_id=session_id,
        )
    except Exception as e:
-        logger.error(f"Unexpected error executing block: {e}", exc_info=True)
+        logger.error("Unexpected error executing block: %s", e, exc_info=True)
        return ErrorResponse(
            message=f"Failed to execute block: {str(e)}",
            error=str(e),
--- a/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
@@ -1,18 +1,197 @@
-"""Tests for execute_block type coercion in helpers.py.
-
-Verifies that execute_block() coerces string input values to match the block's
-expected input types, mirroring the executor's validate_exec() logic.
-This is critical for @@agptfile: expansion, where file content is always a string
-but the block may expect structured types (e.g. list[list[str]]).
-"""
+"""Tests for execute_block — credit charging and type coercion."""

+from collections.abc import AsyncIterator
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

+from backend.blocks._base import BlockType
 from backend.copilot.tools.helpers import execute_block
-from backend.copilot.tools.models import BlockOutputResponse
+from backend.copilot.tools.models import BlockOutputResponse, ErrorResponse
+
+_USER = "test-user-helpers"
+_SESSION = "test-session-helpers"
+
+
+def _make_block(block_id: str = "block-1", name: str = "TestBlock"):
+    """Create a minimal mock block for execute_block()."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.block_type = BlockType.STANDARD
+
+    mock.input_schema = MagicMock()
+    mock.input_schema.get_credentials_fields_info.return_value = {}
+
+    async def _execute(
+        input_data: dict, **kwargs: Any
+    ) -> AsyncIterator[tuple[str, Any]]:
+        yield "result", "ok"
+
+    mock.execute = _execute
+    return mock
+
+
+def _patch_workspace():
+    """Patch workspace_db to return a mock workspace."""
+    mock_workspace = MagicMock()
+    mock_workspace.id = "ws-1"
+    mock_ws_db = MagicMock()
+    mock_ws_db.get_or_create_workspace = AsyncMock(return_value=mock_workspace)
+    return patch("backend.copilot.tools.helpers.workspace_db", return_value=mock_ws_db)
+
+
+def _patch_credit_db(
+    get_credits_return: int = 100,
+    spend_credits_side_effect: Any = None,
+):
+    """Patch credit_db accessor to return a mock credit adapter."""
+    mock_credit = MagicMock()
+    mock_credit.get_credits = AsyncMock(return_value=get_credits_return)
+    if spend_credits_side_effect is not None:
+        mock_credit.spend_credits = AsyncMock(side_effect=spend_credits_side_effect)
+    else:
+        mock_credit.spend_credits = AsyncMock()
+    return (
+        patch(
+            "backend.copilot.tools.helpers.credit_db",
+            return_value=mock_credit,
+        ),
+        mock_credit,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Credit charging tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio(loop_scope="session")
+class TestExecuteBlockCreditCharging:
+    async def test_charges_credits_when_cost_is_positive(self):
+        """Block with cost > 0 should call spend_credits after execution."""
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db(get_credits_return=100)
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(10, {"key": "val"}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={"text": "hello"},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True
+        mock_credit.spend_credits.assert_awaited_once()
+        call_kwargs = mock_credit.spend_credits.call_args.kwargs
+        assert call_kwargs["cost"] == 10
+        assert call_kwargs["metadata"].reason == "copilot_block_execution"
+
+    async def test_returns_error_when_insufficient_credits_before_exec(self):
+        """Pre-execution check should return ErrorResponse when balance < cost."""
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db(get_credits_return=5)
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(10, {}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        assert isinstance(result, ErrorResponse)
+        assert "Insufficient credits" in result.message
+
+    async def test_no_charge_when_cost_is_zero(self):
+        """Block with cost 0 should not call spend_credits."""
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db()
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(0, {}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True
+        # Credit functions should not be called at all for zero-cost blocks
+        mock_credit.get_credits.assert_not_awaited()
+        mock_credit.spend_credits.assert_not_awaited()
+
+    async def test_returns_output_on_post_exec_insufficient_balance(self):
+        """If charging fails after execution, output is still returned (block already ran)."""
+        from backend.util.exceptions import InsufficientBalanceError
+
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db(
+            get_credits_return=15,
+            spend_credits_side_effect=InsufficientBalanceError(
+                "Low balance", _USER, 5, 10
+            ),
+        )
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(10, {}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        # Block already executed (with side effects), so output is returned
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True
+
+
+# ---------------------------------------------------------------------------
+# Type coercion tests
+# ---------------------------------------------------------------------------


 def _make_block_schema(annotations: dict[str, Any]) -> MagicMock:
@@ -28,7 +207,7 @@ def _make_block_schema(annotations: dict[str, Any]) -> MagicMock:
    return schema


-def _make_block(
+def _make_coerce_block(
    block_id: str,
    name: str,
    annotations: dict[str, Any],
@@ -60,7 +239,7 @@ _TEST_USER_ID = "test-user-coerce"
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_json_string_to_nested_list():
    """JSON string → list[list[str]] (Google Sheets CSV import case)."""
-    block = _make_block(
+    block = _make_coerce_block(
        "sheets-write",
        "Google Sheets Write",
        {"values": list[list[str]], "spreadsheet_id": str},
@@ -103,7 +282,7 @@ async def test_coerce_json_string_to_nested_list():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_json_string_to_list():
    """JSON string → list[str]."""
-    block = _make_block(
+    block = _make_coerce_block(
        "list-block",
        "List Block",
        {"items": list[str]},
@@ -135,7 +314,7 @@ async def test_coerce_json_string_to_list():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_json_string_to_dict():
    """JSON string → dict[str, str]."""
-    block = _make_block(
+    block = _make_coerce_block(
        "dict-block",
        "Dict Block",
        {"config": dict[str, str]},
@@ -167,7 +346,7 @@ async def test_coerce_json_string_to_dict():
@pytest.mark.asyncio(loop_scope="session")
 async def test_no_coercion_when_type_matches():
    """Already-correct types pass through without coercion."""
-    block = _make_block(
+    block = _make_coerce_block(
        "pass-through",
        "Pass Through",
        {"values": list[list[str]], "name": str},
@@ -201,7 +380,7 @@ async def test_no_coercion_when_type_matches():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_string_to_int():
    """String number → int."""
-    block = _make_block(
+    block = _make_coerce_block(
        "int-block",
        "Int Block",
        {"count": int},
@@ -234,7 +413,7 @@ async def test_coerce_string_to_int():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_skips_none_values():
    """None values are not coerced (they may be optional fields)."""
-    block = _make_block(
+    block = _make_coerce_block(
        "optional-block",
        "Optional Block",
        {"data": list[str], "label": str},
@@ -267,7 +446,7 @@ async def test_coerce_skips_none_values():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_union_type_preserves_valid_member():
    """Union-typed fields should not be coerced when the value matches a member."""
-    block = _make_block(
+    block = _make_coerce_block(
        "union-block",
        "Union Block",
        {"content": str | list[str]},
@@ -301,7 +480,7 @@ async def test_coerce_union_type_preserves_valid_member():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_inner_elements_of_generic():
    """Inner elements of generic containers are recursively coerced."""
-    block = _make_block(
+    block = _make_coerce_block(
        "inner-coerce",
        "Inner Coerce",
        {"values": list[str]},
--- a/autogpt_platform/backend/backend/data/db_accessors.py
+++ b/autogpt_platform/backend/backend/data/db_accessors.py
@@ -129,3 +129,16 @@ def review_db():
        review_db = get_database_manager_async_client()

    return review_db
+
+
+def credit_db():
+    if db.is_connected():
+        from backend.data import db_manager as _credit_db
+
+        credit_db = _credit_db
+    else:
+        from backend.util.clients import get_database_manager_async_client
+
+        credit_db = get_database_manager_async_client()
+
+    return credit_db
--- a/autogpt_platform/backend/backend/data/db_manager.py
+++ b/autogpt_platform/backend/backend/data/db_manager.py
@@ -148,6 +148,11 @@ async def _get_credits(user_id: str) -> int:
    return await user_credit_model.get_credits(user_id)


+# Public aliases used by db_accessors.credit_db() when Prisma is connected
+get_credits = _get_credits
+spend_credits = _spend_credits
+
+
 class DatabaseManager(AppService):
    """Database connection pooling service.

@@ -512,6 +517,10 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    list_workspace_files = d.list_workspace_files
    soft_delete_workspace_file = d.soft_delete_workspace_file

+    # ============ Credits ============ #
+    spend_credits = d.spend_credits
+    get_credits = d.get_credits
+
    # ============ Understanding ============ #
    get_business_understanding = d.get_business_understanding
    upsert_business_understanding = d.upsert_business_understanding
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
@@ -1,14 +1,8 @@
 "use client";

-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuTrigger,
-} from "@/components/molecules/DropdownMenu/DropdownMenu";
 import { SidebarProvider } from "@/components/ui/sidebar";
 import { cn } from "@/lib/utils";
-import { DotsThree, UploadSimple } from "@phosphor-icons/react";
+import { UploadSimple } from "@phosphor-icons/react";
 import { useCallback, useRef, useState } from "react";
 import { ChatContainer } from "./components/ChatContainer/ChatContainer";
 import { ChatSidebar } from "./components/ChatSidebar/ChatSidebar";
@@ -89,10 +83,9 @@ export function CopilotPage() {
    handleDrawerOpenChange,
    handleSelectSession,
    handleNewChat,
-    // Delete functionality
+    // Delete functionality (available via ChatSidebar context menu on all viewports)
    sessionToDelete,
    isDeleting,
-    handleDeleteClick,
    handleConfirmDelete,
    handleCancelDelete,
  } = useCopilotPage();
@@ -148,38 +141,6 @@ export function CopilotPage() {
            isUploadingFiles={isUploadingFiles}
            droppedFiles={droppedFiles}
            onDroppedFilesConsumed={handleDroppedFilesConsumed}
-            headerSlot={
-              isMobile && sessionId ? (
-                <div className="flex justify-end">
-                  <DropdownMenu>
-                    <DropdownMenuTrigger asChild>
-                      <button
-                        className="rounded p-1.5 hover:bg-neutral-100"
-                        aria-label="More actions"
-                      >
-                        <DotsThree className="h-5 w-5 text-neutral-600" />
-                      </button>
-                    </DropdownMenuTrigger>
-                    <DropdownMenuContent align="end">
-                      <DropdownMenuItem
-                        onClick={() => {
-                          const session = sessions.find(
-                            (s) => s.id === sessionId,
-                          );
-                          if (session) {
-                            handleDeleteClick(session.id, session.title);
-                          }
-                        }}
-                        disabled={isDeleting}
-                        className="text-red-600 focus:bg-red-50 focus:text-red-600"
-                      >
-                        Delete chat
-                      </DropdownMenuItem>
-                    </DropdownMenuContent>
-                  </DropdownMenu>
-                </div>
-              ) : undefined
-            }
          />
        </div>
      </div>
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
@@ -2,7 +2,6 @@
 import { ChatInput } from "@/app/(platform)/copilot/components/ChatInput/ChatInput";
 import { UIDataTypes, UIMessage, UITools } from "ai";
 import { LayoutGroup, motion } from "framer-motion";
-import { ReactNode } from "react";
 import { ChatMessagesContainer } from "../ChatMessagesContainer/ChatMessagesContainer";
 import { CopilotChatActionsProvider } from "../CopilotChatActionsProvider/CopilotChatActionsProvider";
 import { EmptySession } from "../EmptySession/EmptySession";
@@ -21,7 +20,6 @@ export interface ChatContainerProps {
  onSend: (message: string, files?: File[]) => void | Promise<void>;
  onStop: () => void;
  isUploadingFiles?: boolean;
-  headerSlot?: ReactNode;
  /** Files dropped onto the chat window. */
  droppedFiles?: File[];
  /** Called after droppedFiles have been consumed by ChatInput. */
@@ -40,7 +38,6 @@ export const ChatContainer = ({
  onSend,
  onStop,
  isUploadingFiles,
-  headerSlot,
  droppedFiles,
  onDroppedFilesConsumed,
 }: ChatContainerProps) => {
@@ -63,7 +60,6 @@ export const ChatContainer = ({
                status={status}
                error={error}
                isLoading={isLoadingSession}
-                headerSlot={headerSlot}
                sessionID={sessionId}
              />
              <motion.div
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
@@ -30,7 +30,6 @@ interface Props {
  status: string;
  error: Error | undefined;
  isLoading: boolean;
-  headerSlot?: React.ReactNode;
  sessionID?: string | null;
 }

@@ -102,7 +101,6 @@ export function ChatMessagesContainer({
  status,
  error,
  isLoading,
-  headerSlot,
  sessionID,
 }: Props) {
  const lastMessage = messages[messages.length - 1];
@@ -135,7 +133,6 @@ export function ChatMessagesContainer({
  return (
    <Conversation className="min-h-0 flex-1">
      <ConversationContent className="flex flex-1 flex-col gap-6 px-3 py-6">
-        {headerSlot}
        {isLoading && messages.length === 0 && (
          <div
            className="flex flex-1 items-center justify-center"
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/ChatSidebar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/ChatSidebar.tsx
@@ -37,6 +37,7 @@ import { useCopilotUIStore } from "../../store";
 import { NotificationToggle } from "./components/NotificationToggle/NotificationToggle";
 import { DeleteChatDialog } from "../DeleteChatDialog/DeleteChatDialog";
 import { PulseLoader } from "../PulseLoader/PulseLoader";
+import { UsageLimits } from "../UsageLimits/UsageLimits";

 export function ChatSidebar() {
  const { state } = useSidebar();
@@ -256,11 +257,10 @@ export function ChatSidebar() {
                <Text variant="h3" size="body-medium">
                  Your chats
                </Text>
-                <div className="relative left-5 flex items-center gap-1">
+                <div className="flex items-center">
+                  <UsageLimits />
                  <NotificationToggle />
-                  <div className="relative left-1">
-                    <SidebarTrigger />
-                  </div>
+                  <SidebarTrigger />
                </div>
              </div>
              {sessionId ? (
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/components/NotificationToggle/NotificationToggle.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/components/NotificationToggle/NotificationToggle.tsx
@@ -7,6 +7,7 @@ import {
  PopoverTrigger,
 } from "@/components/molecules/Popover/Popover";
 import { toast } from "@/components/molecules/Toast/use-toast";
+import { Button } from "@/components/ui/button";
 import { cn } from "@/lib/utils";
 import { Bell, BellRinging, BellSlash } from "@phosphor-icons/react";
 import { useCopilotUIStore } from "../../../../store";
@@ -48,10 +49,7 @@ export function NotificationToggle() {
  return (
    <Popover>
      <PopoverTrigger asChild>
-        <button
-          className="rounded p-1 text-black transition-colors hover:bg-zinc-50"
-          aria-label="Notification settings"
-        >
+        <Button variant="ghost" size="icon" aria-label="Notification settings">
          {!isNotificationsEnabled ? (
            <BellSlash className="!size-5" />
          ) : isSoundEnabled ? (
@@ -59,7 +57,7 @@ export function NotificationToggle() {
          ) : (
            <Bell className="!size-5" />
          )}
-        </button>
+        </Button>
      </PopoverTrigger>
      <PopoverContent align="start" className="w-56 p-3">
        <div className="flex flex-col gap-3">
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
@@ -0,0 +1,38 @@
+import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/molecules/Popover/Popover";
+import { Button } from "@/components/ui/button";
+import { ChartBar } from "@phosphor-icons/react";
+import { UsagePanelContent } from "./UsagePanelContent";
+
+export { UsagePanelContent, formatResetTime } from "./UsagePanelContent";
+
+export function UsageLimits() {
+  const { data: usage, isLoading } = useGetV2GetCopilotUsage({
+    query: {
+      select: (res) => res.data as CoPilotUsageStatus,
+      refetchInterval: 30000,
+      staleTime: 10000,
+    },
+  });
+
+  if (isLoading || !usage) return null;
+  if (usage.daily.limit <= 0 && usage.weekly.limit <= 0) return null;
+
+  return (
+    <Popover>
+      <PopoverTrigger asChild>
+        <Button variant="ghost" size="icon" aria-label="Usage limits">
+          <ChartBar className="!size-5" weight="light" />
+        </Button>
+      </PopoverTrigger>
+      <PopoverContent align="start" className="w-64 p-3">
+        <UsagePanelContent usage={usage} />
+      </PopoverContent>
+    </Popover>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsagePanelContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsagePanelContent.tsx
@@ -0,0 +1,118 @@
+import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import Link from "next/link";
+
+export function formatResetTime(
+  resetsAt: Date | string,
+  now: Date = new Date(),
+): string {
+  const resetDate =
+    typeof resetsAt === "string" ? new Date(resetsAt) : resetsAt;
+  const diffMs = resetDate.getTime() - now.getTime();
+  if (diffMs <= 0) return "now";
+
+  const hours = Math.floor(diffMs / (1000 * 60 * 60));
+
+  // Under 24h: show relative time ("in 4h 23m")
+  if (hours < 24) {
+    const minutes = Math.floor((diffMs % (1000 * 60 * 60)) / (1000 * 60));
+    if (hours > 0) return `in ${hours}h ${minutes}m`;
+    return `in ${minutes}m`;
+  }
+
+  // Over 24h: show day and time in local timezone ("Mon 12:00 AM PST")
+  return resetDate.toLocaleString(undefined, {
+    weekday: "short",
+    hour: "numeric",
+    minute: "2-digit",
+    timeZoneName: "short",
+  });
+}
+
+function UsageBar({
+  label,
+  used,
+  limit,
+  resetsAt,
+}: {
+  label: string;
+  used: number;
+  limit: number;
+  resetsAt: Date | string;
+}) {
+  if (limit <= 0) return null;
+
+  const rawPercent = (used / limit) * 100;
+  const percent = Math.min(100, Math.round(rawPercent));
+  const isHigh = percent >= 80;
+  const percentLabel =
+    used > 0 && percent === 0 ? "<1% used" : `${percent}% used`;
+
+  return (
+    <div className="flex flex-col gap-1">
+      <div className="flex items-baseline justify-between">
+        <span className="text-xs font-medium text-neutral-700">{label}</span>
+        <span className="text-[11px] tabular-nums text-neutral-500">
+          {percentLabel}
+        </span>
+      </div>
+      <div className="text-[10px] text-neutral-400">
+        Resets {formatResetTime(resetsAt)}
+      </div>
+      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200">
+        <div
+          className={`h-full rounded-full transition-[width] duration-300 ease-out ${
+            isHigh ? "bg-orange-500" : "bg-blue-500"
+          }`}
+          style={{ width: `${Math.max(used > 0 ? 1 : 0, percent)}%` }}
+        />
+      </div>
+    </div>
+  );
+}
+
+export function UsagePanelContent({
+  usage,
+  showBillingLink = true,
+}: {
+  usage: CoPilotUsageStatus;
+  showBillingLink?: boolean;
+}) {
+  const hasDailyLimit = usage.daily.limit > 0;
+  const hasWeeklyLimit = usage.weekly.limit > 0;
+
+  if (!hasDailyLimit && !hasWeeklyLimit) {
+    return (
+      <div className="text-xs text-neutral-500">No usage limits configured</div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col gap-3">
+      <div className="text-xs font-semibold text-neutral-800">Usage limits</div>
+      {hasDailyLimit && (
+        <UsageBar
+          label="Today"
+          used={usage.daily.used}
+          limit={usage.daily.limit}
+          resetsAt={usage.daily.resets_at}
+        />
+      )}
+      {hasWeeklyLimit && (
+        <UsageBar
+          label="This week"
+          used={usage.weekly.used}
+          limit={usage.weekly.limit}
+          resetsAt={usage.weekly.resets_at}
+        />
+      )}
+      {showBillingLink && (
+        <Link
+          href="/profile/credits"
+          className="text-[11px] text-blue-600 hover:underline"
+        >
+          Learn more about usage limits
+        </Link>
+      )}
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsageLimits.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsageLimits.test.tsx
@@ -0,0 +1,124 @@
+import { render, screen, cleanup } from "@/tests/integrations/test-utils";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { UsageLimits } from "../UsageLimits";
+
+// Mock the generated Orval hook
+const mockUseGetV2GetCopilotUsage = vi.fn();
+vi.mock("@/app/api/__generated__/endpoints/chat/chat", () => ({
+  useGetV2GetCopilotUsage: (opts: unknown) => mockUseGetV2GetCopilotUsage(opts),
+}));
+
+// Mock Popover to render children directly (Radix portals don't work in happy-dom)
+vi.mock("@/components/molecules/Popover/Popover", () => ({
+  Popover: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+  PopoverTrigger: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+  PopoverContent: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+}));
+
+afterEach(() => {
+  cleanup();
+  mockUseGetV2GetCopilotUsage.mockReset();
+});
+
+function makeUsage({
+  dailyUsed = 500,
+  dailyLimit = 10000,
+  weeklyUsed = 2000,
+  weeklyLimit = 50000,
+}: {
+  dailyUsed?: number;
+  dailyLimit?: number;
+  weeklyUsed?: number;
+  weeklyLimit?: number;
+} = {}) {
+  const future = new Date(Date.now() + 3600 * 1000); // 1h from now
+  return {
+    daily: { used: dailyUsed, limit: dailyLimit, resets_at: future },
+    weekly: { used: weeklyUsed, limit: weeklyLimit, resets_at: future },
+  };
+}
+
+describe("UsageLimits", () => {
+  it("renders nothing while loading", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: undefined,
+      isLoading: true,
+    });
+    const { container } = render(<UsageLimits />);
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("renders nothing when no limits are configured", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyLimit: 0, weeklyLimit: 0 }),
+      isLoading: false,
+    });
+    const { container } = render(<UsageLimits />);
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("renders the usage button when limits exist", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage(),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+    expect(screen.getByRole("button", { name: /usage limits/i })).toBeDefined();
+  });
+
+  it("displays daily and weekly usage percentages", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyUsed: 5000, dailyLimit: 10000 }),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    expect(screen.getByText("50% used")).toBeDefined();
+    expect(screen.getByText("Today")).toBeDefined();
+    expect(screen.getByText("This week")).toBeDefined();
+    expect(screen.getByText("Usage limits")).toBeDefined();
+  });
+
+  it("shows only weekly bar when daily limit is 0", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({
+        dailyLimit: 0,
+        weeklyUsed: 25000,
+        weeklyLimit: 50000,
+      }),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    expect(screen.getByText("This week")).toBeDefined();
+    expect(screen.queryByText("Today")).toBeNull();
+  });
+
+  it("caps percentage at 100% when over limit", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyUsed: 15000, dailyLimit: 10000 }),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    expect(screen.getByText("100% used")).toBeDefined();
+  });
+
+  it("shows learn more link to credits page", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage(),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    const link = screen.getByText("Learn more about usage limits");
+    expect(link).toBeDefined();
+    expect(link.closest("a")?.getAttribute("href")).toBe("/profile/credits");
+  });
+});
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts
@@ -1,4 +1,5 @@
 import {
+  getGetV2GetCopilotUsageQueryKey,
  getGetV2GetSessionQueryKey,
  postV2CancelSessionTask,
 } from "@/app/api/__generated__/endpoints/chat/chat";
@@ -177,12 +178,41 @@ export function useCopilotStream({
    onError: (error) => {
      if (!sessionId) return;

+      // Detect rate limit (429) responses and show reset time to the user.
+      // The SDK throws a plain Error whose message is the raw response body
+      // (FastAPI returns {"detail": "...usage limit..."} for 429s).
+      let errorDetail: string = error.message;
+      try {
+        const parsed = JSON.parse(error.message) as unknown;
+        if (
+          typeof parsed === "object" &&
+          parsed !== null &&
+          "detail" in parsed &&
+          typeof (parsed as { detail: unknown }).detail === "string"
+        ) {
+          errorDetail = (parsed as { detail: string }).detail;
+        }
+      } catch {
+        // Not JSON — use message as-is
+      }
+      const isRateLimited = errorDetail.toLowerCase().includes("usage limit");
+      if (isRateLimited) {
+        toast({
+          title: "Usage limit reached",
+          description:
+            errorDetail ||
+            "You've reached your usage limit. Please try again later.",
+          variant: "destructive",
+        });
+        return;
+      }
+
      // Detect authentication failures (from getAuthHeaders or 401 responses)
      const isAuthError =
-        error.message.includes("Authentication failed") ||
-        error.message.includes("Unauthorized") ||
-        error.message.includes("Not authenticated") ||
-        error.message.toLowerCase().includes("401");
+        errorDetail.includes("Authentication failed") ||
+        errorDetail.includes("Unauthorized") ||
+        errorDetail.includes("Not authenticated") ||
+        errorDetail.toLowerCase().includes("401");
      if (isAuthError) {
        toast({
          title: "Authentication error",
@@ -307,6 +337,9 @@ export function useCopilotStream({
      queryClient.invalidateQueries({
        queryKey: getGetV2GetSessionQueryKey(sessionId),
      });
+      queryClient.invalidateQueries({
+        queryKey: getGetV2GetCopilotUsageQueryKey(),
+      });
      if (status === "ready") {
        reconnectAttemptsRef.current = 0;
        hasShownDisconnectToast.current = false;
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/page.tsx
@@ -11,6 +11,9 @@ import {

 import { RefundModal } from "./RefundModal";
 import { CreditTransaction } from "@/lib/autogpt-server-api";
+import { UsagePanelContent } from "@/app/(platform)/copilot/components/UsageLimits/UsageLimits";
+import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";

 import {
  Table,
@@ -21,6 +24,32 @@ import {
  TableRow,
 } from "@/components/__legacy__/ui/table";

+function CoPilotUsageSection() {
+  const router = useRouter();
+  const { data: usage, isLoading } = useGetV2GetCopilotUsage({
+    query: {
+      select: (res) => res.data as CoPilotUsageStatus,
+      refetchInterval: 30000,
+      staleTime: 10000,
+    },
+  });
+
+  if (isLoading || !usage) return null;
+  if (usage.daily.limit <= 0 && usage.weekly.limit <= 0) return null;
+
+  return (
+    <div className="my-6 space-y-4">
+      <h3 className="text-lg font-medium">CoPilot Usage Limits</h3>
+      <div className="rounded-lg border border-neutral-200 p-4">
+        <UsagePanelContent usage={usage} showBillingLink={false} />
+      </div>
+      <Button className="w-full" onClick={() => router.push("/copilot")}>
+        Open CoPilot
+      </Button>
+    </div>
+  );
+}
+
 export default function CreditsPage() {
  const api = useBackendAPI();
  const {
@@ -237,11 +266,13 @@ export default function CreditsPage() {
              </Button>
            )}
          </form>
+
+          {/* CoPilot Usage Limits */}
+          <CoPilotUsageSection />
        </div>

        <div className="my-6 space-y-4">
          {/* Payment Portal */}
-
          <h3 className="text-lg font-medium">Manage Your Payment Methods</h3>
          <p className="text-neutral-600">
            You can manage your cards and see your payment history in the
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -1267,7 +1267,7 @@
      "post": {
        "tags": ["v2", "chat", "chat"],
        "summary": "Stream Chat Post",
-        "description": "Stream chat responses for a session (POST with context support).\n\nStreams the AI/completion responses in real time over Server-Sent Events (SSE), including:\n  - Text fragments as they are generated\n  - Tool call UI elements (if invoked)\n  - Tool execution results\n\nThe AI generation runs in a background task that continues even if the client disconnects.\nAll chunks are written to a per-turn Redis stream for reconnection support. If the client\ndisconnects, they can reconnect using GET /sessions/{session_id}/stream to resume.\n\nArgs:\n    session_id: The chat session identifier to associate with the streamed messages.\n    request: Request body containing message, is_user_message, and optional context.\n    user_id: Optional authenticated user ID.\nReturns:\n    StreamingResponse: SSE-formatted response chunks.",
+        "description": "Stream chat responses for a session (POST with context support).\n\nStreams the AI/completion responses in real time over Server-Sent Events (SSE), including:\n  - Text fragments as they are generated\n  - Tool call UI elements (if invoked)\n  - Tool execution results\n\nThe AI generation runs in a background task that continues even if the client disconnects.\nAll chunks are written to a per-turn Redis stream for reconnection support. If the client\ndisconnects, they can reconnect using GET /sessions/{session_id}/stream to resume.\n\nArgs:\n    session_id: The chat session identifier to associate with the streamed messages.\n    request: Request body containing message, is_user_message, and optional context.\n    user_id: Authenticated user ID.\nReturns:\n    StreamingResponse: SSE-formatted response chunks.",
        "operationId": "postV2StreamChatPost",
        "security": [{ "HTTPBearerJWT": [] }],
        "parameters": [
@@ -1382,6 +1382,28 @@
        "security": [{ "HTTPBearerJWT": [] }]
      }
    },
+    "/api/chat/usage": {
+      "get": {
+        "tags": ["v2", "chat", "chat"],
+        "summary": "Get Copilot Usage",
+        "description": "Get CoPilot usage status for the authenticated user.\n\nReturns current token usage vs limits for daily and weekly windows.",
+        "operationId": "getV2GetCopilotUsage",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": { "$ref": "#/components/schemas/CoPilotUsageStatus" }
+              }
+            }
+          },
+          "401": {
+            "$ref": "#/components/responses/HTTP401NotAuthenticatedError"
+          }
+        },
+        "security": [{ "HTTPBearerJWT": [] }]
+      }
+    },
    "/api/credits": {
      "get": {
        "tags": ["v1", "credits"],
@@ -8455,6 +8477,16 @@
        "title": "ClarifyingQuestion",
        "description": "A question that needs user clarification."
      },
+      "CoPilotUsageStatus": {
+        "properties": {
+          "daily": { "$ref": "#/components/schemas/UsageWindow" },
+          "weekly": { "$ref": "#/components/schemas/UsageWindow" }
+        },
+        "type": "object",
+        "required": ["daily", "weekly"],
+        "title": "CoPilotUsageStatus",
+        "description": "Current usage status for a user across all windows."
+      },
      "ContentType": {
        "type": "string",
        "enum": [
@@ -12190,6 +12222,16 @@
              { "$ref": "#/components/schemas/ActiveStreamInfo" },
              { "type": "null" }
            ]
+          },
+          "total_prompt_tokens": {
+            "type": "integer",
+            "title": "Total Prompt Tokens",
+            "default": 0
+          },
+          "total_completion_tokens": {
+            "type": "integer",
+            "title": "Total Completion Tokens",
+            "default": 0
          }
        },
        "type": "object",
@@ -14587,6 +14629,25 @@
        "required": ["timezone"],
        "title": "UpdateTimezoneRequest"
      },
+      "UsageWindow": {
+        "properties": {
+          "used": { "type": "integer", "title": "Used" },
+          "limit": {
+            "type": "integer",
+            "title": "Limit",
+            "description": "Maximum tokens allowed in this window. 0 means unlimited."
+          },
+          "resets_at": {
+            "type": "string",
+            "format": "date-time",
+            "title": "Resets At"
+          }
+        },
+        "type": "object",
+        "required": ["used", "limit", "resets_at"],
+        "title": "UsageWindow",
+        "description": "Usage within a single time window."
+      },
      "UserHistoryResponse": {
        "properties": {
          "history": {
--- a/autogpt_platform/frontend/src/components/ui/sidebar.tsx
+++ b/autogpt_platform/frontend/src/components/ui/sidebar.tsx
@@ -288,6 +288,7 @@ const SidebarTrigger = React.forwardRef<
      ref={ref}
      data-sidebar="trigger"
      variant="ghost"
+      size="icon"
      onClick={(event) => {
        onClick?.(event);
        toggleSidebar();