fix: restore size="icon" on sidebar trigger button

Merge remote-tracking branch 'origin/dev' into feat/tracking-cost-block
2026-03-17 03:00:27 -04:00 · 2026-03-17 12:06:39 +07:00 · 2026-03-17 07:10:44 +07:00 · 2026-03-17 06:16:46 +07:00 · 2026-03-17 06:14:41 +07:00 · 2026-03-16 23:08:18 +00:00
57 changed files with 2955 additions and 613 deletions
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -5,12 +5,14 @@ on:
    branches: [master, dev, ci-test*]
    paths:
      - ".github/workflows/platform-backend-ci.yml"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/backend/**"
      - "autogpt_platform/autogpt_libs/**"
  pull_request:
    branches: [master, dev, release-*]
    paths:
      - ".github/workflows/platform-backend-ci.yml"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/backend/**"
      - "autogpt_platform/autogpt_libs/**"
  merge_group:
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -120,175 +120,6 @@ jobs:
          token: ${{ secrets.GITHUB_TOKEN }}
          exitOnceUploaded: true

-  e2e_test:
-    name: end-to-end tests
-    runs-on: big-boi
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-        with:
-          submodules: recursive
-
-      - name: Set up Platform - Copy default supabase .env
-        run: |
-          cp ../.env.default ../.env
-
-      - name: Set up Platform - Copy backend .env and set OpenAI API key
-        run: |
-          cp ../backend/.env.default ../backend/.env
-          echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
-        env:
-          # Used by E2E test data script to generate embeddings for approved store agents
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-
-      - name: Set up Platform - Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver: docker-container
-          driver-opts: network=host
-
-      - name: Set up Platform - Expose GHA cache to docker buildx CLI
-        uses: crazy-max/ghaction-github-runtime@v4
-
-      - name: Set up Platform - Build Docker images (with cache)
-        working-directory: autogpt_platform
-        run: |
-          pip install pyyaml
-
-          # Resolve extends and generate a flat compose file that bake can understand
-          docker compose -f docker-compose.yml config > docker-compose.resolved.yml
-
-          # Add cache configuration to the resolved compose file
-          python ../.github/workflows/scripts/docker-ci-fix-compose-build-cache.py \
-            --source docker-compose.resolved.yml \
-            --cache-from "type=gha" \
-            --cache-to "type=gha,mode=max" \
-            --backend-hash "${{ hashFiles('autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/poetry.lock', 'autogpt_platform/backend/backend') }}" \
-            --frontend-hash "${{ hashFiles('autogpt_platform/frontend/Dockerfile', 'autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/src') }}" \
-            --git-ref "${{ github.ref }}"
-
-          # Build with bake using the resolved compose file (now includes cache config)
-          docker buildx bake --allow=fs.read=.. -f docker-compose.resolved.yml --load
-        env:
-          NEXT_PUBLIC_PW_TEST: true
-
-      - name: Set up tests - Cache E2E test data
-        id: e2e-data-cache
-        uses: actions/cache@v5
-        with:
-          path: /tmp/e2e_test_data.sql
-          key: e2e-test-data-${{ hashFiles('autogpt_platform/backend/test/e2e_test_data.py', 'autogpt_platform/backend/migrations/**', '.github/workflows/platform-frontend-ci.yml') }}
-
-      - name: Set up Platform - Start Supabase DB + Auth
-        run: |
-          docker compose -f ../docker-compose.resolved.yml up -d db auth --no-build
-          echo "Waiting for database to be ready..."
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done'
-          echo "Waiting for auth service to be ready..."
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -c "SELECT 1 FROM auth.users LIMIT 1" 2>/dev/null; do sleep 2; done' || echo "Auth schema check timeout, continuing..."
-
-      - name: Set up Platform - Run migrations
-        run: |
-          echo "Running migrations..."
-          docker compose -f ../docker-compose.resolved.yml run --rm migrate
-          echo "✅ Migrations completed"
-        env:
-          NEXT_PUBLIC_PW_TEST: true
-
-      - name: Set up tests - Load cached E2E test data
-        if: steps.e2e-data-cache.outputs.cache-hit == 'true'
-        run: |
-          echo "✅ Found cached E2E test data, restoring..."
-          {
-            echo "SET session_replication_role = 'replica';"
-            cat /tmp/e2e_test_data.sql
-            echo "SET session_replication_role = 'origin';"
-          } | docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -b
-          # Refresh materialized views after restore
-          docker compose -f ../docker-compose.resolved.yml exec -T db \
-            psql -U postgres -d postgres -b -c "SET search_path TO platform; SELECT refresh_store_materialized_views();" || true
-
-          echo "✅ E2E test data restored from cache"
-
-      - name: Set up Platform - Start (all other services)
-        run: |
-          docker compose -f ../docker-compose.resolved.yml up -d --no-build
-          echo "Waiting for rest_server to be ready..."
-          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
-        env:
-          NEXT_PUBLIC_PW_TEST: true
-
-      - name: Set up tests - Create E2E test data
-        if: steps.e2e-data-cache.outputs.cache-hit != 'true'
-        run: |
-          echo "Creating E2E test data..."
-          docker cp ../backend/test/e2e_test_data.py $(docker compose -f ../docker-compose.resolved.yml ps -q rest_server):/tmp/e2e_test_data.py
-          docker compose -f ../docker-compose.resolved.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python /tmp/e2e_test_data.py" || {
-            echo "❌ E2E test data creation failed!"
-            docker compose -f ../docker-compose.resolved.yml logs --tail=50 rest_server
-            exit 1
-          }
-
-          # Dump auth.users + platform schema for cache (two separate dumps)
-          echo "Dumping database for cache..."
-          {
-            docker compose -f ../docker-compose.resolved.yml exec -T db \
-              pg_dump -U postgres --data-only --column-inserts \
-              --table='auth.users' postgres
-            docker compose -f ../docker-compose.resolved.yml exec -T db \
-              pg_dump -U postgres --data-only --column-inserts \
-              --schema=platform \
-              --exclude-table='platform._prisma_migrations' \
-              --exclude-table='platform.apscheduler_jobs' \
-              --exclude-table='platform.apscheduler_jobs_batched_notifications' \
-              postgres
-          } > /tmp/e2e_test_data.sql
-
-          echo "✅ Database dump created for caching ($(wc -l < /tmp/e2e_test_data.sql) lines)"
-
-      - name: Set up tests - Enable corepack
-        run: corepack enable
-
-      - name: Set up tests - Set up Node
-        uses: actions/setup-node@v6
-        with:
-          node-version: "22.18.0"
-          cache: "pnpm"
-          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
-
-      - name: Set up tests - Install dependencies
-        run: pnpm install --frozen-lockfile
-
-      - name: Set up tests - Install browser 'chromium'
-        run: pnpm playwright install --with-deps chromium
-
-      - name: Run Playwright tests
-        run: pnpm test:no-build
-        continue-on-error: false
-
-      - name: Upload Playwright report
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: playwright-report
-          path: playwright-report
-          if-no-files-found: ignore
-          retention-days: 3
-
-      - name: Upload Playwright test results
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: playwright-test-results
-          path: test-results
-          if-no-files-found: ignore
-          retention-days: 3
-
-      - name: Print Final Docker Compose logs
-        if: always()
-        run: docker compose -f ../docker-compose.resolved.yml logs
-
  integration_test:
    runs-on: ubuntu-latest
    needs: setup
--- a/.github/workflows/platform-fullstack-ci.yml
+++ b/.github/workflows/platform-fullstack-ci.yml
@@ -1,14 +1,18 @@
-name: AutoGPT Platform - Frontend CI
+name: AutoGPT Platform - Full-stack CI

 on:
  push:
    branches: [master, dev]
    paths:
      - ".github/workflows/platform-fullstack-ci.yml"
+      - ".github/workflows/scripts/docker-ci-fix-compose-build-cache.py"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/**"
  pull_request:
    paths:
      - ".github/workflows/platform-fullstack-ci.yml"
+      - ".github/workflows/scripts/docker-ci-fix-compose-build-cache.py"
+      - ".github/workflows/scripts/get_package_version_from_lockfile.py"
      - "autogpt_platform/**"
  merge_group:

@@ -24,42 +28,28 @@ defaults:
 jobs:
  setup:
    runs-on: ubuntu-latest
-    outputs:
-      cache-key: ${{ steps.cache-key.outputs.key }}

    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

-      - name: Set up Node.js
-        uses: actions/setup-node@v6
-        with:
-          node-version: "22.18.0"
-
      - name: Enable corepack
        run: corepack enable

-      - name: Generate cache key
-        id: cache-key
-        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
-
-      - name: Cache dependencies
-        uses: actions/cache@v5
+      - name: Set up Node
+        uses: actions/setup-node@v6
        with:
-          path: ~/.pnpm-store
-          key: ${{ steps.cache-key.outputs.key }}
-          restore-keys: |
-            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
-            ${{ runner.os }}-pnpm-
+          node-version: "22.18.0"
+          cache: "pnpm"
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml

-      - name: Install dependencies
+      - name: Install dependencies to populate cache
        run: pnpm install --frozen-lockfile

-  types:
-    runs-on: big-boi
+  check-api-types:
+    name: check API types
+    runs-on: ubuntu-latest
    needs: setup
-    strategy:
-      fail-fast: false

    steps:
      - name: Checkout repository
@@ -67,70 +57,256 @@ jobs:
        with:
          submodules: recursive

-      - name: Set up Node.js
+      # ------------------------ Backend setup ------------------------
+
+      - name: Set up Backend - Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Set up Backend - Install Poetry
+        working-directory: autogpt_platform/backend
+        run: |
+          POETRY_VERSION=$(python ../../.github/workflows/scripts/get_package_version_from_lockfile.py poetry)
+          echo "Installing Poetry version ${POETRY_VERSION}"
+          curl -sSL https://install.python-poetry.org | POETRY_VERSION=$POETRY_VERSION python3 -
+
+      - name: Set up Backend - Set up dependency cache
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
+
+      - name: Set up Backend - Install dependencies
+        working-directory: autogpt_platform/backend
+        run: poetry install
+
+      - name: Set up Backend - Generate Prisma client
+        working-directory: autogpt_platform/backend
+        run: poetry run prisma generate && poetry run gen-prisma-stub
+
+      - name: Set up Frontend - Export OpenAPI schema from Backend
+        working-directory: autogpt_platform/backend
+        run: poetry run export-api-schema --output ../frontend/src/app/api/openapi.json
+
+      # ------------------------ Frontend setup ------------------------
+
+      - name: Set up Frontend - Enable corepack
+        run: corepack enable
+
+      - name: Set up Frontend - Set up Node
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
+          cache: "pnpm"
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml

-      - name: Enable corepack
-        run: corepack enable
-
-      - name: Copy default supabase .env
-        run: |
-          cp ../.env.default ../.env
-
-      - name: Copy backend .env
-        run: |
-          cp ../backend/.env.default ../backend/.env
-
-      - name: Run docker compose
-        run: |
-          docker compose -f ../docker-compose.yml --profile local up -d deps_backend
-
-      - name: Restore dependencies cache
-        uses: actions/cache@v5
-        with:
-          path: ~/.pnpm-store
-          key: ${{ needs.setup.outputs.cache-key }}
-          restore-keys: |
-            ${{ runner.os }}-pnpm-
-
-      - name: Install dependencies
+      - name: Set up Frontend - Install dependencies
        run: pnpm install --frozen-lockfile

-      - name: Setup .env
-        run: cp .env.default .env
-
-      - name: Wait for services to be ready
-        run: |
-          echo "Waiting for rest_server to be ready..."
-          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
-          echo "Waiting for database to be ready..."
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done' || echo "Database ready check timeout, continuing..."
-
-      - name: Generate API queries
-        run: pnpm generate:api:force
+      - name: Set up Frontend - Format OpenAPI schema
+        id: format-schema
+        run: pnpm prettier --write ./src/app/api/openapi.json

      - name: Check for API schema changes
        run: |
          if ! git diff --exit-code src/app/api/openapi.json; then
            echo "❌ API schema changes detected in src/app/api/openapi.json"
            echo ""
-            echo "The openapi.json file has been modified after running 'pnpm generate:api-all'."
+            echo "The openapi.json file has been modified after exporting the API schema."
            echo "This usually means changes have been made in the BE endpoints without updating the Frontend."
            echo "The API schema is now out of sync with the Front-end queries."
            echo ""
            echo "To fix this:"
-            echo "1. Pull the backend 'docker compose pull && docker compose up -d --build --force-recreate'"
-            echo "2. Run 'pnpm generate:api' locally"
-            echo "3. Run 'pnpm types' locally"
-            echo "4. Fix any TypeScript errors that may have been introduced"
-            echo "5. Commit and push your changes"
+            echo "\nIn the backend directory:"
+            echo "1. Run 'poetry run export-api-schema --output ../frontend/src/app/api/openapi.json'"
+            echo "\nIn the frontend directory:"
+            echo "2. Run 'pnpm prettier --write src/app/api/openapi.json'"
+            echo "3. Run 'pnpm generate:api'"
+            echo "4. Run 'pnpm types'"
+            echo "5. Fix any TypeScript errors that may have been introduced"
+            echo "6. Commit and push your changes"
            echo ""
            exit 1
          else
            echo "✅ No API schema changes detected"
          fi

-      - name: Run Typescript checks
+      - name: Set up Frontend - Generate API client
+        id: generate-api-client
+        run: pnpm orval --config ./orval.config.ts
+        # Continue with type generation & check even if there are schema changes
+        if: success() || (steps.format-schema.outcome == 'success')
+
+      - name: Check for TypeScript errors
        run: pnpm types
+        if: success() || (steps.generate-api-client.outcome == 'success')
+
+  e2e_test:
+    name: end-to-end tests
+    runs-on: big-boi
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+        with:
+          submodules: recursive
+
+      - name: Set up Platform - Copy default supabase .env
+        run: |
+          cp ../.env.default ../.env
+
+      - name: Set up Platform - Copy backend .env and set OpenAI API key
+        run: |
+          cp ../backend/.env.default ../backend/.env
+          echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
+        env:
+          # Used by E2E test data script to generate embeddings for approved store agents
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+      - name: Set up Platform - Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          driver-opts: network=host
+
+      - name: Set up Platform - Expose GHA cache to docker buildx CLI
+        uses: crazy-max/ghaction-github-runtime@v4
+
+      - name: Set up Platform - Build Docker images (with cache)
+        working-directory: autogpt_platform
+        run: |
+          pip install pyyaml
+
+          # Resolve extends and generate a flat compose file that bake can understand
+          docker compose -f docker-compose.yml config > docker-compose.resolved.yml
+
+          # Add cache configuration to the resolved compose file
+          python ../.github/workflows/scripts/docker-ci-fix-compose-build-cache.py \
+            --source docker-compose.resolved.yml \
+            --cache-from "type=gha" \
+            --cache-to "type=gha,mode=max" \
+            --backend-hash "${{ hashFiles('autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/poetry.lock', 'autogpt_platform/backend/backend/**') }}" \
+            --frontend-hash "${{ hashFiles('autogpt_platform/frontend/Dockerfile', 'autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/src/**') }}" \
+            --git-ref "${{ github.ref }}"
+
+          # Build with bake using the resolved compose file (now includes cache config)
+          docker buildx bake --allow=fs.read=.. -f docker-compose.resolved.yml --load
+        env:
+          NEXT_PUBLIC_PW_TEST: true
+
+      - name: Set up tests - Cache E2E test data
+        id: e2e-data-cache
+        uses: actions/cache@v5
+        with:
+          path: /tmp/e2e_test_data.sql
+          key: e2e-test-data-${{ hashFiles('autogpt_platform/backend/test/e2e_test_data.py', 'autogpt_platform/backend/migrations/**', '.github/workflows/platform-fullstack-ci.yml') }}
+
+      - name: Set up Platform - Start Supabase DB + Auth
+        run: |
+          docker compose -f ../docker-compose.resolved.yml up -d db auth --no-build
+          echo "Waiting for database to be ready..."
+          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done'
+          echo "Waiting for auth service to be ready..."
+          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -c "SELECT 1 FROM auth.users LIMIT 1" 2>/dev/null; do sleep 2; done' || echo "Auth schema check timeout, continuing..."
+
+      - name: Set up Platform - Run migrations
+        run: |
+          echo "Running migrations..."
+          docker compose -f ../docker-compose.resolved.yml run --rm migrate
+          echo "✅ Migrations completed"
+        env:
+          NEXT_PUBLIC_PW_TEST: true
+
+      - name: Set up tests - Load cached E2E test data
+        if: steps.e2e-data-cache.outputs.cache-hit == 'true'
+        run: |
+          echo "✅ Found cached E2E test data, restoring..."
+          {
+            echo "SET session_replication_role = 'replica';"
+            cat /tmp/e2e_test_data.sql
+            echo "SET session_replication_role = 'origin';"
+          } | docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -b
+          # Refresh materialized views after restore
+          docker compose -f ../docker-compose.resolved.yml exec -T db \
+            psql -U postgres -d postgres -b -c "SET search_path TO platform; SELECT refresh_store_materialized_views();" || true
+
+          echo "✅ E2E test data restored from cache"
+
+      - name: Set up Platform - Start (all other services)
+        run: |
+          docker compose -f ../docker-compose.resolved.yml up -d --no-build
+          echo "Waiting for rest_server to be ready..."
+          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
+        env:
+          NEXT_PUBLIC_PW_TEST: true
+
+      - name: Set up tests - Create E2E test data
+        if: steps.e2e-data-cache.outputs.cache-hit != 'true'
+        run: |
+          echo "Creating E2E test data..."
+          docker cp ../backend/test/e2e_test_data.py $(docker compose -f ../docker-compose.resolved.yml ps -q rest_server):/tmp/e2e_test_data.py
+          docker compose -f ../docker-compose.resolved.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python /tmp/e2e_test_data.py" || {
+            echo "❌ E2E test data creation failed!"
+            docker compose -f ../docker-compose.resolved.yml logs --tail=50 rest_server
+            exit 1
+          }
+
+          # Dump auth.users + platform schema for cache (two separate dumps)
+          echo "Dumping database for cache..."
+          {
+            docker compose -f ../docker-compose.resolved.yml exec -T db \
+              pg_dump -U postgres --data-only --column-inserts \
+              --table='auth.users' postgres
+            docker compose -f ../docker-compose.resolved.yml exec -T db \
+              pg_dump -U postgres --data-only --column-inserts \
+              --schema=platform \
+              --exclude-table='platform._prisma_migrations' \
+              --exclude-table='platform.apscheduler_jobs' \
+              --exclude-table='platform.apscheduler_jobs_batched_notifications' \
+              postgres
+          } > /tmp/e2e_test_data.sql
+
+          echo "✅ Database dump created for caching ($(wc -l < /tmp/e2e_test_data.sql) lines)"
+
+      - name: Set up tests - Enable corepack
+        run: corepack enable
+
+      - name: Set up tests - Set up Node
+        uses: actions/setup-node@v6
+        with:
+          node-version: "22.18.0"
+          cache: "pnpm"
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
+
+      - name: Set up tests - Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Set up tests - Install browser 'chromium'
+        run: pnpm playwright install --with-deps chromium
+
+      - name: Run Playwright tests
+        run: pnpm test:no-build
+        continue-on-error: false
+
+      - name: Upload Playwright report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-report
+          path: playwright-report
+          if-no-files-found: ignore
+          retention-days: 3
+
+      - name: Upload Playwright test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-test-results
+          path: test-results
+          if-no-files-found: ignore
+          retention-days: 3
+
+      - name: Print Final Docker Compose logs
+        if: always()
+        run: docker compose -f ../docker-compose.resolved.yml logs
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -8,7 +8,7 @@ from typing import Annotated
 from uuid import uuid4

 from autogpt_libs import auth
-from fastapi import APIRouter, Depends, HTTPException, Query, Response, Security
+from fastapi import APIRouter, HTTPException, Query, Response, Security
 from fastapi.responses import StreamingResponse
 from prisma.models import UserWorkspaceFile
 from pydantic import BaseModel, Field, field_validator
@@ -27,6 +27,12 @@ from backend.copilot.model import (
    get_user_sessions,
    update_session_title,
 )
+from backend.copilot.rate_limit import (
+    CoPilotUsageStatus,
+    RateLimitExceeded,
+    check_rate_limit,
+    get_usage_status,
+)
 from backend.copilot.response_model import StreamError, StreamFinish, StreamHeartbeat
 from backend.copilot.tools.e2b_sandbox import kill_sandbox
 from backend.copilot.tools.models import (
@@ -120,6 +126,8 @@ class SessionDetailResponse(BaseModel):
    user_id: str | None
    messages: list[dict]
    active_stream: ActiveStreamInfo | None = None  # Present if stream is still active
+    total_prompt_tokens: int = 0
+    total_completion_tokens: int = 0


 class SessionSummaryResponse(BaseModel):
@@ -207,7 +215,7 @@ async def list_sessions(
            }
        except Exception:
            logger.warning(
-                "Failed to fetch processing status from Redis; " "defaulting to empty"
+                "Failed to fetch processing status from Redis; defaulting to empty"
            )

    return ListSessionsResponse(
@@ -229,7 +237,7 @@ async def list_sessions(
    "/sessions",
 )
 async def create_session(
-    user_id: Annotated[str, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Security(auth.get_user_id)],
 ) -> CreateSessionResponse:
    """
    Create a new chat session.
@@ -348,7 +356,7 @@ async def update_session_title_route(
 )
 async def get_session(
    session_id: str,
-    user_id: Annotated[str | None, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Security(auth.get_user_id)],
 ) -> SessionDetailResponse:
    """
    Retrieve the details of a specific chat session.
@@ -389,6 +397,10 @@ async def get_session(
            last_message_id=last_message_id,
        )

+    # Sum token usage from session
+    total_prompt = sum(u.prompt_tokens for u in session.usage)
+    total_completion = sum(u.completion_tokens for u in session.usage)
+
    return SessionDetailResponse(
        id=session.session_id,
        created_at=session.started_at.isoformat(),
@@ -396,6 +408,25 @@ async def get_session(
        user_id=session.user_id or None,
        messages=messages,
        active_stream=active_stream_info,
+        total_prompt_tokens=total_prompt,
+        total_completion_tokens=total_completion,
+    )
+
+
+@router.get(
+    "/usage",
+)
+async def get_copilot_usage(
+    user_id: Annotated[str, Security(auth.get_user_id)],
+) -> CoPilotUsageStatus:
+    """Get CoPilot usage status for the authenticated user.
+
+    Returns current token usage vs limits for daily and weekly windows.
+    """
+    return await get_usage_status(
+        user_id=user_id,
+        daily_token_limit=config.daily_token_limit,
+        weekly_token_limit=config.weekly_token_limit,
    )


@@ -405,7 +436,7 @@ async def get_session(
 )
 async def cancel_session_task(
    session_id: str,
-    user_id: Annotated[str | None, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Security(auth.get_user_id)],
 ) -> CancelSessionResponse:
    """Cancel the active streaming task for a session.

@@ -450,7 +481,7 @@ async def cancel_session_task(
 async def stream_chat_post(
    session_id: str,
    request: StreamChatRequest,
-    user_id: str | None = Depends(auth.get_user_id),
+    user_id: str = Security(auth.get_user_id),
 ):
    """
    Stream chat responses for a session (POST with context support).
@@ -467,7 +498,7 @@ async def stream_chat_post(
    Args:
        session_id: The chat session identifier to associate with the streamed messages.
        request: Request body containing message, is_user_message, and optional context.
-        user_id: Optional authenticated user ID.
+        user_id: Authenticated user ID.
    Returns:
        StreamingResponse: SSE-formatted response chunks.

@@ -476,9 +507,7 @@ async def stream_chat_post(
    import time

    stream_start_time = time.perf_counter()
-    log_meta = {"component": "ChatStream", "session_id": session_id}
-    if user_id:
-        log_meta["user_id"] = user_id
+    log_meta = {"component": "ChatStream", "session_id": session_id, "user_id": user_id}

    logger.info(
        f"[TIMING] stream_chat_post STARTED, session={session_id}, "
@@ -496,6 +525,18 @@ async def stream_chat_post(
        },
    )

+    # Pre-turn rate limit check (token-based).
+    # check_rate_limit short-circuits internally when both limits are 0.
+    if user_id:
+        try:
+            await check_rate_limit(
+                user_id=user_id,
+                daily_token_limit=config.daily_token_limit,
+                weekly_token_limit=config.weekly_token_limit,
+            )
+        except RateLimitExceeded as e:
+            raise HTTPException(status_code=429, detail=str(e)) from e
+
    # Enrich message with file metadata if file_ids are provided.
    # Also sanitise file_ids so only validated, workspace-scoped IDs are
    # forwarded downstream (e.g. to the executor via enqueue_copilot_turn).
@@ -730,7 +771,7 @@ async def stream_chat_post(
 )
 async def resume_session_stream(
    session_id: str,
-    user_id: str | None = Depends(auth.get_user_id),
+    user_id: str = Security(auth.get_user_id),
 ):
    """
    Resume an active stream for a session.
--- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py
@@ -1,5 +1,6 @@
-"""Tests for chat API routes: session title update, file attachment validation, and suggested prompts."""
+"""Tests for chat API routes: session title update, file attachment validation, usage, rate limiting, and suggested prompts."""

+from datetime import UTC, datetime, timedelta
 from unittest.mock import AsyncMock, MagicMock

 import fastapi
@@ -251,6 +252,156 @@ def test_file_ids_scoped_to_workspace(mocker: pytest_mock.MockFixture):
    assert call_kwargs["where"]["isDeleted"] is False


+# ─── Rate limit → 429 ─────────────────────────────────────────────────
+
+
+def test_stream_chat_returns_429_on_daily_rate_limit(mocker: pytest_mock.MockFixture):
+    """When check_rate_limit raises RateLimitExceeded for daily limit the endpoint returns 429."""
+    from backend.copilot.rate_limit import RateLimitExceeded
+
+    _mock_stream_internals(mocker)
+    # Ensure the rate-limit branch is entered by setting a non-zero limit.
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch(
+        "backend.api.features.chat.routes.check_rate_limit",
+        side_effect=RateLimitExceeded("daily", datetime.now(UTC) + timedelta(hours=1)),
+    )
+
+    response = client.post(
+        "/sessions/sess-1/stream",
+        json={"message": "hello"},
+    )
+    assert response.status_code == 429
+    assert "daily" in response.json()["detail"].lower()
+
+
+def test_stream_chat_returns_429_on_weekly_rate_limit(mocker: pytest_mock.MockFixture):
+    """When check_rate_limit raises RateLimitExceeded for weekly limit the endpoint returns 429."""
+    from backend.copilot.rate_limit import RateLimitExceeded
+
+    _mock_stream_internals(mocker)
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    resets_at = datetime.now(UTC) + timedelta(days=3)
+    mocker.patch(
+        "backend.api.features.chat.routes.check_rate_limit",
+        side_effect=RateLimitExceeded("weekly", resets_at),
+    )
+
+    response = client.post(
+        "/sessions/sess-1/stream",
+        json={"message": "hello"},
+    )
+    assert response.status_code == 429
+    detail = response.json()["detail"].lower()
+    assert "weekly" in detail
+    assert "resets in" in detail
+
+
+def test_stream_chat_429_includes_reset_time(mocker: pytest_mock.MockFixture):
+    """The 429 response detail should include the human-readable reset time."""
+    from backend.copilot.rate_limit import RateLimitExceeded
+
+    _mock_stream_internals(mocker)
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+    mocker.patch(
+        "backend.api.features.chat.routes.check_rate_limit",
+        side_effect=RateLimitExceeded(
+            "daily", datetime.now(UTC) + timedelta(hours=2, minutes=30)
+        ),
+    )
+
+    response = client.post(
+        "/sessions/sess-1/stream",
+        json={"message": "hello"},
+    )
+    assert response.status_code == 429
+    detail = response.json()["detail"]
+    assert "2h" in detail
+    assert "Resets in" in detail
+
+
+# ─── Usage endpoint ───────────────────────────────────────────────────
+
+
+def _mock_usage(
+    mocker: pytest_mock.MockerFixture,
+    *,
+    daily_used: int = 500,
+    weekly_used: int = 2000,
+) -> AsyncMock:
+    """Mock get_usage_status to return a predictable CoPilotUsageStatus."""
+    from backend.copilot.rate_limit import CoPilotUsageStatus, UsageWindow
+
+    resets_at = datetime.now(UTC) + timedelta(days=1)
+    status = CoPilotUsageStatus(
+        daily=UsageWindow(used=daily_used, limit=10000, resets_at=resets_at),
+        weekly=UsageWindow(used=weekly_used, limit=50000, resets_at=resets_at),
+    )
+    return mocker.patch(
+        "backend.api.features.chat.routes.get_usage_status",
+        new_callable=AsyncMock,
+        return_value=status,
+    )
+
+
+def test_usage_returns_daily_and_weekly(
+    mocker: pytest_mock.MockerFixture,
+    test_user_id: str,
+) -> None:
+    """GET /usage returns daily and weekly usage."""
+    mock_get = _mock_usage(mocker, daily_used=500, weekly_used=2000)
+
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 10000)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 50000)
+
+    response = client.get("/usage")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["daily"]["used"] == 500
+    assert data["weekly"]["used"] == 2000
+
+    mock_get.assert_called_once_with(
+        user_id=test_user_id,
+        daily_token_limit=10000,
+        weekly_token_limit=50000,
+    )
+
+
+def test_usage_uses_config_limits(
+    mocker: pytest_mock.MockerFixture,
+    test_user_id: str,
+) -> None:
+    """The endpoint forwards daily_token_limit and weekly_token_limit from config."""
+    mock_get = _mock_usage(mocker)
+
+    mocker.patch.object(chat_routes.config, "daily_token_limit", 99999)
+    mocker.patch.object(chat_routes.config, "weekly_token_limit", 77777)
+
+    response = client.get("/usage")
+
+    assert response.status_code == 200
+    mock_get.assert_called_once_with(
+        user_id=test_user_id,
+        daily_token_limit=99999,
+        weekly_token_limit=77777,
+    )
+
+
+def test_usage_rejects_unauthenticated_request() -> None:
+    """GET /usage should return 401 when no valid JWT is provided."""
+    unauthenticated_app = fastapi.FastAPI()
+    unauthenticated_app.include_router(chat_routes.router)
+    unauthenticated_client = fastapi.testclient.TestClient(unauthenticated_app)
+
+    response = unauthenticated_client.get("/usage")
+
+    assert response.status_code == 401
+
+
 # ─── Suggested prompts endpoint ──────────────────────────────────────


--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -36,13 +36,15 @@ from backend.copilot.response_model import (
    StreamToolInputAvailable,
    StreamToolInputStart,
    StreamToolOutputAvailable,
+    StreamUsage,
 )
 from backend.copilot.service import (
    _build_system_prompt,
    _generate_session_title,
-    client,
+    _get_openai_client,
    config,
 )
+from backend.copilot.token_tracking import persist_and_record_usage
 from backend.copilot.tools import execute_tool, get_available_tools
 from backend.copilot.tracking import track_user_message
 from backend.util.exceptions import NotFoundError
@@ -89,7 +91,7 @@ async def _compress_session_messages(
        result = await compress_context(
            messages=messages_dict,
            model=config.model,
-            client=client,
+            client=_get_openai_client(),
        )
    except Exception as e:
        logger.warning("[Baseline] Context compression with LLM failed: %s", e)
@@ -221,6 +223,10 @@ async def stream_chat_completion_baseline(
    text_block_id = str(uuid.uuid4())
    text_started = False
    step_open = False
+    # Token usage accumulators — populated from streaming chunks
+    turn_prompt_tokens = 0
+    turn_completion_tokens = 0
+    _stream_error = False  # Track whether an error occurred during streaming
    try:
        for _round in range(_MAX_TOOL_ROUNDS):
            # Open a new step for each LLM round
@@ -232,16 +238,31 @@ async def stream_chat_completion_baseline(
                model=config.model,
                messages=openai_messages,
                stream=True,
+                stream_options={"include_usage": True},
            )
            if tools:
                create_kwargs["tools"] = tools
-            response = await client.chat.completions.create(**create_kwargs)  # type: ignore[arg-type]  # dynamic kwargs
+            response = await _get_openai_client().chat.completions.create(**create_kwargs)  # type: ignore[arg-type]  # dynamic kwargs

            # Accumulate streamed response (text + tool calls)
            round_text = ""
            tool_calls_by_index: dict[int, dict[str, str]] = {}

            async for chunk in response:
+                # Capture token usage from the streaming chunk.
+                # OpenRouter normalises all providers into OpenAI format
+                # where prompt_tokens already includes cached tokens
+                # (unlike Anthropic's native API). Use += to sum all
+                # tool-call rounds since each API call is independent.
+                # NOTE: stream_options={"include_usage": True} is not
+                # universally supported — some providers (Mistral, Llama
+                # via OpenRouter) always return chunk.usage=None. When
+                # that happens, tokens stay 0 and the tiktoken fallback
+                # below activates. Fail-open: one round is estimated.
+                if chunk.usage:
+                    turn_prompt_tokens += chunk.usage.prompt_tokens or 0
+                    turn_completion_tokens += chunk.usage.completion_tokens or 0
+
                delta = chunk.choices[0].delta if chunk.choices else None
                if not delta:
                    continue
@@ -394,6 +415,7 @@ async def stream_chat_completion_baseline(
            )

    except Exception as e:
+        _stream_error = True
        error_msg = str(e) or type(e).__name__
        logger.error("[Baseline] Streaming error: %s", error_msg, exc_info=True)
        # Close any open text/step before emitting error
@@ -411,6 +433,49 @@ async def stream_chat_completion_baseline(
            except Exception:
                logger.warning("[Baseline] Langfuse trace context teardown failed")

+        # Fallback: estimate tokens via tiktoken when the provider does
+        # not honour stream_options={"include_usage": True}.
+        # Count the full message list (system + history + turn) since
+        # each API call sends the complete context window.
+        # NOTE: This estimates one round's prompt tokens. Multi-round tool-calling
+        # turns consume prompt tokens on each API call, so the total is underestimated.
+        # Skip fallback when an error occurred and no output was produced —
+        # charging rate-limit tokens for completely failed requests is unfair.
+        if (
+            turn_prompt_tokens == 0
+            and turn_completion_tokens == 0
+            and not (_stream_error and not assistant_text)
+        ):
+            from backend.util.prompt import (
+                estimate_token_count,
+                estimate_token_count_str,
+            )
+
+            turn_prompt_tokens = max(
+                estimate_token_count(openai_messages, model=config.model), 1
+            )
+            turn_completion_tokens = estimate_token_count_str(
+                assistant_text, model=config.model
+            )
+            logger.info(
+                "[Baseline] No streaming usage reported; estimated tokens: "
+                "prompt=%d, completion=%d",
+                turn_prompt_tokens,
+                turn_completion_tokens,
+            )
+
+        # Persist token usage to session and record for rate limiting.
+        # NOTE: OpenRouter folds cached tokens into prompt_tokens, so we
+        # cannot break out cache_read/cache_creation weights. Users on the
+        # baseline path may be slightly over-counted vs the SDK path.
+        await persist_and_record_usage(
+            session=session,
+            user_id=user_id,
+            prompt_tokens=turn_prompt_tokens,
+            completion_tokens=turn_completion_tokens,
+            log_prefix="[Baseline]",
+        )
+
        # Persist assistant response
        if assistant_text:
            session.messages.append(
@@ -421,4 +486,16 @@ async def stream_chat_completion_baseline(
        except Exception as persist_err:
            logger.error("[Baseline] Failed to persist session: %s", persist_err)

+    # Yield usage and finish AFTER try/finally (not inside finally).
+    # PEP 525 prohibits yielding from finally in async generators during
+    # aclose() — doing so raises RuntimeError on client disconnect.
+    # On GeneratorExit the client is already gone, so unreachable yields
+    # are harmless; on normal completion they reach the SSE stream.
+    if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
+        yield StreamUsage(
+            prompt_tokens=turn_prompt_tokens,
+            completion_tokens=turn_completion_tokens,
+            total_tokens=turn_prompt_tokens + turn_completion_tokens,
+        )
+
    yield StreamFinish()
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -70,6 +70,27 @@ class ChatConfig(BaseSettings):
        description="Cache TTL in seconds for Langfuse prompt (0 to disable caching)",
    )

+    # Rate limiting — token-based limits per day and per week.
+    # Per-turn token cost varies with context size: ~10-15K for early turns,
+    # ~30-50K mid-session, up to ~100K pre-compaction. Average across a
+    # session with compaction cycles is ~25-35K tokens/turn, so 2.5M daily
+    # allows ~70-100 turns/day.
+    # Checked at the HTTP layer (routes.py) before each turn.
+    #
+    # TODO: These are deploy-time constants applied identically to every user.
+    #  If per-user or per-plan limits are needed (e.g., free tier vs paid), these
+    #  must move to the database (e.g., a UserPlan table) and get_usage_status /
+    #  check_rate_limit would look up each user's specific limits instead of
+    #  reading config.daily_token_limit / config.weekly_token_limit.
+    daily_token_limit: int = Field(
+        default=2_500_000,
+        description="Max tokens per day, resets at midnight UTC (0 = unlimited)",
+    )
+    weekly_token_limit: int = Field(
+        default=12_500_000,
+        description="Max tokens per week, resets Monday 00:00 UTC (0 = unlimited)",
+    )
+
    # Claude Agent SDK Configuration
    use_claude_agent_sdk: bool = Field(
        default=True,
--- a/autogpt_platform/backend/backend/copilot/model.py
+++ b/autogpt_platform/backend/backend/copilot/model.py
@@ -73,6 +73,9 @@ class Usage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
+    # Cache breakdown (Anthropic-specific; zero for non-Anthropic models)
+    cache_read_tokens: int = 0
+    cache_creation_tokens: int = 0


 class ChatSessionInfo(BaseModel):
@@ -98,7 +101,10 @@ class ChatSessionInfo(BaseModel):
            prisma_session.successfulAgentSchedules, default={}
        )

-        # Calculate usage from token counts
+        # Calculate usage from token counts.
+        # NOTE: Per-turn cache_read_tokens / cache_creation_tokens breakdown
+        # is lost after persistence — the DB only stores aggregate prompt and
+        # completion totals. This is a known limitation.
        usage = []
        if prisma_session.totalPromptTokens or prisma_session.totalCompletionTokens:
            usage.append(
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -11,18 +11,34 @@ from backend.copilot.tools import TOOL_REGISTRY
 # Shared technical notes that apply to both SDK and baseline modes
 _SHARED_TOOL_NOTES = """\

-### Sharing files
-After `write_workspace_file`, embed the `download_url` in Markdown:
- File: `[report.csv](workspace://file_id#text/csv)`
- Image: `![chart](workspace://file_id#image/png)`
- Video: `![recording](workspace://file_id#video/mp4)`
+### Sharing files with the user
+After saving a file to the persistent workspace with `write_workspace_file`,
+share it with the user by embedding the `download_url` from the response in
+your message as a Markdown link or image:

-### File references — @@agptfile:
-Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
- `workspace://<file_id>` or `workspace:///<path>` — workspace files
- `/absolute/path` — local/sandbox files
- `[start-end]` — optional 1-indexed line range
- Multiple refs per argument supported. Only `workspace://` and absolute paths are expanded.
+- **Any file** — shows as a clickable download link:
+  `[report.csv](workspace://file_id#text/csv)`
+- **Image** — renders inline in chat:
+  `![chart](workspace://file_id#image/png)`
+- **Video** — renders inline in chat with player controls:
+  `![recording](workspace://file_id#video/mp4)`
+
+The `download_url` field in the `write_workspace_file` response is already
+in the correct format — paste it directly after the `(` in the Markdown.
+
+### Passing file content to tools — @@agptfile: references
+Instead of copying large file contents into a tool argument, pass a file
+reference and the platform will load the content for you.
+
+Syntax: `@@agptfile:<uri>[<start>-<end>]`
+
+- `<uri>` **must** start with `workspace://` or `/` (absolute path):
+  - `workspace://<file_id>` — workspace file by ID
+  - `workspace:///<path>` — workspace file by virtual path
+  - `/absolute/local/path` — ephemeral or sdk_cwd file
+  - E2B sandbox absolute path (e.g. `/home/user/script.py`)
+- `[<start>-<end>]` is an optional 1-indexed inclusive line range.
+- URIs that do not start with `workspace://` or `/` are **not** expanded.

 Examples:
 ```
@@ -33,16 +49,50 @@ Examples:
@@agptfile:/home/user/script.py
 ```

-**Structured data**: When the entire argument is a single file reference, the platform auto-parses by extension/MIME. Supported: JSON, JSONL, CSV, TSV, YAML, TOML, Parquet, Excel (.xlsx only). Unrecognised formats return plain string.
+You can embed a reference inside any string argument, or use it as the entire
+value.  Multiple references in one argument are all expanded.

-**Type coercion**: The platform auto-coerces expanded string values to match block input types (e.g. JSON string → `list[list[str]]`).
+**Structured data**: When the **entire** argument value is a single file
+reference (no surrounding text), the platform automatically parses the file
+content based on its extension or MIME type.  Supported formats: JSON, JSONL,
+CSV, TSV, YAML, TOML, Parquet, and Excel (.xlsx — first sheet only).
+For example, pass `@@agptfile:workspace://<id>` where the file is a `.csv` and
+the rows will be parsed into `list[list[str]]` automatically.  If the format is
+unrecognised or parsing fails, the content is returned as a plain string.
+Legacy `.xls` files are **not** supported — only the modern `.xlsx` format.
+
+**Type coercion**: The platform also coerces expanded values to match the
+block's expected input types.  For example, if a block expects `list[list[str]]`
+and the expanded value is a JSON string, it will be parsed into the correct type.

 ### Media file inputs (format: "file")
-Inputs with `"format": "file"` accept `workspace://<file_id>` or `data:<mime>;base64,<payload>`.
-Pass the `workspace://` URI directly (do NOT wrap in `@@agptfile:`). This avoids large payloads and preserves binary content.
+Some block inputs accept media files — their schema shows `"format": "file"`.
+These fields accept:
+- **`workspace://<file_id>`** or **`workspace://<file_id>#<mime>`** — preferred
+  for large files (images, videos, PDFs). The platform passes the reference
+  directly to the block without reading the content into memory.
+- **`data:<mime>;base64,<payload>`** — inline base64 data URI, suitable for
+  small files only.
+
+When a block input has `format: "file"`, **pass the `workspace://` URI
+directly as the value** (do NOT wrap it in `@@agptfile:`). This avoids large
+payloads in tool arguments and preserves binary content (images, videos)
+that would be corrupted by text encoding.
+
+Example — committing an image file to GitHub:
+```json
+{
+  "files": [{
+    "path": "docs/hero.png",
+    "content": "workspace://abc123#image/png",
+    "operation": "upsert"
+  }]
+}
+```

 ### Sub-agent tasks
- Task tool: NEVER set `run_in_background` to true.
+- When using the Task tool, NEVER set `run_in_background` to true.
+  All tasks must run in the foreground.
 """


@@ -78,18 +128,30 @@ def _build_storage_supplement(

 ## Tool notes

-### Shell & filesystem
- Use `bash_exec` for shell commands ({sandbox_type}). Working dir: `{working_dir}`
- All file tools share the same filesystem. Use relative or absolute paths under this dir.
+### Shell commands
+- The SDK built-in Bash tool is NOT available.  Use the `bash_exec` MCP tool
+  for shell commands — it runs {sandbox_type}.
+
+### Working directory
+- Your working directory is: `{working_dir}`
+- All SDK file tools AND `bash_exec` operate on the same filesystem
+- Use relative paths or absolute paths under `{working_dir}` for all file operations
+
+### Two storage systems — CRITICAL to understand

-### Storage — important
 1. **{storage_system_1_name}** (`{working_dir}`):
 {characteristics}
 {persistence}
-2. **Persistent workspace** (cloud) — survives across sessions.
-   - {file_move_name_1_to_2}: use `write_workspace_file`
-   - {file_move_name_2_to_1}: use `read_workspace_file` with save_to_path
-   - Save important files to workspace for persistence.
+
+2. **Persistent workspace** (cloud storage):
+   - Files here **survive across sessions indefinitely**
+
+### Moving files between storages
+- **{file_move_name_1_to_2}**: Copy to persistent workspace
+- **{file_move_name_2_to_1}**: Download for processing
+
+### File persistence
+Important files (code, configs, outputs) should be saved to workspace to ensure they persist.
 {_SHARED_TOOL_NOTES}"""


--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -0,0 +1,266 @@
+"""CoPilot rate limiting based on token usage.
+
+Uses Redis fixed-window counters to track per-user token consumption
+with configurable daily and weekly limits. Daily windows reset at
+midnight UTC; weekly windows reset at ISO week boundary (Monday 00:00
+UTC). Fails open when Redis is unavailable to avoid blocking users.
+"""
+
+import asyncio
+import logging
+from datetime import UTC, datetime, timedelta
+
+from pydantic import BaseModel, Field
+from redis.exceptions import RedisError
+
+from backend.data.redis_client import get_redis_async
+
+logger = logging.getLogger(__name__)
+
+# Redis key prefixes
+_USAGE_KEY_PREFIX = "copilot:usage"
+
+
+class UsageWindow(BaseModel):
+    """Usage within a single time window."""
+
+    used: int
+    limit: int = Field(
+        description="Maximum tokens allowed in this window. 0 means unlimited."
+    )
+    resets_at: datetime
+
+
+class CoPilotUsageStatus(BaseModel):
+    """Current usage status for a user across all windows."""
+
+    daily: UsageWindow
+    weekly: UsageWindow
+
+
+class RateLimitExceeded(Exception):
+    """Raised when a user exceeds their CoPilot usage limit."""
+
+    def __init__(self, window: str, resets_at: datetime):
+        self.window = window
+        self.resets_at = resets_at
+        delta = resets_at - datetime.now(UTC)
+        total_secs = delta.total_seconds()
+        if total_secs <= 0:
+            time_str = "now"
+        else:
+            hours = int(total_secs // 3600)
+            minutes = int((total_secs % 3600) // 60)
+            time_str = f"{hours}h {minutes}m" if hours > 0 else f"{minutes}m"
+        super().__init__(
+            f"You've reached your {window} usage limit. Resets in {time_str}."
+        )
+
+
+async def get_usage_status(
+    user_id: str,
+    daily_token_limit: int,
+    weekly_token_limit: int,
+) -> CoPilotUsageStatus:
+    """Get current usage status for a user.
+
+    Args:
+        user_id: The user's ID.
+        daily_token_limit: Max tokens per day (0 = unlimited).
+        weekly_token_limit: Max tokens per week (0 = unlimited).
+
+    Returns:
+        CoPilotUsageStatus with current usage and limits.
+    """
+    now = datetime.now(UTC)
+    daily_used = 0
+    weekly_used = 0
+    try:
+        redis = await get_redis_async()
+        daily_raw, weekly_raw = await asyncio.gather(
+            redis.get(_daily_key(user_id, now=now)),
+            redis.get(_weekly_key(user_id, now=now)),
+        )
+        daily_used = int(daily_raw or 0)
+        weekly_used = int(weekly_raw or 0)
+    except (RedisError, ConnectionError, OSError):
+        logger.warning("Redis unavailable for usage status, returning zeros")
+
+    return CoPilotUsageStatus(
+        daily=UsageWindow(
+            used=daily_used,
+            limit=daily_token_limit,
+            resets_at=_daily_reset_time(now=now),
+        ),
+        weekly=UsageWindow(
+            used=weekly_used,
+            limit=weekly_token_limit,
+            resets_at=_weekly_reset_time(now=now),
+        ),
+    )
+
+
+async def check_rate_limit(
+    user_id: str,
+    daily_token_limit: int,
+    weekly_token_limit: int,
+) -> None:
+    """Check if user is within rate limits. Raises RateLimitExceeded if not.
+
+    This is a pre-turn soft check. The authoritative usage counter is updated
+    by ``record_token_usage()`` after the turn completes. Under concurrency,
+    two parallel turns may both pass this check against the same snapshot.
+    This is acceptable because token-based limits are approximate by nature
+    (the exact token count is unknown until after generation).
+
+    Fails open: if Redis is unavailable, allows the request.
+    """
+    # Short-circuit: when both limits are 0 (unlimited) skip the Redis
+    # round-trip entirely.
+    if daily_token_limit <= 0 and weekly_token_limit <= 0:
+        return
+
+    now = datetime.now(UTC)
+    try:
+        redis = await get_redis_async()
+        daily_raw, weekly_raw = await asyncio.gather(
+            redis.get(_daily_key(user_id, now=now)),
+            redis.get(_weekly_key(user_id, now=now)),
+        )
+        daily_used = int(daily_raw or 0)
+        weekly_used = int(weekly_raw or 0)
+    except (RedisError, ConnectionError, OSError):
+        logger.warning("Redis unavailable for rate limit check, allowing request")
+        return
+
+    # Worst-case overshoot: N concurrent requests × ~15K tokens each.
+    if daily_token_limit > 0 and daily_used >= daily_token_limit:
+        raise RateLimitExceeded("daily", _daily_reset_time(now=now))
+
+    if weekly_token_limit > 0 and weekly_used >= weekly_token_limit:
+        raise RateLimitExceeded("weekly", _weekly_reset_time(now=now))
+
+
+async def record_token_usage(
+    user_id: str,
+    prompt_tokens: int,
+    completion_tokens: int,
+    *,
+    cache_read_tokens: int = 0,
+    cache_creation_tokens: int = 0,
+) -> None:
+    """Record token usage for a user across all windows.
+
+    Uses cost-weighted counting so cached tokens don't unfairly penalise
+    multi-turn conversations. Anthropic's pricing:
+      - uncached input: 100%
+      - cache creation:  25%
+      - cache read:      10%
+      - output:         100%
+
+    ``prompt_tokens`` should be the *uncached* input count (``input_tokens``
+    from the API response). Cache counts are passed separately.
+
+    Args:
+        user_id: The user's ID.
+        prompt_tokens: Uncached input tokens.
+        completion_tokens: Output tokens.
+        cache_read_tokens: Tokens served from prompt cache (10% cost).
+        cache_creation_tokens: Tokens written to prompt cache (25% cost).
+    """
+    prompt_tokens = max(0, prompt_tokens)
+    completion_tokens = max(0, completion_tokens)
+    cache_read_tokens = max(0, cache_read_tokens)
+    cache_creation_tokens = max(0, cache_creation_tokens)
+
+    weighted_input = (
+        prompt_tokens
+        + round(cache_creation_tokens * 0.25)
+        + round(cache_read_tokens * 0.1)
+    )
+    total = weighted_input + completion_tokens
+    if total <= 0:
+        return
+
+    raw_total = (
+        prompt_tokens + cache_read_tokens + cache_creation_tokens + completion_tokens
+    )
+    logger.info(
+        "Recording token usage for %s: raw=%d, weighted=%d "
+        "(uncached=%d, cache_read=%d@10%%, cache_create=%d@25%%, output=%d)",
+        user_id[:8],
+        raw_total,
+        total,
+        prompt_tokens,
+        cache_read_tokens,
+        cache_creation_tokens,
+        completion_tokens,
+    )
+
+    now = datetime.now(UTC)
+    try:
+        redis = await get_redis_async()
+        # transaction=False: these are independent INCRBY+EXPIRE pairs on
+        # separate keys — no cross-key atomicity needed.  Skipping
+        # MULTI/EXEC avoids the overhead.  If the connection drops between
+        # INCRBY and EXPIRE the key survives until the next date-based key
+        # rotation (daily/weekly), so the memory-leak risk is negligible.
+        pipe = redis.pipeline(transaction=False)
+
+        # Daily counter (expires at next midnight UTC)
+        d_key = _daily_key(user_id, now=now)
+        pipe.incrby(d_key, total)
+        seconds_until_daily_reset = int(
+            (_daily_reset_time(now=now) - now).total_seconds()
+        )
+        pipe.expire(d_key, max(seconds_until_daily_reset, 1))
+
+        # Weekly counter (expires end of week)
+        w_key = _weekly_key(user_id, now=now)
+        pipe.incrby(w_key, total)
+        seconds_until_weekly_reset = int(
+            (_weekly_reset_time(now=now) - now).total_seconds()
+        )
+        pipe.expire(w_key, max(seconds_until_weekly_reset, 1))
+
+        await pipe.execute()
+    except (RedisError, ConnectionError, OSError):
+        logger.warning(
+            "Redis unavailable for recording token usage (tokens=%d)",
+            total,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+
+
+def _daily_key(user_id: str, now: datetime | None = None) -> str:
+    if now is None:
+        now = datetime.now(UTC)
+    return f"{_USAGE_KEY_PREFIX}:daily:{user_id}:{now.strftime('%Y-%m-%d')}"
+
+
+def _weekly_key(user_id: str, now: datetime | None = None) -> str:
+    if now is None:
+        now = datetime.now(UTC)
+    year, week, _ = now.isocalendar()
+    return f"{_USAGE_KEY_PREFIX}:weekly:{user_id}:{year}-W{week:02d}"
+
+
+def _daily_reset_time(now: datetime | None = None) -> datetime:
+    """Calculate when the current daily window resets (next midnight UTC)."""
+    if now is None:
+        now = datetime.now(UTC)
+    return now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
+
+
+def _weekly_reset_time(now: datetime | None = None) -> datetime:
+    """Calculate when the current weekly window resets (next Monday 00:00 UTC)."""
+    if now is None:
+        now = datetime.now(UTC)
+    days_until_monday = (7 - now.weekday()) % 7 or 7
+    return now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(
+        days=days_until_monday
+    )
--- a/autogpt_platform/backend/backend/copilot/rate_limit_test.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit_test.py
@@ -0,0 +1,334 @@
+"""Unit tests for CoPilot rate limiting."""
+
+from datetime import UTC, datetime, timedelta
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from redis.exceptions import RedisError
+
+from .rate_limit import (
+    CoPilotUsageStatus,
+    RateLimitExceeded,
+    check_rate_limit,
+    get_usage_status,
+    record_token_usage,
+)
+
+_USER = "test-user-rl"
+
+
+# ---------------------------------------------------------------------------
+# RateLimitExceeded
+# ---------------------------------------------------------------------------
+
+
+class TestRateLimitExceeded:
+    def test_message_contains_window_name(self):
+        exc = RateLimitExceeded("daily", datetime.now(UTC) + timedelta(hours=1))
+        assert "daily" in str(exc)
+
+    def test_message_contains_reset_time(self):
+        exc = RateLimitExceeded(
+            "weekly", datetime.now(UTC) + timedelta(hours=2, minutes=30)
+        )
+        msg = str(exc)
+        # Allow for slight timing drift (29m or 30m)
+        assert "2h " in msg
+        assert "Resets in" in msg
+
+    def test_message_minutes_only_when_under_one_hour(self):
+        exc = RateLimitExceeded("daily", datetime.now(UTC) + timedelta(minutes=15))
+        msg = str(exc)
+        assert "Resets in" in msg
+        # Should not have "0h"
+        assert "0h" not in msg
+
+    def test_message_says_now_when_resets_at_is_in_the_past(self):
+        """Negative delta (clock skew / stale TTL) should say 'now', not '-1h -30m'."""
+        exc = RateLimitExceeded("daily", datetime.now(UTC) - timedelta(minutes=5))
+        assert "Resets in now" in str(exc)
+
+
+# ---------------------------------------------------------------------------
+# get_usage_status
+# ---------------------------------------------------------------------------
+
+
+class TestGetUsageStatus:
+    @pytest.mark.asyncio
+    async def test_returns_redis_values(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["500", "2000"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert isinstance(status, CoPilotUsageStatus)
+        assert status.daily.used == 500
+        assert status.daily.limit == 10000
+        assert status.weekly.used == 2000
+        assert status.weekly.limit == 50000
+
+    @pytest.mark.asyncio
+    async def test_returns_zeros_when_redis_unavailable(self):
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            side_effect=ConnectionError("Redis down"),
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert status.daily.used == 0
+        assert status.weekly.used == 0
+
+    @pytest.mark.asyncio
+    async def test_partial_none_daily_counter(self):
+        """Daily counter is None (new day), weekly has usage."""
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=[None, "3000"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert status.daily.used == 0
+        assert status.weekly.used == 3000
+
+    @pytest.mark.asyncio
+    async def test_partial_none_weekly_counter(self):
+        """Weekly counter is None (start of week), daily has usage."""
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["500", None])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        assert status.daily.used == 500
+        assert status.weekly.used == 0
+
+    @pytest.mark.asyncio
+    async def test_resets_at_daily_is_next_midnight_utc(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["0", "0"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            status = await get_usage_status(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+        now = datetime.now(UTC)
+        # Daily reset should be within 24h
+        assert status.daily.resets_at > now
+        assert status.daily.resets_at <= now + timedelta(hours=24, seconds=5)
+
+
+# ---------------------------------------------------------------------------
+# check_rate_limit
+# ---------------------------------------------------------------------------
+
+
+class TestCheckRateLimit:
+    @pytest.mark.asyncio
+    async def test_allows_when_under_limit(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["100", "200"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            # Should not raise
+            await check_rate_limit(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+    @pytest.mark.asyncio
+    async def test_raises_when_daily_limit_exceeded(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["10000", "200"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            with pytest.raises(RateLimitExceeded) as exc_info:
+                await check_rate_limit(
+                    _USER, daily_token_limit=10000, weekly_token_limit=50000
+                )
+            assert exc_info.value.window == "daily"
+
+    @pytest.mark.asyncio
+    async def test_raises_when_weekly_limit_exceeded(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["100", "50000"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            with pytest.raises(RateLimitExceeded) as exc_info:
+                await check_rate_limit(
+                    _USER, daily_token_limit=10000, weekly_token_limit=50000
+                )
+            assert exc_info.value.window == "weekly"
+
+    @pytest.mark.asyncio
+    async def test_allows_when_redis_unavailable(self):
+        """Fail-open: allow requests when Redis is down."""
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            side_effect=ConnectionError("Redis down"),
+        ):
+            # Should not raise
+            await check_rate_limit(
+                _USER, daily_token_limit=10000, weekly_token_limit=50000
+            )
+
+    @pytest.mark.asyncio
+    async def test_skips_check_when_limit_is_zero(self):
+        mock_redis = AsyncMock()
+        mock_redis.get = AsyncMock(side_effect=["999999", "999999"])
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            # Should not raise — limits of 0 mean unlimited
+            await check_rate_limit(_USER, daily_token_limit=0, weekly_token_limit=0)
+
+
+# ---------------------------------------------------------------------------
+# record_token_usage
+# ---------------------------------------------------------------------------
+
+
+class TestRecordTokenUsage:
+    @staticmethod
+    def _make_pipeline_mock() -> MagicMock:
+        """Create a pipeline mock with sync methods and async execute."""
+        pipe = MagicMock()
+        pipe.execute = AsyncMock(return_value=[])
+        return pipe
+
+    @pytest.mark.asyncio
+    async def test_increments_redis_counters(self):
+        mock_pipe = self._make_pipeline_mock()
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+
+        # Should call incrby twice (daily + weekly) with total=150
+        incrby_calls = mock_pipe.incrby.call_args_list
+        assert len(incrby_calls) == 2
+        assert incrby_calls[0].args[1] == 150  # daily
+        assert incrby_calls[1].args[1] == 150  # weekly
+
+    @pytest.mark.asyncio
+    async def test_skips_when_zero_tokens(self):
+        mock_redis = AsyncMock()
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(_USER, prompt_tokens=0, completion_tokens=0)
+
+        # Should not call pipeline at all
+        mock_redis.pipeline.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_sets_expire_on_both_keys(self):
+        """Pipeline should call expire for both daily and weekly keys."""
+        mock_pipe = self._make_pipeline_mock()
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+
+        expire_calls = mock_pipe.expire.call_args_list
+        assert len(expire_calls) == 2
+
+        # Daily key TTL should be positive (seconds until next midnight)
+        daily_ttl = expire_calls[0].args[1]
+        assert daily_ttl >= 1
+
+        # Weekly key TTL should be positive (seconds until next Monday)
+        weekly_ttl = expire_calls[1].args[1]
+        assert weekly_ttl >= 1
+
+    @pytest.mark.asyncio
+    async def test_handles_redis_failure_gracefully(self):
+        """Should not raise when Redis is unavailable."""
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            side_effect=ConnectionError("Redis down"),
+        ):
+            # Should not raise
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
+
+    @pytest.mark.asyncio
+    async def test_cost_weighted_counting(self):
+        """Cached tokens should be weighted: cache_read=10%, cache_create=25%."""
+        mock_pipe = self._make_pipeline_mock()
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            await record_token_usage(
+                _USER,
+                prompt_tokens=100,  # uncached → 100
+                completion_tokens=50,  # output → 50
+                cache_read_tokens=10000,  # 10% → 1000
+                cache_creation_tokens=400,  # 25% → 100
+            )
+
+        # Expected weighted total: 100 + 1000 + 100 + 50 = 1250
+        incrby_calls = mock_pipe.incrby.call_args_list
+        assert len(incrby_calls) == 2
+        assert incrby_calls[0].args[1] == 1250  # daily
+        assert incrby_calls[1].args[1] == 1250  # weekly
+
+    @pytest.mark.asyncio
+    async def test_handles_redis_error_during_pipeline_execute(self):
+        """Should not raise when pipeline.execute() fails with RedisError."""
+        mock_pipe = self._make_pipeline_mock()
+        mock_pipe.execute = AsyncMock(side_effect=RedisError("Pipeline failed"))
+        mock_redis = AsyncMock()
+        mock_redis.pipeline = lambda **_kw: mock_pipe
+
+        with patch(
+            "backend.copilot.rate_limit.get_redis_async",
+            return_value=mock_redis,
+        ):
+            # Should not raise — fail-open
+            await record_token_usage(_USER, prompt_tokens=100, completion_tokens=50)
--- a/autogpt_platform/backend/backend/copilot/response_model.py
+++ b/autogpt_platform/backend/backend/copilot/response_model.py
@@ -186,12 +186,43 @@ class StreamToolOutputAvailable(StreamBaseResponse):


 class StreamUsage(StreamBaseResponse):
-    """Token usage statistics."""
+    """Token usage statistics.
+
+    Emitted as an SSE comment so the Vercel AI SDK parser ignores it
+    (it uses z.strictObject() and rejects unknown event types).
+    Usage data is recorded server-side (session DB + Redis counters).
+    """

    type: ResponseType = ResponseType.USAGE
-    promptTokens: int = Field(..., description="Number of prompt tokens")
-    completionTokens: int = Field(..., description="Number of completion tokens")
-    totalTokens: int = Field(..., description="Total number of tokens")
+    prompt_tokens: int = Field(
+        ...,
+        serialization_alias="promptTokens",
+        description="Number of uncached prompt tokens",
+    )
+    completion_tokens: int = Field(
+        ...,
+        serialization_alias="completionTokens",
+        description="Number of completion tokens",
+    )
+    total_tokens: int = Field(
+        ...,
+        serialization_alias="totalTokens",
+        description="Total number of tokens (raw, not weighted)",
+    )
+    cache_read_tokens: int = Field(
+        default=0,
+        serialization_alias="cacheReadTokens",
+        description="Prompt tokens served from cache (10% cost)",
+    )
+    cache_creation_tokens: int = Field(
+        default=0,
+        serialization_alias="cacheCreationTokens",
+        description="Prompt tokens written to cache (25% cost)",
+    )
+
+    def to_sse(self) -> str:
+        """Emit as SSE comment so the AI SDK parser ignores it."""
+        return f": usage {self.model_dump_json(exclude_none=True, by_alias=True)}\n\n"


 class StreamError(StreamBaseResponse):
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -55,12 +55,14 @@ from ..response_model import (
    StreamTextDelta,
    StreamToolInputAvailable,
    StreamToolOutputAvailable,
+    StreamUsage,
 )
 from ..service import (
    _build_system_prompt,
    _generate_session_title,
    _is_langfuse_configured,
 )
+from ..token_tracking import persist_and_record_usage
 from ..tools.e2b_sandbox import get_or_create_sandbox, pause_sandbox_direct
 from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path
 from ..tracking import track_user_message
@@ -736,6 +738,13 @@ async def stream_chat_completion_sdk(
    _otel_ctx: Any = None

    # Make sure there is no more code between the lock acquisition and try-block.
+    # Token usage accumulators — populated from ResultMessage at end of turn
+    turn_prompt_tokens = 0  # uncached input tokens only
+    turn_completion_tokens = 0
+    turn_cache_read_tokens = 0
+    turn_cache_creation_tokens = 0
+    turn_cost_usd: float | None = None
+
    try:
        # Build system prompt (reuses non-SDK path with Langfuse support).
        # Pre-compute the cwd here so the exact working directory path can be
@@ -1112,7 +1121,7 @@ async def stream_chat_completion_sdk(
                                - len(adapter.resolved_tool_calls),
                            )

-                    # Log ResultMessage details for debugging
+                    # Log ResultMessage details and capture token usage
                    if isinstance(sdk_msg, ResultMessage):
                        logger.info(
                            "%s Received: ResultMessage %s "
@@ -1131,6 +1140,33 @@ async def stream_chat_completion_sdk(
                                sdk_msg.result or "(no error message provided)",
                            )

+                        # Capture token usage from ResultMessage.
+                        # Anthropic reports cached tokens separately:
+                        #   input_tokens = uncached only
+                        #   cache_read_input_tokens = served from cache
+                        #   cache_creation_input_tokens = written to cache
+                        if sdk_msg.usage:
+                            turn_prompt_tokens += sdk_msg.usage.get("input_tokens", 0)
+                            turn_cache_read_tokens += sdk_msg.usage.get(
+                                "cache_read_input_tokens", 0
+                            )
+                            turn_cache_creation_tokens += sdk_msg.usage.get(
+                                "cache_creation_input_tokens", 0
+                            )
+                            turn_completion_tokens += sdk_msg.usage.get(
+                                "output_tokens", 0
+                            )
+                            logger.info(
+                                "%s Token usage: uncached=%d, cache_read=%d, cache_create=%d, output=%d",
+                                log_prefix,
+                                turn_prompt_tokens,
+                                turn_cache_read_tokens,
+                                turn_cache_creation_tokens,
+                                turn_completion_tokens,
+                            )
+                        if sdk_msg.total_cost_usd is not None:
+                            turn_cost_usd = sdk_msg.total_cost_usd
+
                    # Emit compaction end if SDK finished compacting.
                    # When compaction ends, sync TranscriptBuilder with the
                    # CLI's active context so they stay identical.
@@ -1347,6 +1383,26 @@ async def stream_chat_completion_sdk(
            ) and not has_appended_assistant:
                session.messages.append(assistant_response)

+        # Emit token usage to the client (must be in try to reach SSE stream).
+        # Session persistence of usage is in finally to stay consistent with
+        # rate-limit recording even if an exception interrupts between here
+        # and the finally block.
+        if turn_prompt_tokens > 0 or turn_completion_tokens > 0:
+            # total_tokens = prompt (uncached input) + completion (output).
+            # Cache tokens are tracked separately and excluded from total
+            # so that the semantics match the baseline path (OpenRouter)
+            # which folds cache into prompt_tokens. Keeping total_tokens
+            # = prompt + completion everywhere makes cross-path comparisons
+            # and session-level aggregation consistent.
+            total_tokens = turn_prompt_tokens + turn_completion_tokens
+            yield StreamUsage(
+                prompt_tokens=turn_prompt_tokens,
+                completion_tokens=turn_completion_tokens,
+                total_tokens=total_tokens,
+                cache_read_tokens=turn_cache_read_tokens,
+                cache_creation_tokens=turn_cache_creation_tokens,
+            )
+
        # Transcript upload is handled exclusively in the finally block
        # to avoid double-uploads (the success path used to upload the
        # old resume file, then the finally block overwrote it with the
@@ -1411,6 +1467,20 @@ async def stream_chat_completion_sdk(
            except Exception:
                logger.warning("OTEL context teardown failed", exc_info=True)

+        # --- Persist token usage to session + rate-limit counters ---
+        # Both must live in finally so they stay consistent even when an
+        # exception interrupts the try block after StreamUsage was yielded.
+        await persist_and_record_usage(
+            session=session,
+            user_id=user_id,
+            prompt_tokens=turn_prompt_tokens,
+            completion_tokens=turn_completion_tokens,
+            cache_read_tokens=turn_cache_read_tokens,
+            cache_creation_tokens=turn_cache_creation_tokens,
+            log_prefix=log_prefix,
+            cost_usd=turn_cost_usd,
+        )
+
        # --- Persist session messages ---
        # This MUST run in finally to persist messages even when the generator
        # is stopped early (e.g., user clicks stop, processor breaks stream loop).
--- a/autogpt_platform/backend/backend/copilot/service.py
+++ b/autogpt_platform/backend/backend/copilot/service.py
@@ -28,10 +28,24 @@ logger = logging.getLogger(__name__)

 config = ChatConfig()
 settings = Settings()
-client = LangfuseAsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
+
+_client: LangfuseAsyncOpenAI | None = None
+_langfuse = None


-langfuse = get_client()
+def _get_openai_client() -> LangfuseAsyncOpenAI:
+    global _client
+    if _client is None:
+        _client = LangfuseAsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
+    return _client
+
+
+def _get_langfuse():
+    global _langfuse
+    if _langfuse is None:
+        _langfuse = get_client()
+    return _langfuse
+

 # Default system prompt used when Langfuse is not configured
 # Provides minimal baseline tone and personality - all workflow, tools, and
@@ -84,7 +98,7 @@ async def _get_system_prompt_template(context: str) -> str:
                else "latest"
            )
            prompt = await asyncio.to_thread(
-                langfuse.get_prompt,
+                _get_langfuse().get_prompt,
                config.langfuse_prompt_name,
                label=label,
                cache_ttl_seconds=config.langfuse_prompt_cache_ttl,
@@ -158,7 +172,7 @@ async def _generate_session_title(
            "environment": settings.config.app_env.value,
        }

-        response = await client.chat.completions.create(
+        response = await _get_openai_client().chat.completions.create(
            model=config.title_model,
            messages=[
                {
--- a/autogpt_platform/backend/backend/copilot/token_tracking.py
+++ b/autogpt_platform/backend/backend/copilot/token_tracking.py
@@ -0,0 +1,93 @@
+"""Shared token-usage persistence and rate-limit recording.
+
+Both the baseline (OpenRouter) and SDK (Anthropic) service layers need to:
+  1. Append a ``Usage`` record to the session.
+  2. Log the turn's token counts.
+  3. Record weighted usage in Redis for rate-limiting.
+
+This module extracts that common logic so both paths stay in sync.
+"""
+
+import logging
+
+from .model import ChatSession, Usage
+from .rate_limit import record_token_usage
+
+logger = logging.getLogger(__name__)
+
+
+async def persist_and_record_usage(
+    *,
+    session: ChatSession | None,
+    user_id: str | None,
+    prompt_tokens: int,
+    completion_tokens: int,
+    cache_read_tokens: int = 0,
+    cache_creation_tokens: int = 0,
+    log_prefix: str = "",
+    cost_usd: float | str | None = None,
+) -> int:
+    """Persist token usage to session and record for rate limiting.
+
+    Args:
+        session: The chat session to append usage to (may be None on error).
+        user_id: User ID for rate-limit counters (skipped if None).
+        prompt_tokens: Uncached input tokens.
+        completion_tokens: Output tokens.
+        cache_read_tokens: Tokens served from prompt cache (Anthropic only).
+        cache_creation_tokens: Tokens written to prompt cache (Anthropic only).
+        log_prefix: Prefix for log messages (e.g. "[SDK]", "[Baseline]").
+        cost_usd: Optional cost for logging (float from SDK, str otherwise).
+
+    Returns:
+        The computed total_tokens (prompt + completion; cache excluded).
+    """
+    prompt_tokens = max(0, prompt_tokens)
+    completion_tokens = max(0, completion_tokens)
+    cache_read_tokens = max(0, cache_read_tokens)
+    cache_creation_tokens = max(0, cache_creation_tokens)
+
+    if prompt_tokens <= 0 and completion_tokens <= 0:
+        return 0
+
+    # total_tokens = prompt + completion. Cache tokens are tracked
+    # separately and excluded from total so both baseline and SDK
+    # paths share the same semantics.
+    total_tokens = prompt_tokens + completion_tokens
+
+    if session is not None:
+        session.usage.append(
+            Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+                cache_read_tokens=cache_read_tokens,
+                cache_creation_tokens=cache_creation_tokens,
+            )
+        )
+
+    if cache_read_tokens or cache_creation_tokens:
+        logger.info(
+            f"{log_prefix} Turn usage: uncached={prompt_tokens}, "
+            f"cache_read={cache_read_tokens}, cache_create={cache_creation_tokens}, "
+            f"output={completion_tokens}, total={total_tokens}, cost_usd={cost_usd}"
+        )
+    else:
+        logger.info(
+            f"{log_prefix} Turn usage: prompt={prompt_tokens}, "
+            f"completion={completion_tokens}, total={total_tokens}"
+        )
+
+    if user_id:
+        try:
+            await record_token_usage(
+                user_id=user_id,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                cache_read_tokens=cache_read_tokens,
+                cache_creation_tokens=cache_creation_tokens,
+            )
+        except Exception as usage_err:
+            logger.warning(f"{log_prefix} Failed to record token usage: {usage_err}")
+
+    return total_tokens
--- a/autogpt_platform/backend/backend/copilot/token_tracking_test.py
+++ b/autogpt_platform/backend/backend/copilot/token_tracking_test.py
@@ -0,0 +1,281 @@
+"""Unit tests for token_tracking.persist_and_record_usage.
+
+Covers both the baseline (prompt+completion only) and SDK (with cache breakdown)
+calling conventions, session persistence, and rate-limit recording.
+"""
+
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from .model import ChatSession, Usage
+from .token_tracking import persist_and_record_usage
+
+
+def _make_session() -> ChatSession:
+    """Return a minimal in-memory ChatSession for testing."""
+    return ChatSession(
+        session_id="sess-test",
+        user_id="user-test",
+        title=None,
+        messages=[],
+        usage=[],
+        started_at=datetime.now(UTC),
+        updated_at=datetime.now(UTC),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Return value / total_tokens semantics
+# ---------------------------------------------------------------------------
+
+
+class TestTotalTokens:
+    @pytest.mark.asyncio
+    async def test_returns_prompt_plus_completion(self):
+        """total_tokens = prompt + completion (cache excluded from total)."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=300,
+                completion_tokens=200,
+            )
+        assert total == 500
+
+    @pytest.mark.asyncio
+    async def test_returns_zero_when_no_tokens(self):
+        """Returns 0 early when both prompt and completion are zero."""
+        total = await persist_and_record_usage(
+            session=None,
+            user_id=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        assert total == 0
+
+    @pytest.mark.asyncio
+    async def test_cache_tokens_excluded_from_total(self):
+        """Cache tokens are stored separately and not added to total_tokens."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+                cache_read_tokens=5000,
+                cache_creation_tokens=200,
+            )
+        # total = prompt + completion only (5000 + 200 cache excluded)
+        assert total == 150
+
+    @pytest.mark.asyncio
+    async def test_baseline_path_no_cache(self):
+        """Baseline (OpenRouter) path passes no cache tokens; total = prompt + completion."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id="u1",
+                prompt_tokens=1000,
+                completion_tokens=400,
+                log_prefix="[Baseline]",
+            )
+        assert total == 1400
+
+    @pytest.mark.asyncio
+    async def test_sdk_path_with_cache(self):
+        """SDK (Anthropic) path passes cache tokens; total still = prompt + completion."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id="u2",
+                prompt_tokens=200,
+                completion_tokens=100,
+                cache_read_tokens=8000,
+                cache_creation_tokens=400,
+                log_prefix="[SDK]",
+                cost_usd=0.0015,
+            )
+        assert total == 300
+
+
+# ---------------------------------------------------------------------------
+# Session persistence
+# ---------------------------------------------------------------------------
+
+
+class TestSessionPersistence:
+    @pytest.mark.asyncio
+    async def test_appends_usage_to_session(self):
+        session = _make_session()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            await persist_and_record_usage(
+                session=session,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        assert len(session.usage) == 1
+        usage: Usage = session.usage[0]
+        assert usage.prompt_tokens == 100
+        assert usage.completion_tokens == 50
+        assert usage.total_tokens == 150
+        assert usage.cache_read_tokens == 0
+        assert usage.cache_creation_tokens == 0
+
+    @pytest.mark.asyncio
+    async def test_appends_cache_breakdown_to_session(self):
+        session = _make_session()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            await persist_and_record_usage(
+                session=session,
+                user_id=None,
+                prompt_tokens=200,
+                completion_tokens=80,
+                cache_read_tokens=3000,
+                cache_creation_tokens=500,
+            )
+        usage: Usage = session.usage[0]
+        assert usage.cache_read_tokens == 3000
+        assert usage.cache_creation_tokens == 500
+
+    @pytest.mark.asyncio
+    async def test_multiple_turns_append_multiple_records(self):
+        session = _make_session()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            await persist_and_record_usage(
+                session=session, user_id=None, prompt_tokens=100, completion_tokens=50
+            )
+            await persist_and_record_usage(
+                session=session, user_id=None, prompt_tokens=200, completion_tokens=70
+            )
+        assert len(session.usage) == 2
+
+    @pytest.mark.asyncio
+    async def test_none_session_does_not_raise(self):
+        """When session is None (e.g. error path), no exception should be raised."""
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new_callable=AsyncMock,
+        ):
+            total = await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        assert total == 150
+
+    @pytest.mark.asyncio
+    async def test_no_append_when_zero_tokens(self):
+        """When tokens are zero, function returns early — session unchanged."""
+        session = _make_session()
+        total = await persist_and_record_usage(
+            session=session,
+            user_id=None,
+            prompt_tokens=0,
+            completion_tokens=0,
+        )
+        assert total == 0
+        assert len(session.usage) == 0
+
+
+# ---------------------------------------------------------------------------
+# Rate-limit recording
+# ---------------------------------------------------------------------------
+
+
+class TestRateLimitRecording:
+    @pytest.mark.asyncio
+    async def test_calls_record_token_usage_when_user_id_present(self):
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id="user-abc",
+                prompt_tokens=100,
+                completion_tokens=50,
+                cache_read_tokens=1000,
+                cache_creation_tokens=200,
+            )
+        mock_record.assert_awaited_once_with(
+            user_id="user-abc",
+            prompt_tokens=100,
+            completion_tokens=50,
+            cache_read_tokens=1000,
+            cache_creation_tokens=200,
+        )
+
+    @pytest.mark.asyncio
+    async def test_skips_record_when_user_id_is_none(self):
+        """Anonymous sessions should not create Redis keys."""
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id=None,
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        mock_record.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_record_failure_does_not_raise(self):
+        """A Redis error in record_token_usage should be swallowed (fail-open)."""
+        mock_record = AsyncMock(side_effect=ConnectionError("Redis down"))
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            # Should not raise
+            total = await persist_and_record_usage(
+                session=None,
+                user_id="user-xyz",
+                prompt_tokens=100,
+                completion_tokens=50,
+            )
+        assert total == 150
+
+    @pytest.mark.asyncio
+    async def test_skips_record_when_zero_tokens(self):
+        """Returns 0 before calling record_token_usage when tokens are zero."""
+        mock_record = AsyncMock()
+        with patch(
+            "backend.copilot.token_tracking.record_token_usage",
+            new=mock_record,
+        ):
+            await persist_and_record_usage(
+                session=None,
+                user_id="user-abc",
+                prompt_tokens=0,
+                completion_tokens=0,
+            )
+        mock_record.assert_not_awaited()
--- a/autogpt_platform/backend/backend/copilot/tools/add_understanding.py
+++ b/autogpt_platform/backend/backend/copilot/tools/add_understanding.py
@@ -22,11 +22,13 @@ class AddUnderstandingTool(BaseTool):

    @property
    def description(self) -> str:
-        return (
-            "Store user's business context, workflows, pain points, and automation goals. "
-            "Call whenever the user shares business info. Each call incrementally merges "
-            "with existing data — provide only the fields you have."
-        )
+        return """Capture and store information about the user's business context,
+workflows, pain points, and automation goals. Call this tool whenever the user
+shares information about their business. Each call incrementally adds to the
+existing understanding - you don't need to provide all fields at once.
+
+Use this to build a comprehensive profile that helps recommend better agents
+and automations for the user's specific needs."""

    @property
    def parameters(self) -> dict[str, Any]:
--- a/autogpt_platform/backend/backend/copilot/tools/agent_browser.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_browser.py
@@ -408,11 +408,18 @@ class BrowserNavigateTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Navigate to a URL in a real browser. Returns accessibility tree with @ref IDs "
-            "for browser_act. Session persists (cookies/auth carry over). "
-            "For static pages, prefer web_fetch. "
-            "For SPAs, elements may load late — use browser_act with wait + browser_screenshot to verify. "
-            "For auth: navigate to login, fill creds with browser_act, then navigate to target."
+            "Navigate to a URL using a real browser. Returns an accessibility "
+            "tree snapshot listing the page's interactive elements with @ref IDs "
+            "(e.g. @e3) that can be used with browser_act. "
+            "Session persists — cookies and login state carry over between calls. "
+            "Use this (with browser_act) for multi-step interaction: login flows, "
+            "form filling, button clicks, or anything requiring page interaction. "
+            "For plain static pages, prefer web_fetch — no browser overhead. "
+            "For authenticated pages: navigate to the login page first, use browser_act "
+            "to fill credentials and submit, then navigate to the target page. "
+            "Note: for slow SPAs, the returned snapshot may reflect a partially-loaded "
+            "state. If elements seem missing, use browser_act with action='wait' and a "
+            "CSS selector or millisecond delay, then take a browser_screenshot to verify."
        )

    @property
@@ -422,13 +429,13 @@ class BrowserNavigateTool(BaseTool):
            "properties": {
                "url": {
                    "type": "string",
-                    "description": "HTTP/HTTPS URL to navigate to.",
+                    "description": "The HTTP/HTTPS URL to navigate to.",
                },
                "wait_for": {
                    "type": "string",
                    "enum": ["networkidle", "load", "domcontentloaded"],
                    "default": "networkidle",
-                    "description": "Navigation completion strategy (default: networkidle).",
+                    "description": "When to consider navigation complete. Use 'networkidle' for SPAs (default).",
                },
            },
            "required": ["url"],
@@ -547,12 +554,14 @@ class BrowserActTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Interact with the current browser page using @ref IDs from the snapshot. "
-            "Actions: click, dblclick, fill, type, scroll, hover, press, "
+            "Interact with the current browser page. Use @ref IDs from the "
+            "snapshot (e.g. '@e3') to target elements. Returns an updated snapshot. "
+            "Supported actions: click, dblclick, fill, type, scroll, hover, press, "
            "check, uncheck, select, wait, back, forward, reload. "
-            "fill clears field first; type appends. "
-            "wait accepts CSS selector or milliseconds (e.g. '1000'). "
-            "Returns updated snapshot."
+            "fill clears the field before typing; type appends without clearing. "
+            "wait accepts a CSS selector (waits for element) or milliseconds string (e.g. '1000'). "
+            "Example login flow: fill @e1 with email → fill @e2 with password → "
+            "click @e3 (submit) → browser_navigate to the target page."
        )

    @property
@@ -578,21 +587,30 @@ class BrowserActTool(BaseTool):
                        "forward",
                        "reload",
                    ],
-                    "description": "Action to perform.",
+                    "description": "The action to perform.",
                },
                "target": {
                    "type": "string",
-                    "description": "@ref ID (e.g. '@e3'), CSS selector, or text description.",
+                    "description": (
+                        "Element to target. Use @ref from snapshot (e.g. '@e3'), "
+                        "a CSS selector, or a text description. "
+                        "Required for: click, dblclick, fill, type, hover, check, uncheck, select. "
+                        "For wait: a CSS selector to wait for, or milliseconds as a string (e.g. '1000')."
+                    ),
                },
                "value": {
                    "type": "string",
-                    "description": "Text for fill/type, key for press (e.g. 'Enter'), option for select.",
+                    "description": (
+                        "For fill/type: the text to enter. "
+                        "For press: key name (e.g. 'Enter', 'Tab', 'Control+a'). "
+                        "For select: the option value to select."
+                    ),
                },
                "direction": {
                    "type": "string",
                    "enum": ["up", "down", "left", "right"],
                    "default": "down",
-                    "description": "Scroll direction (default: down).",
+                    "description": "For scroll: direction to scroll.",
                },
            },
            "required": ["action"],
@@ -739,10 +757,12 @@ class BrowserScreenshotTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Screenshot the current browser page and save to workspace. "
-            "annotate=true overlays @ref labels on elements. "
-            "IMPORTANT: After calling, you MUST immediately call read_workspace_file with the "
-            "returned file_id to display the image inline."
+            "Take a screenshot of the current browser page and save it to the workspace. "
+            "IMPORTANT: After calling this tool, immediately call read_workspace_file "
+            "with the returned file_id to display the image inline to the user — "
+            "the screenshot is not visible until you do this. "
+            "With annotate=true (default), @ref labels are overlaid on interactive "
+            "elements, making it easy to see which @ref ID maps to which element on screen."
        )

    @property
@@ -753,12 +773,12 @@ class BrowserScreenshotTool(BaseTool):
                "annotate": {
                    "type": "boolean",
                    "default": True,
-                    "description": "Overlay @ref labels (default: true).",
+                    "description": "Overlay @ref labels on interactive elements (default: true).",
                },
                "filename": {
                    "type": "string",
                    "default": "screenshot.png",
-                    "description": "Workspace filename (default: screenshot.png).",
+                    "description": "Filename to save in the workspace.",
                },
            },
        }
--- a/autogpt_platform/backend/backend/copilot/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_output.py
@@ -108,12 +108,22 @@ class AgentOutputTool(BaseTool):

    @property
    def description(self) -> str:
-        return (
-            "Retrieve execution outputs from a library agent. "
-            "Identify by agent_name, library_agent_id, or store_slug. "
-            "Filter by execution_id or run_time. "
-            "Optionally wait for running executions."
-        )
+        return """Retrieve execution outputs from agents in the user's library.
+
+        Identify the agent using one of:
+        - agent_name: Fuzzy search in user's library
+        - library_agent_id: Exact library agent ID
+        - store_slug: Marketplace format 'username/agent-name'
+
+        Select which run to retrieve using:
+        - execution_id: Specific execution ID
+        - run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
+
+        Wait for completion (optional):
+        - wait_if_running: Max seconds to wait if execution is still running (0-300).
+          If the execution is running/queued, waits up to this many seconds for completion.
+          Returns current status on timeout. If already finished, returns immediately.
+        """

    @property
    def parameters(self) -> dict[str, Any]:
@@ -122,27 +132,32 @@ class AgentOutputTool(BaseTool):
            "properties": {
                "agent_name": {
                    "type": "string",
-                    "description": "Agent name (fuzzy match).",
+                    "description": "Agent name to search for in user's library (fuzzy match)",
                },
                "library_agent_id": {
                    "type": "string",
-                    "description": "Library agent ID.",
+                    "description": "Exact library agent ID",
                },
                "store_slug": {
                    "type": "string",
-                    "description": "Marketplace 'username/agent-slug'.",
+                    "description": "Marketplace identifier: 'username/agent-slug'",
                },
                "execution_id": {
                    "type": "string",
-                    "description": "Specific execution ID.",
+                    "description": "Specific execution ID to retrieve",
                },
                "run_time": {
                    "type": "string",
-                    "description": "Time filter: 'latest', today/yesterday/last week/last 7 days/last month/last 30 days, 'YYYY-MM-DD', or ISO datetime.",
+                    "description": (
+                        "Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
+                    ),
                },
                "wait_if_running": {
                    "type": "integer",
-                    "description": "Max seconds to wait if still running (0-300). Returns current state on timeout.",
+                    "description": (
+                        "Max seconds to wait if execution is still running (0-300). "
+                        "If running, waits for completion. Returns current state on timeout."
+                    ),
                },
            },
            "required": [],
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
@@ -41,9 +41,15 @@ class BashExecTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Execute a Bash command or script. Shares filesystem with SDK file tools. "
-            "Useful for scripts, data processing, and package installation. "
-            "Killed after timeout (default 30s, max 120s)."
+            "Execute a Bash command or script. "
+            "Full Bash scripting is supported (loops, conditionals, pipes, "
+            "functions, etc.). "
+            "The working directory is shared with the SDK Read/Write/Edit/Glob/Grep "
+            "tools — files created by either are immediately visible to both. "
+            "Execution is killed after the timeout (default 30s, max 120s). "
+            "Returns stdout and stderr. "
+            "Useful for file manipulation, data processing, running scripts, "
+            "and installing packages."
        )

    @property
@@ -53,11 +59,13 @@ class BashExecTool(BaseTool):
            "properties": {
                "command": {
                    "type": "string",
-                    "description": "Bash command or script.",
+                    "description": "Bash command or script to execute.",
                },
                "timeout": {
                    "type": "integer",
-                    "description": "Max seconds (default 30, max 120).",
+                    "description": (
+                        "Max execution time in seconds (default 30, max 120)."
+                    ),
                    "default": 30,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/continue_run_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/continue_run_block.py
@@ -30,7 +30,12 @@ class ContinueRunBlockTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Resume block execution after human review approval. Pass the review_id."
+        return (
+            "Continue executing a block after human review approval. "
+            "Use this after a run_block call returned review_required. "
+            "Pass the review_id from the review_required response. "
+            "The block will execute with the original pre-approved input data."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -39,7 +44,10 @@ class ContinueRunBlockTool(BaseTool):
            "properties": {
                "review_id": {
                    "type": "string",
-                    "description": "review_id from the review_required response.",
+                    "description": (
+                        "The review_id from a previous review_required response. "
+                        "This resumes execution with the pre-approved input data."
+                    ),
                },
            },
            "required": ["review_id"],
--- a/autogpt_platform/backend/backend/copilot/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/create_agent.py
@@ -23,8 +23,12 @@ class CreateAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Create a new agent from JSON (nodes + links). Validates, auto-fixes, and saves. "
-            "Before calling, search for existing agents with find_library_agent."
+            "Create a new agent workflow. Pass `agent_json` with the complete "
+            "agent graph JSON you generated using block schemas from find_block. "
+            "The tool validates, auto-fixes, and saves.\n\n"
+            "IMPORTANT: Before calling this tool, search for relevant existing agents "
+            "using find_library_agent that could be used as building blocks. "
+            "Pass their IDs in the library_agent_ids parameter."
        )

    @property
@@ -38,21 +42,34 @@ class CreateAgentTool(BaseTool):
            "properties": {
                "agent_json": {
                    "type": "object",
-                    "description": "Agent graph with 'nodes' and 'links' arrays.",
+                    "description": (
+                        "The agent JSON to validate and save. "
+                        "Must contain 'nodes' and 'links' arrays, and optionally "
+                        "'name' and 'description'."
+                    ),
                },
                "library_agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs as building blocks.",
+                    "description": (
+                        "List of library agent IDs to use as building blocks."
+                    ),
                },
                "save": {
                    "type": "boolean",
-                    "description": "Save the agent (default: true). False for preview.",
+                    "description": (
+                        "Whether to save the agent. Default is true. "
+                        "Set to false for preview only."
+                    ),
                    "default": True,
                },
                "folder_id": {
                    "type": "string",
-                    "description": "Folder ID to save into (default: root).",
+                    "description": (
+                        "Optional folder ID to save the agent into. "
+                        "If not provided, the agent is saved at root level. "
+                        "Use list_folders to find available folders."
+                    ),
                },
            },
            "required": ["agent_json"],
--- a/autogpt_platform/backend/backend/copilot/tools/customize_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/customize_agent.py
@@ -23,7 +23,9 @@ class CustomizeAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Customize a marketplace/template agent. Validates, auto-fixes, and saves."
+            "Customize a marketplace or template agent. Pass `agent_json` "
+            "with the complete customized agent JSON. The tool validates, "
+            "auto-fixes, and saves."
        )

    @property
@@ -37,21 +39,32 @@ class CustomizeAgentTool(BaseTool):
            "properties": {
                "agent_json": {
                    "type": "object",
-                    "description": "Customized agent JSON with nodes and links.",
+                    "description": (
+                        "Complete customized agent JSON to validate and save. "
+                        "Optionally include 'name' and 'description'."
+                    ),
                },
                "library_agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs as building blocks.",
+                    "description": (
+                        "List of library agent IDs to use as building blocks."
+                    ),
                },
                "save": {
                    "type": "boolean",
-                    "description": "Save the agent (default: true). False for preview.",
+                    "description": (
+                        "Whether to save the customized agent. Default is true."
+                    ),
                    "default": True,
                },
                "folder_id": {
                    "type": "string",
-                    "description": "Folder ID to save into (default: root).",
+                    "description": (
+                        "Optional folder ID to save the agent into. "
+                        "If not provided, the agent is saved at root level. "
+                        "Use list_folders to find available folders."
+                    ),
                },
            },
            "required": ["agent_json"],
--- a/autogpt_platform/backend/backend/copilot/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/edit_agent.py
@@ -23,8 +23,12 @@ class EditAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Edit an existing agent. Validates, auto-fixes, and saves. "
-            "Before calling, search for existing agents with find_library_agent."
+            "Edit an existing agent. Pass `agent_json` with the complete "
+            "updated agent JSON you generated. The tool validates, auto-fixes, "
+            "and saves.\n\n"
+            "IMPORTANT: Before calling this tool, if the changes involve adding new "
+            "functionality, search for relevant existing agents using find_library_agent "
+            "that could be used as building blocks."
        )

    @property
@@ -38,20 +42,33 @@ class EditAgentTool(BaseTool):
            "properties": {
                "agent_id": {
                    "type": "string",
-                    "description": "Graph ID or library agent ID to edit.",
+                    "description": (
+                        "The ID of the agent to edit. "
+                        "Can be a graph ID or library agent ID."
+                    ),
                },
                "agent_json": {
                    "type": "object",
-                    "description": "Updated agent JSON with nodes and links.",
+                    "description": (
+                        "Complete updated agent JSON to validate and save. "
+                        "Must contain 'nodes' and 'links'. "
+                        "Include 'name' and/or 'description' if they need "
+                        "to be updated."
+                    ),
                },
                "library_agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs as building blocks.",
+                    "description": (
+                        "List of library agent IDs to use as building blocks for the changes."
+                    ),
                },
                "save": {
                    "type": "boolean",
-                    "description": "Save changes (default: true). False for preview.",
+                    "description": (
+                        "Whether to save the changes. "
+                        "Default is true. Set to false for preview only."
+                    ),
                    "default": True,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/feature_requests.py
+++ b/autogpt_platform/backend/backend/copilot/tools/feature_requests.py
@@ -134,7 +134,11 @@ class SearchFeatureRequestsTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search existing feature requests. Check before creating a new one."
+        return (
+            "Search existing feature requests to check if a similar request "
+            "already exists before creating a new one. Returns matching feature "
+            "requests with their ID, title, and description."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -230,9 +234,14 @@ class CreateFeatureRequestTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Create a feature request or add need to existing one. "
-            "Search first to avoid duplicates. Pass existing_issue_id to add to existing. "
-            "Never include PII (names, emails, phone numbers, company names) in title/description."
+            "Create a new feature request or add a customer need to an existing one. "
+            "Always search first with search_feature_requests to avoid duplicates. "
+            "If a matching request exists, pass its ID as existing_issue_id to add "
+            "the user's need to it instead of creating a duplicate. "
+            "IMPORTANT: Never include personally identifiable information (PII) in "
+            "the title or description — no names, emails, phone numbers, company "
+            "names, or other identifying details. Write titles and descriptions in "
+            "generic, feature-focused language."
        )

    @property
@@ -242,15 +251,28 @@ class CreateFeatureRequestTool(BaseTool):
            "properties": {
                "title": {
                    "type": "string",
-                    "description": "Feature request title. No PII.",
+                    "description": (
+                        "Title for the feature request. Must be generic and "
+                        "feature-focused — do not include any user names, emails, "
+                        "company names, or other PII."
+                    ),
                },
                "description": {
                    "type": "string",
-                    "description": "What the user wants and why. No PII.",
+                    "description": (
+                        "Detailed description of what the user wants and why. "
+                        "Must not contain any personally identifiable information "
+                        "(PII) — describe the feature need generically without "
+                        "referencing specific users, companies, or contact details."
+                    ),
                },
                "existing_issue_id": {
                    "type": "string",
-                    "description": "Linear issue ID to add need to (from search results).",
+                    "description": (
+                        "If adding a need to an existing feature request, "
+                        "provide its Linear issue ID (from search results). "
+                        "Omit to create a new feature request."
+                    ),
                },
            },
            "required": ["title", "description"],
--- a/autogpt_platform/backend/backend/copilot/tools/find_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_agent.py
@@ -18,7 +18,9 @@ class FindAgentTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search marketplace agents by capability."
+        return (
+            "Discover agents from the marketplace based on capabilities and user needs."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -27,7 +29,7 @@ class FindAgentTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Search keywords (single keywords work best).",
+                    "description": "Search query describing what the user wants to accomplish. Use single keywords for best results.",
                },
            },
            "required": ["query"],
--- a/autogpt_platform/backend/backend/copilot/tools/find_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_block.py
@@ -51,7 +51,14 @@ class FindBlockTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search blocks by name or description. Returns block IDs for run_block. Always call this FIRST to get block IDs before using run_block."
+        return (
+            "Search for available blocks by name or description. "
+            "Blocks are reusable components that perform specific tasks like "
+            "sending emails, making API calls, processing text, etc. "
+            "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. "
+            "The response includes each block's id, name, and description. "
+            "Call run_block with the block's id **with no inputs** to see detailed inputs/outputs and execute it."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -60,11 +67,18 @@ class FindBlockTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Search keywords (e.g. 'email', 'http', 'ai').",
+                    "description": (
+                        "Search query to find blocks by name or description. "
+                        "Use keywords like 'email', 'http', 'text', 'ai', etc."
+                    ),
                },
                "include_schemas": {
                    "type": "boolean",
-                    "description": "Include full input/output schemas (for agent JSON generation).",
+                    "description": (
+                        "If true, include full input_schema and output_schema "
+                        "for each block. Use when generating agent JSON that "
+                        "needs block schemas. Default is false."
+                    ),
                    "default": False,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
@@ -19,8 +19,13 @@ class FindLibraryAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Search user's library agents. Returns graph_id, schemas for sub-agent composition. "
-            "Omit query to list all."
+            "Search for or list agents in the user's library. Use this to find "
+            "agents the user has already added to their library, including agents "
+            "they created or added from the marketplace. "
+            "When creating agents with sub-agent composition, use this to get "
+            "the agent's graph_id, graph_version, input_schema, and output_schema "
+            "needed for AgentExecutorBlock nodes. "
+            "Omit the query to list all agents."
        )

    @property
@@ -30,7 +35,10 @@ class FindLibraryAgentTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Search by name/description. Omit to list all.",
+                    "description": (
+                        "Search query to find agents by name or description. "
+                        "Omit to list all agents in the library."
+                    ),
                },
            },
            "required": [],
--- a/autogpt_platform/backend/backend/copilot/tools/fix_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/fix_agent.py
@@ -22,8 +22,20 @@ class FixAgentGraphTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Auto-fix common agent JSON issues (UUIDs, types, credentials, spacing, etc.). "
-            "Returns fixed JSON and list of fixes applied."
+            "Auto-fix common issues in an agent JSON graph. Applies fixes for:\n"
+            "- Missing or invalid UUIDs on nodes and links\n"
+            "- StoreValueBlock prerequisites for ConditionBlock\n"
+            "- Double curly brace escaping in prompt templates\n"
+            "- AddToList/AddToDictionary prerequisite blocks\n"
+            "- CodeExecutionBlock output field naming\n"
+            "- Missing credentials configuration\n"
+            "- Node X coordinate spacing (800+ units apart)\n"
+            "- AI model default parameters\n"
+            "- Link static properties based on input schema\n"
+            "- Type mismatches (inserts conversion blocks)\n\n"
+            "Returns the fixed agent JSON plus a list of fixes applied. "
+            "After fixing, the agent is re-validated. If still invalid, "
+            "the remaining errors are included in the response."
        )

    @property
--- a/autogpt_platform/backend/backend/copilot/tools/get_agent_building_guide.py
+++ b/autogpt_platform/backend/backend/copilot/tools/get_agent_building_guide.py
@@ -42,7 +42,12 @@ class GetAgentBuildingGuideTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Get the agent JSON building guide (nodes, links, AgentExecutorBlock, MCPToolBlock usage). Call before generating agent JSON."
+        return (
+            "Returns the complete guide for building agent JSON graphs, including "
+            "block IDs, link structure, AgentInputBlock, AgentOutputBlock, "
+            "AgentExecutorBlock (for sub-agent composition), and MCPToolBlock usage. "
+            "Call this before generating agent JSON to ensure correct structure."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
--- a/autogpt_platform/backend/backend/copilot/tools/get_doc_page.py
+++ b/autogpt_platform/backend/backend/copilot/tools/get_doc_page.py
@@ -25,7 +25,8 @@ class GetDocPageTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Read full documentation page content by path (from search_docs results)."
+            "Get the full content of a documentation page by its path. "
+            "Use this after search_docs to read the complete content of a relevant page."
        )

    @property
@@ -35,7 +36,10 @@ class GetDocPageTool(BaseTool):
            "properties": {
                "path": {
                    "type": "string",
-                    "description": "Doc file path (e.g. 'platform/block-sdk-guide.md').",
+                    "description": (
+                        "The path to the documentation file, as returned by search_docs. "
+                        "Example: 'platform/block-sdk-guide.md'"
+                    ),
                },
            },
            "required": ["path"],
--- a/autogpt_platform/backend/backend/copilot/tools/get_mcp_guide.py
+++ b/autogpt_platform/backend/backend/copilot/tools/get_mcp_guide.py
@@ -38,7 +38,11 @@ class GetMCPGuideTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Get MCP server URLs and auth guide."
+        return (
+            "Returns the MCP tool guide: known hosted server URLs (Notion, Linear, "
+            "Stripe, Intercom, Cloudflare, Atlassian) and authentication workflow. "
+            "Call before using run_mcp_tool if you need a server URL or auth info."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -8,11 +8,13 @@ from pydantic_core import PydanticUndefined

 from backend.blocks._base import AnyBlockSchema
 from backend.copilot.constants import COPILOT_NODE_PREFIX, COPILOT_SESSION_PREFIX
-from backend.data.db_accessors import workspace_db
+from backend.data.credit import UsageTransactionMetadata
+from backend.data.db_accessors import credit_db, workspace_db
 from backend.data.execution import ExecutionContext
 from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
+from backend.executor.utils import block_usage_cost
 from backend.integrations.creds_manager import IntegrationCredentialsManager
-from backend.util.exceptions import BlockError
+from backend.util.exceptions import BlockError, InsufficientBalanceError
 from backend.util.type import coerce_inputs_to_schema

 from .models import BlockOutputResponse, ErrorResponse, ToolResponseBase
@@ -115,6 +117,21 @@ async def execute_block(
        # Coerce non-matching data types to the expected input schema.
        coerce_inputs_to_schema(input_data, block.input_schema)

+        # Pre-execution credit check (courtesy; spend_credits is atomic)
+        cost, cost_filter = block_usage_cost(block, input_data)
+        has_cost = cost > 0
+        _credit_db = credit_db()
+        if has_cost:
+            balance = await _credit_db.get_credits(user_id)
+            if balance < cost:
+                return ErrorResponse(
+                    message=(
+                        f"Insufficient credits to run '{block.name}'. "
+                        "Please top up your credits to continue."
+                    ),
+                    session_id=session_id,
+                )
+
        # Execute the block and collect outputs
        outputs: dict[str, list[Any]] = defaultdict(list)
        async for output_name, output_data in block.execute(
@@ -123,6 +140,51 @@ async def execute_block(
        ):
            outputs[output_name].append(output_data)

+        # Charge credits for block execution
+        if has_cost:
+            try:
+                await _credit_db.spend_credits(
+                    user_id=user_id,
+                    cost=cost,
+                    metadata=UsageTransactionMetadata(
+                        graph_exec_id=synthetic_graph_id,
+                        graph_id=synthetic_graph_id,
+                        node_id=synthetic_node_id,
+                        node_exec_id=node_exec_id,
+                        block_id=block_id,
+                        block=block.name,
+                        input=cost_filter,
+                        reason="copilot_block_execution",
+                    ),
+                )
+            except Exception as e:
+                # Block already executed (with possible side effects). Never
+                # return ErrorResponse here — the user received output and
+                # deserves it. Log the billing failure for reconciliation.
+                leak_type = (
+                    "INSUFFICIENT_BALANCE"
+                    if isinstance(e, InsufficientBalanceError)
+                    else "UNEXPECTED_ERROR"
+                )
+                logger.error(
+                    "BILLING_LEAK[%s]: block executed but credit charge failed — "
+                    "user_id=%s, block_id=%s, node_exec_id=%s, cost=%s: %s",
+                    leak_type,
+                    user_id,
+                    block_id,
+                    node_exec_id,
+                    cost,
+                    e,
+                    extra={
+                        "json_fields": {
+                            "billing_leak": True,
+                            "leak_type": leak_type,
+                            "user_id": user_id,
+                            "cost": str(cost),
+                        }
+                    },
+                )
+
        return BlockOutputResponse(
            message=f"Block '{block.name}' executed successfully",
            block_id=block_id,
@@ -133,14 +195,14 @@ async def execute_block(
        )

    except BlockError as e:
-        logger.warning(f"Block execution failed: {e}")
+        logger.warning("Block execution failed: %s", e)
        return ErrorResponse(
            message=f"Block execution failed: {e}",
            error=str(e),
            session_id=session_id,
        )
    except Exception as e:
-        logger.error(f"Unexpected error executing block: {e}", exc_info=True)
+        logger.error("Unexpected error executing block: %s", e, exc_info=True)
        return ErrorResponse(
            message=f"Failed to execute block: {str(e)}",
            error=str(e),
--- a/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
@@ -1,18 +1,197 @@
-"""Tests for execute_block type coercion in helpers.py.
-
-Verifies that execute_block() coerces string input values to match the block's
-expected input types, mirroring the executor's validate_exec() logic.
-This is critical for @@agptfile: expansion, where file content is always a string
-but the block may expect structured types (e.g. list[list[str]]).
-"""
+"""Tests for execute_block — credit charging and type coercion."""

+from collections.abc import AsyncIterator
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

+from backend.blocks._base import BlockType
 from backend.copilot.tools.helpers import execute_block
-from backend.copilot.tools.models import BlockOutputResponse
+from backend.copilot.tools.models import BlockOutputResponse, ErrorResponse
+
+_USER = "test-user-helpers"
+_SESSION = "test-session-helpers"
+
+
+def _make_block(block_id: str = "block-1", name: str = "TestBlock"):
+    """Create a minimal mock block for execute_block()."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.block_type = BlockType.STANDARD
+
+    mock.input_schema = MagicMock()
+    mock.input_schema.get_credentials_fields_info.return_value = {}
+
+    async def _execute(
+        input_data: dict, **kwargs: Any
+    ) -> AsyncIterator[tuple[str, Any]]:
+        yield "result", "ok"
+
+    mock.execute = _execute
+    return mock
+
+
+def _patch_workspace():
+    """Patch workspace_db to return a mock workspace."""
+    mock_workspace = MagicMock()
+    mock_workspace.id = "ws-1"
+    mock_ws_db = MagicMock()
+    mock_ws_db.get_or_create_workspace = AsyncMock(return_value=mock_workspace)
+    return patch("backend.copilot.tools.helpers.workspace_db", return_value=mock_ws_db)
+
+
+def _patch_credit_db(
+    get_credits_return: int = 100,
+    spend_credits_side_effect: Any = None,
+):
+    """Patch credit_db accessor to return a mock credit adapter."""
+    mock_credit = MagicMock()
+    mock_credit.get_credits = AsyncMock(return_value=get_credits_return)
+    if spend_credits_side_effect is not None:
+        mock_credit.spend_credits = AsyncMock(side_effect=spend_credits_side_effect)
+    else:
+        mock_credit.spend_credits = AsyncMock()
+    return (
+        patch(
+            "backend.copilot.tools.helpers.credit_db",
+            return_value=mock_credit,
+        ),
+        mock_credit,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Credit charging tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio(loop_scope="session")
+class TestExecuteBlockCreditCharging:
+    async def test_charges_credits_when_cost_is_positive(self):
+        """Block with cost > 0 should call spend_credits after execution."""
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db(get_credits_return=100)
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(10, {"key": "val"}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={"text": "hello"},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True
+        mock_credit.spend_credits.assert_awaited_once()
+        call_kwargs = mock_credit.spend_credits.call_args.kwargs
+        assert call_kwargs["cost"] == 10
+        assert call_kwargs["metadata"].reason == "copilot_block_execution"
+
+    async def test_returns_error_when_insufficient_credits_before_exec(self):
+        """Pre-execution check should return ErrorResponse when balance < cost."""
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db(get_credits_return=5)
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(10, {}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        assert isinstance(result, ErrorResponse)
+        assert "Insufficient credits" in result.message
+
+    async def test_no_charge_when_cost_is_zero(self):
+        """Block with cost 0 should not call spend_credits."""
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db()
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(0, {}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True
+        # Credit functions should not be called at all for zero-cost blocks
+        mock_credit.get_credits.assert_not_awaited()
+        mock_credit.spend_credits.assert_not_awaited()
+
+    async def test_returns_output_on_post_exec_insufficient_balance(self):
+        """If charging fails after execution, output is still returned (block already ran)."""
+        from backend.util.exceptions import InsufficientBalanceError
+
+        block = _make_block()
+        credit_patch, mock_credit = _patch_credit_db(
+            get_credits_return=15,
+            spend_credits_side_effect=InsufficientBalanceError(
+                "Low balance", _USER, 5, 10
+            ),
+        )
+
+        with (
+            _patch_workspace(),
+            patch(
+                "backend.copilot.tools.helpers.block_usage_cost",
+                return_value=(10, {}),
+            ),
+            credit_patch,
+        ):
+            result = await execute_block(
+                block=block,
+                block_id="block-1",
+                input_data={},
+                user_id=_USER,
+                session_id=_SESSION,
+                node_exec_id="exec-1",
+                matched_credentials={},
+            )
+
+        # Block already executed (with side effects), so output is returned
+        assert isinstance(result, BlockOutputResponse)
+        assert result.success is True
+
+
+# ---------------------------------------------------------------------------
+# Type coercion tests
+# ---------------------------------------------------------------------------


 def _make_block_schema(annotations: dict[str, Any]) -> MagicMock:
@@ -28,7 +207,7 @@ def _make_block_schema(annotations: dict[str, Any]) -> MagicMock:
    return schema


-def _make_block(
+def _make_coerce_block(
    block_id: str,
    name: str,
    annotations: dict[str, Any],
@@ -60,7 +239,7 @@ _TEST_USER_ID = "test-user-coerce"
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_json_string_to_nested_list():
    """JSON string → list[list[str]] (Google Sheets CSV import case)."""
-    block = _make_block(
+    block = _make_coerce_block(
        "sheets-write",
        "Google Sheets Write",
        {"values": list[list[str]], "spreadsheet_id": str},
@@ -103,7 +282,7 @@ async def test_coerce_json_string_to_nested_list():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_json_string_to_list():
    """JSON string → list[str]."""
-    block = _make_block(
+    block = _make_coerce_block(
        "list-block",
        "List Block",
        {"items": list[str]},
@@ -135,7 +314,7 @@ async def test_coerce_json_string_to_list():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_json_string_to_dict():
    """JSON string → dict[str, str]."""
-    block = _make_block(
+    block = _make_coerce_block(
        "dict-block",
        "Dict Block",
        {"config": dict[str, str]},
@@ -167,7 +346,7 @@ async def test_coerce_json_string_to_dict():
@pytest.mark.asyncio(loop_scope="session")
 async def test_no_coercion_when_type_matches():
    """Already-correct types pass through without coercion."""
-    block = _make_block(
+    block = _make_coerce_block(
        "pass-through",
        "Pass Through",
        {"values": list[list[str]], "name": str},
@@ -201,7 +380,7 @@ async def test_no_coercion_when_type_matches():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_string_to_int():
    """String number → int."""
-    block = _make_block(
+    block = _make_coerce_block(
        "int-block",
        "Int Block",
        {"count": int},
@@ -234,7 +413,7 @@ async def test_coerce_string_to_int():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_skips_none_values():
    """None values are not coerced (they may be optional fields)."""
-    block = _make_block(
+    block = _make_coerce_block(
        "optional-block",
        "Optional Block",
        {"data": list[str], "label": str},
@@ -267,7 +446,7 @@ async def test_coerce_skips_none_values():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_union_type_preserves_valid_member():
    """Union-typed fields should not be coerced when the value matches a member."""
-    block = _make_block(
+    block = _make_coerce_block(
        "union-block",
        "Union Block",
        {"content": str | list[str]},
@@ -301,7 +480,7 @@ async def test_coerce_union_type_preserves_valid_member():
@pytest.mark.asyncio(loop_scope="session")
 async def test_coerce_inner_elements_of_generic():
    """Inner elements of generic containers are recursively coerced."""
-    block = _make_block(
+    block = _make_coerce_block(
        "inner-coerce",
        "Inner Coerce",
        {"values": list[str]},
--- a/autogpt_platform/backend/backend/copilot/tools/manage_folders.py
+++ b/autogpt_platform/backend/backend/copilot/tools/manage_folders.py
@@ -88,7 +88,10 @@ class CreateFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Create a library folder. Use parent_id to nest inside another folder."
+        return (
+            "Create a new folder in the user's library to organize agents. "
+            "Optionally nest it inside an existing folder using parent_id."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -101,19 +104,22 @@ class CreateFolderTool(BaseTool):
            "properties": {
                "name": {
                    "type": "string",
-                    "description": "Folder name (max 100 chars).",
+                    "description": "Name for the new folder (max 100 chars).",
                },
                "parent_id": {
                    "type": "string",
-                    "description": "Parent folder ID (omit for root).",
+                    "description": (
+                        "ID of the parent folder to nest inside. "
+                        "Omit to create at root level."
+                    ),
                },
                "icon": {
                    "type": "string",
-                    "description": "Icon identifier.",
+                    "description": "Optional icon identifier for the folder.",
                },
                "color": {
                    "type": "string",
-                    "description": "Hex color (#RRGGBB).",
+                    "description": "Optional hex color code (#RRGGBB).",
                },
            },
            "required": ["name"],
@@ -169,8 +175,13 @@ class ListFoldersTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "List library folders. Omit parent_id for full tree. "
-            "Set include_agents=true when user asks about agents in folders."
+            "List the user's library folders. "
+            "Omit parent_id to get the full folder tree. "
+            "Provide parent_id to list only direct children of that folder. "
+            "Set include_agents=true to also return the agents inside each folder "
+            "and root-level agents not in any folder. Always set include_agents=true "
+            "when the user asks about agents, wants to see what's in their folders, "
+            "or mentions agents alongside folders."
        )

    @property
@@ -184,11 +195,17 @@ class ListFoldersTool(BaseTool):
            "properties": {
                "parent_id": {
                    "type": "string",
-                    "description": "List children of this folder (omit for full tree).",
+                    "description": (
+                        "List children of this folder. "
+                        "Omit to get the full folder tree."
+                    ),
                },
                "include_agents": {
                    "type": "boolean",
-                    "description": "Include agents in each folder (default: false).",
+                    "description": (
+                        "Whether to include the list of agents inside each folder. "
+                        "Defaults to false."
+                    ),
                },
            },
            "required": [],
@@ -340,7 +357,10 @@ class MoveFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Move a folder. Set target_parent_id to null for root."
+        return (
+            "Move a folder to a different parent folder. "
+            "Set target_parent_id to null to move to root level."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -353,11 +373,14 @@ class MoveFolderTool(BaseTool):
            "properties": {
                "folder_id": {
                    "type": "string",
-                    "description": "Folder ID.",
+                    "description": "ID of the folder to move.",
                },
                "target_parent_id": {
                    "type": ["string", "null"],
-                    "description": "New parent folder ID (null for root).",
+                    "description": (
+                        "ID of the new parent folder. "
+                        "Use null to move to root level."
+                    ),
                },
            },
            "required": ["folder_id"],
@@ -410,7 +433,10 @@ class DeleteFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Delete a folder. Agents inside move to root (not deleted)."
+        return (
+            "Delete a folder from the user's library. "
+            "Agents inside the folder are moved to root level (not deleted)."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -473,7 +499,10 @@ class MoveAgentsToFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Move agents to a folder. Set folder_id to null for root."
+        return (
+            "Move one or more agents to a folder. "
+            "Set folder_id to null to move agents to root level."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -487,11 +516,13 @@ class MoveAgentsToFolderTool(BaseTool):
                "agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs to move.",
+                    "description": "List of library agent IDs to move.",
                },
                "folder_id": {
                    "type": ["string", "null"],
-                    "description": "Target folder ID (null for root).",
+                    "description": (
+                        "Target folder ID. Use null to move to root level."
+                    ),
                },
            },
            "required": ["agent_ids"],
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -104,11 +104,19 @@ class RunAgentTool(BaseTool):

    @property
    def description(self) -> str:
-        return (
-            "Run or schedule an agent. Automatically checks inputs and credentials. "
-            "Identify by username_agent_slug ('user/agent') or library_agent_id. "
-            "For scheduling, provide schedule_name + cron."
-        )
+        return """Run or schedule an agent from the marketplace or user's library.
+
+        The tool automatically handles the setup flow:
+        - Returns missing inputs if required fields are not provided
+        - Returns missing credentials if user needs to configure them
+        - Executes immediately if all requirements are met
+        - Schedules execution if cron expression is provided
+
+        Identify the agent using either:
+        - username_agent_slug: Marketplace format 'username/agent-name'
+        - library_agent_id: ID of an agent in the user's library
+
+        For scheduled execution, provide: schedule_name, cron, and optionally timezone."""

    @property
    def parameters(self) -> dict[str, Any]:
@@ -117,36 +125,40 @@ class RunAgentTool(BaseTool):
            "properties": {
                "username_agent_slug": {
                    "type": "string",
-                    "description": "Marketplace format 'username/agent-name'.",
+                    "description": "Agent identifier in format 'username/agent-name'",
                },
                "library_agent_id": {
                    "type": "string",
-                    "description": "Library agent ID.",
+                    "description": "Library agent ID from user's library",
                },
                "inputs": {
                    "type": "object",
-                    "description": "Input values for the agent.",
+                    "description": "Input values for the agent",
                    "additionalProperties": True,
                },
                "use_defaults": {
                    "type": "boolean",
-                    "description": "Run with default values (confirm with user first).",
+                    "description": "Set to true to run with default values (user must confirm)",
                },
                "schedule_name": {
                    "type": "string",
-                    "description": "Name for scheduled execution.",
+                    "description": "Name for scheduled execution (triggers scheduling mode)",
                },
                "cron": {
                    "type": "string",
-                    "description": "Cron expression (min hour day month weekday).",
+                    "description": "Cron expression (5 fields: min hour day month weekday)",
                },
                "timezone": {
                    "type": "string",
-                    "description": "IANA timezone (default: UTC).",
+                    "description": "IANA timezone for schedule (default: UTC)",
                },
                "wait_for_result": {
                    "type": "integer",
-                    "description": "Max seconds to wait for completion (0-300).",
+                    "description": (
+                        "Max seconds to wait for execution to complete (0-300). "
+                        "If >0, blocks until the execution finishes or times out. "
+                        "Returns execution outputs when complete."
+                    ),
                },
            },
            "required": [],
--- a/autogpt_platform/backend/backend/copilot/tools/run_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_block.py
@@ -45,10 +45,13 @@ class RunBlockTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Execute a block. IMPORTANT: Always get block_id from find_block first "
-            "— do NOT guess or fabricate IDs. "
-            "Call with empty input_data to see schema, then with data to execute. "
-            "If review_required, use continue_run_block."
+            "Execute a specific block with the provided input data. "
+            "IMPORTANT: You MUST call find_block first to get the block's 'id' - "
+            "do NOT guess or make up block IDs. "
+            "On first attempt (without input_data), returns detailed schema showing "
+            "required inputs and outputs. Then call again with proper input_data to execute. "
+            "If a block requires human review, use continue_run_block with the "
+            "review_id after the user approves."
        )

    @property
@@ -58,14 +61,28 @@ class RunBlockTool(BaseTool):
            "properties": {
                "block_id": {
                    "type": "string",
-                    "description": "Block ID from find_block results.",
+                    "description": (
+                        "The block's 'id' field from find_block results. "
+                        "NEVER guess this - always get it from find_block first."
+                    ),
+                },
+                "block_name": {
+                    "type": "string",
+                    "description": (
+                        "The block's human-readable name from find_block results. "
+                        "Used for display purposes in the UI."
+                    ),
                },
                "input_data": {
                    "type": "object",
-                    "description": "Input values. Use {} first to see schema.",
+                    "description": (
+                        "Input values for the block. "
+                        "First call with empty {} to see the block's schema, "
+                        "then call again with proper values to execute."
+                    ),
                },
            },
-            "required": ["block_id", "input_data"],
+            "required": ["block_id", "block_name", "input_data"],
        }

    @property
--- a/autogpt_platform/backend/backend/copilot/tools/run_mcp_tool.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_mcp_tool.py
@@ -57,9 +57,10 @@ class RunMCPToolTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Discover and execute MCP server tools. "
-            "Call with server_url only to list tools, then with tool_name + tool_arguments to execute. "
-            "Call get_mcp_guide first for server URLs and auth."
+            "Connect to an MCP (Model Context Protocol) server to discover and execute its tools. "
+            "Two-step: (1) call with server_url to list available tools, "
+            "(2) call again with server_url + tool_name + tool_arguments to execute. "
+            "Call get_mcp_guide for known server URLs and auth details."
        )

    @property
@@ -69,15 +70,24 @@ class RunMCPToolTool(BaseTool):
            "properties": {
                "server_url": {
                    "type": "string",
-                    "description": "MCP server URL (Streamable HTTP endpoint).",
+                    "description": (
+                        "URL of the MCP server (Streamable HTTP endpoint), "
+                        "e.g. https://mcp.example.com/mcp"
+                    ),
                },
                "tool_name": {
                    "type": "string",
-                    "description": "Tool to execute. Omit to discover available tools.",
+                    "description": (
+                        "Name of the MCP tool to execute. "
+                        "Omit on first call to discover available tools."
+                    ),
                },
                "tool_arguments": {
                    "type": "object",
-                    "description": "Arguments matching the tool's input schema.",
+                    "description": (
+                        "Arguments to pass to the selected tool. "
+                        "Must match the tool's input schema returned during discovery."
+                    ),
                },
            },
            "required": ["server_url"],
--- a/autogpt_platform/backend/backend/copilot/tools/search_docs.py
+++ b/autogpt_platform/backend/backend/copilot/tools/search_docs.py
@@ -38,7 +38,11 @@ class SearchDocsTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search platform documentation by keyword. Use get_doc_page to read full results."
+        return (
+            "Search the AutoGPT platform documentation for information about "
+            "how to use the platform, build agents, configure blocks, and more. "
+            "Returns relevant documentation sections. Use get_doc_page to read full content."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -47,7 +51,10 @@ class SearchDocsTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Documentation search query.",
+                    "description": (
+                        "Search query to find relevant documentation. "
+                        "Use natural language to describe what you're looking for."
+                    ),
                },
            },
            "required": ["query"],
--- a/autogpt_platform/backend/backend/copilot/tools/tool_schema_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/tool_schema_test.py
@@ -1,81 +0,0 @@
-"""Schema regression tests for all registered CoPilot tools.
-
-Validates that every tool in TOOL_REGISTRY produces a well-formed schema:
- description is non-empty
- all `required` fields exist in `properties`
- every property has a `type` and `description`
- total token budget does not regress past 8000 tokens
-"""
-
-import json
-
-import pytest
-import tiktoken
-
-from backend.copilot.tools import TOOL_REGISTRY
-
-_TOKEN_BUDGET = 8_000
-
-
-def _get_all_tool_schemas() -> list[tuple[str, object]]:
-    """Return (tool_name, openai_schema) pairs for every registered tool."""
-    return [(name, tool.as_openai_tool()) for name, tool in TOOL_REGISTRY.items()]
-
-
-_ALL_SCHEMAS = _get_all_tool_schemas()
-
-
-@pytest.mark.parametrize(
-    "tool_name,schema",
-    _ALL_SCHEMAS,
-    ids=[name for name, _ in _ALL_SCHEMAS],
-)
-class TestToolSchema:
-    """Validate schema invariants for every registered tool."""
-
-    def test_description_non_empty(self, tool_name: str, schema: dict) -> None:
-        desc = schema["function"].get("description", "")
-        assert desc, f"Tool '{tool_name}' has an empty description"
-
-    def test_required_fields_exist_in_properties(
-        self, tool_name: str, schema: dict
-    ) -> None:
-        params = schema["function"].get("parameters", {})
-        properties = params.get("properties", {})
-        required = params.get("required", [])
-        for field in required:
-            assert field in properties, (
-                f"Tool '{tool_name}': required field '{field}' "
-                f"not found in properties {list(properties.keys())}"
-            )
-
-    def test_every_property_has_type_and_description(
-        self, tool_name: str, schema: dict
-    ) -> None:
-        params = schema["function"].get("parameters", {})
-        properties = params.get("properties", {})
-        for prop_name, prop_def in properties.items():
-            assert (
-                "type" in prop_def
-            ), f"Tool '{tool_name}', property '{prop_name}' is missing 'type'"
-            assert (
-                "description" in prop_def
-            ), f"Tool '{tool_name}', property '{prop_name}' is missing 'description'"
-
-
-def test_total_schema_token_budget() -> None:
-    """Assert total tool schema size stays under the token budget.
-
-    This locks in the 34% token reduction from #12398 and prevents future
-    description bloat from eroding the gains. Budget is set to 8000 tokens.
-    Note: this measures tool JSON only (not the full system prompt); the actual
-    baseline for tool schemas alone is ~6470 tokens, giving ~19% headroom.
-    """
-    schemas = [tool.as_openai_tool() for tool in TOOL_REGISTRY.values()]
-    serialized = json.dumps(schemas)
-    enc = tiktoken.get_encoding("cl100k_base")
-    total_tokens = len(enc.encode(serialized))
-    assert total_tokens < _TOKEN_BUDGET, (
-        f"Tool schemas use {total_tokens} tokens, exceeding budget of {_TOKEN_BUDGET}. "
-        f"Description bloat detected — trim descriptions or raise the budget intentionally."
-    )
--- a/autogpt_platform/backend/backend/copilot/tools/validate_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/validate_agent.py
@@ -21,7 +21,19 @@ class ValidateAgentGraphTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Validate agent JSON for correctness (block_ids, links, types, schemas). On failure, use fix_agent_graph to auto-fix."
+        return (
+            "Validate an agent JSON graph for correctness. Checks:\n"
+            "- All block_ids reference real blocks\n"
+            "- All links reference valid source/sink nodes and fields\n"
+            "- Required input fields are wired or have defaults\n"
+            "- Data types are compatible across links\n"
+            "- Nested sink links use correct notation\n"
+            "- Prompt templates use proper curly brace escaping\n"
+            "- AgentExecutorBlock configurations are valid\n\n"
+            "Call this after generating agent JSON to verify correctness. "
+            "If validation fails, either fix issues manually based on the error "
+            "descriptions, or call fix_agent_graph to auto-fix common problems."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -34,7 +46,11 @@ class ValidateAgentGraphTool(BaseTool):
            "properties": {
                "agent_json": {
                    "type": "object",
-                    "description": "Agent JSON with 'nodes' and 'links' arrays.",
+                    "description": (
+                        "The agent JSON to validate. Must contain 'nodes' and 'links' arrays. "
+                        "Each node needs: id (UUID), block_id, input_default, metadata. "
+                        "Each link needs: id (UUID), source_id, source_name, sink_id, sink_name."
+                    ),
                },
            },
            "required": ["agent_json"],
--- a/autogpt_platform/backend/backend/copilot/tools/web_fetch.py
+++ b/autogpt_platform/backend/backend/copilot/tools/web_fetch.py
@@ -59,7 +59,13 @@ class WebFetchTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Fetch a public web page. Public URLs only — internal addresses blocked. Returns readable text from HTML by default."
+        return (
+            "Fetch the content of a public web page by URL. "
+            "Returns readable text extracted from HTML by default. "
+            "Useful for reading documentation, articles, and API responses. "
+            "Only supports HTTP/HTTPS GET requests to public URLs "
+            "(private/internal network addresses are blocked)."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -68,11 +74,14 @@ class WebFetchTool(BaseTool):
            "properties": {
                "url": {
                    "type": "string",
-                    "description": "Public HTTP/HTTPS URL.",
+                    "description": "The public HTTP/HTTPS URL to fetch.",
                },
                "extract_text": {
                    "type": "boolean",
-                    "description": "Extract text from HTML (default: true).",
+                    "description": (
+                        "If true (default), extract readable text from HTML. "
+                        "If false, return raw content."
+                    ),
                    "default": True,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/copilot/tools/workspace_files.py
@@ -321,7 +321,13 @@ class ListWorkspaceFilesTool(BaseTool):

    @property
    def description(self) -> str:
-        return "List persistent workspace files. For ephemeral session files, use SDK Glob/Read instead. Optionally filter by path prefix."
+        return (
+            "List files in the user's persistent workspace (cloud storage). "
+            "These files survive across sessions. "
+            "For ephemeral session files, use the SDK Read/Glob tools instead. "
+            "Returns file names, paths, sizes, and metadata. "
+            "Optionally filter by path prefix."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -330,17 +336,24 @@ class ListWorkspaceFilesTool(BaseTool):
            "properties": {
                "path_prefix": {
                    "type": "string",
-                    "description": "Filter by path prefix (e.g. '/documents/').",
+                    "description": (
+                        "Optional path prefix to filter files "
+                        "(e.g., '/documents/' to list only files in documents folder). "
+                        "By default, only files from the current session are listed."
+                    ),
                },
                "limit": {
                    "type": "integer",
-                    "description": "Max files to return (default 50, max 100).",
+                    "description": "Maximum number of files to return (default 50, max 100)",
                    "minimum": 1,
                    "maximum": 100,
                },
                "include_all_sessions": {
                    "type": "boolean",
-                    "description": "Include files from all sessions (default: false).",
+                    "description": (
+                        "If true, list files from all sessions. "
+                        "Default is false (only current session's files)."
+                    ),
                },
            },
            "required": [],
@@ -423,10 +436,18 @@ class ReadWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Read a file from persistent workspace. Specify file_id or path. "
-            "Small text/image files return inline; large/binary return metadata+URL. "
-            "Use save_to_path to copy to working dir for processing. "
-            "Use offset/length for paginated reads."
+            "Read a file from the user's persistent workspace (cloud storage). "
+            "These files survive across sessions. "
+            "For ephemeral session files, use the SDK Read tool instead. "
+            "Specify either file_id or path to identify the file. "
+            "For small text files, returns content directly. "
+            "For large or binary files, returns metadata and a download URL. "
+            "Use 'save_to_path' to copy the file to the working directory "
+            "(sandbox or ephemeral) for processing with bash_exec or file tools. "
+            "Use 'offset' and 'length' for paginated reads of large files "
+            "(e.g., persisted tool outputs). "
+            "Paths are scoped to the current session by default. "
+            "Use /sessions/<session_id>/... for cross-session access."
        )

    @property
@@ -436,30 +457,48 @@ class ReadWorkspaceFileTool(BaseTool):
            "properties": {
                "file_id": {
                    "type": "string",
-                    "description": "File ID from list_workspace_files.",
+                    "description": "The file's unique ID (from list_workspace_files)",
                },
                "path": {
                    "type": "string",
-                    "description": "Virtual file path (e.g. '/documents/report.pdf').",
+                    "description": (
+                        "The virtual file path (e.g., '/documents/report.pdf'). "
+                        "Scoped to current session by default."
+                    ),
                },
                "save_to_path": {
                    "type": "string",
-                    "description": "Copy file to this working directory path for processing.",
+                    "description": (
+                        "If provided, save the file to this path in the working "
+                        "directory (cloud sandbox when E2B is active, or "
+                        "ephemeral dir otherwise) so it can be processed with "
+                        "bash_exec or file tools. "
+                        "The file content is still returned in the response."
+                    ),
                },
                "force_download_url": {
                    "type": "boolean",
-                    "description": "Always return metadata+URL instead of inline content.",
+                    "description": (
+                        "If true, always return metadata+URL instead of inline content. "
+                        "Default is false (auto-selects based on file size/type)."
+                    ),
                },
                "offset": {
                    "type": "integer",
-                    "description": "Character offset for paginated reads (0-based).",
+                    "description": (
+                        "Character offset to start reading from (0-based). "
+                        "Use with 'length' for paginated reads of large files."
+                    ),
                },
                "length": {
                    "type": "integer",
-                    "description": "Max characters to return for paginated reads.",
+                    "description": (
+                        "Maximum number of characters to return. "
+                        "Defaults to full file. Use with 'offset' for paginated reads."
+                    ),
                },
            },
-            "required": [],  # At least one of file_id or path must be provided
+            "required": [],  # At least one must be provided
        }

    @property
@@ -614,9 +653,15 @@ class WriteWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Write a file to persistent workspace (survives across sessions). "
-            "Provide exactly one of: content (text), content_base64 (binary), "
-            f"or source_path (copy from working dir). Max {Config().max_file_size_mb}MB."
+            "Write or create a file in the user's persistent workspace (cloud storage). "
+            "These files survive across sessions. "
+            "For ephemeral session files, use the SDK Write tool instead. "
+            "Provide content as plain text via 'content', OR base64-encoded via "
+            "'content_base64', OR copy a file from the ephemeral working directory "
+            "via 'source_path'. Exactly one of these three is required. "
+            f"Maximum file size is {Config().max_file_size_mb}MB. "
+            "Files are saved to the current session's folder by default. "
+            "Use /sessions/<session_id>/... for cross-session access."
        )

    @property
@@ -626,31 +671,51 @@ class WriteWorkspaceFileTool(BaseTool):
            "properties": {
                "filename": {
                    "type": "string",
-                    "description": "Filename (e.g. 'report.pdf').",
+                    "description": "Name for the file (e.g., 'report.pdf')",
                },
                "content": {
                    "type": "string",
-                    "description": "Plain text content. Mutually exclusive with content_base64/source_path.",
+                    "description": (
+                        "Plain text content to write. Use this for text files "
+                        "(code, configs, documents, etc.). "
+                        "Mutually exclusive with content_base64 and source_path."
+                    ),
                },
                "content_base64": {
                    "type": "string",
-                    "description": "Base64-encoded binary content. Mutually exclusive with content/source_path.",
+                    "description": (
+                        "Base64-encoded file content. Use this for binary files "
+                        "(images, PDFs, etc.). "
+                        "Mutually exclusive with content and source_path."
+                    ),
                },
                "source_path": {
                    "type": "string",
-                    "description": "Working directory path to copy to workspace. Mutually exclusive with content/content_base64.",
+                    "description": (
+                        "Path to a file in the ephemeral working directory to "
+                        "copy to workspace (e.g., '/tmp/copilot-.../output.csv'). "
+                        "Use this to persist files created by bash_exec or SDK Write. "
+                        "Mutually exclusive with content and content_base64."
+                    ),
                },
                "path": {
                    "type": "string",
-                    "description": "Virtual path (e.g. '/documents/report.pdf'). Defaults to '/{filename}'.",
+                    "description": (
+                        "Optional virtual path where to save the file "
+                        "(e.g., '/documents/report.pdf'). "
+                        "Defaults to '/{filename}'. Scoped to current session."
+                    ),
                },
                "mime_type": {
                    "type": "string",
-                    "description": "MIME type. Auto-detected from filename if omitted.",
+                    "description": (
+                        "Optional MIME type of the file. "
+                        "Auto-detected from filename if not provided."
+                    ),
                },
                "overwrite": {
                    "type": "boolean",
-                    "description": "Overwrite if file exists (default: false).",
+                    "description": "Whether to overwrite if file exists at path (default: false)",
                },
            },
            "required": ["filename"],
@@ -777,7 +842,12 @@ class DeleteWorkspaceFileTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Delete a file from persistent workspace. Specify file_id or path."
+        return (
+            "Delete a file from the user's persistent workspace (cloud storage). "
+            "Specify either file_id or path to identify the file. "
+            "Paths are scoped to the current session by default. "
+            "Use /sessions/<session_id>/... for cross-session access."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -786,14 +856,17 @@ class DeleteWorkspaceFileTool(BaseTool):
            "properties": {
                "file_id": {
                    "type": "string",
-                    "description": "File ID from list_workspace_files.",
+                    "description": "The file's unique ID (from list_workspace_files)",
                },
                "path": {
                    "type": "string",
-                    "description": "Virtual file path.",
+                    "description": (
+                        "The virtual file path (e.g., '/documents/report.pdf'). "
+                        "Scoped to current session by default."
+                    ),
                },
            },
-            "required": [],  # At least one of file_id or path must be provided
+            "required": [],  # At least one must be provided
        }

    @property
--- a/autogpt_platform/backend/backend/data/db_accessors.py
+++ b/autogpt_platform/backend/backend/data/db_accessors.py
@@ -129,3 +129,16 @@ def review_db():
        review_db = get_database_manager_async_client()

    return review_db
+
+
+def credit_db():
+    if db.is_connected():
+        from backend.data import db_manager as _credit_db
+
+        credit_db = _credit_db
+    else:
+        from backend.util.clients import get_database_manager_async_client
+
+        credit_db = get_database_manager_async_client()
+
+    return credit_db
--- a/autogpt_platform/backend/backend/data/db_manager.py
+++ b/autogpt_platform/backend/backend/data/db_manager.py
@@ -148,6 +148,11 @@ async def _get_credits(user_id: str) -> int:
    return await user_credit_model.get_credits(user_id)


+# Public aliases used by db_accessors.credit_db() when Prisma is connected
+get_credits = _get_credits
+spend_credits = _spend_credits
+
+
 class DatabaseManager(AppService):
    """Database connection pooling service.

@@ -512,6 +517,10 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    list_workspace_files = d.list_workspace_files
    soft_delete_workspace_file = d.soft_delete_workspace_file

+    # ============ Credits ============ #
+    spend_credits = d.spend_credits
+    get_credits = d.get_credits
+
    # ============ Understanding ============ #
    get_business_understanding = d.get_business_understanding
    upsert_business_understanding = d.upsert_business_understanding
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
@@ -1,14 +1,8 @@
 "use client";

-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuTrigger,
-} from "@/components/molecules/DropdownMenu/DropdownMenu";
 import { SidebarProvider } from "@/components/ui/sidebar";
 import { cn } from "@/lib/utils";
-import { DotsThree, UploadSimple } from "@phosphor-icons/react";
+import { UploadSimple } from "@phosphor-icons/react";
 import { useCallback, useRef, useState } from "react";
 import { ChatContainer } from "./components/ChatContainer/ChatContainer";
 import { ChatSidebar } from "./components/ChatSidebar/ChatSidebar";
@@ -89,10 +83,9 @@ export function CopilotPage() {
    handleDrawerOpenChange,
    handleSelectSession,
    handleNewChat,
-    // Delete functionality
+    // Delete functionality (available via ChatSidebar context menu on all viewports)
    sessionToDelete,
    isDeleting,
-    handleDeleteClick,
    handleConfirmDelete,
    handleCancelDelete,
  } = useCopilotPage();
@@ -148,38 +141,6 @@ export function CopilotPage() {
            isUploadingFiles={isUploadingFiles}
            droppedFiles={droppedFiles}
            onDroppedFilesConsumed={handleDroppedFilesConsumed}
-            headerSlot={
-              isMobile && sessionId ? (
-                <div className="flex justify-end">
-                  <DropdownMenu>
-                    <DropdownMenuTrigger asChild>
-                      <button
-                        className="rounded p-1.5 hover:bg-neutral-100"
-                        aria-label="More actions"
-                      >
-                        <DotsThree className="h-5 w-5 text-neutral-600" />
-                      </button>
-                    </DropdownMenuTrigger>
-                    <DropdownMenuContent align="end">
-                      <DropdownMenuItem
-                        onClick={() => {
-                          const session = sessions.find(
-                            (s) => s.id === sessionId,
-                          );
-                          if (session) {
-                            handleDeleteClick(session.id, session.title);
-                          }
-                        }}
-                        disabled={isDeleting}
-                        className="text-red-600 focus:bg-red-50 focus:text-red-600"
-                      >
-                        Delete chat
-                      </DropdownMenuItem>
-                    </DropdownMenuContent>
-                  </DropdownMenu>
-                </div>
-              ) : undefined
-            }
          />
        </div>
      </div>
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
@@ -2,7 +2,6 @@
 import { ChatInput } from "@/app/(platform)/copilot/components/ChatInput/ChatInput";
 import { UIDataTypes, UIMessage, UITools } from "ai";
 import { LayoutGroup, motion } from "framer-motion";
-import { ReactNode } from "react";
 import { ChatMessagesContainer } from "../ChatMessagesContainer/ChatMessagesContainer";
 import { CopilotChatActionsProvider } from "../CopilotChatActionsProvider/CopilotChatActionsProvider";
 import { EmptySession } from "../EmptySession/EmptySession";
@@ -21,7 +20,6 @@ export interface ChatContainerProps {
  onSend: (message: string, files?: File[]) => void | Promise<void>;
  onStop: () => void;
  isUploadingFiles?: boolean;
-  headerSlot?: ReactNode;
  /** Files dropped onto the chat window. */
  droppedFiles?: File[];
  /** Called after droppedFiles have been consumed by ChatInput. */
@@ -40,7 +38,6 @@ export const ChatContainer = ({
  onSend,
  onStop,
  isUploadingFiles,
-  headerSlot,
  droppedFiles,
  onDroppedFilesConsumed,
 }: ChatContainerProps) => {
@@ -63,7 +60,6 @@ export const ChatContainer = ({
                status={status}
                error={error}
                isLoading={isLoadingSession}
-                headerSlot={headerSlot}
                sessionID={sessionId}
              />
              <motion.div
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
@@ -30,7 +30,6 @@ interface Props {
  status: string;
  error: Error | undefined;
  isLoading: boolean;
-  headerSlot?: React.ReactNode;
  sessionID?: string | null;
 }

@@ -102,7 +101,6 @@ export function ChatMessagesContainer({
  status,
  error,
  isLoading,
-  headerSlot,
  sessionID,
 }: Props) {
  const lastMessage = messages[messages.length - 1];
@@ -135,7 +133,6 @@ export function ChatMessagesContainer({
  return (
    <Conversation className="min-h-0 flex-1">
      <ConversationContent className="flex flex-1 flex-col gap-6 px-3 py-6">
-        {headerSlot}
        {isLoading && messages.length === 0 && (
          <div
            className="flex flex-1 items-center justify-center"
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/ChatSidebar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/ChatSidebar.tsx
@@ -37,6 +37,7 @@ import { useCopilotUIStore } from "../../store";
 import { NotificationToggle } from "./components/NotificationToggle/NotificationToggle";
 import { DeleteChatDialog } from "../DeleteChatDialog/DeleteChatDialog";
 import { PulseLoader } from "../PulseLoader/PulseLoader";
+import { UsageLimits } from "../UsageLimits/UsageLimits";

 export function ChatSidebar() {
  const { state } = useSidebar();
@@ -256,11 +257,10 @@ export function ChatSidebar() {
                <Text variant="h3" size="body-medium">
                  Your chats
                </Text>
-                <div className="relative left-5 flex items-center gap-1">
+                <div className="flex items-center">
+                  <UsageLimits />
                  <NotificationToggle />
-                  <div className="relative left-1">
-                    <SidebarTrigger />
-                  </div>
+                  <SidebarTrigger />
                </div>
              </div>
              {sessionId ? (
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/components/NotificationToggle/NotificationToggle.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/components/NotificationToggle/NotificationToggle.tsx
@@ -7,6 +7,7 @@ import {
  PopoverTrigger,
 } from "@/components/molecules/Popover/Popover";
 import { toast } from "@/components/molecules/Toast/use-toast";
+import { Button } from "@/components/ui/button";
 import { cn } from "@/lib/utils";
 import { Bell, BellRinging, BellSlash } from "@phosphor-icons/react";
 import { useCopilotUIStore } from "../../../../store";
@@ -48,10 +49,7 @@ export function NotificationToggle() {
  return (
    <Popover>
      <PopoverTrigger asChild>
-        <button
-          className="rounded p-1 text-black transition-colors hover:bg-zinc-50"
-          aria-label="Notification settings"
-        >
+        <Button variant="ghost" size="icon" aria-label="Notification settings">
          {!isNotificationsEnabled ? (
            <BellSlash className="!size-5" />
          ) : isSoundEnabled ? (
@@ -59,7 +57,7 @@ export function NotificationToggle() {
          ) : (
            <Bell className="!size-5" />
          )}
-        </button>
+        </Button>
      </PopoverTrigger>
      <PopoverContent align="start" className="w-56 p-3">
        <div className="flex flex-col gap-3">
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
@@ -0,0 +1,38 @@
+import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/molecules/Popover/Popover";
+import { Button } from "@/components/ui/button";
+import { ChartBar } from "@phosphor-icons/react";
+import { UsagePanelContent } from "./UsagePanelContent";
+
+export { UsagePanelContent, formatResetTime } from "./UsagePanelContent";
+
+export function UsageLimits() {
+  const { data: usage, isLoading } = useGetV2GetCopilotUsage({
+    query: {
+      select: (res) => res.data as CoPilotUsageStatus,
+      refetchInterval: 30000,
+      staleTime: 10000,
+    },
+  });
+
+  if (isLoading || !usage) return null;
+  if (usage.daily.limit <= 0 && usage.weekly.limit <= 0) return null;
+
+  return (
+    <Popover>
+      <PopoverTrigger asChild>
+        <Button variant="ghost" size="icon" aria-label="Usage limits">
+          <ChartBar className="!size-5" weight="light" />
+        </Button>
+      </PopoverTrigger>
+      <PopoverContent align="start" className="w-64 p-3">
+        <UsagePanelContent usage={usage} />
+      </PopoverContent>
+    </Popover>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsagePanelContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsagePanelContent.tsx
@@ -0,0 +1,118 @@
+import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import Link from "next/link";
+
+export function formatResetTime(
+  resetsAt: Date | string,
+  now: Date = new Date(),
+): string {
+  const resetDate =
+    typeof resetsAt === "string" ? new Date(resetsAt) : resetsAt;
+  const diffMs = resetDate.getTime() - now.getTime();
+  if (diffMs <= 0) return "now";
+
+  const hours = Math.floor(diffMs / (1000 * 60 * 60));
+
+  // Under 24h: show relative time ("in 4h 23m")
+  if (hours < 24) {
+    const minutes = Math.floor((diffMs % (1000 * 60 * 60)) / (1000 * 60));
+    if (hours > 0) return `in ${hours}h ${minutes}m`;
+    return `in ${minutes}m`;
+  }
+
+  // Over 24h: show day and time in local timezone ("Mon 12:00 AM PST")
+  return resetDate.toLocaleString(undefined, {
+    weekday: "short",
+    hour: "numeric",
+    minute: "2-digit",
+    timeZoneName: "short",
+  });
+}
+
+function UsageBar({
+  label,
+  used,
+  limit,
+  resetsAt,
+}: {
+  label: string;
+  used: number;
+  limit: number;
+  resetsAt: Date | string;
+}) {
+  if (limit <= 0) return null;
+
+  const rawPercent = (used / limit) * 100;
+  const percent = Math.min(100, Math.round(rawPercent));
+  const isHigh = percent >= 80;
+  const percentLabel =
+    used > 0 && percent === 0 ? "<1% used" : `${percent}% used`;
+
+  return (
+    <div className="flex flex-col gap-1">
+      <div className="flex items-baseline justify-between">
+        <span className="text-xs font-medium text-neutral-700">{label}</span>
+        <span className="text-[11px] tabular-nums text-neutral-500">
+          {percentLabel}
+        </span>
+      </div>
+      <div className="text-[10px] text-neutral-400">
+        Resets {formatResetTime(resetsAt)}
+      </div>
+      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200">
+        <div
+          className={`h-full rounded-full transition-[width] duration-300 ease-out ${
+            isHigh ? "bg-orange-500" : "bg-blue-500"
+          }`}
+          style={{ width: `${Math.max(used > 0 ? 1 : 0, percent)}%` }}
+        />
+      </div>
+    </div>
+  );
+}
+
+export function UsagePanelContent({
+  usage,
+  showBillingLink = true,
+}: {
+  usage: CoPilotUsageStatus;
+  showBillingLink?: boolean;
+}) {
+  const hasDailyLimit = usage.daily.limit > 0;
+  const hasWeeklyLimit = usage.weekly.limit > 0;
+
+  if (!hasDailyLimit && !hasWeeklyLimit) {
+    return (
+      <div className="text-xs text-neutral-500">No usage limits configured</div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col gap-3">
+      <div className="text-xs font-semibold text-neutral-800">Usage limits</div>
+      {hasDailyLimit && (
+        <UsageBar
+          label="Today"
+          used={usage.daily.used}
+          limit={usage.daily.limit}
+          resetsAt={usage.daily.resets_at}
+        />
+      )}
+      {hasWeeklyLimit && (
+        <UsageBar
+          label="This week"
+          used={usage.weekly.used}
+          limit={usage.weekly.limit}
+          resetsAt={usage.weekly.resets_at}
+        />
+      )}
+      {showBillingLink && (
+        <Link
+          href="/profile/credits"
+          className="text-[11px] text-blue-600 hover:underline"
+        >
+          Learn more about usage limits
+        </Link>
+      )}
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsageLimits.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/tests/UsageLimits.test.tsx
@@ -0,0 +1,124 @@
+import { render, screen, cleanup } from "@/tests/integrations/test-utils";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { UsageLimits } from "../UsageLimits";
+
+// Mock the generated Orval hook
+const mockUseGetV2GetCopilotUsage = vi.fn();
+vi.mock("@/app/api/__generated__/endpoints/chat/chat", () => ({
+  useGetV2GetCopilotUsage: (opts: unknown) => mockUseGetV2GetCopilotUsage(opts),
+}));
+
+// Mock Popover to render children directly (Radix portals don't work in happy-dom)
+vi.mock("@/components/molecules/Popover/Popover", () => ({
+  Popover: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+  PopoverTrigger: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+  PopoverContent: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+}));
+
+afterEach(() => {
+  cleanup();
+  mockUseGetV2GetCopilotUsage.mockReset();
+});
+
+function makeUsage({
+  dailyUsed = 500,
+  dailyLimit = 10000,
+  weeklyUsed = 2000,
+  weeklyLimit = 50000,
+}: {
+  dailyUsed?: number;
+  dailyLimit?: number;
+  weeklyUsed?: number;
+  weeklyLimit?: number;
+} = {}) {
+  const future = new Date(Date.now() + 3600 * 1000); // 1h from now
+  return {
+    daily: { used: dailyUsed, limit: dailyLimit, resets_at: future },
+    weekly: { used: weeklyUsed, limit: weeklyLimit, resets_at: future },
+  };
+}
+
+describe("UsageLimits", () => {
+  it("renders nothing while loading", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: undefined,
+      isLoading: true,
+    });
+    const { container } = render(<UsageLimits />);
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("renders nothing when no limits are configured", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyLimit: 0, weeklyLimit: 0 }),
+      isLoading: false,
+    });
+    const { container } = render(<UsageLimits />);
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("renders the usage button when limits exist", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage(),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+    expect(screen.getByRole("button", { name: /usage limits/i })).toBeDefined();
+  });
+
+  it("displays daily and weekly usage percentages", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyUsed: 5000, dailyLimit: 10000 }),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    expect(screen.getByText("50% used")).toBeDefined();
+    expect(screen.getByText("Today")).toBeDefined();
+    expect(screen.getByText("This week")).toBeDefined();
+    expect(screen.getByText("Usage limits")).toBeDefined();
+  });
+
+  it("shows only weekly bar when daily limit is 0", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({
+        dailyLimit: 0,
+        weeklyUsed: 25000,
+        weeklyLimit: 50000,
+      }),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    expect(screen.getByText("This week")).toBeDefined();
+    expect(screen.queryByText("Today")).toBeNull();
+  });
+
+  it("caps percentage at 100% when over limit", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage({ dailyUsed: 15000, dailyLimit: 10000 }),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    expect(screen.getByText("100% used")).toBeDefined();
+  });
+
+  it("shows learn more link to credits page", () => {
+    mockUseGetV2GetCopilotUsage.mockReturnValue({
+      data: makeUsage(),
+      isLoading: false,
+    });
+    render(<UsageLimits />);
+
+    const link = screen.getByText("Learn more about usage limits");
+    expect(link).toBeDefined();
+    expect(link.closest("a")?.getAttribute("href")).toBe("/profile/credits");
+  });
+});
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts
@@ -1,4 +1,5 @@
 import {
+  getGetV2GetCopilotUsageQueryKey,
  getGetV2GetSessionQueryKey,
  postV2CancelSessionTask,
 } from "@/app/api/__generated__/endpoints/chat/chat";
@@ -177,12 +178,41 @@ export function useCopilotStream({
    onError: (error) => {
      if (!sessionId) return;

+      // Detect rate limit (429) responses and show reset time to the user.
+      // The SDK throws a plain Error whose message is the raw response body
+      // (FastAPI returns {"detail": "...usage limit..."} for 429s).
+      let errorDetail: string = error.message;
+      try {
+        const parsed = JSON.parse(error.message) as unknown;
+        if (
+          typeof parsed === "object" &&
+          parsed !== null &&
+          "detail" in parsed &&
+          typeof (parsed as { detail: unknown }).detail === "string"
+        ) {
+          errorDetail = (parsed as { detail: string }).detail;
+        }
+      } catch {
+        // Not JSON — use message as-is
+      }
+      const isRateLimited = errorDetail.toLowerCase().includes("usage limit");
+      if (isRateLimited) {
+        toast({
+          title: "Usage limit reached",
+          description:
+            errorDetail ||
+            "You've reached your usage limit. Please try again later.",
+          variant: "destructive",
+        });
+        return;
+      }
+
      // Detect authentication failures (from getAuthHeaders or 401 responses)
      const isAuthError =
-        error.message.includes("Authentication failed") ||
-        error.message.includes("Unauthorized") ||
-        error.message.includes("Not authenticated") ||
-        error.message.toLowerCase().includes("401");
+        errorDetail.includes("Authentication failed") ||
+        errorDetail.includes("Unauthorized") ||
+        errorDetail.includes("Not authenticated") ||
+        errorDetail.toLowerCase().includes("401");
      if (isAuthError) {
        toast({
          title: "Authentication error",
@@ -307,6 +337,9 @@ export function useCopilotStream({
      queryClient.invalidateQueries({
        queryKey: getGetV2GetSessionQueryKey(sessionId),
      });
+      queryClient.invalidateQueries({
+        queryKey: getGetV2GetCopilotUsageQueryKey(),
+      });
      if (status === "ready") {
        reconnectAttemptsRef.current = 0;
        hasShownDisconnectToast.current = false;
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/credits/page.tsx
@@ -11,6 +11,9 @@ import {

 import { RefundModal } from "./RefundModal";
 import { CreditTransaction } from "@/lib/autogpt-server-api";
+import { UsagePanelContent } from "@/app/(platform)/copilot/components/UsageLimits/UsageLimits";
+import type { CoPilotUsageStatus } from "@/app/api/__generated__/models/coPilotUsageStatus";
+import { useGetV2GetCopilotUsage } from "@/app/api/__generated__/endpoints/chat/chat";

 import {
  Table,
@@ -21,6 +24,32 @@ import {
  TableRow,
 } from "@/components/__legacy__/ui/table";

+function CoPilotUsageSection() {
+  const router = useRouter();
+  const { data: usage, isLoading } = useGetV2GetCopilotUsage({
+    query: {
+      select: (res) => res.data as CoPilotUsageStatus,
+      refetchInterval: 30000,
+      staleTime: 10000,
+    },
+  });
+
+  if (isLoading || !usage) return null;
+  if (usage.daily.limit <= 0 && usage.weekly.limit <= 0) return null;
+
+  return (
+    <div className="my-6 space-y-4">
+      <h3 className="text-lg font-medium">CoPilot Usage Limits</h3>
+      <div className="rounded-lg border border-neutral-200 p-4">
+        <UsagePanelContent usage={usage} showBillingLink={false} />
+      </div>
+      <Button className="w-full" onClick={() => router.push("/copilot")}>
+        Open CoPilot
+      </Button>
+    </div>
+  );
+}
+
 export default function CreditsPage() {
  const api = useBackendAPI();
  const {
@@ -237,11 +266,13 @@ export default function CreditsPage() {
              </Button>
            )}
          </form>
+
+          {/* CoPilot Usage Limits */}
+          <CoPilotUsageSection />
        </div>

        <div className="my-6 space-y-4">
          {/* Payment Portal */}
-
          <h3 className="text-lg font-medium">Manage Your Payment Methods</h3>
          <p className="text-neutral-600">
            You can manage your cards and see your payment history in the
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -1267,7 +1267,7 @@
      "post": {
        "tags": ["v2", "chat", "chat"],
        "summary": "Stream Chat Post",
-        "description": "Stream chat responses for a session (POST with context support).\n\nStreams the AI/completion responses in real time over Server-Sent Events (SSE), including:\n  - Text fragments as they are generated\n  - Tool call UI elements (if invoked)\n  - Tool execution results\n\nThe AI generation runs in a background task that continues even if the client disconnects.\nAll chunks are written to a per-turn Redis stream for reconnection support. If the client\ndisconnects, they can reconnect using GET /sessions/{session_id}/stream to resume.\n\nArgs:\n    session_id: The chat session identifier to associate with the streamed messages.\n    request: Request body containing message, is_user_message, and optional context.\n    user_id: Optional authenticated user ID.\nReturns:\n    StreamingResponse: SSE-formatted response chunks.",
+        "description": "Stream chat responses for a session (POST with context support).\n\nStreams the AI/completion responses in real time over Server-Sent Events (SSE), including:\n  - Text fragments as they are generated\n  - Tool call UI elements (if invoked)\n  - Tool execution results\n\nThe AI generation runs in a background task that continues even if the client disconnects.\nAll chunks are written to a per-turn Redis stream for reconnection support. If the client\ndisconnects, they can reconnect using GET /sessions/{session_id}/stream to resume.\n\nArgs:\n    session_id: The chat session identifier to associate with the streamed messages.\n    request: Request body containing message, is_user_message, and optional context.\n    user_id: Authenticated user ID.\nReturns:\n    StreamingResponse: SSE-formatted response chunks.",
        "operationId": "postV2StreamChatPost",
        "security": [{ "HTTPBearerJWT": [] }],
        "parameters": [
@@ -1382,6 +1382,28 @@
        "security": [{ "HTTPBearerJWT": [] }]
      }
    },
+    "/api/chat/usage": {
+      "get": {
+        "tags": ["v2", "chat", "chat"],
+        "summary": "Get Copilot Usage",
+        "description": "Get CoPilot usage status for the authenticated user.\n\nReturns current token usage vs limits for daily and weekly windows.",
+        "operationId": "getV2GetCopilotUsage",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": { "$ref": "#/components/schemas/CoPilotUsageStatus" }
+              }
+            }
+          },
+          "401": {
+            "$ref": "#/components/responses/HTTP401NotAuthenticatedError"
+          }
+        },
+        "security": [{ "HTTPBearerJWT": [] }]
+      }
+    },
    "/api/credits": {
      "get": {
        "tags": ["v1", "credits"],
@@ -8455,6 +8477,16 @@
        "title": "ClarifyingQuestion",
        "description": "A question that needs user clarification."
      },
+      "CoPilotUsageStatus": {
+        "properties": {
+          "daily": { "$ref": "#/components/schemas/UsageWindow" },
+          "weekly": { "$ref": "#/components/schemas/UsageWindow" }
+        },
+        "type": "object",
+        "required": ["daily", "weekly"],
+        "title": "CoPilotUsageStatus",
+        "description": "Current usage status for a user across all windows."
+      },
      "ContentType": {
        "type": "string",
        "enum": [
@@ -12190,6 +12222,16 @@
              { "$ref": "#/components/schemas/ActiveStreamInfo" },
              { "type": "null" }
            ]
+          },
+          "total_prompt_tokens": {
+            "type": "integer",
+            "title": "Total Prompt Tokens",
+            "default": 0
+          },
+          "total_completion_tokens": {
+            "type": "integer",
+            "title": "Total Completion Tokens",
+            "default": 0
          }
        },
        "type": "object",
@@ -14587,6 +14629,25 @@
        "required": ["timezone"],
        "title": "UpdateTimezoneRequest"
      },
+      "UsageWindow": {
+        "properties": {
+          "used": { "type": "integer", "title": "Used" },
+          "limit": {
+            "type": "integer",
+            "title": "Limit",
+            "description": "Maximum tokens allowed in this window. 0 means unlimited."
+          },
+          "resets_at": {
+            "type": "string",
+            "format": "date-time",
+            "title": "Resets At"
+          }
+        },
+        "type": "object",
+        "required": ["used", "limit", "resets_at"],
+        "title": "UsageWindow",
+        "description": "Usage within a single time window."
+      },
      "UserHistoryResponse": {
        "properties": {
          "history": {
--- a/autogpt_platform/frontend/src/components/ui/sidebar.tsx
+++ b/autogpt_platform/frontend/src/components/ui/sidebar.tsx
@@ -288,6 +288,7 @@ const SidebarTrigger = React.forwardRef<
      ref={ref}
      data-sidebar="trigger"
      variant="ghost"
+      size="icon"
      onClick={(event) => {
        onClick?.(event);
        toggleSidebar();