add entrypoint to pyproject.toml

add to docker compose files
feat(backend/copilot): Copilot Executor Microservice
2026-02-10 23:05:17 -05:00 · 2026-02-10 23:38:21 +01:00 · 2026-02-10 22:55:47 +01:00 · 2026-02-10 22:48:01 +01:00 · 2026-02-10 16:18:05 +01:00 · 2026-02-10 14:43:33 +00:00
252 changed files with 19636 additions and 12922 deletions
--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -49,7 +49,7 @@ jobs:

      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
        if: github.event_name == 'push'
-        uses: peter-evans/create-pull-request@v7
+        uses: peter-evans/create-pull-request@v8
        with:
          add-paths: classic/frontend/build/web
          base: ${{ github.ref_name }}
--- a/.github/workflows/claude-ci-failure-auto-fix.yml
+++ b/.github/workflows/claude-ci-failure-auto-fix.yml
@@ -42,7 +42,7 @@ jobs:

      - name: Get CI failure details
        id: failure_details
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const run = await github.rest.actions.getWorkflowRun({
--- a/.github/workflows/claude-dependabot.yml
+++ b/.github/workflows/claude-dependabot.yml
@@ -41,7 +41,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -78,7 +78,7 @@ jobs:

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"

@@ -91,7 +91,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV

      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -124,7 +124,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
@@ -309,6 +309,7 @@ jobs:
        uses: anthropics/claude-code-action@v1
        with:
          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          allowed_bots: "dependabot[bot]"
          claude_args: |
            --allowedTools "Bash(npm:*),Bash(pnpm:*),Bash(poetry:*),Bash(git:*),Edit,Replace,NotebookEditCell,mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*)"
          prompt: |
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@@ -57,7 +57,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -94,7 +94,7 @@ jobs:

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"

@@ -107,7 +107,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV

      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -140,7 +140,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -39,7 +39,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -76,7 +76,7 @@ jobs:

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"

@@ -89,7 +89,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV

      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -132,7 +132,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
--- a/.github/workflows/docs-block-sync.yml
+++ b/.github/workflows/docs-block-sync.yml
@@ -33,7 +33,7 @@ jobs:
          python-version: "3.11"

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/docs-claude-review.yml
+++ b/.github/workflows/docs-claude-review.yml
@@ -33,7 +33,7 @@ jobs:
          python-version: "3.11"

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/docs-enhance.yml
+++ b/.github/workflows/docs-enhance.yml
@@ -38,7 +38,7 @@ jobs:
          python-version: "3.11"

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -88,7 +88,7 @@ jobs:
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/platform-dev-deploy-event-dispatcher.yml
+++ b/.github/workflows/platform-dev-deploy-event-dispatcher.yml
@@ -17,7 +17,7 @@ jobs:
      - name: Check comment permissions and deployment status
        id: check_status
        if: github.event_name == 'issue_comment' && github.event.issue.pull_request
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const commentBody = context.payload.comment.body.trim();
@@ -55,7 +55,7 @@ jobs:

      - name: Post permission denied comment
        if: steps.check_status.outputs.permission_denied == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -68,7 +68,7 @@ jobs:
      - name: Get PR details for deployment
        id: pr_details
        if: steps.check_status.outputs.should_deploy == 'true' || steps.check_status.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const pr = await github.rest.pulls.get({
@@ -98,7 +98,7 @@ jobs:

      - name: Post deploy success comment
        if: steps.check_status.outputs.should_deploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -126,7 +126,7 @@ jobs:

      - name: Post undeploy success comment
        if: steps.check_status.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -139,7 +139,7 @@ jobs:
      - name: Check deployment status on PR close
        id: check_pr_close
        if: github.event_name == 'pull_request' && github.event.action == 'closed'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const comments = await github.rest.issues.listComments({
@@ -187,7 +187,7 @@ jobs:
          github.event_name == 'pull_request' &&
          github.event.action == 'closed' &&
          steps.check_pr_close.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -42,7 +42,7 @@ jobs:
              - 'autogpt_platform/frontend/src/components/**'

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -54,7 +54,7 @@ jobs:
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT

      - name: Cache dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ steps.cache-key.outputs.key }}
@@ -74,7 +74,7 @@ jobs:
        uses: actions/checkout@v4

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -82,7 +82,7 @@ jobs:
        run: corepack enable

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -112,7 +112,7 @@ jobs:
          fetch-depth: 0

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -120,7 +120,7 @@ jobs:
        run: corepack enable

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -153,7 +153,7 @@ jobs:
          submodules: recursive

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -176,7 +176,7 @@ jobs:
        uses: docker/setup-buildx-action@v3

      - name: Cache Docker layers
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-frontend-test-${{ hashFiles('autogpt_platform/docker-compose.yml', 'autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/pyproject.toml', 'autogpt_platform/backend/poetry.lock') }}
@@ -231,7 +231,7 @@ jobs:
          fi

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -282,7 +282,7 @@ jobs:
          submodules: recursive

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -290,7 +290,7 @@ jobs:
        run: corepack enable

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
--- a/.github/workflows/platform-fullstack-ci.yml
+++ b/.github/workflows/platform-fullstack-ci.yml
@@ -32,7 +32,7 @@ jobs:
        uses: actions/checkout@v4

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -44,7 +44,7 @@ jobs:
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT

      - name: Cache dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ steps.cache-key.outputs.key }}
@@ -56,7 +56,7 @@ jobs:
        run: pnpm install --frozen-lockfile

  types:
-    runs-on: ubuntu-latest
+    runs-on: big-boi
    needs: setup
    strategy:
      fail-fast: false
@@ -68,7 +68,7 @@ jobs:
          submodules: recursive

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"

@@ -85,10 +85,10 @@ jobs:

      - name: Run docker compose
        run: |
-          docker compose -f ../docker-compose.yml --profile local --profile deps_backend up -d
+          docker compose -f ../docker-compose.yml --profile local up -d deps_backend

      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
--- a/autogpt_platform/autogpt_libs/poetry.lock
+++ b/autogpt_platform/autogpt_libs/poetry.lock
--- a/autogpt_platform/autogpt_libs/pyproject.toml
+++ b/autogpt_platform/autogpt_libs/pyproject.toml
@@ -9,25 +9,25 @@ packages = [{ include = "autogpt_libs" }]
 [tool.poetry.dependencies]
 python = ">=3.10,<4.0"
 colorama = "^0.4.6"
-cryptography = "^45.0"
+cryptography = "^46.0"
 expiringdict = "^1.2.2"
-fastapi = "^0.116.1"
-google-cloud-logging = "^3.12.1"
-launchdarkly-server-sdk = "^9.12.0"
-pydantic = "^2.11.7"
-pydantic-settings = "^2.10.1"
-pyjwt = { version = "^2.10.1", extras = ["crypto"] }
+fastapi = "^0.128.0"
+google-cloud-logging = "^3.13.0"
+launchdarkly-server-sdk = "^9.14.1"
+pydantic = "^2.12.5"
+pydantic-settings = "^2.12.0"
+pyjwt = { version = "^2.11.0", extras = ["crypto"] }
 redis = "^6.2.0"
-supabase = "^2.16.0"
-uvicorn = "^0.35.0"
+supabase = "^2.27.2"
+uvicorn = "^0.40.0"

 [tool.poetry.group.dev.dependencies]
-pyright = "^1.1.404"
+pyright = "^1.1.408"
 pytest = "^8.4.1"
-pytest-asyncio = "^1.1.0"
-pytest-mock = "^3.14.1"
-pytest-cov = "^6.2.1"
-ruff = "^0.12.11"
+pytest-asyncio = "^1.3.0"
+pytest-mock = "^3.15.1"
+pytest-cov = "^7.0.0"
+ruff = "^0.15.0"

 [build-system]
 requires = ["poetry-core"]
--- a/autogpt_platform/backend/backend/api/external/v1/tools.py
+++ b/autogpt_platform/backend/backend/api/external/v1/tools.py
@@ -15,9 +15,9 @@ from prisma.enums import APIKeyPermission
 from pydantic import BaseModel, Field

 from backend.api.external.middleware import require_permission
-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tools import find_agent_tool, run_agent_tool
-from backend.api.features.chat.tools.models import ToolResponseBase
+from backend.copilot.model import ChatSession
+from backend.copilot.tools import find_agent_tool, run_agent_tool
+from backend.copilot.tools.models import ToolResponseBase
 from backend.data.auth.base import APIAuthorizationInfo

 logger = logging.getLogger(__name__)
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -6,19 +6,49 @@ from collections.abc import AsyncGenerator
 from typing import Annotated

 from autogpt_libs import auth
-from fastapi import APIRouter, Depends, Header, HTTPException, Query, Security
+from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, Security
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel

+from backend.copilot import service as chat_service
+from backend.copilot import stream_registry
+from backend.copilot.completion_handler import (
+    process_operation_failure,
+    process_operation_success,
+)
+from backend.copilot.config import ChatConfig
+from backend.copilot.executor.utils import enqueue_copilot_task
+from backend.copilot.model import (
+    ChatSession,
+    create_chat_session,
+    get_chat_session,
+    get_user_sessions,
+)
+from backend.copilot.response_model import StreamFinish, StreamHeartbeat
+from backend.copilot.tools.models import (
+    AgentDetailsResponse,
+    AgentOutputResponse,
+    AgentPreviewResponse,
+    AgentSavedResponse,
+    AgentsFoundResponse,
+    BlockListResponse,
+    BlockOutputResponse,
+    ClarificationNeededResponse,
+    DocPageResponse,
+    DocSearchResultsResponse,
+    ErrorResponse,
+    ExecutionStartedResponse,
+    InputValidationErrorResponse,
+    NeedLoginResponse,
+    NoResultsResponse,
+    OperationInProgressResponse,
+    OperationPendingResponse,
+    OperationStartedResponse,
+    SetupRequirementsResponse,
+    UnderstandingUpdatedResponse,
+)
 from backend.util.exceptions import NotFoundError

-from . import service as chat_service
-from . import stream_registry
-from .completion_handler import process_operation_failure, process_operation_success
-from .config import ChatConfig
-from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions
-from .response_model import StreamFinish, StreamHeartbeat, StreamStart
-
 config = ChatConfig()


@@ -266,12 +296,36 @@ async def stream_chat_post(

    """
    import asyncio
+    import time

-    session = await _validate_and_get_session(session_id, user_id)
+    stream_start_time = time.perf_counter()
+    log_meta = {"component": "ChatStream", "session_id": session_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] stream_chat_post STARTED, session={session_id}, "
+        f"user={user_id}, message_len={len(request.message)}",
+        extra={"json_fields": log_meta},
+    )
+
+    _session = await _validate_and_get_session(session_id, user_id)  # noqa: F841
+    logger.info(
+        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time)*1000:.1f}ms",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": (time.perf_counter() - stream_start_time) * 1000,
+            }
+        },
+    )

    # Create a task in the stream registry for reconnection support
    task_id = str(uuid_module.uuid4())
    operation_id = str(uuid_module.uuid4())
+    log_meta["task_id"] = task_id
+
+    task_create_start = time.perf_counter()
    await stream_registry.create_task(
        task_id=task_id,
        session_id=session_id,
@@ -280,40 +334,46 @@ async def stream_chat_post(
        tool_name="chat",
        operation_id=operation_id,
    )
+    logger.info(
+        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start)*1000:.1f}ms",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": (time.perf_counter() - task_create_start) * 1000,
+            }
+        },
+    )

-    # Background task that runs the AI generation independently of SSE connection
-    async def run_ai_generation():
-        try:
-            # Emit a start event with task_id for reconnection
-            start_chunk = StreamStart(messageId=task_id, taskId=task_id)
-            await stream_registry.publish_chunk(task_id, start_chunk)
+    # Enqueue the task to RabbitMQ for processing by the CoPilot executor
+    await enqueue_copilot_task(
+        task_id=task_id,
+        session_id=session_id,
+        user_id=user_id,
+        operation_id=operation_id,
+        message=request.message,
+        is_user_message=request.is_user_message,
+        context=request.context,
+    )

-            async for chunk in chat_service.stream_chat_completion(
-                session_id,
-                request.message,
-                is_user_message=request.is_user_message,
-                user_id=user_id,
-                session=session,  # Pass pre-fetched session to avoid double-fetch
-                context=request.context,
-            ):
-                # Write to Redis (subscribers will receive via XREAD)
-                await stream_registry.publish_chunk(task_id, chunk)
-
-            # Mark task as completed
-            await stream_registry.mark_task_completed(task_id, "completed")
-        except Exception as e:
-            logger.error(
-                f"Error in background AI generation for session {session_id}: {e}"
-            )
-            await stream_registry.mark_task_completed(task_id, "failed")
-
-    # Start the AI generation in a background task
-    bg_task = asyncio.create_task(run_ai_generation())
-    await stream_registry.set_task_asyncio_task(task_id, bg_task)
+    setup_time = (time.perf_counter() - stream_start_time) * 1000
+    logger.info(
+        f"[TIMING] Task enqueued to RabbitMQ, setup={setup_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
+    )

    # SSE endpoint that subscribes to the task's stream
    async def event_generator() -> AsyncGenerator[str, None]:
+        import time as time_module
+
+        event_gen_start = time_module.perf_counter()
+        logger.info(
+            f"[TIMING] event_generator STARTED, task={task_id}, session={session_id}, "
+            f"user={user_id}",
+            extra={"json_fields": log_meta},
+        )
        subscriber_queue = None
+        first_chunk_yielded = False
+        chunks_yielded = 0
        try:
            # Subscribe to the task stream (this replays existing messages + live updates)
            subscriber_queue = await stream_registry.subscribe_to_task(
@@ -328,22 +388,70 @@ async def stream_chat_post(
                return

            # Read from the subscriber queue and yield to SSE
+            logger.info(
+                "[TIMING] Starting to read from subscriber_queue",
+                extra={"json_fields": log_meta},
+            )
            while True:
                try:
                    chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
+                    chunks_yielded += 1
+
+                    if not first_chunk_yielded:
+                        first_chunk_yielded = True
+                        elapsed = time_module.perf_counter() - event_gen_start
+                        logger.info(
+                            f"[TIMING] FIRST CHUNK from queue at {elapsed:.2f}s, "
+                            f"type={type(chunk).__name__}",
+                            extra={
+                                "json_fields": {
+                                    **log_meta,
+                                    "chunk_type": type(chunk).__name__,
+                                    "elapsed_ms": elapsed * 1000,
+                                }
+                            },
+                        )
+
                    yield chunk.to_sse()

                    # Check for finish signal
                    if isinstance(chunk, StreamFinish):
+                        total_time = time_module.perf_counter() - event_gen_start
+                        logger.info(
+                            f"[TIMING] StreamFinish received in {total_time:.2f}s; "
+                            f"n_chunks={chunks_yielded}",
+                            extra={
+                                "json_fields": {
+                                    **log_meta,
+                                    "chunks_yielded": chunks_yielded,
+                                    "total_time_ms": total_time * 1000,
+                                }
+                            },
+                        )
                        break
                except asyncio.TimeoutError:
-                    # Send heartbeat to keep connection alive
                    yield StreamHeartbeat().to_sse()

        except GeneratorExit:
+            logger.info(
+                f"[TIMING] GeneratorExit (client disconnected), chunks={chunks_yielded}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "chunks_yielded": chunks_yielded,
+                        "reason": "client_disconnect",
+                    }
+                },
+            )
            pass  # Client disconnected - background task continues
        except Exception as e:
-            logger.error(f"Error in SSE stream for task {task_id}: {e}")
+            elapsed = (time_module.perf_counter() - event_gen_start) * 1000
+            logger.error(
+                f"[TIMING] event_generator ERROR after {elapsed:.1f}ms: {e}",
+                extra={
+                    "json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
+                },
+            )
        finally:
            # Unsubscribe when client disconnects or stream ends to prevent resource leak
            if subscriber_queue is not None:
@@ -357,6 +465,18 @@ async def stream_chat_post(
                        exc_info=True,
                    )
            # AI SDK protocol termination - always yield even if unsubscribe fails
+            total_time = time_module.perf_counter() - event_gen_start
+            logger.info(
+                f"[TIMING] event_generator FINISHED in {total_time:.2f}s; "
+                f"task={task_id}, session={session_id}, n_chunks={chunks_yielded}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time * 1000,
+                        "chunks_yielded": chunks_yielded,
+                    }
+                },
+            )
            yield "data: [DONE]\n\n"

    return StreamingResponse(
@@ -374,63 +494,90 @@ async def stream_chat_post(
@router.get(
    "/sessions/{session_id}/stream",
 )
-async def stream_chat_get(
+async def resume_session_stream(
    session_id: str,
-    message: Annotated[str, Query(min_length=1, max_length=10000)],
    user_id: str | None = Depends(auth.get_user_id),
-    is_user_message: bool = Query(default=True),
 ):
    """
-    Stream chat responses for a session (GET - legacy endpoint).
+    Resume an active stream for a session.

-    Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
-      - Text fragments as they are generated
-      - Tool call UI elements (if invoked)
-      - Tool execution results
+    Called by the AI SDK's ``useChat(resume: true)`` on page load.
+    Checks for an active (in-progress) task on the session and either replays
+    the full SSE stream or returns 204 No Content if nothing is running.

    Args:
-        session_id: The chat session identifier to associate with the streamed messages.
-        message: The user's new message to process.
+        session_id: The chat session identifier.
        user_id: Optional authenticated user ID.
-        is_user_message: Whether the message is a user message.
-    Returns:
-        StreamingResponse: SSE-formatted response chunks.

+    Returns:
+        StreamingResponse (SSE) when an active stream exists,
+        or 204 No Content when there is nothing to resume.
    """
-    session = await _validate_and_get_session(session_id, user_id)
+    import asyncio
+
+    active_task, _last_id = await stream_registry.get_active_task_for_session(
+        session_id, user_id
+    )
+
+    if not active_task:
+        return Response(status_code=204)
+
+    subscriber_queue = await stream_registry.subscribe_to_task(
+        task_id=active_task.task_id,
+        user_id=user_id,
+        last_message_id="0-0",  # Full replay so useChat rebuilds the message
+    )
+
+    if subscriber_queue is None:
+        return Response(status_code=204)

    async def event_generator() -> AsyncGenerator[str, None]:
        chunk_count = 0
        first_chunk_type: str | None = None
-        async for chunk in chat_service.stream_chat_completion(
-            session_id,
-            message,
-            is_user_message=is_user_message,
-            user_id=user_id,
-            session=session,  # Pass pre-fetched session to avoid double-fetch
-        ):
-            if chunk_count < 3:
-                logger.info(
-                    "Chat stream chunk",
-                    extra={
-                        "session_id": session_id,
-                        "chunk_type": str(chunk.type),
-                    },
+        try:
+            while True:
+                try:
+                    chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
+                    if chunk_count < 3:
+                        logger.info(
+                            "Resume stream chunk",
+                            extra={
+                                "session_id": session_id,
+                                "chunk_type": str(chunk.type),
+                            },
+                        )
+                    if not first_chunk_type:
+                        first_chunk_type = str(chunk.type)
+                    chunk_count += 1
+                    yield chunk.to_sse()
+
+                    if isinstance(chunk, StreamFinish):
+                        break
+                except asyncio.TimeoutError:
+                    yield StreamHeartbeat().to_sse()
+        except GeneratorExit:
+            pass
+        except Exception as e:
+            logger.error(f"Error in resume stream for session {session_id}: {e}")
+        finally:
+            try:
+                await stream_registry.unsubscribe_from_task(
+                    active_task.task_id, subscriber_queue
                )
-            if not first_chunk_type:
-                first_chunk_type = str(chunk.type)
-            chunk_count += 1
-            yield chunk.to_sse()
-        logger.info(
-            "Chat stream completed",
-            extra={
-                "session_id": session_id,
-                "chunk_count": chunk_count,
-                "first_chunk_type": first_chunk_type,
-            },
-        )
-        # AI SDK protocol termination
-        yield "data: [DONE]\n\n"
+            except Exception as unsub_err:
+                logger.error(
+                    f"Error unsubscribing from task {active_task.task_id}: {unsub_err}",
+                    exc_info=True,
+                )
+            logger.info(
+                "Resume stream completed",
+                extra={
+                    "session_id": session_id,
+                    "n_chunks": chunk_count,
+                    "first_chunk_type": first_chunk_type,
+                },
+            )
+            yield "data: [DONE]\n\n"

    return StreamingResponse(
        event_generator(),
@@ -438,8 +585,8 @@ async def stream_chat_get(
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no",  # Disable nginx buffering
-            "x-vercel-ai-ui-message-stream": "v1",  # AI SDK protocol header
+            "X-Accel-Buffering": "no",
+            "x-vercel-ai-ui-message-stream": "v1",
        },
    )

@@ -751,3 +898,42 @@ async def health_check() -> dict:
        "service": "chat",
        "version": "0.1.0",
    }
+
+
+# ========== Schema Export (for OpenAPI / Orval codegen) ==========
+
+ToolResponseUnion = (
+    AgentsFoundResponse
+    | NoResultsResponse
+    | AgentDetailsResponse
+    | SetupRequirementsResponse
+    | ExecutionStartedResponse
+    | NeedLoginResponse
+    | ErrorResponse
+    | InputValidationErrorResponse
+    | AgentOutputResponse
+    | UnderstandingUpdatedResponse
+    | AgentPreviewResponse
+    | AgentSavedResponse
+    | ClarificationNeededResponse
+    | BlockListResponse
+    | BlockOutputResponse
+    | DocSearchResultsResponse
+    | DocPageResponse
+    | OperationStartedResponse
+    | OperationPendingResponse
+    | OperationInProgressResponse
+)
+
+
+@router.get(
+    "/schema/tool-responses",
+    response_model=ToolResponseUnion,
+    include_in_schema=True,
+    summary="[Dummy] Tool response type export for codegen",
+    description="This endpoint is not meant to be called. It exists solely to "
+    "expose tool response models in the OpenAPI schema for frontend codegen.",
+)
+async def _tool_response_schema() -> ToolResponseUnion:  # type: ignore[return]
+    """Never called at runtime. Exists only so Orval generates TS types."""
+    raise HTTPException(status_code=501, detail="Schema-only endpoint")
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -1,193 +0,0 @@
-import logging
-from typing import Any
-
-from prisma.enums import ContentType
-
-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tools.base import BaseTool, ToolResponseBase
-from backend.api.features.chat.tools.models import (
-    BlockInfoSummary,
-    BlockInputFieldInfo,
-    BlockListResponse,
-    ErrorResponse,
-    NoResultsResponse,
-)
-from backend.api.features.store.hybrid_search import unified_hybrid_search
-from backend.data.block import get_block
-
-logger = logging.getLogger(__name__)
-
-
-class FindBlockTool(BaseTool):
-    """Tool for searching available blocks."""
-
-    @property
-    def name(self) -> str:
-        return "find_block"
-
-    @property
-    def description(self) -> str:
-        return (
-            "Search for available blocks by name or description. "
-            "Blocks are reusable components that perform specific tasks like "
-            "sending emails, making API calls, processing text, etc. "
-            "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. "
-            "The response includes each block's id, required_inputs, and input_schema."
-        )
-
-    @property
-    def parameters(self) -> dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": (
-                        "Search query to find blocks by name or description. "
-                        "Use keywords like 'email', 'http', 'text', 'ai', etc."
-                    ),
-                },
-            },
-            "required": ["query"],
-        }
-
-    @property
-    def requires_auth(self) -> bool:
-        return True
-
-    async def _execute(
-        self,
-        user_id: str | None,
-        session: ChatSession,
-        **kwargs,
-    ) -> ToolResponseBase:
-        """Search for blocks matching the query.
-
-        Args:
-            user_id: User ID (required)
-            session: Chat session
-            query: Search query
-
-        Returns:
-            BlockListResponse: List of matching blocks
-            NoResultsResponse: No blocks found
-            ErrorResponse: Error message
-        """
-        query = kwargs.get("query", "").strip()
-        session_id = session.session_id
-
-        if not query:
-            return ErrorResponse(
-                message="Please provide a search query",
-                session_id=session_id,
-            )
-
-        try:
-            # Search for blocks using hybrid search
-            results, total = await unified_hybrid_search(
-                query=query,
-                content_types=[ContentType.BLOCK],
-                page=1,
-                page_size=10,
-            )
-
-            if not results:
-                return NoResultsResponse(
-                    message=f"No blocks found for '{query}'",
-                    suggestions=[
-                        "Try broader keywords like 'email', 'http', 'text', 'ai'",
-                        "Check spelling of technical terms",
-                    ],
-                    session_id=session_id,
-                )
-
-            # Enrich results with full block information
-            blocks: list[BlockInfoSummary] = []
-            for result in results:
-                block_id = result["content_id"]
-                block = get_block(block_id)
-
-                # Skip disabled blocks
-                if block and not block.disabled:
-                    # Get input/output schemas
-                    input_schema = {}
-                    output_schema = {}
-                    try:
-                        input_schema = block.input_schema.jsonschema()
-                    except Exception:
-                        pass
-                    try:
-                        output_schema = block.output_schema.jsonschema()
-                    except Exception:
-                        pass
-
-                    # Get categories from block instance
-                    categories = []
-                    if hasattr(block, "categories") and block.categories:
-                        categories = [cat.value for cat in block.categories]
-
-                    # Extract required inputs for easier use
-                    required_inputs: list[BlockInputFieldInfo] = []
-                    if input_schema:
-                        properties = input_schema.get("properties", {})
-                        required_fields = set(input_schema.get("required", []))
-                        # Get credential field names to exclude from required inputs
-                        credentials_fields = set(
-                            block.input_schema.get_credentials_fields().keys()
-                        )
-
-                        for field_name, field_schema in properties.items():
-                            # Skip credential fields - they're handled separately
-                            if field_name in credentials_fields:
-                                continue
-
-                            required_inputs.append(
-                                BlockInputFieldInfo(
-                                    name=field_name,
-                                    type=field_schema.get("type", "string"),
-                                    description=field_schema.get("description", ""),
-                                    required=field_name in required_fields,
-                                    default=field_schema.get("default"),
-                                )
-                            )
-
-                    blocks.append(
-                        BlockInfoSummary(
-                            id=block_id,
-                            name=block.name,
-                            description=block.description or "",
-                            categories=categories,
-                            input_schema=input_schema,
-                            output_schema=output_schema,
-                            required_inputs=required_inputs,
-                        )
-                    )
-
-            if not blocks:
-                return NoResultsResponse(
-                    message=f"No blocks found for '{query}'",
-                    suggestions=[
-                        "Try broader keywords like 'email', 'http', 'text', 'ai'",
-                    ],
-                    session_id=session_id,
-                )
-
-            return BlockListResponse(
-                message=(
-                    f"Found {len(blocks)} block(s) matching '{query}'. "
-                    "To execute a block, use run_block with the block's 'id' field "
-                    "and provide 'input_data' matching the block's input_schema."
-                ),
-                blocks=blocks,
-                count=len(blocks),
-                query=query,
-                session_id=session_id,
-            )
-
-        except Exception as e:
-            logger.error(f"Error searching blocks: {e}", exc_info=True)
-            return ErrorResponse(
-                message="Failed to search blocks",
-                error=str(e),
-                session_id=session_id,
-            )
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance.

 import logging
 import re
+import time
 from dataclasses import dataclass
 from typing import Any, Literal

@@ -362,7 +363,11 @@ async def unified_hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0
    # Apply BM25 reranking
@@ -686,7 +691,11 @@ async def hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0

@@ -718,6 +727,87 @@ async def hybrid_search_simple(
    return await hybrid_search(query=query, page=page, page_size=page_size)


+# ============================================================================
+# Diagnostics
+# ============================================================================
+
+# Rate limit: only log vector error diagnostics once per this interval
+_VECTOR_DIAG_INTERVAL_SECONDS = 60
+_last_vector_diag_time: float = 0
+
+
+async def _log_vector_error_diagnostics(error: Exception) -> None:
+    """Log diagnostic info when 'type vector does not exist' error occurs.
+
+    Note: Diagnostic queries use query_raw_with_schema which may run on a different
+    pooled connection than the one that failed. Session-level search_path can differ,
+    so these diagnostics show cluster-wide state, not necessarily the failed session.
+
+    Includes rate limiting to avoid log spam - only logs once per minute.
+    Caller should re-raise the error after calling this function.
+    """
+    global _last_vector_diag_time
+
+    # Check if this is the vector type error
+    error_str = str(error).lower()
+    if not (
+        "type" in error_str and "vector" in error_str and "does not exist" in error_str
+    ):
+        return
+
+    # Rate limit: only log once per interval
+    now = time.time()
+    if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS:
+        return
+    _last_vector_diag_time = now
+
+    try:
+        diagnostics: dict[str, object] = {}
+
+        try:
+            search_path_result = await query_raw_with_schema("SHOW search_path")
+            diagnostics["search_path"] = search_path_result
+        except Exception as e:
+            diagnostics["search_path"] = f"Error: {e}"
+
+        try:
+            schema_result = await query_raw_with_schema("SELECT current_schema()")
+            diagnostics["current_schema"] = schema_result
+        except Exception as e:
+            diagnostics["current_schema"] = f"Error: {e}"
+
+        try:
+            user_result = await query_raw_with_schema(
+                "SELECT current_user, session_user, current_database()"
+            )
+            diagnostics["user_info"] = user_result
+        except Exception as e:
+            diagnostics["user_info"] = f"Error: {e}"
+
+        try:
+            # Check pgvector extension installation (cluster-wide, stable info)
+            ext_result = await query_raw_with_schema(
+                "SELECT extname, extversion, nspname as schema "
+                "FROM pg_extension e "
+                "JOIN pg_namespace n ON e.extnamespace = n.oid "
+                "WHERE extname = 'vector'"
+            )
+            diagnostics["pgvector_extension"] = ext_result
+        except Exception as e:
+            diagnostics["pgvector_extension"] = f"Error: {e}"
+
+        logger.error(
+            f"Vector type error diagnostics:\n"
+            f"  Error: {error}\n"
+            f"  search_path: {diagnostics.get('search_path')}\n"
+            f"  current_schema: {diagnostics.get('current_schema')}\n"
+            f"  user_info: {diagnostics.get('user_info')}\n"
+            f"  pgvector_extension: {diagnostics.get('pgvector_extension')}"
+        )
+    except Exception as diag_error:
+        logger.error(f"Failed to collect vector error diagnostics: {diag_error}")
+
+
 # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
 # for existing code that expects the popularity parameter
 HybridSearchWeights = StoreAgentSearchWeights
--- a/autogpt_platform/backend/backend/api/rest_api.py
+++ b/autogpt_platform/backend/backend/api/rest_api.py
@@ -40,11 +40,11 @@ import backend.data.user
 import backend.integrations.webhooks.utils
 import backend.util.service
 import backend.util.settings
-from backend.api.features.chat.completion_consumer import (
+from backend.blocks.llm import DEFAULT_LLM_MODEL
+from backend.copilot.completion_consumer import (
    start_completion_consumer,
    stop_completion_consumer,
 )
-from backend.blocks.llm import DEFAULT_LLM_MODEL
 from backend.data.model import Credentials
 from backend.integrations.providers import ProviderName
 from backend.monitoring.instrumentation import instrument_fastapi
--- a/autogpt_platform/backend/backend/app.py
+++ b/autogpt_platform/backend/backend/app.py
@@ -38,6 +38,7 @@ def main(**kwargs):

    from backend.api.rest_api import AgentServer
    from backend.api.ws_api import WebsocketServer
+    from backend.copilot.executor.manager import CoPilotExecutor
    from backend.executor import DatabaseManager, ExecutionManager, Scheduler
    from backend.notifications import NotificationManager

@@ -48,6 +49,7 @@ def main(**kwargs):
        WebsocketServer(),
        AgentServer(),
        ExecutionManager(),
+        CoPilotExecutor(),
        **kwargs,
    )

--- a/autogpt_platform/backend/backend/blocks/exa/websets.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets.py
@@ -478,7 +478,7 @@ class ExaCreateOrFindWebsetBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        try:
-            webset = aexa.websets.get(id=input_data.external_id)
+            webset = await aexa.websets.get(id=input_data.external_id)
            webset_result = Webset.model_validate(webset.model_dump(by_alias=True))

            yield "webset", webset_result
@@ -494,7 +494,7 @@ class ExaCreateOrFindWebsetBlock(Block):
                        count=input_data.search_count,
                    )

-                webset = aexa.websets.create(
+                webset = await aexa.websets.create(
                    params=CreateWebsetParameters(
                        search=search_params,
                        external_id=input_data.external_id,
@@ -554,7 +554,7 @@ class ExaUpdateWebsetBlock(Block):
        if input_data.metadata is not None:
            payload["metadata"] = input_data.metadata

-        sdk_webset = aexa.websets.update(id=input_data.webset_id, params=payload)
+        sdk_webset = await aexa.websets.update(id=input_data.webset_id, params=payload)

        status_str = (
            sdk_webset.status.value
@@ -617,7 +617,7 @@ class ExaListWebsetsBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        response = aexa.websets.list(
+        response = await aexa.websets.list(
            cursor=input_data.cursor,
            limit=input_data.limit,
        )
@@ -678,7 +678,7 @@ class ExaGetWebsetBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_webset = aexa.websets.get(id=input_data.webset_id)
+        sdk_webset = await aexa.websets.get(id=input_data.webset_id)

        status_str = (
            sdk_webset.status.value
@@ -748,7 +748,7 @@ class ExaDeleteWebsetBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_webset = aexa.websets.delete(id=input_data.webset_id)
+        deleted_webset = await aexa.websets.delete(id=input_data.webset_id)

        status_str = (
            deleted_webset.status.value
@@ -798,7 +798,7 @@ class ExaCancelWebsetBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        canceled_webset = aexa.websets.cancel(id=input_data.webset_id)
+        canceled_webset = await aexa.websets.cancel(id=input_data.webset_id)

        status_str = (
            canceled_webset.status.value
@@ -968,7 +968,7 @@ class ExaPreviewWebsetBlock(Block):
                entity["description"] = input_data.entity_description
            payload["entity"] = entity

-        sdk_preview = aexa.websets.preview(params=payload)
+        sdk_preview = await aexa.websets.preview(params=payload)

        preview = PreviewWebsetModel.from_sdk(sdk_preview)

@@ -1051,7 +1051,7 @@ class ExaWebsetStatusBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        status = (
            webset.status.value
@@ -1185,7 +1185,7 @@ class ExaWebsetSummaryBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        # Extract basic info
        webset_id = webset.id
@@ -1211,7 +1211,7 @@ class ExaWebsetSummaryBlock(Block):
        total_items = 0

        if input_data.include_sample_items and input_data.sample_size > 0:
-            items_response = aexa.websets.items.list(
+            items_response = await aexa.websets.items.list(
                webset_id=input_data.webset_id, limit=input_data.sample_size
            )
            sample_items_data = [
@@ -1362,7 +1362,7 @@ class ExaWebsetReadyCheckBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        # Get webset details
-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        status = (
            webset.status.value
--- a/autogpt_platform/backend/backend/blocks/exa/websets_enrichment.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_enrichment.py
@@ -202,7 +202,7 @@ class ExaCreateEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_enrichment = aexa.websets.enrichments.create(
+        sdk_enrichment = await aexa.websets.enrichments.create(
            webset_id=input_data.webset_id, params=payload
        )

@@ -223,7 +223,7 @@ class ExaCreateEnrichmentBlock(Block):
            items_enriched = 0

            while time.time() - poll_start < input_data.polling_timeout:
-                current_enrich = aexa.websets.enrichments.get(
+                current_enrich = await aexa.websets.enrichments.get(
                    webset_id=input_data.webset_id, id=enrichment_id
                )
                current_status = (
@@ -234,7 +234,7 @@ class ExaCreateEnrichmentBlock(Block):

                if current_status in ["completed", "failed", "cancelled"]:
                    # Estimate items from webset searches
-                    webset = aexa.websets.get(id=input_data.webset_id)
+                    webset = await aexa.websets.get(id=input_data.webset_id)
                    if webset.searches:
                        for search in webset.searches:
                            if search.progress:
@@ -329,7 +329,7 @@ class ExaGetEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_enrichment = aexa.websets.enrichments.get(
+        sdk_enrichment = await aexa.websets.enrichments.get(
            webset_id=input_data.webset_id, id=input_data.enrichment_id
        )

@@ -474,7 +474,7 @@ class ExaDeleteEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_enrichment = aexa.websets.enrichments.delete(
+        deleted_enrichment = await aexa.websets.enrichments.delete(
            webset_id=input_data.webset_id, id=input_data.enrichment_id
        )

@@ -525,13 +525,13 @@ class ExaCancelEnrichmentBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        canceled_enrichment = aexa.websets.enrichments.cancel(
+        canceled_enrichment = await aexa.websets.enrichments.cancel(
            webset_id=input_data.webset_id, id=input_data.enrichment_id
        )

        # Try to estimate how many items were enriched before cancellation
        items_enriched = 0
-        items_response = aexa.websets.items.list(
+        items_response = await aexa.websets.items.list(
            webset_id=input_data.webset_id, limit=100
        )

--- a/autogpt_platform/backend/backend/blocks/exa/websets_import_export.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_import_export.py
@@ -222,7 +222,7 @@ class ExaCreateImportBlock(Block):
    def _create_test_mock():
        """Create test mocks for the AsyncExa SDK."""
        from datetime import datetime
-        from unittest.mock import MagicMock
+        from unittest.mock import AsyncMock, MagicMock

        # Create mock SDK import object
        mock_import = MagicMock()
@@ -247,7 +247,7 @@ class ExaCreateImportBlock(Block):
        return {
            "_get_client": lambda *args, **kwargs: MagicMock(
                websets=MagicMock(
-                    imports=MagicMock(create=lambda *args, **kwargs: mock_import)
+                    imports=MagicMock(create=AsyncMock(return_value=mock_import))
                )
            )
        }
@@ -294,7 +294,7 @@ class ExaCreateImportBlock(Block):
        if input_data.metadata:
            payload["metadata"] = input_data.metadata

-        sdk_import = aexa.websets.imports.create(
+        sdk_import = await aexa.websets.imports.create(
            params=payload, csv_data=input_data.csv_data
        )

@@ -360,7 +360,7 @@ class ExaGetImportBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_import = aexa.websets.imports.get(import_id=input_data.import_id)
+        sdk_import = await aexa.websets.imports.get(import_id=input_data.import_id)

        import_obj = ImportModel.from_sdk(sdk_import)

@@ -426,7 +426,7 @@ class ExaListImportsBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        response = aexa.websets.imports.list(
+        response = await aexa.websets.imports.list(
            cursor=input_data.cursor,
            limit=input_data.limit,
        )
@@ -474,7 +474,9 @@ class ExaDeleteImportBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_import = aexa.websets.imports.delete(import_id=input_data.import_id)
+        deleted_import = await aexa.websets.imports.delete(
+            import_id=input_data.import_id
+        )

        yield "import_id", deleted_import.id
        yield "success", "true"
@@ -573,14 +575,14 @@ class ExaExportWebsetBlock(Block):
            }
        )

-        # Create mock iterator
-        mock_items = [mock_item1, mock_item2]
+        # Create async iterator for list_all
+        async def async_item_iterator(*args, **kwargs):
+            for item in [mock_item1, mock_item2]:
+                yield item

        return {
            "_get_client": lambda *args, **kwargs: MagicMock(
-                websets=MagicMock(
-                    items=MagicMock(list_all=lambda *args, **kwargs: iter(mock_items))
-                )
+                websets=MagicMock(items=MagicMock(list_all=async_item_iterator))
            )
        }

@@ -602,7 +604,7 @@ class ExaExportWebsetBlock(Block):
                webset_id=input_data.webset_id, limit=input_data.max_items
            )

-            for sdk_item in item_iterator:
+            async for sdk_item in item_iterator:
                if len(all_items) >= input_data.max_items:
                    break

--- a/autogpt_platform/backend/backend/blocks/exa/websets_items.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_items.py
@@ -178,7 +178,7 @@ class ExaGetWebsetItemBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_item = aexa.websets.items.get(
+        sdk_item = await aexa.websets.items.get(
            webset_id=input_data.webset_id, id=input_data.item_id
        )

@@ -269,7 +269,7 @@ class ExaListWebsetItemsBlock(Block):
            response = None

            while time.time() - start_time < input_data.wait_timeout:
-                response = aexa.websets.items.list(
+                response = await aexa.websets.items.list(
                    webset_id=input_data.webset_id,
                    cursor=input_data.cursor,
                    limit=input_data.limit,
@@ -282,13 +282,13 @@ class ExaListWebsetItemsBlock(Block):
                interval = min(interval * 1.2, 10)

            if not response:
-                response = aexa.websets.items.list(
+                response = await aexa.websets.items.list(
                    webset_id=input_data.webset_id,
                    cursor=input_data.cursor,
                    limit=input_data.limit,
                )
        else:
-            response = aexa.websets.items.list(
+            response = await aexa.websets.items.list(
                webset_id=input_data.webset_id,
                cursor=input_data.cursor,
                limit=input_data.limit,
@@ -340,7 +340,7 @@ class ExaDeleteWebsetItemBlock(Block):
    ) -> BlockOutput:
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_item = aexa.websets.items.delete(
+        deleted_item = await aexa.websets.items.delete(
            webset_id=input_data.webset_id, id=input_data.item_id
        )

@@ -408,7 +408,7 @@ class ExaBulkWebsetItemsBlock(Block):
            webset_id=input_data.webset_id, limit=input_data.max_items
        )

-        for sdk_item in item_iterator:
+        async for sdk_item in item_iterator:
            if len(all_items) >= input_data.max_items:
                break

@@ -475,7 +475,7 @@ class ExaWebsetItemsSummaryBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        entity_type = "unknown"
        if webset.searches:
@@ -495,7 +495,7 @@ class ExaWebsetItemsSummaryBlock(Block):
        # Get sample items if requested
        sample_items: List[WebsetItemModel] = []
        if input_data.sample_size > 0:
-            items_response = aexa.websets.items.list(
+            items_response = await aexa.websets.items.list(
                webset_id=input_data.webset_id, limit=input_data.sample_size
            )
            # Convert to our stable models
@@ -569,7 +569,7 @@ class ExaGetNewItemsBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        # Get items starting from cursor
-        response = aexa.websets.items.list(
+        response = await aexa.websets.items.list(
            webset_id=input_data.webset_id,
            cursor=input_data.since_cursor,
            limit=input_data.max_items,
--- a/autogpt_platform/backend/backend/blocks/exa/websets_monitor.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_monitor.py
@@ -233,7 +233,7 @@ class ExaCreateMonitorBlock(Block):
    def _create_test_mock():
        """Create test mocks for the AsyncExa SDK."""
        from datetime import datetime
-        from unittest.mock import MagicMock
+        from unittest.mock import AsyncMock, MagicMock

        # Create mock SDK monitor object
        mock_monitor = MagicMock()
@@ -263,7 +263,7 @@ class ExaCreateMonitorBlock(Block):
        return {
            "_get_client": lambda *args, **kwargs: MagicMock(
                websets=MagicMock(
-                    monitors=MagicMock(create=lambda *args, **kwargs: mock_monitor)
+                    monitors=MagicMock(create=AsyncMock(return_value=mock_monitor))
                )
            )
        }
@@ -320,7 +320,7 @@ class ExaCreateMonitorBlock(Block):
        if input_data.metadata:
            payload["metadata"] = input_data.metadata

-        sdk_monitor = aexa.websets.monitors.create(params=payload)
+        sdk_monitor = await aexa.websets.monitors.create(params=payload)

        monitor = MonitorModel.from_sdk(sdk_monitor)

@@ -384,7 +384,7 @@ class ExaGetMonitorBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_monitor = aexa.websets.monitors.get(monitor_id=input_data.monitor_id)
+        sdk_monitor = await aexa.websets.monitors.get(monitor_id=input_data.monitor_id)

        monitor = MonitorModel.from_sdk(sdk_monitor)

@@ -476,7 +476,7 @@ class ExaUpdateMonitorBlock(Block):
        if input_data.metadata is not None:
            payload["metadata"] = input_data.metadata

-        sdk_monitor = aexa.websets.monitors.update(
+        sdk_monitor = await aexa.websets.monitors.update(
            monitor_id=input_data.monitor_id, params=payload
        )

@@ -522,7 +522,9 @@ class ExaDeleteMonitorBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        deleted_monitor = aexa.websets.monitors.delete(monitor_id=input_data.monitor_id)
+        deleted_monitor = await aexa.websets.monitors.delete(
+            monitor_id=input_data.monitor_id
+        )

        yield "monitor_id", deleted_monitor.id
        yield "success", "true"
@@ -579,7 +581,7 @@ class ExaListMonitorsBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        response = aexa.websets.monitors.list(
+        response = await aexa.websets.monitors.list(
            cursor=input_data.cursor,
            limit=input_data.limit,
            webset_id=input_data.webset_id,
--- a/autogpt_platform/backend/backend/blocks/exa/websets_polling.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_polling.py
@@ -121,7 +121,7 @@ class ExaWaitForWebsetBlock(Block):
                WebsetTargetStatus.IDLE,
                WebsetTargetStatus.ANY_COMPLETE,
            ]:
-                final_webset = aexa.websets.wait_until_idle(
+                final_webset = await aexa.websets.wait_until_idle(
                    id=input_data.webset_id,
                    timeout=input_data.timeout,
                    poll_interval=input_data.check_interval,
@@ -164,7 +164,7 @@ class ExaWaitForWebsetBlock(Block):
                interval = input_data.check_interval
                while time.time() - start_time < input_data.timeout:
                    # Get current webset status
-                    webset = aexa.websets.get(id=input_data.webset_id)
+                    webset = await aexa.websets.get(id=input_data.webset_id)
                    current_status = (
                        webset.status.value
                        if hasattr(webset.status, "value")
@@ -209,7 +209,7 @@ class ExaWaitForWebsetBlock(Block):

                # Timeout reached
                elapsed = time.time() - start_time
-                webset = aexa.websets.get(id=input_data.webset_id)
+                webset = await aexa.websets.get(id=input_data.webset_id)
                final_status = (
                    webset.status.value
                    if hasattr(webset.status, "value")
@@ -345,7 +345,7 @@ class ExaWaitForSearchBlock(Block):
        try:
            while time.time() - start_time < input_data.timeout:
                # Get current search status using SDK
-                search = aexa.websets.searches.get(
+                search = await aexa.websets.searches.get(
                    webset_id=input_data.webset_id, id=input_data.search_id
                )

@@ -401,7 +401,7 @@ class ExaWaitForSearchBlock(Block):
            elapsed = time.time() - start_time

            # Get last known status
-            search = aexa.websets.searches.get(
+            search = await aexa.websets.searches.get(
                webset_id=input_data.webset_id, id=input_data.search_id
            )
            final_status = (
@@ -503,7 +503,7 @@ class ExaWaitForEnrichmentBlock(Block):
        try:
            while time.time() - start_time < input_data.timeout:
                # Get current enrichment status using SDK
-                enrichment = aexa.websets.enrichments.get(
+                enrichment = await aexa.websets.enrichments.get(
                    webset_id=input_data.webset_id, id=input_data.enrichment_id
                )

@@ -548,7 +548,7 @@ class ExaWaitForEnrichmentBlock(Block):
            elapsed = time.time() - start_time

            # Get last known status
-            enrichment = aexa.websets.enrichments.get(
+            enrichment = await aexa.websets.enrichments.get(
                webset_id=input_data.webset_id, id=input_data.enrichment_id
            )
            final_status = (
@@ -575,7 +575,7 @@ class ExaWaitForEnrichmentBlock(Block):
    ) -> tuple[list[SampleEnrichmentModel], int]:
        """Get sample enriched data and count."""
        # Get a few items to see enrichment results using SDK
-        response = aexa.websets.items.list(webset_id=webset_id, limit=5)
+        response = await aexa.websets.items.list(webset_id=webset_id, limit=5)

        sample_data: list[SampleEnrichmentModel] = []
        enriched_count = 0
--- a/autogpt_platform/backend/backend/blocks/exa/websets_search.py
+++ b/autogpt_platform/backend/backend/blocks/exa/websets_search.py
@@ -317,7 +317,7 @@ class ExaCreateWebsetSearchBlock(Block):

        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_search = aexa.websets.searches.create(
+        sdk_search = await aexa.websets.searches.create(
            webset_id=input_data.webset_id, params=payload
        )

@@ -350,7 +350,7 @@ class ExaCreateWebsetSearchBlock(Block):
            poll_start = time.time()

            while time.time() - poll_start < input_data.polling_timeout:
-                current_search = aexa.websets.searches.get(
+                current_search = await aexa.websets.searches.get(
                    webset_id=input_data.webset_id, id=search_id
                )
                current_status = (
@@ -442,7 +442,7 @@ class ExaGetWebsetSearchBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        sdk_search = aexa.websets.searches.get(
+        sdk_search = await aexa.websets.searches.get(
            webset_id=input_data.webset_id, id=input_data.search_id
        )

@@ -523,7 +523,7 @@ class ExaCancelWebsetSearchBlock(Block):
        # Use AsyncExa SDK
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

-        canceled_search = aexa.websets.searches.cancel(
+        canceled_search = await aexa.websets.searches.cancel(
            webset_id=input_data.webset_id, id=input_data.search_id
        )

@@ -604,7 +604,7 @@ class ExaFindOrCreateSearchBlock(Block):
        aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())

        # Get webset to check existing searches
-        webset = aexa.websets.get(id=input_data.webset_id)
+        webset = await aexa.websets.get(id=input_data.webset_id)

        # Look for existing search with same query
        existing_search = None
@@ -636,7 +636,7 @@ class ExaFindOrCreateSearchBlock(Block):
            if input_data.entity_type != SearchEntityType.AUTO:
                payload["entity"] = {"type": input_data.entity_type.value}

-            sdk_search = aexa.websets.searches.create(
+            sdk_search = await aexa.websets.searches.create(
                webset_id=input_data.webset_id, params=payload
            )

--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -531,12 +531,12 @@ class LLMResponse(BaseModel):

 def convert_openai_tool_fmt_to_anthropic(
    openai_tools: list[dict] | None = None,
-) -> Iterable[ToolParam] | anthropic.NotGiven:
+) -> Iterable[ToolParam] | anthropic.Omit:
    """
    Convert OpenAI tool format to Anthropic tool format.
    """
    if not openai_tools or len(openai_tools) == 0:
-        return anthropic.NOT_GIVEN
+        return anthropic.omit

    anthropic_tools = []
    for tool in openai_tools:
@@ -596,10 +596,10 @@ def extract_openai_tool_calls(response) -> list[ToolContentBlock] | None:

 def get_parallel_tool_calls_param(
    llm_model: LlmModel, parallel_tool_calls: bool | None
-):
+) -> bool | openai.Omit:
    """Get the appropriate parallel_tool_calls parameter for OpenAI-compatible APIs."""
    if llm_model.startswith("o") or parallel_tool_calls is None:
-        return openai.NOT_GIVEN
+        return openai.omit
    return parallel_tool_calls


--- a/autogpt_platform/backend/backend/copilot/init.py
+++ b/autogpt_platform/backend/backend/copilot/init.py
@@ -0,0 +1,8 @@
+"""CoPilot module - AI assistant for AutoGPT platform.
+
+This module contains the core CoPilot functionality including:
+- AI generation service (LLM calls)
+- Tool execution
+- Session management
+- Stream registry for SSE reconnection
+"""
--- a/autogpt_platform/backend/backend/api/features/chat/completion_consumer.py
+++ b/autogpt_platform/backend/backend/api/features/chat/completion_consumer.py
@@ -119,8 +119,9 @@ class ChatCompletionConsumer:
        """Lazily initialize Prisma client on first use."""
        if self._prisma is None:
            database_url = os.getenv("DATABASE_URL", "postgresql://localhost:5432")
-            self._prisma = Prisma(datasource={"url": database_url})
-            await self._prisma.connect()
+            prisma = Prisma(datasource={"url": database_url})
+            await prisma.connect()
+            self._prisma = prisma
            logger.info("[COMPLETION] Consumer Prisma client connected (lazy init)")
        return self._prisma

--- a/autogpt_platform/backend/backend/api/features/chat/completion_handler.py
+++ b/autogpt_platform/backend/backend/api/features/chat/completion_handler.py
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -93,6 +93,12 @@ class ChatConfig(BaseSettings):
        description="Name of the prompt in Langfuse to fetch",
    )

+    # Extended thinking configuration for Claude models
+    thinking_enabled: bool = Field(
+        default=True,
+        description="Enable adaptive thinking for Claude models via OpenRouter",
+    )
+
    @field_validator("api_key", mode="before")
    @classmethod
    def get_api_key(cls, v):
--- a/autogpt_platform/backend/backend/api/features/chat/db.py
+++ b/autogpt_platform/backend/backend/api/features/chat/db.py
@@ -45,10 +45,7 @@ async def create_chat_session(
        successfulAgentRuns=SafeJson({}),
        successfulAgentSchedules=SafeJson({}),
    )
-    return await PrismaChatSession.prisma().create(
-        data=data,
-        include={"Messages": True},
-    )
+    return await PrismaChatSession.prisma().create(data=data)


 async def update_chat_session(
--- a/autogpt_platform/backend/backend/copilot/executor/init.py
+++ b/autogpt_platform/backend/backend/copilot/executor/init.py
@@ -0,0 +1,5 @@
+"""CoPilot Executor - Dedicated service for AI generation and tool execution.
+
+This module contains the executor service that processes CoPilot tasks
+from RabbitMQ, following the graph executor pattern.
+"""
--- a/autogpt_platform/backend/backend/copilot/executor/main.py
+++ b/autogpt_platform/backend/backend/copilot/executor/main.py
@@ -0,0 +1,18 @@
+"""Entry point for running the CoPilot Executor service.
+
+Usage:
+    python -m backend.copilot.executor
+"""
+
+from backend.app import run_processes
+
+from .manager import CoPilotExecutor
+
+
+def main():
+    """Run the CoPilot Executor service."""
+    run_processes(CoPilotExecutor())
+
+
+if __name__ == "__main__":
+    main()
--- a/autogpt_platform/backend/backend/copilot/executor/manager.py
+++ b/autogpt_platform/backend/backend/copilot/executor/manager.py
@@ -0,0 +1,462 @@
+"""CoPilot Executor Manager - main service for CoPilot task execution.
+
+This module contains the CoPilotExecutor class that consumes chat tasks from
+RabbitMQ and processes them using a thread pool, following the graph executor pattern.
+"""
+
+import logging
+import os
+import threading
+import time
+import uuid
+from concurrent.futures import Future, ThreadPoolExecutor
+
+from pika.adapters.blocking_connection import BlockingChannel
+from pika.spec import Basic, BasicProperties
+from prometheus_client import Gauge, start_http_server
+
+from backend.data import redis_client as redis
+from backend.data.rabbitmq import SyncRabbitMQ
+from backend.executor.cluster_lock import ClusterLock
+from backend.util.decorator import error_logged
+from backend.util.logging import TruncatedLogger
+from backend.util.process import AppProcess
+from backend.util.retry import continuous_retry, func_retry
+from backend.util.settings import Settings
+
+from .processor import execute_copilot_task, init_worker
+from .utils import (
+    COPILOT_CANCEL_QUEUE_NAME,
+    COPILOT_EXECUTION_QUEUE_NAME,
+    GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS,
+    CancelCoPilotEvent,
+    CoPilotExecutionEntry,
+    create_copilot_queue_config,
+)
+
+logger = TruncatedLogger(logging.getLogger(__name__), prefix="[CoPilotExecutor]")
+settings = Settings()
+
+# Prometheus metrics
+active_tasks_gauge = Gauge(
+    "copilot_executor_active_tasks",
+    "Number of active CoPilot tasks",
+)
+pool_size_gauge = Gauge(
+    "copilot_executor_pool_size",
+    "Maximum number of CoPilot executor workers",
+)
+utilization_gauge = Gauge(
+    "copilot_executor_utilization_ratio",
+    "Ratio of active tasks to pool size",
+)
+
+
+class CoPilotExecutor(AppProcess):
+    """CoPilot Executor service for processing chat generation tasks.
+
+    This service consumes tasks from RabbitMQ, processes them using a thread pool,
+    and publishes results to Redis Streams. It follows the graph executor pattern
+    for reliable message handling and graceful shutdown.
+
+    Key features:
+    - RabbitMQ-based task distribution with manual acknowledgment
+    - Thread pool executor for concurrent task processing
+    - Cluster lock for duplicate prevention across pods
+    - Graceful shutdown with timeout for in-flight tasks
+    - FANOUT exchange for cancellation broadcast
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.pool_size = settings.config.num_copilot_workers
+        self.active_tasks: dict[str, tuple[Future, threading.Event]] = {}
+        self.executor_id = str(uuid.uuid4())
+
+        self._executor = None
+        self._stop_consuming = None
+
+        self._cancel_thread = None
+        self._cancel_client = None
+        self._run_thread = None
+        self._run_client = None
+
+        self._task_locks: dict[str, ClusterLock] = {}
+
+    # ============ Main Entry Points (AppProcess interface) ============ #
+
+    def run(self):
+        """Main service loop - consume from RabbitMQ."""
+        logger.info(f"Pod assigned executor_id: {self.executor_id}")
+        logger.info(f"Spawn max-{self.pool_size} workers...")
+
+        pool_size_gauge.set(self.pool_size)
+        self._update_metrics()
+        start_http_server(settings.config.copilot_executor_port)
+
+        self.cancel_thread.start()
+        self.run_thread.start()
+
+        while True:
+            time.sleep(1e5)
+
+    def cleanup(self):
+        """Graceful shutdown with active execution waiting."""
+        pid = os.getpid()
+        logger.info(f"[cleanup {pid}] Starting graceful shutdown...")
+
+        # Signal the consumer thread to stop
+        try:
+            self.stop_consuming.set()
+            run_channel = self.run_client.get_channel()
+            run_channel.connection.add_callback_threadsafe(
+                lambda: run_channel.stop_consuming()
+            )
+            logger.info(f"[cleanup {pid}] Consumer has been signaled to stop")
+        except Exception as e:
+            logger.error(f"[cleanup {pid}] Error stopping consumer: {e}")
+
+        # Wait for active executions to complete
+        if self.active_tasks:
+            logger.info(
+                f"[cleanup {pid}] Waiting for {len(self.active_tasks)} active tasks to complete (timeout: {GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS}s)..."
+            )
+
+            start_time = time.monotonic()
+            last_refresh = start_time
+            lock_refresh_interval = settings.config.cluster_lock_timeout / 10
+
+            while (
+                self.active_tasks
+                and (time.monotonic() - start_time) < GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS
+            ):
+                self._cleanup_completed_tasks()
+                if not self.active_tasks:
+                    break
+
+                # Refresh cluster locks periodically
+                current_time = time.monotonic()
+                if current_time - last_refresh >= lock_refresh_interval:
+                    for lock in self._task_locks.values():
+                        try:
+                            lock.refresh()
+                        except Exception as e:
+                            logger.warning(
+                                f"[cleanup {pid}] Failed to refresh lock: {e}"
+                            )
+                    last_refresh = current_time
+
+                logger.info(
+                    f"[cleanup {pid}] {len(self.active_tasks)} tasks still active, waiting..."
+                )
+                time.sleep(10.0)
+
+        # Stop message consumers
+        if self._run_thread:
+            self._stop_message_consumers(
+                self._run_thread, self.run_client, "[cleanup][run]"
+            )
+        if self._cancel_thread:
+            self._stop_message_consumers(
+                self._cancel_thread, self.cancel_client, "[cleanup][cancel]"
+            )
+
+        # Shutdown executor
+        if self._executor:
+            logger.info(f"[cleanup {pid}] Shutting down executor...")
+            self._executor.shutdown(wait=False)
+
+        # Release any remaining locks
+        for task_id, lock in list(self._task_locks.items()):
+            try:
+                lock.release()
+                logger.info(f"[cleanup {pid}] Released lock for {task_id}")
+            except Exception as e:
+                logger.error(
+                    f"[cleanup {pid}] Failed to release lock for {task_id}: {e}"
+                )
+
+        logger.info(f"[cleanup {pid}] Graceful shutdown completed")
+
+    # ============ RabbitMQ Consumer Methods ============ #
+
+    @continuous_retry()
+    def _consume_cancel(self):
+        """Consume cancellation messages from FANOUT exchange."""
+        if self.stop_consuming.is_set() and not self.active_tasks:
+            logger.info("Stop reconnecting cancel consumer - service cleaned up")
+            return
+
+        if not self.cancel_client.is_ready:
+            self.cancel_client.disconnect()
+        self.cancel_client.connect()
+        cancel_channel = self.cancel_client.get_channel()
+        cancel_channel.basic_consume(
+            queue=COPILOT_CANCEL_QUEUE_NAME,
+            on_message_callback=self._handle_cancel_message,
+            auto_ack=True,
+        )
+        logger.info("Starting cancel message consumer...")
+        cancel_channel.start_consuming()
+        if not self.stop_consuming.is_set() or self.active_tasks:
+            raise RuntimeError("Cancel message consumer stopped unexpectedly")
+        logger.info("Cancel message consumer stopped gracefully")
+
+    @continuous_retry()
+    def _consume_run(self):
+        """Consume run messages from DIRECT exchange."""
+        if self.stop_consuming.is_set():
+            logger.info("Stop reconnecting run consumer - service cleaned up")
+            return
+
+        if not self.run_client.is_ready:
+            self.run_client.disconnect()
+        self.run_client.connect()
+        run_channel = self.run_client.get_channel()
+        run_channel.basic_qos(prefetch_count=self.pool_size)
+
+        run_channel.basic_consume(
+            queue=COPILOT_EXECUTION_QUEUE_NAME,
+            on_message_callback=self._handle_run_message,
+            auto_ack=False,
+            consumer_tag="copilot_execution_consumer",
+        )
+        run_channel.confirm_delivery()
+        logger.info("Starting to consume run messages...")
+        run_channel.start_consuming()
+        if not self.stop_consuming.is_set():
+            raise RuntimeError("Run message consumer stopped unexpectedly")
+        logger.info("Run message consumer stopped gracefully")
+
+    # ============ Message Handlers ============ #
+
+    @error_logged(swallow=True)
+    def _handle_cancel_message(
+        self,
+        _channel: BlockingChannel,
+        _method: Basic.Deliver,
+        _properties: BasicProperties,
+        body: bytes,
+    ):
+        """Handle cancel message from FANOUT exchange."""
+        request = CancelCoPilotEvent.model_validate_json(body)
+        task_id = request.task_id
+        if not task_id:
+            logger.warning("Cancel message missing 'task_id'")
+            return
+        if task_id not in self.active_tasks:
+            logger.debug(f"Cancel received for {task_id} but not active")
+            return
+
+        _, cancel_event = self.active_tasks[task_id]
+        logger.info(f"Received cancel for {task_id}")
+        if not cancel_event.is_set():
+            cancel_event.set()
+        else:
+            logger.debug(f"Cancel already set for {task_id}")
+
+    def _handle_run_message(
+        self,
+        _channel: BlockingChannel,
+        method: Basic.Deliver,
+        _properties: BasicProperties,
+        body: bytes,
+    ):
+        """Handle run message from DIRECT exchange."""
+        delivery_tag = method.delivery_tag
+
+        @func_retry
+        def ack_message(reject: bool, requeue: bool):
+            """Acknowledge or reject the message."""
+            channel = self.run_client.get_channel()
+            if reject:
+                channel.connection.add_callback_threadsafe(
+                    lambda: channel.basic_nack(delivery_tag, requeue=requeue)
+                )
+            else:
+                channel.connection.add_callback_threadsafe(
+                    lambda: channel.basic_ack(delivery_tag)
+                )
+
+        # Check if we're shutting down
+        if self.stop_consuming.is_set():
+            logger.info("Rejecting new task during shutdown")
+            ack_message(reject=True, requeue=True)
+            return
+
+        # Check if we can accept more tasks
+        self._cleanup_completed_tasks()
+        if len(self.active_tasks) >= self.pool_size:
+            ack_message(reject=True, requeue=True)
+            return
+
+        try:
+            entry = CoPilotExecutionEntry.model_validate_json(body)
+        except Exception as e:
+            logger.error(f"Could not parse run message: {e}, body={body}")
+            ack_message(reject=True, requeue=False)
+            return
+
+        task_id = entry.task_id
+
+        # Check for local duplicate
+        if task_id in self.active_tasks:
+            logger.warning(f"Task {task_id} already running locally")
+            ack_message(reject=True, requeue=True)
+            return
+
+        # Try to acquire cluster-wide lock
+        cluster_lock = ClusterLock(
+            redis=redis.get_redis(),
+            key=f"copilot_lock:{task_id}",
+            owner_id=self.executor_id,
+            timeout=settings.config.cluster_lock_timeout,
+        )
+        current_owner = cluster_lock.try_acquire()
+        if current_owner != self.executor_id:
+            if current_owner is not None:
+                logger.warning(f"Task {task_id} already running on pod {current_owner}")
+                ack_message(reject=True, requeue=False)
+            else:
+                logger.warning(
+                    f"Could not acquire lock for {task_id} - Redis unavailable"
+                )
+                ack_message(reject=True, requeue=True)
+            return
+
+        # Execute the task
+        try:
+            self._task_locks[task_id] = cluster_lock
+
+            logger.info(
+                f"Acquired cluster lock for {task_id}, executor_id={self.executor_id}"
+            )
+
+            cancel_event = threading.Event()
+            future = self.executor.submit(
+                execute_copilot_task, entry, cancel_event, cluster_lock
+            )
+            self.active_tasks[task_id] = (future, cancel_event)
+        except Exception as e:
+            logger.warning(f"Failed to setup execution for {task_id}: {e}")
+            cluster_lock.release()
+            if task_id in self._task_locks:
+                del self._task_locks[task_id]
+            ack_message(reject=True, requeue=True)
+            return
+
+        self._update_metrics()
+
+        def on_run_done(f: Future):
+            logger.info(f"Run completed for {task_id}")
+            try:
+                if exec_error := f.exception():
+                    logger.error(f"Execution for {task_id} failed: {exec_error}")
+                    ack_message(reject=True, requeue=True)
+                else:
+                    ack_message(reject=False, requeue=False)
+            except BaseException as e:
+                logger.exception(f"Error in run completion callback: {e}")
+            finally:
+                # Release the cluster lock
+                if task_id in self._task_locks:
+                    logger.info(f"Releasing cluster lock for {task_id}")
+                    self._task_locks[task_id].release()
+                    del self._task_locks[task_id]
+                self._cleanup_completed_tasks()
+
+        future.add_done_callback(on_run_done)
+
+    # ============ Helper Methods ============ #
+
+    def _cleanup_completed_tasks(self) -> list[str]:
+        """Remove completed futures from active_tasks and update metrics."""
+        completed_tasks = []
+        for task_id, (future, _) in self.active_tasks.items():
+            if future.done():
+                completed_tasks.append(task_id)
+
+        for task_id in completed_tasks:
+            logger.info(f"Cleaned up completed task {task_id}")
+            self.active_tasks.pop(task_id, None)
+
+        self._update_metrics()
+        return completed_tasks
+
+    def _update_metrics(self):
+        """Update Prometheus metrics."""
+        active_count = len(self.active_tasks)
+        active_tasks_gauge.set(active_count)
+        if self.stop_consuming.is_set():
+            utilization_gauge.set(1.0)
+        else:
+            utilization_gauge.set(
+                active_count / self.pool_size if self.pool_size > 0 else 0
+            )
+
+    def _stop_message_consumers(
+        self, thread: threading.Thread, client: SyncRabbitMQ, prefix: str
+    ):
+        """Stop a message consumer thread."""
+        try:
+            channel = client.get_channel()
+            channel.connection.add_callback_threadsafe(lambda: channel.stop_consuming())
+
+            try:
+                thread.join(timeout=300)
+            except TimeoutError:
+                logger.error(
+                    f"{prefix} Thread did not finish in time, forcing disconnect"
+                )
+
+            client.disconnect()
+            logger.info(f"{prefix} Client disconnected")
+        except Exception as e:
+            logger.error(f"{prefix} Error disconnecting client: {e}")
+
+    # ============ Lazy-initialized Properties ============ #
+
+    @property
+    def cancel_thread(self) -> threading.Thread:
+        if self._cancel_thread is None:
+            self._cancel_thread = threading.Thread(
+                target=lambda: self._consume_cancel(),
+                daemon=True,
+            )
+        return self._cancel_thread
+
+    @property
+    def run_thread(self) -> threading.Thread:
+        if self._run_thread is None:
+            self._run_thread = threading.Thread(
+                target=lambda: self._consume_run(),
+                daemon=True,
+            )
+        return self._run_thread
+
+    @property
+    def stop_consuming(self) -> threading.Event:
+        if self._stop_consuming is None:
+            self._stop_consuming = threading.Event()
+        return self._stop_consuming
+
+    @property
+    def executor(self) -> ThreadPoolExecutor:
+        if self._executor is None:
+            self._executor = ThreadPoolExecutor(
+                max_workers=self.pool_size,
+                initializer=init_worker,
+            )
+        return self._executor
+
+    @property
+    def cancel_client(self) -> SyncRabbitMQ:
+        if self._cancel_client is None:
+            self._cancel_client = SyncRabbitMQ(create_copilot_queue_config())
+        return self._cancel_client
+
+    @property
+    def run_client(self) -> SyncRabbitMQ:
+        if self._run_client is None:
+            self._run_client = SyncRabbitMQ(create_copilot_queue_config())
+        return self._run_client
--- a/autogpt_platform/backend/backend/copilot/executor/processor.py
+++ b/autogpt_platform/backend/backend/copilot/executor/processor.py
@@ -0,0 +1,252 @@
+"""CoPilot execution processor - per-worker execution logic.
+
+This module contains the processor class that handles CoPilot task execution
+in a thread-local context, following the graph executor pattern.
+"""
+
+import asyncio
+import logging
+import threading
+import time
+
+from backend.copilot import service as copilot_service
+from backend.copilot import stream_registry
+from backend.copilot.response_model import StreamError, StreamFinish, StreamFinishStep
+from backend.executor.cluster_lock import ClusterLock
+from backend.util.decorator import error_logged
+from backend.util.logging import TruncatedLogger, configure_logging
+from backend.util.process import set_service_name
+from backend.util.retry import func_retry
+
+from .utils import CoPilotExecutionEntry, CoPilotLogMetadata
+
+logger = TruncatedLogger(logging.getLogger(__name__), prefix="[CoPilotExecutor]")
+
+
+# ============ Module Entry Points ============ #
+
+# Thread-local storage for processor instances
+_tls = threading.local()
+
+
+def execute_copilot_task(
+    entry: CoPilotExecutionEntry,
+    cancel: threading.Event,
+    cluster_lock: ClusterLock,
+):
+    """Execute a CoPilot task using the thread-local processor.
+
+    This function is the entry point called by the thread pool executor.
+
+    Args:
+        entry: The task payload
+        cancel: Threading event to signal cancellation
+        cluster_lock: Distributed lock for this execution
+    """
+    processor: CoPilotProcessor = _tls.processor
+    return processor.execute(entry, cancel, cluster_lock)
+
+
+def init_worker():
+    """Initialize the processor for the current worker thread.
+
+    This function is called by the thread pool executor when a new worker
+    thread is created. It ensures each worker has its own processor instance.
+    """
+    _tls.processor = CoPilotProcessor()
+    _tls.processor.on_executor_start()
+
+
+# ============ Processor Class ============ #
+
+
+class CoPilotProcessor:
+    """Per-worker execution logic for CoPilot tasks.
+
+    This class is instantiated once per worker thread and handles the execution
+    of CoPilot chat generation tasks. It maintains an async event loop for
+    running the async service code.
+
+    The execution flow:
+        1. CoPilot task is picked from RabbitMQ queue
+        2. Manager submits task to thread pool
+        3. Processor executes the task in its event loop
+        4. Results are published to Redis Streams
+    """
+
+    @func_retry
+    def on_executor_start(self):
+        """Initialize the processor when the worker thread starts.
+
+        This method is called once per worker thread to set up the async event
+        loop, connect to Prisma, and initialize any required resources.
+        """
+        configure_logging()
+        set_service_name("CoPilotExecutor")
+        self.tid = threading.get_ident()
+        self.execution_loop = asyncio.new_event_loop()
+        self.execution_thread = threading.Thread(
+            target=self.execution_loop.run_forever, daemon=True
+        )
+        self.execution_thread.start()
+
+        # Connect to Prisma in the worker's event loop
+        # This is required because the CoPilot service uses Prisma directly
+        # TODO: Use DatabaseManager, avoid direct Prisma connection(?)
+        asyncio.run_coroutine_threadsafe(
+            self._connect_prisma(), self.execution_loop
+        ).result(timeout=30.0)
+
+        logger.info(f"[CoPilotExecutor] Worker {self.tid} started")
+
+    async def _connect_prisma(self):
+        """Connect to Prisma database in the worker's event loop."""
+        from backend.data import db
+
+        if not db.is_connected():
+            await db.connect()
+            logger.info(f"[CoPilotExecutor] Worker {self.tid} connected to Prisma")
+
+    @error_logged(swallow=False)
+    def execute(
+        self,
+        entry: CoPilotExecutionEntry,
+        cancel: threading.Event,
+        cluster_lock: ClusterLock,
+    ):
+        """Execute a CoPilot task.
+
+        This is the main entry point for task execution. It runs the async
+        execution logic in the worker's event loop and handles errors.
+
+        Args:
+            entry: The task payload containing session and message info
+            cancel: Threading event to signal cancellation
+            cluster_lock: Distributed lock to prevent duplicate execution
+        """
+        log = CoPilotLogMetadata(
+            logging.getLogger(__name__),
+            task_id=entry.task_id,
+            session_id=entry.session_id,
+            user_id=entry.user_id,
+        )
+        log.info("Starting execution")
+
+        start_time = time.monotonic()
+
+        try:
+            # Run the async execution in our event loop
+            future = asyncio.run_coroutine_threadsafe(
+                self._execute_async(entry, cancel, cluster_lock, log),
+                self.execution_loop,
+            )
+
+            # Wait for completion, checking cancel periodically
+            while not future.done():
+                try:
+                    future.result(timeout=1.0)
+                except asyncio.TimeoutError:
+                    if cancel.is_set():
+                        log.info("Cancellation requested")
+                        future.cancel()
+                        break
+                    # Refresh cluster lock to maintain ownership
+                    cluster_lock.refresh()
+
+            if not future.cancelled():
+                # Get result to propagate any exceptions
+                future.result()
+
+            elapsed = time.monotonic() - start_time
+            log.info(f"Execution completed in {elapsed:.2f}s")
+
+        except Exception as e:
+            elapsed = time.monotonic() - start_time
+            log.error(f"Execution failed after {elapsed:.2f}s: {e}")
+            # Ensure task is marked as failed in stream registry
+            asyncio.run_coroutine_threadsafe(
+                self._mark_task_failed(entry.task_id, str(e)),
+                self.execution_loop,
+            ).result(timeout=10.0)
+            raise
+
+    async def _execute_async(
+        self,
+        entry: CoPilotExecutionEntry,
+        cancel: threading.Event,
+        cluster_lock: ClusterLock,
+        log: CoPilotLogMetadata,
+    ):
+        """Async execution logic for CoPilot task.
+
+        This method calls the existing stream_chat_completion service function
+        and publishes results to the stream registry.
+
+        Args:
+            entry: The task payload
+            cancel: Threading event to signal cancellation
+            cluster_lock: Distributed lock for refresh
+            log: Structured logger for this task
+        """
+        last_refresh = time.monotonic()
+        refresh_interval = 30.0  # Refresh lock every 30 seconds
+
+        try:
+            # Stream chat completion and publish chunks to Redis
+            async for chunk in copilot_service.stream_chat_completion(
+                session_id=entry.session_id,
+                message=entry.message if entry.message else None,
+                is_user_message=entry.is_user_message,
+                user_id=entry.user_id,
+                context=entry.context,
+                _task_id=entry.task_id,
+            ):
+                # Check for cancellation
+                if cancel.is_set():
+                    log.info("Cancelled during streaming")
+                    await stream_registry.publish_chunk(
+                        entry.task_id, StreamError(errorText="Operation cancelled")
+                    )
+                    await stream_registry.publish_chunk(
+                        entry.task_id, StreamFinishStep()
+                    )
+                    await stream_registry.publish_chunk(entry.task_id, StreamFinish())
+                    await stream_registry.mark_task_completed(
+                        entry.task_id, status="failed"
+                    )
+                    return
+
+                # Refresh cluster lock periodically
+                current_time = time.monotonic()
+                if current_time - last_refresh >= refresh_interval:
+                    cluster_lock.refresh()
+                    last_refresh = current_time
+
+                # Publish chunk to stream registry
+                await stream_registry.publish_chunk(entry.task_id, chunk)
+
+            # Mark task as completed
+            await stream_registry.mark_task_completed(entry.task_id, status="completed")
+            log.info("Task completed successfully")
+
+        except asyncio.CancelledError:
+            log.info("Task cancelled")
+            await stream_registry.mark_task_completed(entry.task_id, status="failed")
+            raise
+
+        except Exception as e:
+            log.error(f"Task failed: {e}")
+            await self._mark_task_failed(entry.task_id, str(e))
+            raise
+
+    async def _mark_task_failed(self, task_id: str, error_message: str):
+        """Mark a task as failed and publish error to stream registry."""
+        try:
+            await stream_registry.publish_chunk(
+                task_id, StreamError(errorText=error_message)
+            )
+            await stream_registry.publish_chunk(task_id, StreamFinishStep())
+            await stream_registry.publish_chunk(task_id, StreamFinish())
+            await stream_registry.mark_task_completed(task_id, status="failed")
+        except Exception as e:
+            logger.error(f"Failed to mark task {task_id} as failed: {e}")
--- a/autogpt_platform/backend/backend/copilot/executor/utils.py
+++ b/autogpt_platform/backend/backend/copilot/executor/utils.py
@@ -0,0 +1,207 @@
+"""RabbitMQ queue configuration for CoPilot executor.
+
+Defines two exchanges and queues following the graph executor pattern:
+- 'copilot_execution' (DIRECT) for chat generation tasks
+- 'copilot_cancel' (FANOUT) for cancellation requests
+"""
+
+import logging
+
+from pydantic import BaseModel
+
+from backend.data.rabbitmq import Exchange, ExchangeType, Queue, RabbitMQConfig
+from backend.util.logging import TruncatedLogger, is_structured_logging_enabled
+
+logger = logging.getLogger(__name__)
+
+
+# ============ Logging Helper ============ #
+
+
+class CoPilotLogMetadata(TruncatedLogger):
+    """Structured logging helper for CoPilot executor.
+
+    In cloud environments (structured logging enabled), uses a simple prefix
+    and passes metadata via json_fields. In local environments, uses a detailed
+    prefix with all metadata key-value pairs for easier debugging.
+
+    Args:
+        logger: The underlying logger instance
+        max_length: Maximum log message length before truncation
+        **kwargs: Metadata key-value pairs (e.g., task_id="abc", session_id="xyz")
+            These are added to json_fields in cloud mode, or to the prefix in local mode.
+    """
+
+    def __init__(
+        self,
+        logger: logging.Logger,
+        max_length: int = 1000,
+        **kwargs: str | None,
+    ):
+        # Filter out None values
+        metadata = {k: v for k, v in kwargs.items() if v is not None}
+        metadata["component"] = "CoPilotExecutor"
+
+        if is_structured_logging_enabled():
+            prefix = "[CoPilotExecutor]"
+        else:
+            # Build prefix from metadata key-value pairs
+            meta_parts = "|".join(
+                f"{k}:{v}" for k, v in metadata.items() if k != "component"
+            )
+            prefix = (
+                f"[CoPilotExecutor|{meta_parts}]" if meta_parts else "[CoPilotExecutor]"
+            )
+
+        super().__init__(
+            logger,
+            max_length=max_length,
+            prefix=prefix,
+            metadata=metadata,
+        )
+
+
+# ============ Exchange and Queue Configuration ============ #
+
+COPILOT_EXECUTION_EXCHANGE = Exchange(
+    name="copilot_execution",
+    type=ExchangeType.DIRECT,
+    durable=True,
+    auto_delete=False,
+)
+COPILOT_EXECUTION_QUEUE_NAME = "copilot_execution_queue"
+COPILOT_EXECUTION_ROUTING_KEY = "copilot.run"
+
+COPILOT_CANCEL_EXCHANGE = Exchange(
+    name="copilot_cancel",
+    type=ExchangeType.FANOUT,
+    durable=True,
+    auto_delete=True,
+)
+COPILOT_CANCEL_QUEUE_NAME = "copilot_cancel_queue"
+
+# CoPilot operations can include extended thinking and agent generation
+# which may take 30+ minutes to complete
+COPILOT_CONSUMER_TIMEOUT_SECONDS = 60 * 60  # 1 hour
+
+# Graceful shutdown timeout - allow in-flight operations to complete
+GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS = 30 * 60  # 30 minutes
+
+
+def create_copilot_queue_config() -> RabbitMQConfig:
+    """Create RabbitMQ configuration for CoPilot executor.
+
+    Defines two exchanges and queues:
+    - 'copilot_execution' (DIRECT) for chat generation tasks
+    - 'copilot_cancel' (FANOUT) for cancellation requests
+
+    Returns:
+        RabbitMQConfig with exchanges and queues defined
+    """
+    run_queue = Queue(
+        name=COPILOT_EXECUTION_QUEUE_NAME,
+        exchange=COPILOT_EXECUTION_EXCHANGE,
+        routing_key=COPILOT_EXECUTION_ROUTING_KEY,
+        durable=True,
+        auto_delete=False,
+        arguments={
+            # Extended consumer timeout for long-running LLM operations
+            # Default 30-minute timeout is insufficient for extended thinking
+            # and agent generation which can take 30+ minutes
+            "x-consumer-timeout": COPILOT_CONSUMER_TIMEOUT_SECONDS
+            * 1000,
+        },
+    )
+    cancel_queue = Queue(
+        name=COPILOT_CANCEL_QUEUE_NAME,
+        exchange=COPILOT_CANCEL_EXCHANGE,
+        routing_key="",  # not used for FANOUT
+        durable=True,
+        auto_delete=False,
+    )
+    return RabbitMQConfig(
+        vhost="/",
+        exchanges=[COPILOT_EXECUTION_EXCHANGE, COPILOT_CANCEL_EXCHANGE],
+        queues=[run_queue, cancel_queue],
+    )
+
+
+# ============ Message Models ============ #
+
+
+class CoPilotExecutionEntry(BaseModel):
+    """Task payload for CoPilot AI generation.
+
+    This model represents a chat generation task to be processed by the executor.
+    """
+
+    task_id: str
+    """Unique identifier for this task (used for stream registry)"""
+
+    session_id: str
+    """Chat session ID"""
+
+    user_id: str | None
+    """User ID (may be None for anonymous users)"""
+
+    operation_id: str
+    """Operation ID for webhook callbacks and completion tracking"""
+
+    message: str
+    """User's message to process"""
+
+    is_user_message: bool = True
+    """Whether the message is from the user (vs system/assistant)"""
+
+    context: dict[str, str] | None = None
+    """Optional context for the message (e.g., {url: str, content: str})"""
+
+
+class CancelCoPilotEvent(BaseModel):
+    """Event to cancel a CoPilot operation."""
+
+    task_id: str
+    """Task ID to cancel"""
+
+
+# ============ Queue Publishing Helpers ============ #
+
+
+async def enqueue_copilot_task(
+    task_id: str,
+    session_id: str,
+    user_id: str | None,
+    operation_id: str,
+    message: str,
+    is_user_message: bool = True,
+    context: dict[str, str] | None = None,
+) -> None:
+    """Enqueue a CoPilot task for processing by the executor service.
+
+    Args:
+        task_id: Unique identifier for this task (used for stream registry)
+        session_id: Chat session ID
+        user_id: User ID (may be None for anonymous users)
+        operation_id: Operation ID for webhook callbacks and completion tracking
+        message: User's message to process
+        is_user_message: Whether the message is from the user (vs system/assistant)
+        context: Optional context for the message (e.g., {url: str, content: str})
+    """
+    from backend.util.clients import get_async_copilot_queue
+
+    entry = CoPilotExecutionEntry(
+        task_id=task_id,
+        session_id=session_id,
+        user_id=user_id,
+        operation_id=operation_id,
+        message=message,
+        is_user_message=is_user_message,
+        context=context,
+    )
+
+    queue_client = await get_async_copilot_queue()
+    await queue_client.publish_message(
+        routing_key=COPILOT_EXECUTION_ROUTING_KEY,
+        message=entry.model_dump_json(),
+        exchange=COPILOT_EXECUTION_EXCHANGE,
+    )
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
--- a/autogpt_platform/backend/backend/api/features/chat/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py
--- a/autogpt_platform/backend/backend/api/features/chat/response_model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py
@@ -18,6 +18,10 @@ class ResponseType(str, Enum):
    START = "start"
    FINISH = "finish"

+    # Step lifecycle (one LLM API call within a message)
+    START_STEP = "start-step"
+    FINISH_STEP = "finish-step"
+
    # Text streaming
    TEXT_START = "text-start"
    TEXT_DELTA = "text-delta"
@@ -57,6 +61,16 @@ class StreamStart(StreamBaseResponse):
        description="Task ID for SSE reconnection. Clients can reconnect using GET /tasks/{taskId}/stream",
    )

+    def to_sse(self) -> str:
+        """Convert to SSE format, excluding non-protocol fields like taskId."""
+        import json
+
+        data: dict[str, Any] = {
+            "type": self.type.value,
+            "messageId": self.messageId,
+        }
+        return f"data: {json.dumps(data)}\n\n"
+

 class StreamFinish(StreamBaseResponse):
    """End of message/stream."""
@@ -64,6 +78,26 @@ class StreamFinish(StreamBaseResponse):
    type: ResponseType = ResponseType.FINISH


+class StreamStartStep(StreamBaseResponse):
+    """Start of a step (one LLM API call within a message).
+
+    The AI SDK uses this to add a step-start boundary to message.parts,
+    enabling visual separation between multiple LLM calls in a single message.
+    """
+
+    type: ResponseType = ResponseType.START_STEP
+
+
+class StreamFinishStep(StreamBaseResponse):
+    """End of a step (one LLM API call within a message).
+
+    The AI SDK uses this to reset activeTextParts and activeReasoningParts,
+    so the next LLM call in a tool-call continuation starts with clean state.
+    """
+
+    type: ResponseType = ResponseType.FINISH_STEP
+
+
 # ========== Text Streaming ==========


@@ -117,7 +151,7 @@ class StreamToolOutputAvailable(StreamBaseResponse):
    type: ResponseType = ResponseType.TOOL_OUTPUT_AVAILABLE
    toolCallId: str = Field(..., description="Tool call ID this responds to")
    output: str | dict[str, Any] = Field(..., description="Tool execution output")
-    # Additional fields for internal use (not part of AI SDK spec but useful)
+    # Keep these for internal backend use
    toolName: str | None = Field(
        default=None, description="Name of the tool that was executed"
    )
@@ -125,6 +159,17 @@ class StreamToolOutputAvailable(StreamBaseResponse):
        default=True, description="Whether the tool execution succeeded"
    )

+    def to_sse(self) -> str:
+        """Convert to SSE format, excluding non-spec fields."""
+        import json
+
+        data = {
+            "type": self.type.value,
+            "toolCallId": self.toolCallId,
+            "output": self.output,
+        }
+        return f"data: {json.dumps(data)}\n\n"
+

 # ========== Other ==========

--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -52,8 +52,10 @@ from .response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
+    StreamFinishStep,
    StreamHeartbeat,
    StreamStart,
+    StreamStartStep,
    StreamTextDelta,
    StreamTextEnd,
    StreamTextStart,
@@ -351,6 +353,10 @@ async def stream_chat_completion(
    retry_count: int = 0,
    session: ChatSession | None = None,
    context: dict[str, str] | None = None,  # {url: str, content: str}
+    _continuation_message_id: (
+        str | None
+    ) = None,  # Internal: reuse message ID for tool call continuations
+    _task_id: str | None = None,  # Internal: task ID for SSE reconnection support
 ) -> AsyncGenerator[StreamBaseResponse, None]:
    """Main entry point for streaming chat completions with database handling.

@@ -371,21 +377,45 @@ async def stream_chat_completion(
        ValueError: If max_context_messages is exceeded

    """
+    completion_start = time.monotonic()
+
+    # Build log metadata for structured logging
+    log_meta = {"component": "ChatService", "session_id": session_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
    logger.info(
-        f"Streaming chat completion for session {session_id} for message {message} and user id {user_id}. Message is user message: {is_user_message}"
+        f"[TIMING] stream_chat_completion STARTED, session={session_id}, user={user_id}, "
+        f"message_len={len(message) if message else 0}, is_user={is_user_message}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "message_len": len(message) if message else 0,
+                "is_user_message": is_user_message,
+            }
+        },
    )

    # Only fetch from Redis if session not provided (initial call)
    if session is None:
+        fetch_start = time.monotonic()
        session = await get_chat_session(session_id, user_id)
+        fetch_time = (time.monotonic() - fetch_start) * 1000
        logger.info(
-            f"Fetched session from Redis: {session.session_id if session else 'None'}, "
-            f"message_count={len(session.messages) if session else 0}"
+            f"[TIMING] get_chat_session took {fetch_time:.1f}ms, "
+            f"n_messages={len(session.messages) if session else 0}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "duration_ms": fetch_time,
+                    "n_messages": len(session.messages) if session else 0,
+                }
+            },
        )
    else:
        logger.info(
-            f"Using provided session object: {session.session_id}, "
-            f"message_count={len(session.messages)}"
+            f"[TIMING] Using provided session, messages={len(session.messages)}",
+            extra={"json_fields": {**log_meta, "n_messages": len(session.messages)}},
        )

    if not session:
@@ -406,17 +436,25 @@ async def stream_chat_completion(

        # Track user message in PostHog
        if is_user_message:
+            posthog_start = time.monotonic()
            track_user_message(
                user_id=user_id,
                session_id=session_id,
                message_length=len(message),
            )
+            posthog_time = (time.monotonic() - posthog_start) * 1000
+            logger.info(
+                f"[TIMING] track_user_message took {posthog_time:.1f}ms",
+                extra={"json_fields": {**log_meta, "duration_ms": posthog_time}},
+            )

-    logger.info(
-        f"Upserting session: {session.session_id} with user id {session.user_id}, "
-        f"message_count={len(session.messages)}"
-    )
+    upsert_start = time.monotonic()
    session = await upsert_chat_session(session)
+    upsert_time = (time.monotonic() - upsert_start) * 1000
+    logger.info(
+        f"[TIMING] upsert_chat_session took {upsert_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": upsert_time}},
+    )
    assert session, "Session not found"

    # Generate title for new sessions on first user message (non-blocking)
@@ -454,7 +492,13 @@ async def stream_chat_completion(
            asyncio.create_task(_update_title())

    # Build system prompt with business understanding
+    prompt_start = time.monotonic()
    system_prompt, understanding = await _build_system_prompt(user_id)
+    prompt_time = (time.monotonic() - prompt_start) * 1000
+    logger.info(
+        f"[TIMING] _build_system_prompt took {prompt_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": prompt_time}},
+    )

    # Initialize variables for streaming
    assistant_response = ChatMessage(
@@ -479,13 +523,27 @@ async def stream_chat_completion(
    # Generate unique IDs for AI SDK protocol
    import uuid as uuid_module

-    message_id = str(uuid_module.uuid4())
+    is_continuation = _continuation_message_id is not None
+    message_id = _continuation_message_id or str(uuid_module.uuid4())
    text_block_id = str(uuid_module.uuid4())

-    # Yield message start
-    yield StreamStart(messageId=message_id)
+    # Only yield message start for the initial call, not for continuations.
+    setup_time = (time.monotonic() - completion_start) * 1000
+    logger.info(
+        f"[TIMING] Setup complete, yielding StreamStart at {setup_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
+    )
+    if not is_continuation:
+        yield StreamStart(messageId=message_id, taskId=_task_id)
+
+    # Emit start-step before each LLM call (AI SDK uses this to add step boundaries)
+    yield StreamStartStep()

    try:
+        logger.info(
+            "[TIMING] Calling _stream_chat_chunks",
+            extra={"json_fields": log_meta},
+        )
        async for chunk in _stream_chat_chunks(
            session=session,
            tools=tools,
@@ -585,6 +643,10 @@ async def stream_chat_completion(
                    )
                yield chunk
            elif isinstance(chunk, StreamFinish):
+                if has_done_tool_call:
+                    # Tool calls happened — close the step but don't send message-level finish.
+                    # The continuation will open a new step, and finish will come at the end.
+                    yield StreamFinishStep()
                if not has_done_tool_call:
                    # Emit text-end before finish if we received text but haven't closed it
                    if has_received_text and not text_streaming_ended:
@@ -616,6 +678,8 @@ async def stream_chat_completion(
                            has_saved_assistant_message = True

                    has_yielded_end = True
+                    # Emit finish-step before finish (resets AI SDK text/reasoning state)
+                    yield StreamFinishStep()
                    yield chunk
            elif isinstance(chunk, StreamError):
                has_yielded_error = True
@@ -665,6 +729,10 @@ async def stream_chat_completion(
            logger.info(
                f"Retryable error encountered. Attempt {retry_count + 1}/{config.max_retries}"
            )
+            # Close the current step before retrying so the recursive call's
+            # StreamStartStep doesn't produce unbalanced step events.
+            if not has_yielded_end:
+                yield StreamFinishStep()
            should_retry = True
        else:
            # Non-retryable error or max retries exceeded
@@ -700,6 +768,7 @@ async def stream_chat_completion(
                error_response = StreamError(errorText=error_message)
                yield error_response
            if not has_yielded_end:
+                yield StreamFinishStep()
                yield StreamFinish()
            return

@@ -714,6 +783,8 @@ async def stream_chat_completion(
            retry_count=retry_count + 1,
            session=session,
            context=context,
+            _continuation_message_id=message_id,  # Reuse message ID since start was already sent
+            _task_id=_task_id,
        ):
            yield chunk
        return  # Exit after retry to avoid double-saving in finally block
@@ -783,6 +854,8 @@ async def stream_chat_completion(
            session=session,  # Pass session object to avoid Redis refetch
            context=context,
            tool_call_response=str(tool_response_messages),
+            _continuation_message_id=message_id,  # Reuse message ID to avoid duplicates
+            _task_id=_task_id,
        ):
            yield chunk

@@ -893,9 +966,21 @@ async def _stream_chat_chunks(
        SSE formatted JSON response objects

    """
+    import time as time_module
+
+    stream_chunks_start = time_module.perf_counter()
    model = config.model

-    logger.info("Starting pure chat stream")
+    # Build log metadata for structured logging
+    log_meta = {"component": "ChatService", "session_id": session.session_id}
+    if session.user_id:
+        log_meta["user_id"] = session.user_id
+
+    logger.info(
+        f"[TIMING] _stream_chat_chunks STARTED, session={session.session_id}, "
+        f"user={session.user_id}, n_messages={len(session.messages)}",
+        extra={"json_fields": {**log_meta, "n_messages": len(session.messages)}},
+    )

    messages = session.to_openai_messages()
    if system_prompt:
@@ -906,12 +991,18 @@ async def _stream_chat_chunks(
        messages = [system_message] + messages

    # Apply context window management
+    context_start = time_module.perf_counter()
    context_result = await _manage_context_window(
        messages=messages,
        model=model,
        api_key=config.api_key,
        base_url=config.base_url,
    )
+    context_time = (time_module.perf_counter() - context_start) * 1000
+    logger.info(
+        f"[TIMING] _manage_context_window took {context_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": context_time}},
+    )

    if context_result.error:
        if "System prompt dropped" in context_result.error:
@@ -946,9 +1037,19 @@ async def _stream_chat_chunks(

        while retry_count <= MAX_RETRIES:
            try:
+                elapsed = (time_module.perf_counter() - stream_chunks_start) * 1000
+                retry_info = (
+                    f" (retry {retry_count}/{MAX_RETRIES})" if retry_count > 0 else ""
+                )
                logger.info(
-                    f"Creating OpenAI chat completion stream..."
-                    f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}"
+                    f"[TIMING] Creating OpenAI stream at {elapsed:.1f}ms{retry_info}",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "elapsed_ms": elapsed,
+                            "retry_count": retry_count,
+                        }
+                    },
                )

                # Build extra_body for OpenRouter tracing and PostHog analytics
@@ -965,6 +1066,11 @@ async def _stream_chat_chunks(
                        :128
                    ]  # OpenRouter limit

+                # Enable adaptive thinking for Anthropic models via OpenRouter
+                if config.thinking_enabled and "anthropic" in model.lower():
+                    extra_body["reasoning"] = {"enabled": True}
+
+                api_call_start = time_module.perf_counter()
                stream = await client.chat.completions.create(
                    model=model,
                    messages=cast(list[ChatCompletionMessageParam], messages),
@@ -974,6 +1080,11 @@ async def _stream_chat_chunks(
                    stream_options=ChatCompletionStreamOptionsParam(include_usage=True),
                    extra_body=extra_body,
                )
+                api_init_time = (time_module.perf_counter() - api_call_start) * 1000
+                logger.info(
+                    f"[TIMING] OpenAI stream object returned in {api_init_time:.1f}ms",
+                    extra={"json_fields": {**log_meta, "duration_ms": api_init_time}},
+                )

                # Variables to accumulate tool calls
                tool_calls: list[dict[str, Any]] = []
@@ -984,10 +1095,13 @@ async def _stream_chat_chunks(

                # Track if we've started the text block
                text_started = False
+                first_content_chunk = True
+                chunk_count = 0

                # Process the stream
                chunk: ChatCompletionChunk
                async for chunk in stream:
+                    chunk_count += 1
                    if chunk.usage:
                        yield StreamUsage(
                            promptTokens=chunk.usage.prompt_tokens,
@@ -1010,6 +1124,23 @@ async def _stream_chat_chunks(
                            if not text_started and text_block_id:
                                yield StreamTextStart(id=text_block_id)
                                text_started = True
+                            # Log timing for first content chunk
+                            if first_content_chunk:
+                                first_content_chunk = False
+                                ttfc = (
+                                    time_module.perf_counter() - api_call_start
+                                ) * 1000
+                                logger.info(
+                                    f"[TIMING] FIRST CONTENT CHUNK at {ttfc:.1f}ms "
+                                    f"(since API call), n_chunks={chunk_count}",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "time_to_first_chunk_ms": ttfc,
+                                            "n_chunks": chunk_count,
+                                        }
+                                    },
+                                )
                            # Stream the text delta
                            text_response = StreamTextDelta(
                                id=text_block_id or "",
@@ -1066,7 +1197,21 @@ async def _stream_chat_chunks(
                                        toolName=tool_calls[idx]["function"]["name"],
                                    )
                                    emitted_start_for_idx.add(idx)
-                logger.info(f"Stream complete. Finish reason: {finish_reason}")
+                stream_duration = time_module.perf_counter() - api_call_start
+                logger.info(
+                    f"[TIMING] OpenAI stream COMPLETE, finish_reason={finish_reason}, "
+                    f"duration={stream_duration:.2f}s, "
+                    f"n_chunks={chunk_count}, n_tool_calls={len(tool_calls)}",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "stream_duration_ms": stream_duration * 1000,
+                            "finish_reason": finish_reason,
+                            "n_chunks": chunk_count,
+                            "n_tool_calls": len(tool_calls),
+                        }
+                    },
+                )

                # Yield all accumulated tool calls after the stream is complete
                # This ensures all tool call arguments have been fully received
@@ -1086,6 +1231,12 @@ async def _stream_chat_chunks(
                        # Re-raise to trigger retry logic in the parent function
                        raise

+                total_time = (time_module.perf_counter() - stream_chunks_start) * 1000
+                logger.info(
+                    f"[TIMING] _stream_chat_chunks COMPLETED in {total_time/1000:.1f}s; "
+                    f"session={session.session_id}, user={session.user_id}",
+                    extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
+                )
                yield StreamFinish()
                return
            except Exception as e:
@@ -1565,6 +1716,7 @@ async def _execute_long_running_tool_with_streaming(
            task_id,
            StreamError(errorText=str(e)),
        )
+        await stream_registry.publish_chunk(task_id, StreamFinishStep())
        await stream_registry.publish_chunk(task_id, StreamFinish())

        await _update_pending_operation(
@@ -1681,6 +1833,10 @@ async def _generate_llm_continuation(
        if session_id:
            extra_body["session_id"] = session_id[:128]

+        # Enable adaptive thinking for Anthropic models via OpenRouter
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["reasoning"] = {"enabled": True}
+
        retry_count = 0
        last_error: Exception | None = None
        response = None
@@ -1811,6 +1967,10 @@ async def _generate_llm_continuation_with_streaming(
        if session_id:
            extra_body["session_id"] = session_id[:128]

+        # Enable adaptive thinking for Anthropic models via OpenRouter
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["reasoning"] = {"enabled": True}
+
        # Make streaming LLM call (no tools - just text response)
        from typing import cast

@@ -1822,6 +1982,7 @@ async def _generate_llm_continuation_with_streaming(

        # Publish start event
        await stream_registry.publish_chunk(task_id, StreamStart(messageId=message_id))
+        await stream_registry.publish_chunk(task_id, StreamStartStep())
        await stream_registry.publish_chunk(task_id, StreamTextStart(id=text_block_id))

        # Stream the response
@@ -1845,6 +2006,7 @@ async def _generate_llm_continuation_with_streaming(

        # Publish end events
        await stream_registry.publish_chunk(task_id, StreamTextEnd(id=text_block_id))
+        await stream_registry.publish_chunk(task_id, StreamFinishStep())

        if assistant_content:
            # Reload session from DB to avoid race condition with user messages
@@ -1886,4 +2048,5 @@ async def _generate_llm_continuation_with_streaming(
            task_id,
            StreamError(errorText=f"Failed to generate response: {e}"),
        )
+        await stream_registry.publish_chunk(task_id, StreamFinishStep())
        await stream_registry.publish_chunk(task_id, StreamFinish())
--- a/autogpt_platform/backend/backend/api/features/chat/service_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service_test.py
--- a/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
+++ b/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
@@ -104,6 +104,24 @@ async def create_task(
    Returns:
        The created ActiveTask instance (metadata only)
    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Build log metadata for structured logging
+    log_meta = {
+        "component": "StreamRegistry",
+        "task_id": task_id,
+        "session_id": session_id,
+    }
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] create_task STARTED, task={task_id}, session={session_id}, user={user_id}",
+        extra={"json_fields": log_meta},
+    )
+
    task = ActiveTask(
        task_id=task_id,
        session_id=session_id,
@@ -114,10 +132,18 @@ async def create_task(
    )

    # Store metadata in Redis
+    redis_start = time.perf_counter()
    redis = await get_redis_async()
+    redis_time = (time.perf_counter() - redis_start) * 1000
+    logger.info(
+        f"[TIMING] get_redis_async took {redis_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": redis_time}},
+    )
+
    meta_key = _get_task_meta_key(task_id)
    op_key = _get_operation_mapping_key(operation_id)

+    hset_start = time.perf_counter()
    await redis.hset(  # type: ignore[misc]
        meta_key,
        mapping={
@@ -131,12 +157,22 @@ async def create_task(
            "created_at": task.created_at.isoformat(),
        },
    )
+    hset_time = (time.perf_counter() - hset_start) * 1000
+    logger.info(
+        f"[TIMING] redis.hset took {hset_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": hset_time}},
+    )
+
    await redis.expire(meta_key, config.stream_ttl)

    # Create operation_id -> task_id mapping for webhook lookups
    await redis.set(op_key, task_id, ex=config.stream_ttl)

-    logger.debug(f"Created task {task_id} for session {session_id}")
+    total_time = (time.perf_counter() - start_time) * 1000
+    logger.info(
+        f"[TIMING] create_task COMPLETED in {total_time:.1f}ms; task={task_id}, session={session_id}",
+        extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
+    )

    return task

@@ -156,26 +192,60 @@ async def publish_chunk(
    Returns:
        The Redis Stream message ID
    """
+    import time
+
+    start_time = time.perf_counter()
+    chunk_type = type(chunk).__name__
    chunk_json = chunk.model_dump_json()
    message_id = "0-0"

+    # Build log metadata
+    log_meta = {
+        "component": "StreamRegistry",
+        "task_id": task_id,
+        "chunk_type": chunk_type,
+    }
+
    try:
        redis = await get_redis_async()
        stream_key = _get_task_stream_key(task_id)

        # Write to Redis Stream for persistence and real-time delivery
+        xadd_start = time.perf_counter()
        raw_id = await redis.xadd(
            stream_key,
            {"data": chunk_json},
            maxlen=config.stream_max_length,
        )
+        xadd_time = (time.perf_counter() - xadd_start) * 1000
        message_id = raw_id if isinstance(raw_id, str) else raw_id.decode()

        # Set TTL on stream to match task metadata TTL
        await redis.expire(stream_key, config.stream_ttl)
+
+        total_time = (time.perf_counter() - start_time) * 1000
+        # Only log timing for significant chunks or slow operations
+        if (
+            chunk_type
+            in ("StreamStart", "StreamFinish", "StreamTextStart", "StreamTextEnd")
+            or total_time > 50
+        ):
+            logger.info(
+                f"[TIMING] publish_chunk {chunk_type} in {total_time:.1f}ms (xadd={xadd_time:.1f}ms)",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time,
+                        "xadd_time_ms": xadd_time,
+                        "message_id": message_id,
+                    }
+                },
+            )
    except Exception as e:
+        elapsed = (time.perf_counter() - start_time) * 1000
        logger.error(
-            f"Failed to publish chunk for task {task_id}: {e}",
+            f"[TIMING] Failed to publish chunk {chunk_type} after {elapsed:.1f}ms: {e}",
+            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
            exc_info=True,
        )

@@ -200,24 +270,61 @@ async def subscribe_to_task(
        An asyncio Queue that will receive stream chunks, or None if task not found
        or user doesn't have access
    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Build log metadata
+    log_meta = {"component": "StreamRegistry", "task_id": task_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] subscribe_to_task STARTED, task={task_id}, user={user_id}, last_msg={last_message_id}",
+        extra={"json_fields": {**log_meta, "last_message_id": last_message_id}},
+    )
+
+    redis_start = time.perf_counter()
    redis = await get_redis_async()
    meta_key = _get_task_meta_key(task_id)
    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
+    hgetall_time = (time.perf_counter() - redis_start) * 1000
+    logger.info(
+        f"[TIMING] Redis hgetall took {hgetall_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": hgetall_time}},
+    )

    if not meta:
-        logger.debug(f"Task {task_id} not found in Redis")
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] Task not found in Redis after {elapsed:.1f}ms",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "elapsed_ms": elapsed,
+                    "reason": "task_not_found",
+                }
+            },
+        )
        return None

    # Note: Redis client uses decode_responses=True, so keys are strings
    task_status = meta.get("status", "")
    task_user_id = meta.get("user_id", "") or None
+    log_meta["session_id"] = meta.get("session_id", "")

    # Validate ownership - if task has an owner, requester must match
    if task_user_id:
        if user_id != task_user_id:
            logger.warning(
-                f"User {user_id} denied access to task {task_id} "
-                f"owned by {task_user_id}"
+                f"[TIMING] Access denied: user {user_id} tried to access task owned by {task_user_id}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "task_owner": task_user_id,
+                        "reason": "access_denied",
+                    }
+                },
            )
            return None

@@ -225,7 +332,19 @@ async def subscribe_to_task(
    stream_key = _get_task_stream_key(task_id)

    # Step 1: Replay messages from Redis Stream
+    xread_start = time.perf_counter()
    messages = await redis.xread({stream_key: last_message_id}, block=0, count=1000)
+    xread_time = (time.perf_counter() - xread_start) * 1000
+    logger.info(
+        f"[TIMING] Redis xread (replay) took {xread_time:.1f}ms, status={task_status}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": xread_time,
+                "task_status": task_status,
+            }
+        },
+    )

    replayed_count = 0
    replay_last_id = last_message_id
@@ -244,19 +363,48 @@ async def subscribe_to_task(
                    except Exception as e:
                        logger.warning(f"Failed to replay message: {e}")

-    logger.debug(f"Task {task_id}: replayed {replayed_count} messages")
+    logger.info(
+        f"[TIMING] Replayed {replayed_count} messages, last_id={replay_last_id}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "n_messages_replayed": replayed_count,
+                "replay_last_id": replay_last_id,
+            }
+        },
+    )

    # Step 2: If task is still running, start stream listener for live updates
    if task_status == "running":
+        logger.info(
+            "[TIMING] Task still running, starting _stream_listener",
+            extra={"json_fields": {**log_meta, "task_status": task_status}},
+        )
        listener_task = asyncio.create_task(
-            _stream_listener(task_id, subscriber_queue, replay_last_id)
+            _stream_listener(task_id, subscriber_queue, replay_last_id, log_meta)
        )
        # Track listener task for cleanup on unsubscribe
        _listener_tasks[id(subscriber_queue)] = (task_id, listener_task)
    else:
        # Task is completed/failed - add finish marker
+        logger.info(
+            f"[TIMING] Task already {task_status}, adding StreamFinish",
+            extra={"json_fields": {**log_meta, "task_status": task_status}},
+        )
        await subscriber_queue.put(StreamFinish())

+    total_time = (time.perf_counter() - start_time) * 1000
+    logger.info(
+        f"[TIMING] subscribe_to_task COMPLETED in {total_time:.1f}ms; task={task_id}, "
+        f"n_messages_replayed={replayed_count}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "total_time_ms": total_time,
+                "n_messages_replayed": replayed_count,
+            }
+        },
+    )
    return subscriber_queue


@@ -264,6 +412,7 @@ async def _stream_listener(
    task_id: str,
    subscriber_queue: asyncio.Queue[StreamBaseResponse],
    last_replayed_id: str,
+    log_meta: dict | None = None,
 ) -> None:
    """Listen to Redis Stream for new messages using blocking XREAD.

@@ -274,10 +423,27 @@ async def _stream_listener(
        task_id: Task ID to listen for
        subscriber_queue: Queue to deliver messages to
        last_replayed_id: Last message ID from replay (continue from here)
+        log_meta: Structured logging metadata
    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Use provided log_meta or build minimal one
+    if log_meta is None:
+        log_meta = {"component": "StreamRegistry", "task_id": task_id}
+
+    logger.info(
+        f"[TIMING] _stream_listener STARTED, task={task_id}, last_id={last_replayed_id}",
+        extra={"json_fields": {**log_meta, "last_replayed_id": last_replayed_id}},
+    )
+
    queue_id = id(subscriber_queue)
    # Track the last successfully delivered message ID for recovery hints
    last_delivered_id = last_replayed_id
+    messages_delivered = 0
+    first_message_time = None
+    xread_count = 0

    try:
        redis = await get_redis_async()
@@ -287,9 +453,39 @@ async def _stream_listener(
        while True:
            # Block for up to 30 seconds waiting for new messages
            # This allows periodic checking if task is still running
+            xread_start = time.perf_counter()
+            xread_count += 1
            messages = await redis.xread(
                {stream_key: current_id}, block=30000, count=100
            )
+            xread_time = (time.perf_counter() - xread_start) * 1000
+
+            if messages:
+                msg_count = sum(len(msgs) for _, msgs in messages)
+                logger.info(
+                    f"[TIMING] xread #{xread_count} returned {msg_count} messages in {xread_time:.1f}ms",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "xread_count": xread_count,
+                            "n_messages": msg_count,
+                            "duration_ms": xread_time,
+                        }
+                    },
+                )
+            elif xread_time > 1000:
+                # Only log timeouts (30s blocking)
+                logger.info(
+                    f"[TIMING] xread #{xread_count} timeout after {xread_time:.1f}ms",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "xread_count": xread_count,
+                            "duration_ms": xread_time,
+                            "reason": "timeout",
+                        }
+                    },
+                )

            if not messages:
                # Timeout - check if task is still running
@@ -326,10 +522,30 @@ async def _stream_listener(
                                )
                                # Update last delivered ID on successful delivery
                                last_delivered_id = current_id
+                                messages_delivered += 1
+                                if first_message_time is None:
+                                    first_message_time = time.perf_counter()
+                                    elapsed = (first_message_time - start_time) * 1000
+                                    logger.info(
+                                        f"[TIMING] FIRST live message at {elapsed:.1f}ms, type={type(chunk).__name__}",
+                                        extra={
+                                            "json_fields": {
+                                                **log_meta,
+                                                "elapsed_ms": elapsed,
+                                                "chunk_type": type(chunk).__name__,
+                                            }
+                                        },
+                                    )
                            except asyncio.TimeoutError:
                                logger.warning(
-                                    f"Subscriber queue full for task {task_id}, "
-                                    f"message delivery timed out after {QUEUE_PUT_TIMEOUT}s"
+                                    f"[TIMING] Subscriber queue full, delivery timed out after {QUEUE_PUT_TIMEOUT}s",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "timeout_s": QUEUE_PUT_TIMEOUT,
+                                            "reason": "queue_full",
+                                        }
+                                    },
                                )
                                # Send overflow error with recovery info
                                try:
@@ -351,15 +567,44 @@ async def _stream_listener(

                            # Stop listening on finish
                            if isinstance(chunk, StreamFinish):
+                                total_time = (time.perf_counter() - start_time) * 1000
+                                logger.info(
+                                    f"[TIMING] StreamFinish received in {total_time/1000:.1f}s; delivered={messages_delivered}",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "total_time_ms": total_time,
+                                            "messages_delivered": messages_delivered,
+                                        }
+                                    },
+                                )
                                return
                    except Exception as e:
-                        logger.warning(f"Error processing stream message: {e}")
+                        logger.warning(
+                            f"Error processing stream message: {e}",
+                            extra={"json_fields": {**log_meta, "error": str(e)}},
+                        )

    except asyncio.CancelledError:
-        logger.debug(f"Stream listener cancelled for task {task_id}")
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] _stream_listener CANCELLED after {elapsed:.1f}ms, delivered={messages_delivered}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "elapsed_ms": elapsed,
+                    "messages_delivered": messages_delivered,
+                    "reason": "cancelled",
+                }
+            },
+        )
        raise  # Re-raise to propagate cancellation
    except Exception as e:
-        logger.error(f"Stream listener error for task {task_id}: {e}")
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.error(
+            f"[TIMING] _stream_listener ERROR after {elapsed:.1f}ms: {e}",
+            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
+        )
        # On error, send finish to unblock subscriber
        try:
            await asyncio.wait_for(
@@ -368,10 +613,24 @@ async def _stream_listener(
            )
        except (asyncio.TimeoutError, asyncio.QueueFull):
            logger.warning(
-                f"Could not deliver finish event for task {task_id} after error"
+                "Could not deliver finish event after error",
+                extra={"json_fields": log_meta},
            )
    finally:
        # Clean up listener task mapping on exit
+        total_time = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] _stream_listener FINISHED in {total_time/1000:.1f}s; task={task_id}, "
+            f"delivered={messages_delivered}, xread_count={xread_count}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "total_time_ms": total_time,
+                    "messages_delivered": messages_delivered,
+                    "xread_count": xread_count,
+                }
+            },
+        )
        _listener_tasks.pop(queue_id, None)


@@ -598,8 +857,10 @@ def _reconstruct_chunk(chunk_data: dict) -> StreamBaseResponse | None:
        ResponseType,
        StreamError,
        StreamFinish,
+        StreamFinishStep,
        StreamHeartbeat,
        StreamStart,
+        StreamStartStep,
        StreamTextDelta,
        StreamTextEnd,
        StreamTextStart,
@@ -613,6 +874,8 @@ def _reconstruct_chunk(chunk_data: dict) -> StreamBaseResponse | None:
    type_to_class: dict[str, type[StreamBaseResponse]] = {
        ResponseType.START.value: StreamStart,
        ResponseType.FINISH.value: StreamFinish,
+        ResponseType.START_STEP.value: StreamStartStep,
+        ResponseType.FINISH_STEP.value: StreamFinishStep,
        ResponseType.TEXT_START.value: StreamTextStart,
        ResponseType.TEXT_DELTA.value: StreamTextDelta,
        ResponseType.TEXT_END.value: StreamTextEnd,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/IDEAS.md
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/IDEAS.md
--- a/autogpt_platform/backend/backend/api/features/chat/tools/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/init.py
@@ -3,8 +3,8 @@ from typing import TYPE_CHECKING, Any

 from openai.types.chat import ChatCompletionToolParam

-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tracking import track_tool_called
+from backend.copilot.model import ChatSession
+from backend.copilot.tracking import track_tool_called

 from .add_understanding import AddUnderstandingTool
 from .agent_output import AgentOutputTool
@@ -27,7 +27,7 @@ from .workspace_files import (
 )

 if TYPE_CHECKING:
-    from backend.api.features.chat.response_model import StreamToolOutputAvailable
+    from backend.copilot.response_model import StreamToolOutputAvailable

 logger = logging.getLogger(__name__)

--- a/autogpt_platform/backend/backend/api/features/chat/tools/_test_data.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/_test_data.py
@@ -6,11 +6,11 @@ import pytest
 from prisma.types import ProfileCreateInput
 from pydantic import SecretStr

-from backend.api.features.chat.model import ChatSession
 from backend.api.features.store import db as store_db
 from backend.blocks.firecrawl.scrape import FirecrawlScrapeBlock
 from backend.blocks.io import AgentInputBlock, AgentOutputBlock
 from backend.blocks.llm import AITextGeneratorBlock
+from backend.copilot.model import ChatSession
 from backend.data.db import prisma
 from backend.data.graph import Graph, Link, Node, create_graph
 from backend.data.model import APIKeyCredentials
--- a/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Any

-from backend.api.features.chat.model import ChatSession
+from backend.copilot.model import ChatSession
 from backend.data.understanding import (
    BusinessUnderstandingInput,
    upsert_business_understanding,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
@@ -7,9 +7,9 @@ from typing import Any

 from pydantic import BaseModel, field_validator

-from backend.api.features.chat.model import ChatSession
 from backend.api.features.library import db as library_db
 from backend.api.features.library.model import LibraryAgent
+from backend.copilot.model import ChatSession
 from backend.data import execution as execution_db
 from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta

--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/base.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/base.py
@@ -5,8 +5,8 @@ from typing import Any

 from openai.types.chat import ChatCompletionToolParam

-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.response_model import StreamToolOutputAvailable
+from backend.copilot.model import ChatSession
+from backend.copilot.response_model import StreamToolOutputAvailable

 from .models import ErrorResponse, NeedLoginResponse, ToolResponseBase

--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Any

-from backend.api.features.chat.model import ChatSession
+from backend.copilot.model import ChatSession

 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/customize_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/customize_agent.py
@@ -3,9 +3,9 @@
 import logging
 from typing import Any

-from backend.api.features.chat.model import ChatSession
 from backend.api.features.store import db as store_db
 from backend.api.features.store.exceptions import AgentNotFoundError
+from backend.copilot.model import ChatSession

 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Any

-from backend.api.features.chat.model import ChatSession
+from backend.copilot.model import ChatSession

 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
@@ -2,7 +2,7 @@

 from typing import Any

-from backend.api.features.chat.model import ChatSession
+from backend.copilot.model import ChatSession

 from .agent_search import search_agents
 from .base import BaseTool
--- a/autogpt_platform/backend/backend/copilot/tools/find_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_block.py
@@ -0,0 +1,245 @@
+import logging
+from typing import Any
+
+from prisma.enums import ContentType
+
+from backend.api.features.store.hybrid_search import unified_hybrid_search
+from backend.copilot.model import ChatSession
+from backend.copilot.tools.base import BaseTool, ToolResponseBase
+from backend.copilot.tools.models import (
+    BlockInfoSummary,
+    BlockInputFieldInfo,
+    BlockListResponse,
+    ErrorResponse,
+    NoResultsResponse,
+)
+from backend.data.block import BlockType, get_block
+
+logger = logging.getLogger(__name__)
+
+_TARGET_RESULTS = 10
+# Over-fetch to compensate for post-hoc filtering of graph-only blocks.
+# 40 is 2x current removed; speed of query 10 vs 40 is minimial
+_OVERFETCH_PAGE_SIZE = 40
+
+# Block types that only work within graphs and cannot run standalone in CoPilot.
+COPILOT_EXCLUDED_BLOCK_TYPES = {
+    BlockType.INPUT,  # Graph interface definition - data enters via chat, not graph inputs
+    BlockType.OUTPUT,  # Graph interface definition - data exits via chat, not graph outputs
+    BlockType.WEBHOOK,  # Wait for external events - would hang forever in CoPilot
+    BlockType.WEBHOOK_MANUAL,  # Same as WEBHOOK
+    BlockType.NOTE,  # Visual annotation only - no runtime behavior
+    BlockType.HUMAN_IN_THE_LOOP,  # Pauses for human approval - CoPilot IS human-in-the-loop
+    BlockType.AGENT,  # AgentExecutorBlock requires execution_context - use run_agent tool
+}
+
+# Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
+COPILOT_EXCLUDED_BLOCK_IDS = {
+    # SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
+    "3b191d9f-356f-482d-8238-ba04b6d18381",
+}
+
+
+class FindBlockTool(BaseTool):
+    """Tool for searching available blocks."""
+
+    @property
+    def name(self) -> str:
+        return "find_block"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Search for available blocks by name or description. "
+            "Blocks are reusable components that perform specific tasks like "
+            "sending emails, making API calls, processing text, etc. "
+            "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. "
+            "The response includes each block's id, required_inputs, and input_schema."
+        )
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": (
+                        "Search query to find blocks by name or description. "
+                        "Use keywords like 'email', 'http', 'text', 'ai', etc."
+                    ),
+                },
+            },
+            "required": ["query"],
+        }
+
+    @property
+    def requires_auth(self) -> bool:
+        return True
+
+    async def _execute(
+        self,
+        user_id: str | None,
+        session: ChatSession,
+        **kwargs,
+    ) -> ToolResponseBase:
+        """Search for blocks matching the query.
+
+        Args:
+            user_id: User ID (required)
+            session: Chat session
+            query: Search query
+
+        Returns:
+            BlockListResponse: List of matching blocks
+            NoResultsResponse: No blocks found
+            ErrorResponse: Error message
+        """
+        query = kwargs.get("query", "").strip()
+        session_id = session.session_id
+
+        if not query:
+            return ErrorResponse(
+                message="Please provide a search query",
+                session_id=session_id,
+            )
+
+        try:
+            # Search for blocks using hybrid search
+            results, total = await unified_hybrid_search(
+                query=query,
+                content_types=[ContentType.BLOCK],
+                page=1,
+                page_size=_OVERFETCH_PAGE_SIZE,
+            )
+
+            if not results:
+                return NoResultsResponse(
+                    message=f"No blocks found for '{query}'",
+                    suggestions=[
+                        "Try broader keywords like 'email', 'http', 'text', 'ai'",
+                        "Check spelling of technical terms",
+                    ],
+                    session_id=session_id,
+                )
+
+            # Enrich results with full block information
+            blocks: list[BlockInfoSummary] = []
+            for result in results:
+                block_id = result["content_id"]
+                block = get_block(block_id)
+
+                # Skip disabled blocks
+                if not block or block.disabled:
+                    continue
+
+                # Skip blocks excluded from CoPilot (graph-only blocks)
+                if (
+                    block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+                    or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+                ):
+                    continue
+
+                # Get input/output schemas
+                input_schema = {}
+                output_schema = {}
+                try:
+                    input_schema = block.input_schema.jsonschema()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to generate input schema for block %s: %s",
+                        block_id,
+                        e,
+                    )
+                try:
+                    output_schema = block.output_schema.jsonschema()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to generate output schema for block %s: %s",
+                        block_id,
+                        e,
+                    )
+
+                # Get categories from block instance
+                categories = []
+                if hasattr(block, "categories") and block.categories:
+                    categories = [cat.value for cat in block.categories]
+
+                # Extract required inputs for easier use
+                required_inputs: list[BlockInputFieldInfo] = []
+                if input_schema:
+                    properties = input_schema.get("properties", {})
+                    required_fields = set(input_schema.get("required", []))
+                    # Get credential field names to exclude from required inputs
+                    credentials_fields = set(
+                        block.input_schema.get_credentials_fields().keys()
+                    )
+
+                    for field_name, field_schema in properties.items():
+                        # Skip credential fields - they're handled separately
+                        if field_name in credentials_fields:
+                            continue
+
+                        required_inputs.append(
+                            BlockInputFieldInfo(
+                                name=field_name,
+                                type=field_schema.get("type", "string"),
+                                description=field_schema.get("description", ""),
+                                required=field_name in required_fields,
+                                default=field_schema.get("default"),
+                            )
+                        )
+
+                blocks.append(
+                    BlockInfoSummary(
+                        id=block_id,
+                        name=block.name,
+                        description=block.description or "",
+                        categories=categories,
+                        input_schema=input_schema,
+                        output_schema=output_schema,
+                        required_inputs=required_inputs,
+                    )
+                )
+
+                if len(blocks) >= _TARGET_RESULTS:
+                    break
+
+            if blocks and len(blocks) < _TARGET_RESULTS:
+                logger.debug(
+                    "find_block returned %d/%d results for query '%s' "
+                    "(filtered %d excluded/disabled blocks)",
+                    len(blocks),
+                    _TARGET_RESULTS,
+                    query,
+                    len(results) - len(blocks),
+                )
+
+            if not blocks:
+                return NoResultsResponse(
+                    message=f"No blocks found for '{query}'",
+                    suggestions=[
+                        "Try broader keywords like 'email', 'http', 'text', 'ai'",
+                    ],
+                    session_id=session_id,
+                )
+
+            return BlockListResponse(
+                message=(
+                    f"Found {len(blocks)} block(s) matching '{query}'. "
+                    "To execute a block, use run_block with the block's 'id' field "
+                    "and provide 'input_data' matching the block's input_schema."
+                ),
+                blocks=blocks,
+                count=len(blocks),
+                query=query,
+                session_id=session_id,
+            )
+
+        except Exception as e:
+            logger.error(f"Error searching blocks: {e}", exc_info=True)
+            return ErrorResponse(
+                message="Failed to search blocks",
+                error=str(e),
+                session_id=session_id,
+            )
--- a/autogpt_platform/backend/backend/copilot/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_block_test.py
@@ -0,0 +1,139 @@
+"""Tests for block filtering in FindBlockTool."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from backend.copilot.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+    FindBlockTool,
+)
+from backend.copilot.tools.models import BlockListResponse
+from backend.data.block import BlockType
+
+from ._test_data import make_session
+
+_TEST_USER_ID = "test-user-find-block"
+
+
+def make_mock_block(
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+):
+    """Create a mock block for testing."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.description = f"{name} description"
+    mock.block_type = block_type
+    mock.disabled = disabled
+    mock.input_schema = MagicMock()
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields.return_value = {}
+    mock.output_schema = MagicMock()
+    mock.output_schema.jsonschema.return_value = {}
+    mock.categories = []
+    return mock
+
+
+class TestFindBlockFiltering:
+    """Tests for block filtering in FindBlockTool."""
+
+    def test_excluded_block_types_contains_expected_types(self):
+        """Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
+        assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES
+
+    def test_excluded_block_ids_contains_smart_decision_maker(self):
+        """Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
+        assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_type_filtered_from_results(self):
+        """Verify blocks with excluded BlockTypes are filtered from search results."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        # Mock search returns an INPUT block (excluded) and a STANDARD block (included)
+        search_results = [
+            {"content_id": "input-block-id", "score": 0.9},
+            {"content_id": "standard-block-id", "score": 0.8},
+        ]
+
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
+        standard_block = make_mock_block(
+            "standard-block-id", "HTTP Request", BlockType.STANDARD
+        )
+
+        def mock_get_block(block_id):
+            return {
+                "input-block-id": input_block,
+                "standard-block-id": standard_block,
+            }.get(block_id)
+
+        with patch(
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
+            new_callable=AsyncMock,
+            return_value=(search_results, 2),
+        ):
+            with patch(
+                "backend.api.features.chat.tools.find_block.get_block",
+                side_effect=mock_get_block,
+            ):
+                tool = FindBlockTool()
+                response = await tool._execute(
+                    user_id=_TEST_USER_ID, session=session, query="test"
+                )
+
+        # Should only return the standard block, not the INPUT block
+        assert isinstance(response, BlockListResponse)
+        assert len(response.blocks) == 1
+        assert response.blocks[0].id == "standard-block-id"
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_id_filtered_from_results(self):
+        """Verify SmartDecisionMakerBlock is filtered from search results."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
+        search_results = [
+            {"content_id": smart_decision_id, "score": 0.9},
+            {"content_id": "normal-block-id", "score": 0.8},
+        ]
+
+        # SmartDecisionMakerBlock has STANDARD type but is excluded by ID
+        smart_block = make_mock_block(
+            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
+        )
+        normal_block = make_mock_block(
+            "normal-block-id", "Normal Block", BlockType.STANDARD
+        )
+
+        def mock_get_block(block_id):
+            return {
+                smart_decision_id: smart_block,
+                "normal-block-id": normal_block,
+            }.get(block_id)
+
+        with patch(
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
+            new_callable=AsyncMock,
+            return_value=(search_results, 2),
+        ):
+            with patch(
+                "backend.api.features.chat.tools.find_block.get_block",
+                side_effect=mock_get_block,
+            ):
+                tool = FindBlockTool()
+                response = await tool._execute(
+                    user_id=_TEST_USER_ID, session=session, query="decision"
+                )
+
+        # Should only return normal block, not SmartDecisionMakerBlock
+        assert isinstance(response, BlockListResponse)
+        assert len(response.blocks) == 1
+        assert response.blocks[0].id == "normal-block-id"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
@@ -2,7 +2,7 @@

 from typing import Any

-from backend.api.features.chat.model import ChatSession
+from backend.copilot.model import ChatSession

 from .agent_search import search_agents
 from .base import BaseTool
--- a/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
@@ -4,9 +4,9 @@ import logging
 from pathlib import Path
 from typing import Any

-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tools.base import BaseTool
-from backend.api.features.chat.tools.models import (
+from backend.copilot.model import ChatSession
+from backend.copilot.tools.base import BaseTool
+from backend.copilot.tools.models import (
    DocPageResponse,
    ErrorResponse,
    ToolResponseBase,
--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -0,0 +1,29 @@
+"""Shared helpers for chat tools."""
+
+from typing import Any
+
+
+def get_inputs_from_schema(
+    input_schema: dict[str, Any],
+    exclude_fields: set[str] | None = None,
+) -> list[dict[str, Any]]:
+    """Extract input field info from JSON schema."""
+    if not isinstance(input_schema, dict):
+        return []
+
+    exclude = exclude_fields or set()
+    properties = input_schema.get("properties", {})
+    required = set(input_schema.get("required", []))
+
+    return [
+        {
+            "name": name,
+            "title": schema.get("title", name),
+            "type": schema.get("type", "string"),
+            "description": schema.get("description", ""),
+            "required": name in required,
+            "default": schema.get("default"),
+        }
+        for name, schema in properties.items()
+        if name not in exclude
+    ]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -5,13 +5,10 @@ from typing import Any

 from pydantic import BaseModel, Field, field_validator

-from backend.api.features.chat.config import ChatConfig
-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tracking import (
-    track_agent_run_success,
-    track_agent_scheduled,
-)
 from backend.api.features.library import db as library_db
+from backend.copilot.config import ChatConfig
+from backend.copilot.model import ChatSession
+from backend.copilot.tracking import track_agent_run_success, track_agent_scheduled
 from backend.data.graph import GraphModel
 from backend.data.model import CredentialsMetaInput
 from backend.data.user import get_user_by_id
@@ -24,6 +21,7 @@ from backend.util.timezone_utils import (
 )

 from .base import BaseTool
+from .helpers import get_inputs_from_schema
 from .models import (
    AgentDetails,
    AgentDetailsResponse,
@@ -261,7 +259,7 @@ class RunAgentTool(BaseTool):
                        ),
                        requirements={
                            "credentials": requirements_creds_list,
-                            "inputs": self._get_inputs_list(graph.input_schema),
+                            "inputs": get_inputs_from_schema(graph.input_schema),
                            "execution_modes": self._get_execution_modes(graph),
                        },
                    ),
@@ -369,22 +367,6 @@ class RunAgentTool(BaseTool):
                session_id=session_id,
            )

-    def _get_inputs_list(self, input_schema: dict[str, Any]) -> list[dict[str, Any]]:
-        """Extract inputs list from schema."""
-        inputs_list = []
-        if isinstance(input_schema, dict) and "properties" in input_schema:
-            for field_name, field_schema in input_schema["properties"].items():
-                inputs_list.append(
-                    {
-                        "name": field_name,
-                        "title": field_schema.get("title", field_name),
-                        "type": field_schema.get("type", "string"),
-                        "description": field_schema.get("description", ""),
-                        "required": field_name in input_schema.get("required", []),
-                    }
-                )
-        return inputs_list
-
    def _get_execution_modes(self, graph: GraphModel) -> list[str]:
        """Get available execution modes for the graph."""
        trigger_info = graph.trigger_setup_info
@@ -398,7 +380,7 @@ class RunAgentTool(BaseTool):
        suffix: str,
    ) -> str:
        """Build a message describing available inputs for an agent."""
-        inputs_list = self._get_inputs_list(graph.input_schema)
+        inputs_list = get_inputs_from_schema(graph.input_schema)
        required_names = [i["name"] for i in inputs_list if i["required"]]
        optional_names = [i["name"] for i in inputs_list if not i["required"]]

--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -7,15 +7,20 @@ from typing import Any

 from pydantic_core import PydanticUndefined

-from backend.api.features.chat.model import ChatSession
-from backend.data.block import get_block
+from backend.copilot.model import ChatSession
+from backend.copilot.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+)
+from backend.data.block import AnyBlockSchema, get_block
 from backend.data.execution import ExecutionContext
-from backend.data.model import CredentialsMetaInput
+from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
 from backend.data.workspace import get_or_create_workspace
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util.exceptions import BlockError

 from .base import BaseTool
+from .helpers import get_inputs_from_schema
 from .models import (
    BlockOutputResponse,
    ErrorResponse,
@@ -24,7 +29,10 @@ from .models import (
    ToolResponseBase,
    UserReadiness,
 )
-from .utils import build_missing_credentials_from_field_info
+from .utils import (
+    build_missing_credentials_from_field_info,
+    match_credentials_to_requirements,
+)

 logger = logging.getLogger(__name__)

@@ -73,91 +81,6 @@ class RunBlockTool(BaseTool):
    def requires_auth(self) -> bool:
        return True

-    async def _check_block_credentials(
-        self,
-        user_id: str,
-        block: Any,
-        input_data: dict[str, Any] | None = None,
-    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
-        """
-        Check if user has required credentials for a block.
-
-        Args:
-            user_id: User ID
-            block: Block to check credentials for
-            input_data: Input data for the block (used to determine provider via discriminator)
-
-        Returns:
-            tuple[matched_credentials, missing_credentials]
-        """
-        matched_credentials: dict[str, CredentialsMetaInput] = {}
-        missing_credentials: list[CredentialsMetaInput] = []
-        input_data = input_data or {}
-
-        # Get credential field info from block's input schema
-        credentials_fields_info = block.input_schema.get_credentials_fields_info()
-
-        if not credentials_fields_info:
-            return matched_credentials, missing_credentials
-
-        # Get user's available credentials
-        creds_manager = IntegrationCredentialsManager()
-        available_creds = await creds_manager.store.get_all_creds(user_id)
-
-        for field_name, field_info in credentials_fields_info.items():
-            effective_field_info = field_info
-            if field_info.discriminator and field_info.discriminator_mapping:
-                # Get discriminator from input, falling back to schema default
-                discriminator_value = input_data.get(field_info.discriminator)
-                if discriminator_value is None:
-                    field = block.input_schema.model_fields.get(
-                        field_info.discriminator
-                    )
-                    if field and field.default is not PydanticUndefined:
-                        discriminator_value = field.default
-
-                if (
-                    discriminator_value
-                    and discriminator_value in field_info.discriminator_mapping
-                ):
-                    effective_field_info = field_info.discriminate(discriminator_value)
-                    logger.debug(
-                        f"Discriminated provider for {field_name}: "
-                        f"{discriminator_value} -> {effective_field_info.provider}"
-                    )
-
-            matching_cred = next(
-                (
-                    cred
-                    for cred in available_creds
-                    if cred.provider in effective_field_info.provider
-                    and cred.type in effective_field_info.supported_types
-                ),
-                None,
-            )
-
-            if matching_cred:
-                matched_credentials[field_name] = CredentialsMetaInput(
-                    id=matching_cred.id,
-                    provider=matching_cred.provider,  # type: ignore
-                    type=matching_cred.type,
-                    title=matching_cred.title,
-                )
-            else:
-                # Create a placeholder for the missing credential
-                provider = next(iter(effective_field_info.provider), "unknown")
-                cred_type = next(iter(effective_field_info.supported_types), "api_key")
-                missing_credentials.append(
-                    CredentialsMetaInput(
-                        id=field_name,
-                        provider=provider,  # type: ignore
-                        type=cred_type,  # type: ignore
-                        title=field_name.replace("_", " ").title(),
-                    )
-                )
-
-        return matched_credentials, missing_credentials
-
    async def _execute(
        self,
        user_id: str | None,
@@ -212,11 +135,24 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )

+        # Check if block is excluded from CoPilot (graph-only blocks)
+        if (
+            block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+            or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+        ):
+            return ErrorResponse(
+                message=(
+                    f"Block '{block.name}' cannot be run directly in CoPilot. "
+                    "This block is designed for use within graphs only."
+                ),
+                session_id=session_id,
+            )
+
        logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")

        creds_manager = IntegrationCredentialsManager()
-        matched_credentials, missing_credentials = await self._check_block_credentials(
-            user_id, block, input_data
+        matched_credentials, missing_credentials = (
+            await self._resolve_block_credentials(user_id, block, input_data)
        )

        if missing_credentials:
@@ -345,29 +281,75 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )

-    def _get_inputs_list(self, block: Any) -> list[dict[str, Any]]:
+    async def _resolve_block_credentials(
+        self,
+        user_id: str,
+        block: AnyBlockSchema,
+        input_data: dict[str, Any] | None = None,
+    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
+        """
+        Resolve credentials for a block by matching user's available credentials.
+
+        Args:
+            user_id: User ID
+            block: Block to resolve credentials for
+            input_data: Input data for the block (used to determine provider via discriminator)
+
+        Returns:
+            tuple of (matched_credentials, missing_credentials) - matched credentials
+            are used for block execution, missing ones indicate setup requirements.
+        """
+        input_data = input_data or {}
+        requirements = self._resolve_discriminated_credentials(block, input_data)
+
+        if not requirements:
+            return {}, []
+
+        return await match_credentials_to_requirements(user_id, requirements)
+
+    def _get_inputs_list(self, block: AnyBlockSchema) -> list[dict[str, Any]]:
        """Extract non-credential inputs from block schema."""
-        inputs_list = []
        schema = block.input_schema.jsonschema()
-        properties = schema.get("properties", {})
-        required_fields = set(schema.get("required", []))
-
-        # Get credential field names to exclude
        credentials_fields = set(block.input_schema.get_credentials_fields().keys())
+        return get_inputs_from_schema(schema, exclude_fields=credentials_fields)

-        for field_name, field_schema in properties.items():
-            # Skip credential fields
-            if field_name in credentials_fields:
-                continue
+    def _resolve_discriminated_credentials(
+        self,
+        block: AnyBlockSchema,
+        input_data: dict[str, Any],
+    ) -> dict[str, CredentialsFieldInfo]:
+        """Resolve credential requirements, applying discriminator logic where needed."""
+        credentials_fields_info = block.input_schema.get_credentials_fields_info()
+        if not credentials_fields_info:
+            return {}

-            inputs_list.append(
-                {
-                    "name": field_name,
-                    "title": field_schema.get("title", field_name),
-                    "type": field_schema.get("type", "string"),
-                    "description": field_schema.get("description", ""),
-                    "required": field_name in required_fields,
-                }
-            )
+        resolved: dict[str, CredentialsFieldInfo] = {}

-        return inputs_list
+        for field_name, field_info in credentials_fields_info.items():
+            effective_field_info = field_info
+
+            if field_info.discriminator and field_info.discriminator_mapping:
+                discriminator_value = input_data.get(field_info.discriminator)
+                if discriminator_value is None:
+                    field = block.input_schema.model_fields.get(
+                        field_info.discriminator
+                    )
+                    if field and field.default is not PydanticUndefined:
+                        discriminator_value = field.default
+
+                if (
+                    discriminator_value
+                    and discriminator_value in field_info.discriminator_mapping
+                ):
+                    effective_field_info = field_info.discriminate(discriminator_value)
+                    # For host-scoped credentials, add the discriminator value
+                    # (e.g., URL) so _credential_is_for_host can match it
+                    effective_field_info.discriminator_values.add(discriminator_value)
+                    logger.debug(
+                        f"Discriminated provider for {field_name}: "
+                        f"{discriminator_value} -> {effective_field_info.provider}"
+                    )
+
+            resolved[field_name] = effective_field_info
+
+        return resolved
--- a/autogpt_platform/backend/backend/copilot/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_block_test.py
@@ -0,0 +1,106 @@
+"""Tests for block execution guards in RunBlockTool."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from backend.copilot.tools.models import ErrorResponse
+from backend.copilot.tools.run_block import RunBlockTool
+from backend.data.block import BlockType
+
+from ._test_data import make_session
+
+_TEST_USER_ID = "test-user-run-block"
+
+
+def make_mock_block(
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+):
+    """Create a mock block for testing."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.block_type = block_type
+    mock.disabled = disabled
+    mock.input_schema = MagicMock()
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields_info.return_value = []
+    return mock
+
+
+class TestRunBlockFiltering:
+    """Tests for block execution guards in RunBlockTool."""
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_type_returns_error(self):
+        """Attempting to execute a block with excluded BlockType returns error."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=input_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id="input-block-id",
+                input_data={},
+            )
+
+        assert isinstance(response, ErrorResponse)
+        assert "cannot be run directly in CoPilot" in response.message
+        assert "designed for use within graphs only" in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_id_returns_error(self):
+        """Attempting to execute SmartDecisionMakerBlock returns error."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
+        smart_block = make_mock_block(
+            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
+        )
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=smart_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id=smart_decision_id,
+                input_data={},
+            )
+
+        assert isinstance(response, ErrorResponse)
+        assert "cannot be run directly in CoPilot" in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_non_excluded_block_passes_guard(self):
+        """Non-excluded blocks pass the filtering guard (may fail later for other reasons)."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        standard_block = make_mock_block(
+            "standard-id", "HTTP Request", BlockType.STANDARD
+        )
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=standard_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id="standard-id",
+                input_data={},
+            )
+
+        # Should NOT be an ErrorResponse about CoPilot exclusion
+        # (may be other errors like missing credentials, but not the exclusion guard)
+        if isinstance(response, ErrorResponse):
+            assert "cannot be run directly in CoPilot" not in response.message
--- a/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
@@ -5,16 +5,16 @@ from typing import Any

 from prisma.enums import ContentType

-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tools.base import BaseTool
-from backend.api.features.chat.tools.models import (
+from backend.api.features.store.hybrid_search import unified_hybrid_search
+from backend.copilot.model import ChatSession
+from backend.copilot.tools.base import BaseTool
+from backend.copilot.tools.models import (
    DocSearchResult,
    DocSearchResultsResponse,
    ErrorResponse,
    NoResultsResponse,
    ToolResponseBase,
 )
-from backend.api.features.store.hybrid_search import unified_hybrid_search

 logger = logging.getLogger(__name__)

--- a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
@@ -8,6 +8,7 @@ from backend.api.features.library import model as library_model
 from backend.api.features.store import db as store_db
 from backend.data.graph import GraphModel
 from backend.data.model import (
+    Credentials,
    CredentialsFieldInfo,
    CredentialsMetaInput,
    HostScopedCredentials,
@@ -223,6 +224,99 @@ async def get_or_create_library_agent(
    return library_agents[0]


+async def match_credentials_to_requirements(
+    user_id: str,
+    requirements: dict[str, CredentialsFieldInfo],
+) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
+    """
+    Match user's credentials against a dictionary of credential requirements.
+
+    This is the core matching logic shared by both graph and block credential matching.
+    """
+    matched: dict[str, CredentialsMetaInput] = {}
+    missing: list[CredentialsMetaInput] = []
+
+    if not requirements:
+        return matched, missing
+
+    available_creds = await get_user_credentials(user_id)
+
+    for field_name, field_info in requirements.items():
+        matching_cred = find_matching_credential(available_creds, field_info)
+
+        if matching_cred:
+            try:
+                matched[field_name] = create_credential_meta_from_match(matching_cred)
+            except Exception as e:
+                logger.error(
+                    f"Failed to create CredentialsMetaInput for field '{field_name}': "
+                    f"provider={matching_cred.provider}, type={matching_cred.type}, "
+                    f"credential_id={matching_cred.id}",
+                    exc_info=True,
+                )
+                provider = next(iter(field_info.provider), "unknown")
+                cred_type = next(iter(field_info.supported_types), "api_key")
+                missing.append(
+                    CredentialsMetaInput(
+                        id=field_name,
+                        provider=provider,  # type: ignore
+                        type=cred_type,  # type: ignore
+                        title=f"{field_name} (validation failed: {e})",
+                    )
+                )
+        else:
+            provider = next(iter(field_info.provider), "unknown")
+            cred_type = next(iter(field_info.supported_types), "api_key")
+            missing.append(
+                CredentialsMetaInput(
+                    id=field_name,
+                    provider=provider,  # type: ignore
+                    type=cred_type,  # type: ignore
+                    title=field_name.replace("_", " ").title(),
+                )
+            )
+
+    return matched, missing
+
+
+async def get_user_credentials(user_id: str) -> list[Credentials]:
+    """Get all available credentials for a user."""
+    creds_manager = IntegrationCredentialsManager()
+    return await creds_manager.store.get_all_creds(user_id)
+
+
+def find_matching_credential(
+    available_creds: list[Credentials],
+    field_info: CredentialsFieldInfo,
+) -> Credentials | None:
+    """Find a credential that matches the required provider, type, scopes, and host."""
+    for cred in available_creds:
+        if cred.provider not in field_info.provider:
+            continue
+        if cred.type not in field_info.supported_types:
+            continue
+        if cred.type == "oauth2" and not _credential_has_required_scopes(
+            cred, field_info
+        ):
+            continue
+        if cred.type == "host_scoped" and not _credential_is_for_host(cred, field_info):
+            continue
+        return cred
+    return None
+
+
+def create_credential_meta_from_match(
+    matching_cred: Credentials,
+) -> CredentialsMetaInput:
+    """Create a CredentialsMetaInput from a matched credential."""
+    return CredentialsMetaInput(
+        id=matching_cred.id,
+        provider=matching_cred.provider,  # type: ignore
+        type=matching_cred.type,
+        title=matching_cred.title,
+    )
+
+
 async def match_user_credentials_to_graph(
    user_id: str,
    graph: GraphModel,
@@ -331,8 +425,6 @@ def _credential_has_required_scopes(
    # If no scopes are required, any credential matches
    if not requirements.required_scopes:
        return True
-
-    # Check that credential scopes are a superset of required scopes
    return set(credential.scopes).issuperset(requirements.required_scopes)


--- a/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
@@ -6,7 +6,7 @@ from typing import Any, Optional

 from pydantic import BaseModel

-from backend.api.features.chat.model import ChatSession
+from backend.copilot.model import ChatSession
 from backend.data.workspace import get_or_create_workspace
 from backend.util.settings import Config
 from backend.util.virus_scanner import scan_content_safe
--- a/autogpt_platform/backend/backend/api/features/chat/tracking.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tracking.py
--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -1,9 +1,8 @@
 import logging
+import queue
 from collections import defaultdict
 from datetime import datetime, timedelta, timezone
 from enum import Enum
-from multiprocessing import Manager
-from queue import Empty
 from typing import (
    TYPE_CHECKING,
    Annotated,
@@ -1200,12 +1199,16 @@ class NodeExecutionEntry(BaseModel):

 class ExecutionQueue(Generic[T]):
    """
-    Queue for managing the execution of agents.
-    This will be shared between different processes
+    Thread-safe queue for managing node execution within a single graph execution.
+
+    Note: Uses queue.Queue (not multiprocessing.Queue) since all access is from
+    threads within the same process. If migrating back to ProcessPoolExecutor,
+    replace with multiprocessing.Manager().Queue() for cross-process safety.
    """

    def __init__(self):
-        self.queue = Manager().Queue()
+        # Thread-safe queue (not multiprocessing) — see class docstring
+        self.queue: queue.Queue[T] = queue.Queue()

    def add(self, execution: T) -> T:
        self.queue.put(execution)
@@ -1220,7 +1223,7 @@ class ExecutionQueue(Generic[T]):
    def get_or_none(self) -> T | None:
        try:
            return self.queue.get_nowait()
-        except Empty:
+        except queue.Empty:
            return None


--- a/autogpt_platform/backend/backend/data/execution_queue_test.py
+++ b/autogpt_platform/backend/backend/data/execution_queue_test.py
@@ -0,0 +1,58 @@
+"""Tests for ExecutionQueue thread-safety."""
+
+import queue
+import threading
+
+from backend.data.execution import ExecutionQueue
+
+
+def test_execution_queue_uses_stdlib_queue():
+    """Verify ExecutionQueue uses queue.Queue (not multiprocessing)."""
+    q = ExecutionQueue()
+    assert isinstance(q.queue, queue.Queue)
+
+
+def test_basic_operations():
+    """Test add, get, empty, and get_or_none."""
+    q = ExecutionQueue()
+
+    assert q.empty() is True
+    assert q.get_or_none() is None
+
+    result = q.add("item1")
+    assert result == "item1"
+    assert q.empty() is False
+
+    item = q.get()
+    assert item == "item1"
+    assert q.empty() is True
+
+
+def test_thread_safety():
+    """Test concurrent access from multiple threads."""
+    q = ExecutionQueue()
+    results = []
+    num_items = 100
+
+    def producer():
+        for i in range(num_items):
+            q.add(f"item_{i}")
+
+    def consumer():
+        count = 0
+        while count < num_items:
+            item = q.get_or_none()
+            if item is not None:
+                results.append(item)
+                count += 1
+
+    producer_thread = threading.Thread(target=producer)
+    consumer_thread = threading.Thread(target=consumer)
+
+    producer_thread.start()
+    consumer_thread.start()
+
+    producer_thread.join(timeout=5)
+    consumer_thread.join(timeout=5)
+
+    assert len(results) == num_items
--- a/autogpt_platform/backend/backend/data/rabbitmq.py
+++ b/autogpt_platform/backend/backend/data/rabbitmq.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -225,6 +226,10 @@ class SyncRabbitMQ(RabbitMQBase):
 class AsyncRabbitMQ(RabbitMQBase):
    """Asynchronous RabbitMQ client"""

+    def __init__(self, config: RabbitMQConfig):
+        super().__init__(config)
+        self._reconnect_lock: asyncio.Lock | None = None
+
    @property
    def is_connected(self) -> bool:
        return bool(self._connection and not self._connection.is_closed)
@@ -235,7 +240,17 @@ class AsyncRabbitMQ(RabbitMQBase):

    @conn_retry("AsyncRabbitMQ", "Acquiring async connection")
    async def connect(self):
-        if self.is_connected:
+        if self.is_connected and self._channel and not self._channel.is_closed:
+            return
+
+        if (
+            self.is_connected
+            and self._connection
+            and (self._channel is None or self._channel.is_closed)
+        ):
+            self._channel = await self._connection.channel()
+            await self._channel.set_qos(prefetch_count=1)
+            await self.declare_infrastructure()
            return

        self._connection = await aio_pika.connect_robust(
@@ -291,24 +306,46 @@ class AsyncRabbitMQ(RabbitMQBase):
                    exchange, routing_key=queue.routing_key or queue.name
                )

-    @func_retry
-    async def publish_message(
+    @property
+    def _lock(self) -> asyncio.Lock:
+        if self._reconnect_lock is None:
+            self._reconnect_lock = asyncio.Lock()
+        return self._reconnect_lock
+
+    async def _ensure_channel(self) -> aio_pika.abc.AbstractChannel:
+        """Get a valid channel, reconnecting if the current one is stale.
+
+        Uses a lock to prevent concurrent reconnection attempts from racing.
+        """
+        if self.is_ready:
+            return self._channel  # type: ignore  # is_ready guarantees non-None
+
+        async with self._lock:
+            # Double-check after acquiring lock
+            if self.is_ready:
+                return self._channel  # type: ignore
+
+            self._channel = None
+            await self.connect()
+
+            if self._channel is None:
+                raise RuntimeError("Channel should be established after connect")
+
+            return self._channel
+
+    async def _publish_once(
        self,
        routing_key: str,
        message: str,
        exchange: Optional[Exchange] = None,
        persistent: bool = True,
    ) -> None:
-        if not self.is_ready:
-            await self.connect()
-
-        if self._channel is None:
-            raise RuntimeError("Channel should be established after connect")
+        channel = await self._ensure_channel()

        if exchange:
-            exchange_obj = await self._channel.get_exchange(exchange.name)
+            exchange_obj = await channel.get_exchange(exchange.name)
        else:
-            exchange_obj = self._channel.default_exchange
+            exchange_obj = channel.default_exchange

        await exchange_obj.publish(
            aio_pika.Message(
@@ -322,9 +359,23 @@ class AsyncRabbitMQ(RabbitMQBase):
            routing_key=routing_key,
        )

+    @func_retry
+    async def publish_message(
+        self,
+        routing_key: str,
+        message: str,
+        exchange: Optional[Exchange] = None,
+        persistent: bool = True,
+    ) -> None:
+        try:
+            await self._publish_once(routing_key, message, exchange, persistent)
+        except aio_pika.exceptions.ChannelInvalidStateError:
+            logger.warning(
+                "RabbitMQ channel invalid, forcing reconnect and retrying publish"
+            )
+            async with self._lock:
+                self._channel = None
+            await self._publish_once(routing_key, message, exchange, persistent)
+
    async def get_channel(self) -> aio_pika.abc.AbstractChannel:
-        if not self.is_ready:
-            await self.connect()
-        if self._channel is None:
-            raise RuntimeError("Channel should be established after connect")
-        return self._channel
+        return await self._ensure_channel()
--- a/autogpt_platform/backend/backend/util/clients.py
+++ b/autogpt_platform/backend/backend/util/clients.py
@@ -11,7 +11,6 @@ settings = Settings()

 if TYPE_CHECKING:
    from openai import AsyncOpenAI
-    from supabase import AClient, Client

    from backend.data.execution import (
        AsyncRedisExecutionEventBus,
@@ -22,6 +21,7 @@ if TYPE_CHECKING:
    from backend.executor.scheduler import SchedulerClient
    from backend.integrations.credentials_store import IntegrationCredentialsStore
    from backend.notifications.notifications import NotificationManagerClient
+    from supabase import AClient, Client


@thread_cached
@@ -106,6 +106,20 @@ async def get_async_execution_queue() -> "AsyncRabbitMQ":
    return client


+# ============ CoPilot Queue Helpers ============ #
+
+
+@thread_cached
+async def get_async_copilot_queue() -> "AsyncRabbitMQ":
+    """Get a thread-cached AsyncRabbitMQ CoPilot queue client."""
+    from backend.copilot.executor.utils import create_copilot_queue_config
+    from backend.data.rabbitmq import AsyncRabbitMQ
+
+    client = AsyncRabbitMQ(create_copilot_queue_config())
+    await client.connect()
+    return client
+
+
 # ============ Integration Credentials Store ============ #


--- a/autogpt_platform/backend/backend/util/file.py
+++ b/autogpt_platform/backend/backend/util/file.py
@@ -342,6 +342,14 @@ async def store_media_file(
        if not target_path.is_file():
            raise ValueError(f"Local file does not exist: {target_path}")

+        # Virus scan the local file before any further processing
+        local_content = target_path.read_bytes()
+        if len(local_content) > MAX_FILE_SIZE_BYTES:
+            raise ValueError(
+                f"File too large: {len(local_content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
+            )
+        await scan_content_safe(local_content, filename=sanitized_file)
+
    # Return based on requested format
    if return_format == "for_local_processing":
        # Use when processing files locally with tools like ffmpeg, MoviePy, PIL
--- a/autogpt_platform/backend/backend/util/file_test.py
+++ b/autogpt_platform/backend/backend/util/file_test.py
@@ -247,3 +247,100 @@ class TestFileCloudIntegration:
                    execution_context=make_test_context(graph_exec_id=graph_exec_id),
                    return_format="for_local_processing",
                )
+
+    @pytest.mark.asyncio
+    async def test_store_media_file_local_path_scanned(self):
+        """Test that local file paths are scanned for viruses."""
+        graph_exec_id = "test-exec-123"
+        local_file = "test_video.mp4"
+        file_content = b"fake video content"
+
+        with patch(
+            "backend.util.file.get_cloud_storage_handler"
+        ) as mock_handler_getter, patch(
+            "backend.util.file.scan_content_safe"
+        ) as mock_scan, patch(
+            "backend.util.file.Path"
+        ) as mock_path_class:
+
+            # Mock cloud storage handler - not a cloud path
+            mock_handler = MagicMock()
+            mock_handler.is_cloud_path.return_value = False
+            mock_handler_getter.return_value = mock_handler
+
+            # Mock virus scanner
+            mock_scan.return_value = None
+
+            # Mock file system operations
+            mock_base_path = MagicMock()
+            mock_target_path = MagicMock()
+            mock_resolved_path = MagicMock()
+
+            mock_path_class.return_value = mock_base_path
+            mock_base_path.mkdir = MagicMock()
+            mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
+            mock_target_path.resolve.return_value = mock_resolved_path
+            mock_resolved_path.is_relative_to.return_value = True
+            mock_resolved_path.is_file.return_value = True
+            mock_resolved_path.read_bytes.return_value = file_content
+            mock_resolved_path.relative_to.return_value = Path(local_file)
+            mock_resolved_path.name = local_file
+
+            result = await store_media_file(
+                file=MediaFileType(local_file),
+                execution_context=make_test_context(graph_exec_id=graph_exec_id),
+                return_format="for_local_processing",
+            )
+
+            # Verify virus scan was called for local file
+            mock_scan.assert_called_once_with(file_content, filename=local_file)
+
+            # Result should be the relative path
+            assert str(result) == local_file
+
+    @pytest.mark.asyncio
+    async def test_store_media_file_local_path_virus_detected(self):
+        """Test that infected local files raise VirusDetectedError."""
+        from backend.api.features.store.exceptions import VirusDetectedError
+
+        graph_exec_id = "test-exec-123"
+        local_file = "infected.exe"
+        file_content = b"malicious content"
+
+        with patch(
+            "backend.util.file.get_cloud_storage_handler"
+        ) as mock_handler_getter, patch(
+            "backend.util.file.scan_content_safe"
+        ) as mock_scan, patch(
+            "backend.util.file.Path"
+        ) as mock_path_class:
+
+            # Mock cloud storage handler - not a cloud path
+            mock_handler = MagicMock()
+            mock_handler.is_cloud_path.return_value = False
+            mock_handler_getter.return_value = mock_handler
+
+            # Mock virus scanner to detect virus
+            mock_scan.side_effect = VirusDetectedError(
+                "EICAR-Test-File", "File rejected due to virus detection"
+            )
+
+            # Mock file system operations
+            mock_base_path = MagicMock()
+            mock_target_path = MagicMock()
+            mock_resolved_path = MagicMock()
+
+            mock_path_class.return_value = mock_base_path
+            mock_base_path.mkdir = MagicMock()
+            mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
+            mock_target_path.resolve.return_value = mock_resolved_path
+            mock_resolved_path.is_relative_to.return_value = True
+            mock_resolved_path.is_file.return_value = True
+            mock_resolved_path.read_bytes.return_value = file_content
+
+            with pytest.raises(VirusDetectedError):
+                await store_media_file(
+                    file=MediaFileType(local_file),
+                    execution_context=make_test_context(graph_exec_id=graph_exec_id),
+                    return_format="for_local_processing",
+                )
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -211,16 +211,23 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
        description="The port for execution manager daemon to run on",
    )

+    num_copilot_workers: int = Field(
+        default=10,
+        ge=1,
+        le=100,
+        description="Number of concurrent CoPilot executor workers",
+    )
+
+    copilot_executor_port: int = Field(
+        default=8008,
+        description="The port for CoPilot executor daemon to run on",
+    )
+
    execution_scheduler_port: int = Field(
        default=8003,
        description="The port for execution scheduler daemon to run on",
    )

-    agent_server_port: int = Field(
-        default=8004,
-        description="The port for agent server daemon to run on",
-    )
-
    database_api_port: int = Field(
        default=8005,
        description="The port for database server API to run on",
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -12,16 +12,16 @@ python = ">=3.10,<3.14"
 aio-pika = "^9.5.5"
 aiohttp = "^3.10.0"
 aiodns = "^3.5.0"
-anthropic = "^0.59.0"
+anthropic = "^0.79.0"
 apscheduler = "^3.11.1"
 autogpt-libs = { path = "../autogpt_libs", develop = true }
 bleach = { extras = ["css"], version = "^6.2.0" }
 click = "^8.2.0"
-cryptography = "^45.0"
+cryptography = "^46.0"
 discord-py = "^2.5.2"
 e2b-code-interpreter = "^1.5.2"
 elevenlabs = "^1.50.0"
-fastapi = "^0.116.1"
+fastapi = "^0.128.6"
 feedparser = "^6.0.11"
 flake8 = "^7.3.0"
 google-api-python-client = "^2.177.0"
@@ -34,11 +34,11 @@ html2text = "^2024.2.26"
 jinja2 = "^3.1.6"
 jsonref = "^1.1.0"
 jsonschema = "^4.25.0"
-langfuse = "^3.11.0"
-launchdarkly-server-sdk = "^9.12.0"
+langfuse = "^3.14.1"
+launchdarkly-server-sdk = "^9.14.1"
 mem0ai = "^0.1.115"
 moviepy = "^2.1.2"
-ollama = "^0.5.1"
+ollama = "^0.6.1"
 openai = "^1.97.1"
 orjson = "^3.10.0"
 pika = "^1.3.2"
@@ -48,16 +48,16 @@ postmarker = "^1.0"
 praw = "~7.8.1"
 prisma = "^0.15.0"
 rank-bm25 = "^0.2.2"
-prometheus-client = "^0.22.1"
+prometheus-client = "^0.24.1"
 prometheus-fastapi-instrumentator = "^7.0.0"
 psutil = "^7.0.0"
 psycopg2-binary = "^2.9.10"
-pydantic = { extras = ["email"], version = "^2.11.7" }
-pydantic-settings = "^2.10.1"
+pydantic = { extras = ["email"], version = "^2.12.5" }
+pydantic-settings = "^2.12.0"
 pytest = "^8.4.1"
 pytest-asyncio = "^1.1.0"
 python-dotenv = "^1.1.1"
-python-multipart = "^0.0.20"
+python-multipart = "^0.0.22"
 redis = "^6.2.0"
 regex = "^2025.9.18"
 replicate = "^1.0.6"
@@ -65,11 +65,11 @@ sentry-sdk = {extras = ["anthropic", "fastapi", "launchdarkly", "openai", "sqlal
 sqlalchemy = "^2.0.40"
 strenum = "^0.4.9"
 stripe = "^11.5.0"
-supabase = "2.17.0"
-tenacity = "^9.1.2"
+supabase = "2.27.3"
+tenacity = "^9.1.4"
 todoist-api-python = "^2.1.7"
 tweepy = "^4.16.0"
-uvicorn = { extras = ["standard"], version = "^0.35.0" }
+uvicorn = { extras = ["standard"], version = "^0.40.0" }
 websockets = "^15.0"
 youtube-transcript-api = "^1.2.1"
 yt-dlp = "2025.12.08"
@@ -77,7 +77,7 @@ zerobouncesdk = "^1.1.2"
 # NOTE: please insert new dependencies in their alphabetical location
 pytest-snapshot = "^0.9.0"
 aiofiles = "^24.1.0"
-tiktoken = "^0.9.0"
+tiktoken = "^0.12.0"
 aioclamd = "^1.0.0"
 setuptools = "^80.9.0"
 gcloud-aio-storage = "^9.5.0"
@@ -95,13 +95,13 @@ black = "^24.10.0"
 faker = "^38.2.0"
 httpx = "^0.28.1"
 isort = "^5.13.2"
-poethepoet = "^0.37.0"
+poethepoet = "^0.41.0"
 pre-commit = "^4.4.0"
 pyright = "^1.1.407"
 pytest-mock = "^3.15.1"
-pytest-watcher = "^0.4.2"
+pytest-watcher = "^0.6.3"
 requests = "^2.32.5"
-ruff = "^0.14.5"
+ruff = "^0.15.0"
 # NOTE: please insert new dependencies in their alphabetical location

 [build-system]
@@ -116,6 +116,7 @@ ws = "backend.ws:main"
 scheduler = "backend.scheduler:main"
 notification = "backend.notification:main"
 executor = "backend.exec:main"
+copilot-executor = "backend.copilot.executor.__main__:main"
 cli = "backend.cli:main"
 format = "linter:format"
 lint = "linter:lint"
--- a/autogpt_platform/backend/test/agent_generator/test_core_integration.py
+++ b/autogpt_platform/backend/test/agent_generator/test_core_integration.py
@@ -9,10 +9,8 @@ from unittest.mock import AsyncMock, patch

 import pytest

-from backend.api.features.chat.tools.agent_generator import core
-from backend.api.features.chat.tools.agent_generator.core import (
-    AgentGeneratorNotConfiguredError,
-)
+from backend.copilot.tools.agent_generator import core
+from backend.copilot.tools.agent_generator.core import AgentGeneratorNotConfiguredError


 class TestServiceNotConfigured:
--- a/autogpt_platform/backend/test/agent_generator/test_library_agents.py
+++ b/autogpt_platform/backend/test/agent_generator/test_library_agents.py
@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

-from backend.api.features.chat.tools.agent_generator import core
+from backend.copilot.tools.agent_generator import core


 class TestGetLibraryAgentsForGeneration:
--- a/autogpt_platform/backend/test/agent_generator/test_service.py
+++ b/autogpt_platform/backend/test/agent_generator/test_service.py
@@ -10,7 +10,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import httpx
 import pytest

-from backend.api.features.chat.tools.agent_generator import service
+from backend.copilot.tools.agent_generator import service


 class TestServiceConfiguration:
--- a/autogpt_platform/docker-compose.platform.yml
+++ b/autogpt_platform/docker-compose.platform.yml
@@ -158,6 +158,41 @@ services:
        max-size: "10m"
        max-file: "3"

+  copilot_executor:
+    build:
+      context: ../
+      dockerfile: autogpt_platform/backend/Dockerfile
+      target: server
+    command: ["python", "-m", "backend.copilot.executor"]
+    develop:
+      watch:
+        - path: ./
+          target: autogpt_platform/backend/
+          action: rebuild
+    depends_on:
+      redis:
+        condition: service_healthy
+      rabbitmq:
+        condition: service_healthy
+      db:
+        condition: service_healthy
+      migrate:
+        condition: service_completed_successfully
+      database_manager:
+        condition: service_started
+    <<: *backend-env-files
+    environment:
+      <<: *backend-env
+    ports:
+      - "8008:8008"
+    networks:
+      - app-network
+    logging:
+      driver: json-file
+      options:
+        max-size: "10m"
+        max-file: "3"
+
  websocket_server:
    build:
      context: ../
--- a/autogpt_platform/docker-compose.yml
+++ b/autogpt_platform/docker-compose.yml
@@ -53,6 +53,12 @@ services:
      file: ./docker-compose.platform.yml
      service: executor

+  copilot_executor:
+    <<: *agpt-services
+    extends:
+      file: ./docker-compose.platform.yml
+      service: copilot_executor
+
  websocket_server:
    <<: *agpt-services
    extends:
@@ -174,5 +180,6 @@ services:
      - deps
      - rest_server
      - executor
+      - copilot_executor
      - websocket_server
      - database_manager
--- a/autogpt_platform/frontend/Dockerfile
+++ b/autogpt_platform/frontend/Dockerfile
@@ -25,8 +25,12 @@ RUN if [ -f .env.production ]; then \
      cp .env.default .env; \
    fi
 RUN pnpm run generate:api
+# Disable source-map generation in Docker builds to halve webpack memory usage.
+# Source maps are only useful when SENTRY_AUTH_TOKEN is set (Vercel deploys);
+# the Docker image never uploads them, so generating them just wastes RAM.
+ENV NEXT_PUBLIC_SOURCEMAPS="false"
 # In CI, we want NEXT_PUBLIC_PW_TEST=true during build so Next.js inlines it
-RUN if [ "$NEXT_PUBLIC_PW_TEST" = "true" ]; then NEXT_PUBLIC_PW_TEST=true NODE_OPTIONS="--max-old-space-size=4096" pnpm build; else NODE_OPTIONS="--max-old-space-size=4096" pnpm build; fi
+RUN if [ "$NEXT_PUBLIC_PW_TEST" = "true" ]; then NEXT_PUBLIC_PW_TEST=true NODE_OPTIONS="--max-old-space-size=8192" pnpm build; else NODE_OPTIONS="--max-old-space-size=8192" pnpm build; fi

 # Prod stage - based on NextJS reference Dockerfile https://github.com/vercel/next.js/blob/64271354533ed16da51be5dce85f0dbd15f17517/examples/with-docker/Dockerfile
 FROM node:21-alpine AS prod
--- a/autogpt_platform/frontend/next.config.mjs
+++ b/autogpt_platform/frontend/next.config.mjs
@@ -1,8 +1,12 @@
 import { withSentryConfig } from "@sentry/nextjs";

+// Allow Docker builds to skip source-map generation (halves memory usage).
+// Defaults to true so Vercel/local builds are unaffected.
+const enableSourceMaps = process.env.NEXT_PUBLIC_SOURCEMAPS !== "false";
+
 /** @type {import('next').NextConfig} */
 const nextConfig = {
-  productionBrowserSourceMaps: true,
+  productionBrowserSourceMaps: enableSourceMaps,
  // Externalize OpenTelemetry packages to fix Turbopack HMR issues
  serverExternalPackages: [
    "@opentelemetry/instrumentation",
@@ -14,9 +18,37 @@ const nextConfig = {
    serverActions: {
      bodySizeLimit: "256mb",
    },
-    // Increase body size limit for API routes (file uploads) - 256MB to match backend limit
-    proxyClientMaxBodySize: "256mb",
    middlewareClientMaxBodySize: "256mb",
+    // Limit parallel webpack workers to reduce peak memory during builds.
+    cpus: 2,
+  },
+  // Work around cssnano "Invalid array length" bug in Next.js's bundled
+  // cssnano-simple comment parser when processing very large CSS chunks.
+  // CSS is still bundled correctly; gzip handles most of the size savings anyway.
+  webpack: (config, { dev }) => {
+    if (!dev) {
+      // Next.js adds CssMinimizerPlugin internally (after user config), so we
+      // can't filter it from config.plugins. Instead, intercept the webpack
+      // compilation hooks and replace the buggy plugin's tap with a no-op.
+      config.plugins.push({
+        apply(compiler) {
+          compiler.hooks.compilation.tap(
+            "DisableCssMinimizer",
+            (compilation) => {
+              compilation.hooks.processAssets.intercept({
+                register: (tap) => {
+                  if (tap.name === "CssMinimizerPlugin") {
+                    return { ...tap, fn: async () => {} };
+                  }
+                  return tap;
+                },
+              });
+            },
+          );
+        },
+      });
+    }
+    return config;
  },
  images: {
    domains: [
@@ -54,9 +86,16 @@ const nextConfig = {
  transpilePackages: ["geist"],
 };

-const isDevelopmentBuild = process.env.NODE_ENV !== "production";
+// Only run the Sentry webpack plugin when we can actually upload source maps
+// (i.e. on Vercel with SENTRY_AUTH_TOKEN set). The Sentry *runtime* SDK
+// (imported in app code) still captures errors without the plugin.
+// Skipping the plugin saves ~1 GB of peak memory during `next build`.
+const skipSentryPlugin =
+  process.env.NODE_ENV !== "production" ||
+  !enableSourceMaps ||
+  !process.env.SENTRY_AUTH_TOKEN;

-export default isDevelopmentBuild
+export default skipSentryPlugin
  ? nextConfig
  : withSentryConfig(nextConfig, {
      // For all available options, see:
@@ -96,7 +135,7 @@ export default isDevelopmentBuild

      // This helps Sentry with sourcemaps... https://docs.sentry.io/platforms/javascript/guides/nextjs/sourcemaps/
      sourcemaps: {
-        disable: false,
+        disable: !enableSourceMaps,
        assets: [".next/**/*.js", ".next/**/*.js.map"],
        ignore: ["**/node_modules/**"],
        deleteSourcemapsAfterUpload: false, // Source is public anyway :)
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -7,7 +7,7 @@
  },
  "scripts": {
    "dev": "pnpm run generate:api:force && next dev --turbo",
-    "build": "next build",
+    "build": "cross-env NODE_OPTIONS=--max-old-space-size=16384 next build",
    "start": "next start",
    "start:standalone": "cd .next/standalone && node server.js",
    "lint": "next lint && prettier --check .",
@@ -30,6 +30,7 @@
    "defaults"
  ],
  "dependencies": {
+    "@ai-sdk/react": "3.0.61",
    "@faker-js/faker": "10.0.0",
    "@hookform/resolvers": "5.2.2",
    "@next/third-parties": "15.4.6",
@@ -60,6 +61,10 @@
    "@rjsf/utils": "6.1.2",
    "@rjsf/validator-ajv8": "6.1.2",
    "@sentry/nextjs": "10.27.0",
+    "@streamdown/cjk": "1.0.1",
+    "@streamdown/code": "1.0.1",
+    "@streamdown/math": "1.0.1",
+    "@streamdown/mermaid": "1.0.1",
    "@supabase/ssr": "0.7.0",
    "@supabase/supabase-js": "2.78.0",
    "@tanstack/react-query": "5.90.6",
@@ -68,6 +73,7 @@
    "@vercel/analytics": "1.5.0",
    "@vercel/speed-insights": "1.2.0",
    "@xyflow/react": "12.9.2",
+    "ai": "6.0.59",
    "boring-avatars": "1.11.2",
    "class-variance-authority": "0.7.1",
    "clsx": "2.1.1",
@@ -87,7 +93,6 @@
    "launchdarkly-react-client-sdk": "3.9.0",
    "lodash": "4.17.21",
    "lucide-react": "0.552.0",
-    "moment": "2.30.1",
    "next": "15.4.10",
    "next-themes": "0.4.6",
    "nuqs": "2.7.2",
@@ -102,7 +107,7 @@
    "react-markdown": "9.0.3",
    "react-modal": "3.16.3",
    "react-shepherd": "6.1.9",
-    "react-window": "1.8.11",
+    "react-window": "2.2.0",
    "recharts": "3.3.0",
    "rehype-autolink-headings": "7.1.0",
    "rehype-highlight": "7.0.2",
@@ -112,9 +117,11 @@
    "remark-math": "6.0.0",
    "shepherd.js": "14.5.1",
    "sonner": "2.0.7",
+    "streamdown": "2.1.0",
    "tailwind-merge": "2.6.0",
    "tailwind-scrollbar": "3.1.0",
    "tailwindcss-animate": "1.0.7",
+    "use-stick-to-bottom": "1.1.2",
    "uuid": "11.1.0",
    "vaul": "1.1.2",
    "zod": "3.25.76",
@@ -140,7 +147,7 @@
    "@types/react": "18.3.17",
    "@types/react-dom": "18.3.5",
    "@types/react-modal": "3.16.3",
-    "@types/react-window": "1.8.8",
+    "@types/react-window": "2.0.0",
    "@vitejs/plugin-react": "5.1.2",
    "axe-playwright": "2.2.2",
    "chromatic": "13.3.3",
@@ -172,7 +179,8 @@
  },
  "pnpm": {
    "overrides": {
-      "@opentelemetry/instrumentation": "0.209.0"
+      "@opentelemetry/instrumentation": "0.209.0",
+      "lodash-es": "4.17.23"
    }
  },
  "packageManager": "pnpm@10.20.0+sha512.cf9998222162dd85864d0a8102e7892e7ba4ceadebbf5a31f9c2fce48dfce317a9c53b9f6464d1ef9042cba2e02ae02a9f7c143a2b438cd93c91840f0192b9dd"
--- a/autogpt_platform/frontend/pnpm-lock.yaml
+++ b/autogpt_platform/frontend/pnpm-lock.yaml
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/BlockMenuSearchBar/useBlockMenuSearchBar.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/BlockMenuSearchBar/useBlockMenuSearchBar.ts
@@ -1,4 +1,4 @@
-import { debounce } from "lodash";
+import debounce from "lodash/debounce";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { useBlockMenuStore } from "../../../../stores/blockMenuStore";
 import { getQueryClient } from "@/lib/react-query/queryClient";
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/HorizontalScroll.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/NewControlPanel/NewBlockMenu/HorizontalScroll.tsx
@@ -70,10 +70,10 @@ export const HorizontalScroll: React.FC<HorizontalScrollAreaProps> = ({
          {children}
        </div>
        {canScrollLeft && (
-          <div className="pointer-events-none absolute inset-y-0 left-0 w-8 bg-gradient-to-r from-white via-white/80 to-white/0" />
+          <div className="pointer-events-none absolute inset-y-0 left-0 w-8 bg-gradient-to-r from-background via-background/80 to-background/0" />
        )}
        {canScrollRight && (
-          <div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-white via-white/80 to-white/0" />
+          <div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-background via-background/80 to-background/0" />
        )}
        {canScrollLeft && (
          <button
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/CopilotPage.tsx
@@ -0,0 +1,76 @@
+"use client";
+
+import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
+import { SidebarProvider } from "@/components/ui/sidebar";
+import { ChatContainer } from "./components/ChatContainer/ChatContainer";
+import { ChatSidebar } from "./components/ChatSidebar/ChatSidebar";
+import { MobileDrawer } from "./components/MobileDrawer/MobileDrawer";
+import { MobileHeader } from "./components/MobileHeader/MobileHeader";
+import { useCopilotPage } from "./useCopilotPage";
+
+export function CopilotPage() {
+  const {
+    sessionId,
+    messages,
+    status,
+    error,
+    stop,
+    createSession,
+    onSend,
+    isLoadingSession,
+    isCreatingSession,
+    isUserLoading,
+    isLoggedIn,
+    // Mobile drawer
+    isMobile,
+    isDrawerOpen,
+    sessions,
+    isLoadingSessions,
+    handleOpenDrawer,
+    handleCloseDrawer,
+    handleDrawerOpenChange,
+    handleSelectSession,
+    handleNewChat,
+  } = useCopilotPage();
+
+  if (isUserLoading || !isLoggedIn) {
+    return <LoadingSpinner size="large" cover />;
+  }
+
+  return (
+    <SidebarProvider
+      defaultOpen={true}
+      className="h-[calc(100vh-72px)] min-h-0"
+    >
+      {!isMobile && <ChatSidebar />}
+      <div className="relative flex h-full w-full flex-col overflow-hidden bg-[#f8f8f9] px-0">
+        {isMobile && <MobileHeader onOpenDrawer={handleOpenDrawer} />}
+        <div className="flex-1 overflow-hidden">
+          <ChatContainer
+            messages={messages}
+            status={status}
+            error={error}
+            sessionId={sessionId}
+            isLoadingSession={isLoadingSession}
+            isCreatingSession={isCreatingSession}
+            onCreateSession={createSession}
+            onSend={onSend}
+            onStop={stop}
+          />
+        </div>
+      </div>
+      {isMobile && (
+        <MobileDrawer
+          isOpen={isDrawerOpen}
+          sessions={sessions}
+          currentSessionId={sessionId}
+          isLoading={isLoadingSessions}
+          onSelectSession={handleSelectSession}
+          onNewChat={handleNewChat}
+          onClose={handleCloseDrawer}
+          onOpenChange={handleDrawerOpenChange}
+        />
+      )}
+    </SidebarProvider>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
@@ -0,0 +1,74 @@
+"use client";
+import { ChatInput } from "@/app/(platform)/copilot/components/ChatInput/ChatInput";
+import { UIDataTypes, UIMessage, UITools } from "ai";
+import { LayoutGroup, motion } from "framer-motion";
+import { ChatMessagesContainer } from "../ChatMessagesContainer/ChatMessagesContainer";
+import { CopilotChatActionsProvider } from "../CopilotChatActionsProvider/CopilotChatActionsProvider";
+import { EmptySession } from "../EmptySession/EmptySession";
+
+export interface ChatContainerProps {
+  messages: UIMessage<unknown, UIDataTypes, UITools>[];
+  status: string;
+  error: Error | undefined;
+  sessionId: string | null;
+  isLoadingSession: boolean;
+  isCreatingSession: boolean;
+  onCreateSession: () => void | Promise<string>;
+  onSend: (message: string) => void | Promise<void>;
+  onStop: () => void;
+}
+export const ChatContainer = ({
+  messages,
+  status,
+  error,
+  sessionId,
+  isLoadingSession,
+  isCreatingSession,
+  onCreateSession,
+  onSend,
+  onStop,
+}: ChatContainerProps) => {
+  const inputLayoutId = "copilot-2-chat-input";
+
+  return (
+    <CopilotChatActionsProvider onSend={onSend}>
+      <LayoutGroup id="copilot-2-chat-layout">
+        <div className="flex h-full min-h-0 w-full flex-col bg-[#f8f8f9] px-2 lg:px-0">
+          {sessionId ? (
+            <div className="mx-auto flex h-full min-h-0 w-full max-w-3xl flex-col">
+              <ChatMessagesContainer
+                messages={messages}
+                status={status}
+                error={error}
+                isLoading={isLoadingSession}
+              />
+              <motion.div
+                initial={{ opacity: 0 }}
+                animate={{ opacity: 1 }}
+                transition={{ duration: 0.3 }}
+                className="relative px-3 pb-2 pt-2"
+              >
+                <div className="pointer-events-none absolute left-0 right-0 top-[-18px] z-10 h-6 bg-gradient-to-b from-transparent to-[#f8f8f9]" />
+                <ChatInput
+                  inputId="chat-input-session"
+                  onSend={onSend}
+                  disabled={status === "streaming"}
+                  isStreaming={status === "streaming"}
+                  onStop={onStop}
+                  placeholder="What else can I help with?"
+                />
+              </motion.div>
+            </div>
+          ) : (
+            <EmptySession
+              inputLayoutId={inputLayoutId}
+              isCreatingSession={isCreatingSession}
+              onCreateSession={onCreateSession}
+              onSend={onSend}
+            />
+          )}
+        </div>
+      </LayoutGroup>
+    </CopilotChatActionsProvider>
+  );
+};
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
@@ -6,17 +6,19 @@ import {
  MicrophoneIcon,
  StopIcon,
 } from "@phosphor-icons/react";
+import { ChangeEvent, useCallback } from "react";
 import { RecordingIndicator } from "./components/RecordingIndicator";
 import { useChatInput } from "./useChatInput";
 import { useVoiceRecording } from "./useVoiceRecording";

 export interface Props {
-  onSend: (message: string) => void;
+  onSend: (message: string) => void | Promise<void>;
  disabled?: boolean;
  isStreaming?: boolean;
  onStop?: () => void;
  placeholder?: string;
  className?: string;
+  inputId?: string;
 }

 export function ChatInput({
@@ -26,14 +28,14 @@ export function ChatInput({
  onStop,
  placeholder = "Type your message...",
  className,
+  inputId = "chat-input",
 }: Props) {
-  const inputId = "chat-input";
  const {
    value,
    setValue,
    handleKeyDown: baseHandleKeyDown,
    handleSubmit,
-    handleChange,
+    handleChange: baseHandleChange,
    hasMultipleLines,
  } = useChatInput({
    onSend,
@@ -60,6 +62,15 @@ export function ChatInput({
    inputId,
  });

+  // Block text changes when recording
+  const handleChange = useCallback(
+    (e: ChangeEvent<HTMLTextAreaElement>) => {
+      if (isRecording) return;
+      baseHandleChange(e);
+    },
+    [isRecording, baseHandleChange],
+  );
+
  return (
    <form onSubmit={handleSubmit} className={cn("relative flex-1", className)}>
      <div className="relative">
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
@@ -21,6 +21,7 @@ export function useChatInput({
 }: Args) {
  const [value, setValue] = useState("");
  const [hasMultipleLines, setHasMultipleLines] = useState(false);
+  const [isSending, setIsSending] = useState(false);

  useEffect(
    function focusOnMount() {
@@ -100,34 +101,40 @@ export function useChatInput({
    }
  }, [value, maxRows, inputId]);

-  const handleSend = () => {
-    if (disabled || !value.trim()) return;
-    onSend(value.trim());
-    setValue("");
-    setHasMultipleLines(false);
-    const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
-    const wrapper = document.getElementById(
-      `${inputId}-wrapper`,
-    ) as HTMLDivElement;
-    if (textarea) {
-      textarea.style.height = "auto";
+  async function handleSend() {
+    if (disabled || isSending || !value.trim()) return;
+
+    setIsSending(true);
+    try {
+      await onSend(value.trim());
+      setValue("");
+      setHasMultipleLines(false);
+      const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
+      const wrapper = document.getElementById(
+        `${inputId}-wrapper`,
+      ) as HTMLDivElement;
+      if (textarea) {
+        textarea.style.height = "auto";
+      }
+      if (wrapper) {
+        wrapper.style.height = "";
+        wrapper.style.maxHeight = "";
+      }
+    } finally {
+      setIsSending(false);
    }
-    if (wrapper) {
-      wrapper.style.height = "";
-      wrapper.style.maxHeight = "";
-    }
-  };
+  }

  function handleKeyDown(event: KeyboardEvent<HTMLTextAreaElement>) {
    if (event.key === "Enter" && !event.shiftKey) {
      event.preventDefault();
-      handleSend();
+      void handleSend();
    }
  }

  function handleSubmit(e: FormEvent<HTMLFormElement>) {
    e.preventDefault();
-    handleSend();
+    void handleSend();
  }

  function handleChange(e: ChangeEvent<HTMLTextAreaElement>) {
@@ -142,5 +149,6 @@ export function useChatInput({
    handleSubmit,
    handleChange,
    hasMultipleLines,
+    isSending,
  };
 }
--- a/Show More
+++ b/Show More