fix(chat/sdk): resolve relative paths in security hooks and unify workspace access

The security hook's path validation blocked SDK Read/Write tools because it didn't resolve relative paths against sdk_cwd. Since the SDK sets cwd, Claude naturally uses relative paths like "test.txt" which failed the absolute path prefix check. Now relative paths are joined with sdk_cwd before validation, and denial messages include the allowed workspace path. Also clarifies the workspace model: SDK Read/Write + bash_exec share the same ephemeral session directory, while workspace_file tools provide persistent cloud storage across sessions.
feat: fix openapi.json
2026-02-14 00:35:02 -05:00 · 2026-02-13 10:40:41 +04:00 · 2026-02-12 23:39:47 +04:00 · 2026-02-12 23:32:26 +04:00 · 2026-02-12 23:30:51 +04:00 · 2026-02-12 22:55:29 +04:00
530 changed files with 34618 additions and 15453 deletions
--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -49,7 +49,7 @@ jobs:
      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
        if: github.event_name == 'push'
-        uses: peter-evans/create-pull-request@v7
+        uses: peter-evans/create-pull-request@v8
        with:
          add-paths: classic/frontend/build/web
          base: ${{ github.ref_name }}
--- a/.github/workflows/claude-ci-failure-auto-fix.yml
+++ b/.github/workflows/claude-ci-failure-auto-fix.yml
@@ -22,7 +22,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.workflow_run.head_branch }}
          fetch-depth: 0
@@ -42,7 +42,7 @@ jobs:
      - name: Get CI failure details
        id: failure_details
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const run = await github.rest.actions.getWorkflowRun({
--- a/.github/workflows/claude-dependabot.yml
+++ b/.github/workflows/claude-dependabot.yml
@@ -30,7 +30,7 @@ jobs:
      actions: read # Required for CI access
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1
@@ -41,7 +41,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -78,7 +78,7 @@ jobs:
      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"
@@ -91,7 +91,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -124,7 +124,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
@@ -309,6 +309,7 @@ jobs:
        uses: anthropics/claude-code-action@v1
        with:
          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
          allowed_bots: "dependabot[bot]"
          claude_args: |
            --allowedTools "Bash(npm:*),Bash(pnpm:*),Bash(poetry:*),Bash(git:*),Edit,Replace,NotebookEditCell,mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*)"
          prompt: |
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@@ -40,7 +40,7 @@ jobs:
      actions: read # Required for CI access
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1
@@ -57,7 +57,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -94,7 +94,7 @@ jobs:
      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"
@@ -107,7 +107,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -140,7 +140,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -58,7 +58,7 @@ jobs:
        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
    steps:
    - name: Checkout repository
-      uses: actions/checkout@v4
+      uses: actions/checkout@v6
    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -27,7 +27,7 @@ jobs:
    # If you do not check out your code, Copilot will do this for you.
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          submodules: true
@@ -39,7 +39,7 @@ jobs:
          python-version: "3.11"  # Use standard version matching CI
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -76,7 +76,7 @@ jobs:
      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22"
@@ -89,7 +89,7 @@ jobs:
          echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
      - name: Cache frontend dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -132,7 +132,7 @@ jobs:
      # Phase 1: Cache and load Docker images for faster setup
      - name: Set up Docker image cache
        id: docker-cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/docker-cache
          # Use a versioned key for cache invalidation when image list changes
--- a/.github/workflows/docs-block-sync.yml
+++ b/.github/workflows/docs-block-sync.yml
@@ -23,7 +23,7 @@ jobs:
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1
@@ -33,7 +33,7 @@ jobs:
          python-version: "3.11"
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/docs-claude-review.yml
+++ b/.github/workflows/docs-claude-review.yml
@@ -23,7 +23,7 @@ jobs:
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
@@ -33,7 +33,7 @@ jobs:
          python-version: "3.11"
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/docs-enhance.yml
+++ b/.github/workflows/docs-enhance.yml
@@ -28,7 +28,7 @@ jobs:
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 1
@@ -38,7 +38,7 @@ jobs:
          python-version: "3.11"
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/platform-autogpt-deploy-dev.yaml
+++ b/.github/workflows/platform-autogpt-deploy-dev.yaml
@@ -25,7 +25,7 @@ jobs:
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ github.event.inputs.git_ref || github.ref_name }}
@@ -52,7 +52,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Trigger deploy workflow
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DEPLOY_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
--- a/.github/workflows/platform-autogpt-deploy-prod.yml
+++ b/.github/workflows/platform-autogpt-deploy-prod.yml
@@ -17,7 +17,7 @@ jobs:
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ github.ref_name || 'master' }}
@@ -45,7 +45,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Trigger deploy workflow
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DEPLOY_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -68,7 +68,7 @@ jobs:
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          submodules: true
@@ -88,7 +88,7 @@ jobs:
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
      - name: Set up Python dependency cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.cache/pypoetry
          key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
--- a/.github/workflows/platform-dev-deploy-event-dispatcher.yml
+++ b/.github/workflows/platform-dev-deploy-event-dispatcher.yml
@@ -17,7 +17,7 @@ jobs:
      - name: Check comment permissions and deployment status
        id: check_status
        if: github.event_name == 'issue_comment' && github.event.issue.pull_request
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const commentBody = context.payload.comment.body.trim();
@@ -55,7 +55,7 @@ jobs:
      - name: Post permission denied comment
        if: steps.check_status.outputs.permission_denied == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -68,7 +68,7 @@ jobs:
      - name: Get PR details for deployment
        id: pr_details
        if: steps.check_status.outputs.should_deploy == 'true' || steps.check_status.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const pr = await github.rest.pulls.get({
@@ -82,7 +82,7 @@ jobs:
      - name: Dispatch Deploy Event
        if: steps.check_status.outputs.should_deploy == 'true'
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DISPATCH_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
@@ -98,7 +98,7 @@ jobs:
      - name: Post deploy success comment
        if: steps.check_status.outputs.should_deploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -110,7 +110,7 @@ jobs:
      - name: Dispatch Undeploy Event (from comment)
        if: steps.check_status.outputs.should_undeploy == 'true'
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DISPATCH_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
@@ -126,7 +126,7 @@ jobs:
      - name: Post undeploy success comment
        if: steps.check_status.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
@@ -139,7 +139,7 @@ jobs:
      - name: Check deployment status on PR close
        id: check_pr_close
        if: github.event_name == 'pull_request' && github.event.action == 'closed'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            const comments = await github.rest.issues.listComments({
@@ -168,7 +168,7 @@ jobs:
          github.event_name == 'pull_request' &&
          github.event.action == 'closed' &&
          steps.check_pr_close.outputs.should_undeploy == 'true'
-        uses: peter-evans/repository-dispatch@v3
+        uses: peter-evans/repository-dispatch@v4
        with:
          token: ${{ secrets.DISPATCH_TOKEN }}
          repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
@@ -187,7 +187,7 @@ jobs:
          github.event_name == 'pull_request' &&
          github.event.action == 'closed' &&
          steps.check_pr_close.outputs.should_undeploy == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.issues.createComment({
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -27,13 +27,22 @@ jobs:
    runs-on: ubuntu-latest
    outputs:
      cache-key: ${{ steps.cache-key.outputs.key }}
      components-changed: ${{ steps.filter.outputs.components }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Check for component changes
        uses: dorny/paths-filter@v3
        id: filter
        with:
          filters: |
            components:
              - 'autogpt_platform/frontend/src/components/**'
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -45,7 +54,7 @@ jobs:
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
      - name: Cache dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ steps.cache-key.outputs.key }}
@@ -62,10 +71,10 @@ jobs:
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -73,7 +82,7 @@ jobs:
        run: corepack enable
      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -90,17 +99,20 @@ jobs:
  chromatic:
    runs-on: ubuntu-latest
    needs: setup
-    # Only run on dev branch pushes or PRs targeting dev
+    # Disabled: to re-enable, remove 'false &&' from the condition below
-    if: github.ref == 'refs/heads/dev' || github.base_ref == 'dev'
+    if: >-
      false
      && (github.ref == 'refs/heads/dev' || github.base_ref == 'dev')
      && needs.setup.outputs.components-changed == 'true'
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -108,7 +120,7 @@ jobs:
        run: corepack enable
      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -136,12 +148,12 @@ jobs:
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          submodules: recursive
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -164,7 +176,7 @@ jobs:
        uses: docker/setup-buildx-action@v3
      - name: Cache Docker layers
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-frontend-test-${{ hashFiles('autogpt_platform/docker-compose.yml', 'autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/pyproject.toml', 'autogpt_platform/backend/poetry.lock') }}
@@ -219,7 +231,7 @@ jobs:
          fi
      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
@@ -265,12 +277,12 @@ jobs:
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          submodules: recursive
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -278,7 +290,7 @@ jobs:
        run: corepack enable
      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
--- a/.github/workflows/platform-fullstack-ci.yml
+++ b/.github/workflows/platform-fullstack-ci.yml
@@ -29,10 +29,10 @@ jobs:
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -44,7 +44,7 @@ jobs:
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
      - name: Cache dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ steps.cache-key.outputs.key }}
@@ -56,19 +56,19 @@ jobs:
        run: pnpm install --frozen-lockfile
  types:
-    runs-on: ubuntu-latest
+    runs-on: big-boi
    needs: setup
    strategy:
      fail-fast: false
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          submodules: recursive
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
@@ -85,10 +85,10 @@ jobs:
      - name: Run docker compose
        run: |
-          docker compose -f ../docker-compose.yml --profile local --profile deps_backend up -d
+          docker compose -f ../docker-compose.yml --profile local up -d deps_backend
      - name: Restore dependencies cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: ~/.pnpm-store
          key: ${{ needs.setup.outputs.cache-key }}
--- a/.github/workflows/repo-workflow-checker.yml
+++ b/.github/workflows/repo-workflow-checker.yml
@@ -11,7 +11,7 @@ jobs:
    steps:
      # - name: Wait some time for all actions to start
      #   run: sleep 30
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        # with:
          # fetch-depth: 0
      - name: Set up Python
--- a/.gitignore
+++ b/.gitignore
@@ -180,3 +180,4 @@ autogpt_platform/backend/settings.py
 .claude/settings.local.json
 CLAUDE.local.md
 /autogpt_platform/backend/logs
 .next
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@ Before proceeding with the installation, ensure your system meets the following
 ### Updated Setup Instructions:
 We've moved to a fully maintained and regularly updated documentation site.
-👉 [Follow the official self-hosting guide here](https://docs.agpt.co/platform/getting-started/)
+👉 [Follow the official self-hosting guide here](https://agpt.co/docs/platform/getting-started/getting-started)
 This tutorial assumes you have Docker, VSCode, git and npm installed.
--- a/autogpt_platform/autogpt_libs/poetry.lock
+++ b/autogpt_platform/autogpt_libs/poetry.lock
--- a/autogpt_platform/autogpt_libs/pyproject.toml
+++ b/autogpt_platform/autogpt_libs/pyproject.toml
@@ -9,25 +9,25 @@ packages = [{ include = "autogpt_libs" }]
 [tool.poetry.dependencies]
 python = ">=3.10,<4.0"
 colorama = "^0.4.6"
-cryptography = "^45.0"
+cryptography = "^46.0"
 expiringdict = "^1.2.2"
-fastapi = "^0.116.1"
+fastapi = "^0.128.0"
-google-cloud-logging = "^3.12.1"
+google-cloud-logging = "^3.13.0"
-launchdarkly-server-sdk = "^9.12.0"
+launchdarkly-server-sdk = "^9.14.1"
-pydantic = "^2.11.7"
+pydantic = "^2.12.5"
-pydantic-settings = "^2.10.1"
+pydantic-settings = "^2.12.0"
-pyjwt = { version = "^2.10.1", extras = ["crypto"] }
+pyjwt = { version = "^2.11.0", extras = ["crypto"] }
 redis = "^6.2.0"
-supabase = "^2.16.0"
+supabase = "^2.27.2"
-uvicorn = "^0.35.0"
+uvicorn = "^0.40.0"
 [tool.poetry.group.dev.dependencies]
-pyright = "^1.1.404"
+pyright = "^1.1.408"
 pytest = "^8.4.1"
-pytest-asyncio = "^1.1.0"
+pytest-asyncio = "^1.3.0"
-pytest-mock = "^3.14.1"
+pytest-mock = "^3.15.1"
-pytest-cov = "^6.2.1"
+pytest-cov = "^7.0.0"
-ruff = "^0.12.11"
+ruff = "^0.15.0"
 [build-system]
 requires = ["poetry-core"]
--- a/autogpt_platform/backend/.env.default
+++ b/autogpt_platform/backend/.env.default
@@ -152,6 +152,7 @@ REPLICATE_API_KEY=
 REVID_API_KEY=
 SCREENSHOTONE_API_KEY=
 UNREAL_SPEECH_API_KEY=
 ELEVENLABS_API_KEY=
 # Data & Search Services
 E2B_API_KEY=
--- a/autogpt_platform/backend/.gitignore
+++ b/autogpt_platform/backend/.gitignore
@@ -19,3 +19,6 @@ load-tests/*.json
 load-tests/*.log
 load-tests/node_modules/*
 migrations/*/rollback*.sql
 # Workspace files
 workspaces/
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,10 +62,18 @@ ENV POETRY_HOME=/opt/poetry \
    DEBIAN_FRONTEND=noninteractive
 ENV PATH=/opt/poetry/bin:$PATH
-# Install Python without upgrading system-managed packages
+# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use.
 # bubblewrap provides OS-level sandbox (whitelist-only FS + no network)
 # for the bash_exec MCP tool.
 RUN apt-get update && apt-get install -y \
    python3.13 \
    python3-pip \
    ffmpeg \
    imagemagick \
    jq \
    ripgrep \
    tree \
    bubblewrap \
    && rm -rf /var/lib/apt/lists/*
 # Copy only necessary files from builder
--- a/autogpt_platform/backend/backend/api/external/v1/routes.py
+++ b/autogpt_platform/backend/backend/api/external/v1/routes.py
@@ -10,7 +10,7 @@ from typing_extensions import TypedDict
 import backend.api.features.store.cache as store_cache
 import backend.api.features.store.model as store_model
-import backend.data.block
+import backend.blocks
 from backend.api.external.middleware import require_permission
 from backend.data import execution as execution_db
 from backend.data import graph as graph_db
@@ -67,7 +67,7 @@ async def get_user_info(
    dependencies=[Security(require_permission(APIKeyPermission.READ_BLOCK))],
 )
 async def get_graph_blocks() -> Sequence[dict[Any, Any]]:
-    blocks = [block() for block in backend.data.block.get_blocks().values()]
+    blocks = [block() for block in backend.blocks.get_blocks().values()]
    return [b.to_dict() for b in blocks if not b.disabled]
@@ -83,7 +83,7 @@ async def execute_graph_block(
        require_permission(APIKeyPermission.EXECUTE_BLOCK)
    ),
 ) -> CompletedBlockOutput:
-    obj = backend.data.block.get_block(block_id)
+    obj = backend.blocks.get_block(block_id)
    if not obj:
        raise HTTPException(status_code=404, detail=f"Block #{block_id} not found.")
    if obj.disabled:
--- a/autogpt_platform/backend/backend/api/features/builder/db.py
+++ b/autogpt_platform/backend/backend/api/features/builder/db.py
@@ -10,10 +10,15 @@ import backend.api.features.library.db as library_db
 import backend.api.features.library.model as library_model
 import backend.api.features.store.db as store_db
 import backend.api.features.store.model as store_model
 import backend.data.block
 from backend.blocks import load_all_blocks
 from backend.blocks._base import (
    AnyBlockSchema,
    BlockCategory,
    BlockInfo,
    BlockSchema,
    BlockType,
 )
 from backend.blocks.llm import LlmModel
 from backend.data.block import AnyBlockSchema, BlockCategory, BlockInfo, BlockSchema
 from backend.data.db import query_raw_with_schema
 from backend.integrations.providers import ProviderName
 from backend.util.cache import cached
@@ -22,7 +27,7 @@ from backend.util.models import Pagination
 from .model import (
    BlockCategoryResponse,
    BlockResponse,
-    BlockType,
+    BlockTypeFilter,
    CountResponse,
    FilterType,
    Provider,
@@ -88,7 +93,7 @@ def get_block_categories(category_blocks: int = 3) -> list[BlockCategoryResponse
 def get_blocks(
    *,
    category: str | None = None,
-    type: BlockType | None = None,
+    type: BlockTypeFilter | None = None,
    provider: ProviderName | None = None,
    page: int = 1,
    page_size: int = 50,
@@ -669,9 +674,9 @@ async def get_suggested_blocks(count: int = 5) -> list[BlockInfo]:
    for block_type in load_all_blocks().values():
        block: AnyBlockSchema = block_type()
        if block.disabled or block.block_type in (
-            backend.data.block.BlockType.INPUT,
+            BlockType.INPUT,
-            backend.data.block.BlockType.OUTPUT,
+            BlockType.OUTPUT,
-            backend.data.block.BlockType.AGENT,
+            BlockType.AGENT,
        ):
            continue
        # Find the execution count for this block
--- a/autogpt_platform/backend/backend/api/features/builder/model.py
+++ b/autogpt_platform/backend/backend/api/features/builder/model.py
@@ -4,7 +4,7 @@ from pydantic import BaseModel
 import backend.api.features.library.model as library_model
 import backend.api.features.store.model as store_model
-from backend.data.block import BlockInfo
+from backend.blocks._base import BlockInfo
 from backend.integrations.providers import ProviderName
 from backend.util.models import Pagination
@@ -15,7 +15,7 @@ FilterType = Literal[
    "my_agents",
 ]
-BlockType = Literal["all", "input", "action", "output"]
+BlockTypeFilter = Literal["all", "input", "action", "output"]
 class SearchEntry(BaseModel):
--- a/autogpt_platform/backend/backend/api/features/builder/routes.py
+++ b/autogpt_platform/backend/backend/api/features/builder/routes.py
@@ -88,7 +88,7 @@ async def get_block_categories(
 )
 async def get_blocks(
    category: Annotated[str | None, fastapi.Query()] = None,
-    type: Annotated[builder_model.BlockType | None, fastapi.Query()] = None,
+    type: Annotated[builder_model.BlockTypeFilter | None, fastapi.Query()] = None,
    provider: Annotated[ProviderName | None, fastapi.Query()] = None,
    page: Annotated[int, fastapi.Query()] = 1,
    page_size: Annotated[int, fastapi.Query()] = 50,
--- a/autogpt_platform/backend/backend/api/features/chat/completion_consumer.py
+++ b/autogpt_platform/backend/backend/api/features/chat/completion_consumer.py
@@ -0,0 +1,368 @@
 """Redis Streams consumer for operation completion messages.
 This module provides a consumer (ChatCompletionConsumer) that listens for
 completion notifications (OperationCompleteMessage) from external services
 (like Agent Generator) and triggers the appropriate stream registry and
 chat service updates via process_operation_success/process_operation_failure.
 Why Redis Streams instead of RabbitMQ?
 --------------------------------------
 While the project typically uses RabbitMQ for async task queues (e.g., execution
 queue), Redis Streams was chosen for chat completion notifications because:
 1. **Unified Infrastructure**: The SSE reconnection feature already uses Redis
   Streams (via stream_registry) for message persistence and replay. Using Redis
   Streams for completion notifications keeps all chat streaming infrastructure
   in one system, simplifying operations and reducing cross-system coordination.
 2. **Message Replay**: Redis Streams support XREAD with arbitrary message IDs,
   allowing consumers to replay missed messages after reconnection. This aligns
   with the SSE reconnection pattern where clients can resume from last_message_id.
 3. **Consumer Groups with XAUTOCLAIM**: Redis consumer groups provide automatic
   load balancing across pods with explicit message claiming (XAUTOCLAIM) for
   recovering from dead consumers - ideal for the completion callback pattern.
 4. **Lower Latency**: For real-time SSE updates, Redis (already in-memory for
   stream_registry) provides lower latency than an additional RabbitMQ hop.
 5. **Atomicity with Task State**: Completion processing often needs to update
   task metadata stored in Redis. Keeping both in Redis enables simpler
   transactional semantics without distributed coordination.
 The consumer uses Redis Streams with consumer groups for reliable message
 processing across multiple platform pods, with XAUTOCLAIM for reclaiming
 stale pending messages from dead consumers.
 """
 import asyncio
 import logging
 import os
 import uuid
 from typing import Any
 import orjson
 from prisma import Prisma
 from pydantic import BaseModel
 from redis.exceptions import ResponseError
 from backend.data.redis_client import get_redis_async
 from . import stream_registry
 from .completion_handler import process_operation_failure, process_operation_success
 from .config import ChatConfig
 logger = logging.getLogger(__name__)
 config = ChatConfig()
 class OperationCompleteMessage(BaseModel):
    """Message format for operation completion notifications."""
    operation_id: str
    task_id: str
    success: bool
    result: dict | str | None = None
    error: str | None = None
 class ChatCompletionConsumer:
    """Consumer for chat operation completion messages from Redis Streams.
    This consumer initializes its own Prisma client in start() to ensure
    database operations work correctly within this async context.
    Uses Redis consumer groups to allow multiple platform pods to consume
    messages reliably with automatic redelivery on failure.
    """
    def __init__(self):
        self._consumer_task: asyncio.Task | None = None
        self._running = False
        self._prisma: Prisma | None = None
        self._consumer_name = f"consumer-{uuid.uuid4().hex[:8]}"
    async def start(self) -> None:
        """Start the completion consumer."""
        if self._running:
            logger.warning("Completion consumer already running")
            return
        # Create consumer group if it doesn't exist
        try:
            redis = await get_redis_async()
            await redis.xgroup_create(
                config.stream_completion_name,
                config.stream_consumer_group,
                id="0",
                mkstream=True,
            )
            logger.info(
                f"Created consumer group '{config.stream_consumer_group}' "
                f"on stream '{config.stream_completion_name}'"
            )
        except ResponseError as e:
            if "BUSYGROUP" in str(e):
                logger.debug(
                    f"Consumer group '{config.stream_consumer_group}' already exists"
                )
            else:
                raise
        self._running = True
        self._consumer_task = asyncio.create_task(self._consume_messages())
        logger.info(
            f"Chat completion consumer started (consumer: {self._consumer_name})"
        )
    async def _ensure_prisma(self) -> Prisma:
        """Lazily initialize Prisma client on first use."""
        if self._prisma is None:
            database_url = os.getenv("DATABASE_URL", "postgresql://localhost:5432")
            self._prisma = Prisma(datasource={"url": database_url})
            await self._prisma.connect()
            logger.info("[COMPLETION] Consumer Prisma client connected (lazy init)")
        return self._prisma
    async def stop(self) -> None:
        """Stop the completion consumer."""
        self._running = False
        if self._consumer_task:
            self._consumer_task.cancel()
            try:
                await self._consumer_task
            except asyncio.CancelledError:
                pass
            self._consumer_task = None
        if self._prisma:
            await self._prisma.disconnect()
            self._prisma = None
            logger.info("[COMPLETION] Consumer Prisma client disconnected")
        logger.info("Chat completion consumer stopped")
    async def _consume_messages(self) -> None:
        """Main message consumption loop with retry logic."""
        max_retries = 10
        retry_delay = 5  # seconds
        retry_count = 0
        block_timeout = 5000  # milliseconds
        while self._running and retry_count < max_retries:
            try:
                redis = await get_redis_async()
                # Reset retry count on successful connection
                retry_count = 0
                while self._running:
                    # First, claim any stale pending messages from dead consumers
                    # Redis does NOT auto-redeliver pending messages; we must explicitly
                    # claim them using XAUTOCLAIM
                    try:
                        claimed_result = await redis.xautoclaim(
                            name=config.stream_completion_name,
                            groupname=config.stream_consumer_group,
                            consumername=self._consumer_name,
                            min_idle_time=config.stream_claim_min_idle_ms,
                            start_id="0-0",
                            count=10,
                        )
                        # xautoclaim returns: (next_start_id, [(id, data), ...], [deleted_ids])
                        if claimed_result and len(claimed_result) >= 2:
                            claimed_entries = claimed_result[1]
                            if claimed_entries:
                                logger.info(
                                    f"Claimed {len(claimed_entries)} stale pending messages"
                                )
                                for entry_id, data in claimed_entries:
                                    if not self._running:
                                        return
                                    await self._process_entry(redis, entry_id, data)
                    except Exception as e:
                        logger.warning(f"XAUTOCLAIM failed (non-fatal): {e}")
                    # Read new messages from the stream
                    messages = await redis.xreadgroup(
                        groupname=config.stream_consumer_group,
                        consumername=self._consumer_name,
                        streams={config.stream_completion_name: ">"},
                        block=block_timeout,
                        count=10,
                    )
                    if not messages:
                        continue
                    for stream_name, entries in messages:
                        for entry_id, data in entries:
                            if not self._running:
                                return
                            await self._process_entry(redis, entry_id, data)
            except asyncio.CancelledError:
                logger.info("Consumer cancelled")
                return
            except Exception as e:
                retry_count += 1
                logger.error(
                    f"Consumer error (retry {retry_count}/{max_retries}): {e}",
                    exc_info=True,
                )
                if self._running and retry_count < max_retries:
                    await asyncio.sleep(retry_delay)
                else:
                    logger.error("Max retries reached, stopping consumer")
                    return
    async def _process_entry(
        self, redis: Any, entry_id: str, data: dict[str, Any]
    ) -> None:
        """Process a single stream entry and acknowledge it on success.
        Args:
            redis: Redis client connection
            entry_id: The stream entry ID
            data: The entry data dict
        """
        try:
            # Handle the message
            message_data = data.get("data")
            if message_data:
                await self._handle_message(
                    message_data.encode()
                    if isinstance(message_data, str)
                    else message_data
                )
            # Acknowledge the message after successful processing
            await redis.xack(
                config.stream_completion_name,
                config.stream_consumer_group,
                entry_id,
            )
        except Exception as e:
            logger.error(
                f"Error processing completion message {entry_id}: {e}",
                exc_info=True,
            )
            # Message remains in pending state and will be claimed by
            # XAUTOCLAIM after min_idle_time expires
    async def _handle_message(self, body: bytes) -> None:
        """Handle a completion message using our own Prisma client."""
        try:
            data = orjson.loads(body)
            message = OperationCompleteMessage(**data)
        except Exception as e:
            logger.error(f"Failed to parse completion message: {e}")
            return
        logger.info(
            f"[COMPLETION] Received completion for operation {message.operation_id} "
            f"(task_id={message.task_id}, success={message.success})"
        )
        # Find task in registry
        task = await stream_registry.find_task_by_operation_id(message.operation_id)
        if task is None:
            task = await stream_registry.get_task(message.task_id)
        if task is None:
            logger.warning(
                f"[COMPLETION] Task not found for operation {message.operation_id} "
                f"(task_id={message.task_id})"
            )
            return
        logger.info(
            f"[COMPLETION] Found task: task_id={task.task_id}, "
            f"session_id={task.session_id}, tool_call_id={task.tool_call_id}"
        )
        # Guard against empty task fields
        if not task.task_id or not task.session_id or not task.tool_call_id:
            logger.error(
                f"[COMPLETION] Task has empty critical fields! "
                f"task_id={task.task_id!r}, session_id={task.session_id!r}, "
                f"tool_call_id={task.tool_call_id!r}"
            )
            return
        if message.success:
            await self._handle_success(task, message)
        else:
            await self._handle_failure(task, message)
    async def _handle_success(
        self,
        task: stream_registry.ActiveTask,
        message: OperationCompleteMessage,
    ) -> None:
        """Handle successful operation completion."""
        prisma = await self._ensure_prisma()
        await process_operation_success(task, message.result, prisma)
    async def _handle_failure(
        self,
        task: stream_registry.ActiveTask,
        message: OperationCompleteMessage,
    ) -> None:
        """Handle failed operation completion."""
        prisma = await self._ensure_prisma()
        await process_operation_failure(task, message.error, prisma)
 # Module-level consumer instance
 _consumer: ChatCompletionConsumer | None = None
 async def start_completion_consumer() -> None:
    """Start the global completion consumer."""
    global _consumer
    if _consumer is None:
        _consumer = ChatCompletionConsumer()
    await _consumer.start()
 async def stop_completion_consumer() -> None:
    """Stop the global completion consumer."""
    global _consumer
    if _consumer:
        await _consumer.stop()
        _consumer = None
 async def publish_operation_complete(
    operation_id: str,
    task_id: str,
    success: bool,
    result: dict | str | None = None,
    error: str | None = None,
 ) -> None:
    """Publish an operation completion message to Redis Streams.
    Args:
        operation_id: The operation ID that completed.
        task_id: The task ID associated with the operation.
        success: Whether the operation succeeded.
        result: The result data (for success).
        error: The error message (for failure).
    """
    message = OperationCompleteMessage(
        operation_id=operation_id,
        task_id=task_id,
        success=success,
        result=result,
        error=error,
    )
    redis = await get_redis_async()
    await redis.xadd(
        config.stream_completion_name,
        {"data": message.model_dump_json()},
        maxlen=config.stream_max_length,
    )
    logger.info(f"Published completion for operation {operation_id}")
--- a/autogpt_platform/backend/backend/api/features/chat/completion_handler.py
+++ b/autogpt_platform/backend/backend/api/features/chat/completion_handler.py
@@ -0,0 +1,344 @@
 """Shared completion handling for operation success and failure.
 This module provides common logic for handling operation completion from both:
 - The Redis Streams consumer (completion_consumer.py)
 - The HTTP webhook endpoint (routes.py)
 """
 import logging
 from typing import Any
 import orjson
 from prisma import Prisma
 from . import service as chat_service
 from . import stream_registry
 from .response_model import StreamError, StreamToolOutputAvailable
 from .tools.models import ErrorResponse
 logger = logging.getLogger(__name__)
 # Tools that produce agent_json that needs to be saved to library
 AGENT_GENERATION_TOOLS = {"create_agent", "edit_agent"}
 # Keys that should be stripped from agent_json when returning in error responses
 SENSITIVE_KEYS = frozenset(
    {
        "api_key",
        "apikey",
        "api_secret",
        "password",
        "secret",
        "credentials",
        "credential",
        "token",
        "access_token",
        "refresh_token",
        "private_key",
        "privatekey",
        "auth",
        "authorization",
    }
 )
 def _sanitize_agent_json(obj: Any) -> Any:
    """Recursively sanitize agent_json by removing sensitive keys.
    Args:
        obj: The object to sanitize (dict, list, or primitive)
    Returns:
        Sanitized copy with sensitive keys removed/redacted
    """
    if isinstance(obj, dict):
        return {
            k: "[REDACTED]" if k.lower() in SENSITIVE_KEYS else _sanitize_agent_json(v)
            for k, v in obj.items()
        }
    elif isinstance(obj, list):
        return [_sanitize_agent_json(item) for item in obj]
    else:
        return obj
 class ToolMessageUpdateError(Exception):
    """Raised when updating a tool message in the database fails."""
    pass
 async def _update_tool_message(
    session_id: str,
    tool_call_id: str,
    content: str,
    prisma_client: Prisma | None,
 ) -> None:
    """Update tool message in database.
    Args:
        session_id: The session ID
        tool_call_id: The tool call ID to update
        content: The new content for the message
        prisma_client: Optional Prisma client. If None, uses chat_service.
    Raises:
        ToolMessageUpdateError: If the database update fails. The caller should
            handle this to avoid marking the task as completed with inconsistent state.
    """
    try:
        if prisma_client:
            # Use provided Prisma client (for consumer with its own connection)
            updated_count = await prisma_client.chatmessage.update_many(
                where={
                    "sessionId": session_id,
                    "toolCallId": tool_call_id,
                },
                data={"content": content},
            )
            # Check if any rows were updated - 0 means message not found
            if updated_count == 0:
                raise ToolMessageUpdateError(
                    f"No message found with tool_call_id={tool_call_id} in session {session_id}"
                )
        else:
            # Use service function (for webhook endpoint)
            await chat_service._update_pending_operation(
                session_id=session_id,
                tool_call_id=tool_call_id,
                result=content,
            )
    except ToolMessageUpdateError:
        raise
    except Exception as e:
        logger.error(f"[COMPLETION] Failed to update tool message: {e}", exc_info=True)
        raise ToolMessageUpdateError(
            f"Failed to update tool message for tool_call_id={tool_call_id}: {e}"
        ) from e
 def serialize_result(result: dict | list | str | int | float | bool | None) -> str:
    """Serialize result to JSON string with sensible defaults.
    Args:
        result: The result to serialize. Can be a dict, list, string,
            number, boolean, or None.
    Returns:
        JSON string representation of the result. Returns '{"status": "completed"}'
        only when result is explicitly None.
    """
    if isinstance(result, str):
        return result
    if result is None:
        return '{"status": "completed"}'
    return orjson.dumps(result).decode("utf-8")
 async def _save_agent_from_result(
    result: dict[str, Any],
    user_id: str | None,
    tool_name: str,
 ) -> dict[str, Any]:
    """Save agent to library if result contains agent_json.
    Args:
        result: The result dict that may contain agent_json
        user_id: The user ID to save the agent for
        tool_name: The tool name (create_agent or edit_agent)
    Returns:
        Updated result dict with saved agent details, or original result if no agent_json
    """
    if not user_id:
        logger.warning("[COMPLETION] Cannot save agent: no user_id in task")
        return result
    agent_json = result.get("agent_json")
    if not agent_json:
        logger.warning(
            f"[COMPLETION] {tool_name} completed but no agent_json in result"
        )
        return result
    try:
        from .tools.agent_generator import save_agent_to_library
        is_update = tool_name == "edit_agent"
        created_graph, library_agent = await save_agent_to_library(
            agent_json, user_id, is_update=is_update
        )
        logger.info(
            f"[COMPLETION] Saved agent '{created_graph.name}' to library "
            f"(graph_id={created_graph.id}, library_agent_id={library_agent.id})"
        )
        # Return a response similar to AgentSavedResponse
        return {
            "type": "agent_saved",
            "message": f"Agent '{created_graph.name}' has been saved to your library!",
            "agent_id": created_graph.id,
            "agent_name": created_graph.name,
            "library_agent_id": library_agent.id,
            "library_agent_link": f"/library/agents/{library_agent.id}",
            "agent_page_link": f"/build?flowID={created_graph.id}",
        }
    except Exception as e:
        logger.error(
            f"[COMPLETION] Failed to save agent to library: {e}",
            exc_info=True,
        )
        # Return error but don't fail the whole operation
        # Sanitize agent_json to remove sensitive keys before returning
        return {
            "type": "error",
            "message": f"Agent was generated but failed to save: {str(e)}",
            "error": str(e),
            "agent_json": _sanitize_agent_json(agent_json),
        }
 async def process_operation_success(
    task: stream_registry.ActiveTask,
    result: dict | str | None,
    prisma_client: Prisma | None = None,
 ) -> None:
    """Handle successful operation completion.
    Publishes the result to the stream registry, updates the database,
    generates LLM continuation, and marks the task as completed.
    Args:
        task: The active task that completed
        result: The result data from the operation
        prisma_client: Optional Prisma client for database operations.
            If None, uses chat_service._update_pending_operation instead.
    Raises:
        ToolMessageUpdateError: If the database update fails. The task will be
            marked as failed instead of completed to avoid inconsistent state.
    """
    # For agent generation tools, save the agent to library
    if task.tool_name in AGENT_GENERATION_TOOLS and isinstance(result, dict):
        result = await _save_agent_from_result(result, task.user_id, task.tool_name)
    # Serialize result for output (only substitute default when result is exactly None)
    result_output = result if result is not None else {"status": "completed"}
    output_str = (
        result_output
        if isinstance(result_output, str)
        else orjson.dumps(result_output).decode("utf-8")
    )
    # Publish result to stream registry
    await stream_registry.publish_chunk(
        task.task_id,
        StreamToolOutputAvailable(
            toolCallId=task.tool_call_id,
            toolName=task.tool_name,
            output=output_str,
            success=True,
        ),
    )
    # Update pending operation in database
    # If this fails, we must not continue to mark the task as completed
    result_str = serialize_result(result)
    try:
        await _update_tool_message(
            session_id=task.session_id,
            tool_call_id=task.tool_call_id,
            content=result_str,
            prisma_client=prisma_client,
        )
    except ToolMessageUpdateError:
        # DB update failed - mark task as failed to avoid inconsistent state
        logger.error(
            f"[COMPLETION] DB update failed for task {task.task_id}, "
            "marking as failed instead of completed"
        )
        await stream_registry.publish_chunk(
            task.task_id,
            StreamError(errorText="Failed to save operation result to database"),
        )
        await stream_registry.mark_task_completed(task.task_id, status="failed")
        raise
    # Generate LLM continuation with streaming
    try:
        await chat_service._generate_llm_continuation_with_streaming(
            session_id=task.session_id,
            user_id=task.user_id,
            task_id=task.task_id,
        )
    except Exception as e:
        logger.error(
            f"[COMPLETION] Failed to generate LLM continuation: {e}",
            exc_info=True,
        )
    # Mark task as completed and release Redis lock
    await stream_registry.mark_task_completed(task.task_id, status="completed")
    try:
        await chat_service._mark_operation_completed(task.tool_call_id)
    except Exception as e:
        logger.error(f"[COMPLETION] Failed to mark operation completed: {e}")
    logger.info(
        f"[COMPLETION] Successfully processed completion for task {task.task_id}"
    )
 async def process_operation_failure(
    task: stream_registry.ActiveTask,
    error: str | None,
    prisma_client: Prisma | None = None,
 ) -> None:
    """Handle failed operation completion.
    Publishes the error to the stream registry, updates the database with
    the error response, and marks the task as failed.
    Args:
        task: The active task that failed
        error: The error message from the operation
        prisma_client: Optional Prisma client for database operations.
            If None, uses chat_service._update_pending_operation instead.
    """
    error_msg = error or "Operation failed"
    # Publish error to stream registry
    await stream_registry.publish_chunk(
        task.task_id,
        StreamError(errorText=error_msg),
    )
    # Update pending operation with error
    # If this fails, we still continue to mark the task as failed
    error_response = ErrorResponse(
        message=error_msg,
        error=error,
    )
    try:
        await _update_tool_message(
            session_id=task.session_id,
            tool_call_id=task.tool_call_id,
            content=error_response.model_dump_json(),
            prisma_client=prisma_client,
        )
    except ToolMessageUpdateError:
        # DB update failed - log but continue with cleanup
        logger.error(
            f"[COMPLETION] DB update failed while processing failure for task {task.task_id}, "
            "continuing with cleanup"
        )
    # Mark task as failed and release Redis lock
    await stream_registry.mark_task_completed(task.task_id, status="failed")
    try:
        await chat_service._mark_operation_completed(task.tool_call_id)
    except Exception as e:
        logger.error(f"[COMPLETION] Failed to mark operation completed: {e}")
    logger.info(f"[COMPLETION] Processed failure for task {task.task_id}: {error_msg}")
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -11,7 +11,7 @@ class ChatConfig(BaseSettings):
    # OpenAI API Configuration
    model: str = Field(
-        default="anthropic/claude-opus-4.5", description="Default model to use"
+        default="anthropic/claude-opus-4.6", description="Default model to use"
    )
    title_model: str = Field(
        default="openai/gpt-4o-mini",
@@ -27,12 +27,11 @@ class ChatConfig(BaseSettings):
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")
    # Streaming Configuration
    max_context_messages: int = Field(
        default=50, ge=1, le=200, description="Maximum context messages"
    )
    stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
-    max_retries: int = Field(default=3, description="Maximum number of retries")
+    max_retries: int = Field(
        default=3,
        description="Max retries for fallback path (SDK handles retries internally)",
    )
    max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
    max_agent_schedules: int = Field(
        default=30, description="Maximum number of agent schedules"
@@ -44,6 +43,48 @@ class ChatConfig(BaseSettings):
        description="TTL in seconds for long-running operation tracking in Redis (safety net if pod dies)",
    )
    # Stream registry configuration for SSE reconnection
    stream_ttl: int = Field(
        default=3600,
        description="TTL in seconds for stream data in Redis (1 hour)",
    )
    stream_max_length: int = Field(
        default=10000,
        description="Maximum number of messages to store per stream",
    )
    # Redis Streams configuration for completion consumer
    stream_completion_name: str = Field(
        default="chat:completions",
        description="Redis Stream name for operation completions",
    )
    stream_consumer_group: str = Field(
        default="chat_consumers",
        description="Consumer group name for completion stream",
    )
    stream_claim_min_idle_ms: int = Field(
        default=60000,
        description="Minimum idle time in milliseconds before claiming pending messages from dead consumers",
    )
    # Redis key prefixes for stream registry
    task_meta_prefix: str = Field(
        default="chat:task:meta:",
        description="Prefix for task metadata hash keys",
    )
    task_stream_prefix: str = Field(
        default="chat:stream:",
        description="Prefix for task message stream keys",
    )
    task_op_prefix: str = Field(
        default="chat:task:op:",
        description="Prefix for operation ID to task ID mapping keys",
    )
    internal_api_key: str | None = Field(
        default=None,
        description="API key for internal webhook callbacks (env: CHAT_INTERNAL_API_KEY)",
    )
    # Langfuse Prompt Management Configuration
    # Note: Langfuse credentials are in Settings().secrets (settings.py)
    langfuse_prompt_name: str = Field(
@@ -51,6 +92,32 @@ class ChatConfig(BaseSettings):
        description="Name of the prompt in Langfuse to fetch",
    )
    # Claude Agent SDK Configuration
    use_claude_agent_sdk: bool = Field(
        default=True,
        description="Use Claude Agent SDK for chat completions",
    )
    claude_agent_model: str | None = Field(
        default=None,
        description="Model for the Claude Agent SDK path. If None, derives from "
        "the `model` field by stripping the OpenRouter provider prefix.",
    )
    claude_agent_max_buffer_size: int = Field(
        default=10 * 1024 * 1024,  # 10MB (default SDK is 1MB)
        description="Max buffer size in bytes for Claude Agent SDK JSON message parsing. "
        "Increase if tool outputs exceed the limit.",
    )
    claude_agent_max_subtasks: int = Field(
        default=10,
        description="Max number of sub-agent Tasks the SDK can spawn per session.",
    )
    # Extended thinking configuration for Claude models
    thinking_enabled: bool = Field(
        default=True,
        description="Enable adaptive thinking for Claude models via OpenRouter",
    )
    @field_validator("api_key", mode="before")
    @classmethod
    def get_api_key(cls, v):
@@ -82,6 +149,25 @@ class ChatConfig(BaseSettings):
                v = "https://openrouter.ai/api/v1"
        return v
    @field_validator("internal_api_key", mode="before")
    @classmethod
    def get_internal_api_key(cls, v):
        """Get internal API key from environment if not provided."""
        if v is None:
            v = os.getenv("CHAT_INTERNAL_API_KEY")
        return v
    @field_validator("use_claude_agent_sdk", mode="before")
    @classmethod
    def get_use_claude_agent_sdk(cls, v):
        """Get use_claude_agent_sdk from environment if not provided."""
        # Check environment variable - default to True if not set
        env_val = os.getenv("CHAT_USE_CLAUDE_AGENT_SDK", "").lower()
        if env_val:
            return env_val in ("true", "1", "yes", "on")
        # Default to True (SDK enabled by default)
        return True if v is None else v
    # Prompt paths for different contexts
    PROMPT_PATHS: dict[str, str] = {
        "default": "prompts/chat_system.md",
--- a/autogpt_platform/backend/backend/api/features/chat/db.py
+++ b/autogpt_platform/backend/backend/api/features/chat/db.py
@@ -45,10 +45,7 @@ async def create_chat_session(
        successfulAgentRuns=SafeJson({}),
        successfulAgentSchedules=SafeJson({}),
    )
-    return await PrismaChatSession.prisma().create(
+    return await PrismaChatSession.prisma().create(data=data)
        data=data,
        include={"Messages": True},
    )
 async def update_chat_session(
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -2,7 +2,7 @@ import asyncio
 import logging
 import uuid
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, cast
 from weakref import WeakValueDictionary
 from openai.types.chat import (
@@ -104,6 +104,26 @@ class ChatSession(BaseModel):
    successful_agent_runs: dict[str, int] = {}
    successful_agent_schedules: dict[str, int] = {}
    def add_tool_call_to_current_turn(self, tool_call: dict) -> None:
        """Attach a tool_call to the current turn's assistant message.
        Searches backwards for the most recent assistant message (stopping at
        any user message boundary). If found, appends the tool_call to it.
        Otherwise creates a new assistant message with the tool_call.
        """
        for msg in reversed(self.messages):
            if msg.role == "user":
                break
            if msg.role == "assistant":
                if not msg.tool_calls:
                    msg.tool_calls = []
                msg.tool_calls.append(tool_call)
                return
        self.messages.append(
            ChatMessage(role="assistant", content="", tool_calls=[tool_call])
        )
    @staticmethod
    def new(user_id: str) -> "ChatSession":
        return ChatSession(
@@ -172,6 +192,47 @@ class ChatSession(BaseModel):
            successful_agent_schedules=successful_agent_schedules,
        )
    @staticmethod
    def _merge_consecutive_assistant_messages(
        messages: list[ChatCompletionMessageParam],
    ) -> list[ChatCompletionMessageParam]:
        """Merge consecutive assistant messages into single messages.
        Long-running tool flows can create split assistant messages: one with
        text content and another with tool_calls. Anthropic's API requires
        tool_result blocks to reference a tool_use in the immediately preceding
        assistant message, so these splits cause 400 errors via OpenRouter.
        """
        if len(messages) < 2:
            return messages
        result: list[ChatCompletionMessageParam] = [messages[0]]
        for msg in messages[1:]:
            prev = result[-1]
            if prev.get("role") != "assistant" or msg.get("role") != "assistant":
                result.append(msg)
                continue
            prev = cast(ChatCompletionAssistantMessageParam, prev)
            curr = cast(ChatCompletionAssistantMessageParam, msg)
            curr_content = curr.get("content") or ""
            if curr_content:
                prev_content = prev.get("content") or ""
                prev["content"] = (
                    f"{prev_content}\n{curr_content}" if prev_content else curr_content
                )
            curr_tool_calls = curr.get("tool_calls")
            if curr_tool_calls:
                prev_tool_calls = prev.get("tool_calls")
                prev["tool_calls"] = (
                    list(prev_tool_calls) + list(curr_tool_calls)
                    if prev_tool_calls
                    else list(curr_tool_calls)
                )
        return result
    def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
        messages = []
        for message in self.messages:
@@ -258,7 +319,7 @@ class ChatSession(BaseModel):
                        name=message.name or "",
                    )
                )
-        return messages
+        return self._merge_consecutive_assistant_messages(messages)
 async def _get_session_from_cache(session_id: str) -> ChatSession | None:
@@ -273,9 +334,8 @@ async def _get_session_from_cache(session_id: str) -> ChatSession | None:
    try:
        session = ChatSession.model_validate_json(raw_session)
        logger.info(
-            f"Loading session {session_id} from cache: "
+            f"[CACHE] Loaded session {session_id}: {len(session.messages)} messages, "
-            f"message_count={len(session.messages)}, "
+            f"last_roles={[m.role for m in session.messages[-3:]]}"  # Last 3 roles
            f"roles={[m.role for m in session.messages]}"
        )
        return session
    except Exception as e:
@@ -317,11 +377,9 @@ async def _get_session_from_db(session_id: str) -> ChatSession | None:
        return None
    messages = prisma_session.Messages
-    logger.info(
+    logger.debug(
-        f"Loading session {session_id} from DB: "
+        f"[DB] Loaded session {session_id}: {len(messages) if messages else 0} messages, "
-        f"has_messages={messages is not None}, "
+        f"roles={[m.role for m in messages[-3:]] if messages else []}"  # Last 3 roles
        f"message_count={len(messages) if messages else 0}, "
        f"roles={[m.role for m in messages] if messages else []}"
    )
    return ChatSession.from_db(prisma_session, messages)
@@ -372,10 +430,9 @@ async def _save_session_to_db(
                    "function_call": msg.function_call,
                }
            )
-        logger.info(
+        logger.debug(
-            f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
+            f"[DB] Saving {len(new_messages)} messages to session {session.session_id}, "
-            f"roles={[m['role'] for m in messages_data]}, "
+            f"roles={[m['role'] for m in messages_data]}"
            f"start_sequence={existing_message_count}"
        )
        await chat_db.add_chat_messages_batch(
            session_id=session.session_id,
@@ -415,7 +472,7 @@ async def get_chat_session(
        logger.warning(f"Unexpected cache error for session {session_id}: {e}")
    # Fall back to database
-    logger.info(f"Session {session_id} not in cache, checking database")
+    logger.debug(f"Session {session_id} not in cache, checking database")
    session = await _get_session_from_db(session_id)
    if session is None:
@@ -432,7 +489,6 @@ async def get_chat_session(
    # Cache the session from DB
    try:
        await _cache_session(session)
        logger.info(f"Cached session {session_id} from database")
    except Exception as e:
        logger.warning(f"Failed to cache session {session_id}: {e}")
@@ -497,6 +553,40 @@ async def upsert_chat_session(
        return session
 async def append_and_save_message(session_id: str, message: ChatMessage) -> ChatSession:
    """Atomically append a message to a session and persist it.
    Acquires the session lock, re-fetches the latest session state,
    appends the message, and saves — preventing message loss when
    concurrent requests modify the same session.
    """
    lock = await _get_session_lock(session_id)
    async with lock:
        session = await get_chat_session(session_id)
        if session is None:
            raise ValueError(f"Session {session_id} not found")
        session.messages.append(message)
        existing_message_count = await chat_db.get_chat_session_message_count(
            session_id
        )
        try:
            await _save_session_to_db(session, existing_message_count)
        except Exception as e:
            raise DatabaseError(
                f"Failed to persist message to session {session_id}"
            ) from e
        try:
            await _cache_session(session)
        except Exception as e:
            logger.warning(f"Cache write failed for session {session_id}: {e}")
        return session
 async def create_chat_session(user_id: str) -> ChatSession:
    """Create a new chat session and persist it.
@@ -603,13 +693,19 @@ async def update_session_title(session_id: str, title: str) -> bool:
            logger.warning(f"Session {session_id} not found for title update")
            return False
-        # Invalidate cache so next fetch gets updated title
+        # Update title in cache if it exists (instead of invalidating).
        # This prevents race conditions where cache invalidation causes
        # the frontend to see stale DB data while streaming is still in progress.
        try:
-            redis_key = _get_session_cache_key(session_id)
+            cached = await _get_session_from_cache(session_id)
-            async_redis = await get_redis_async()
+            if cached:
-            await async_redis.delete(redis_key)
+                cached.title = title
                await _cache_session(cached)
        except Exception as e:
-            logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
+            # Not critical - title will be correct on next full cache refresh
            logger.warning(
                f"Failed to update title in cache for session {session_id}: {e}"
            )
        return True
    except Exception as e:
--- a/autogpt_platform/backend/backend/api/features/chat/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py
@@ -1,4 +1,16 @@
 from typing import cast
 import pytest
 from openai.types.chat import (
    ChatCompletionAssistantMessageParam,
    ChatCompletionMessageParam,
    ChatCompletionToolMessageParam,
    ChatCompletionUserMessageParam,
 )
 from openai.types.chat.chat_completion_message_tool_call_param import (
    ChatCompletionMessageToolCallParam,
    Function,
 )
 from .model import (
    ChatMessage,
@@ -117,3 +129,205 @@ async def test_chatsession_db_storage(setup_test_user, test_user_id):
                loaded.tool_calls is not None
            ), f"Tool calls missing for {orig.role} message"
            assert len(orig.tool_calls) == len(loaded.tool_calls)
 # --------------------------------------------------------------------------- #
 #  _merge_consecutive_assistant_messages                                       #
 # --------------------------------------------------------------------------- #
 _tc = ChatCompletionMessageToolCallParam(
    id="tc1", type="function", function=Function(name="do_stuff", arguments="{}")
 )
 _tc2 = ChatCompletionMessageToolCallParam(
    id="tc2", type="function", function=Function(name="other", arguments="{}")
 )
 def test_merge_noop_when_no_consecutive_assistants():
    """Messages without consecutive assistants are returned unchanged."""
    msgs = [
        ChatCompletionUserMessageParam(role="user", content="hi"),
        ChatCompletionAssistantMessageParam(role="assistant", content="hello"),
        ChatCompletionUserMessageParam(role="user", content="bye"),
    ]
    merged = ChatSession._merge_consecutive_assistant_messages(msgs)
    assert len(merged) == 3
    assert [m["role"] for m in merged] == ["user", "assistant", "user"]
 def test_merge_splits_text_and_tool_calls():
    """The exact bug scenario: text-only assistant followed by tool_calls-only assistant."""
    msgs = [
        ChatCompletionUserMessageParam(role="user", content="build agent"),
        ChatCompletionAssistantMessageParam(
            role="assistant", content="Let me build that"
        ),
        ChatCompletionAssistantMessageParam(
            role="assistant", content="", tool_calls=[_tc]
        ),
        ChatCompletionToolMessageParam(role="tool", content="ok", tool_call_id="tc1"),
    ]
    merged = ChatSession._merge_consecutive_assistant_messages(msgs)
    assert len(merged) == 3
    assert merged[0]["role"] == "user"
    assert merged[2]["role"] == "tool"
    a = cast(ChatCompletionAssistantMessageParam, merged[1])
    assert a["role"] == "assistant"
    assert a.get("content") == "Let me build that"
    assert a.get("tool_calls") == [_tc]
 def test_merge_combines_tool_calls_from_both():
    """Both consecutive assistants have tool_calls — they get merged."""
    msgs: list[ChatCompletionAssistantMessageParam] = [
        ChatCompletionAssistantMessageParam(
            role="assistant", content="text", tool_calls=[_tc]
        ),
        ChatCompletionAssistantMessageParam(
            role="assistant", content="", tool_calls=[_tc2]
        ),
    ]
    merged = ChatSession._merge_consecutive_assistant_messages(msgs)  # type: ignore[arg-type]
    assert len(merged) == 1
    a = cast(ChatCompletionAssistantMessageParam, merged[0])
    assert a.get("tool_calls") == [_tc, _tc2]
    assert a.get("content") == "text"
 def test_merge_three_consecutive_assistants():
    """Three consecutive assistants collapse into one."""
    msgs: list[ChatCompletionAssistantMessageParam] = [
        ChatCompletionAssistantMessageParam(role="assistant", content="a"),
        ChatCompletionAssistantMessageParam(role="assistant", content="b"),
        ChatCompletionAssistantMessageParam(
            role="assistant", content="", tool_calls=[_tc]
        ),
    ]
    merged = ChatSession._merge_consecutive_assistant_messages(msgs)  # type: ignore[arg-type]
    assert len(merged) == 1
    a = cast(ChatCompletionAssistantMessageParam, merged[0])
    assert a.get("content") == "a\nb"
    assert a.get("tool_calls") == [_tc]
 def test_merge_empty_and_single_message():
    """Edge cases: empty list and single message."""
    assert ChatSession._merge_consecutive_assistant_messages([]) == []
    single: list[ChatCompletionMessageParam] = [
        ChatCompletionUserMessageParam(role="user", content="hi")
    ]
    assert ChatSession._merge_consecutive_assistant_messages(single) == single
 # --------------------------------------------------------------------------- #
 #  add_tool_call_to_current_turn                                               #
 # --------------------------------------------------------------------------- #
 _raw_tc = {
    "id": "tc1",
    "type": "function",
    "function": {"name": "f", "arguments": "{}"},
 }
 _raw_tc2 = {
    "id": "tc2",
    "type": "function",
    "function": {"name": "g", "arguments": "{}"},
 }
 def test_add_tool_call_appends_to_existing_assistant():
    """When the last assistant is from the current turn, tool_call is added to it."""
    session = ChatSession.new(user_id="u")
    session.messages = [
        ChatMessage(role="user", content="hi"),
        ChatMessage(role="assistant", content="working on it"),
    ]
    session.add_tool_call_to_current_turn(_raw_tc)
    assert len(session.messages) == 2  # no new message created
    assert session.messages[1].tool_calls == [_raw_tc]
 def test_add_tool_call_creates_assistant_when_none_exists():
    """When there's no current-turn assistant, a new one is created."""
    session = ChatSession.new(user_id="u")
    session.messages = [
        ChatMessage(role="user", content="hi"),
    ]
    session.add_tool_call_to_current_turn(_raw_tc)
    assert len(session.messages) == 2
    assert session.messages[1].role == "assistant"
    assert session.messages[1].tool_calls == [_raw_tc]
 def test_add_tool_call_does_not_cross_user_boundary():
    """A user message acts as a boundary — previous assistant is not modified."""
    session = ChatSession.new(user_id="u")
    session.messages = [
        ChatMessage(role="assistant", content="old turn"),
        ChatMessage(role="user", content="new message"),
    ]
    session.add_tool_call_to_current_turn(_raw_tc)
    assert len(session.messages) == 3  # new assistant was created
    assert session.messages[0].tool_calls is None  # old assistant untouched
    assert session.messages[2].role == "assistant"
    assert session.messages[2].tool_calls == [_raw_tc]
 def test_add_tool_call_multiple_times():
    """Multiple long-running tool calls accumulate on the same assistant."""
    session = ChatSession.new(user_id="u")
    session.messages = [
        ChatMessage(role="user", content="hi"),
        ChatMessage(role="assistant", content="doing stuff"),
    ]
    session.add_tool_call_to_current_turn(_raw_tc)
    # Simulate a pending tool result in between (like _yield_tool_call does)
    session.messages.append(
        ChatMessage(role="tool", content="pending", tool_call_id="tc1")
    )
    session.add_tool_call_to_current_turn(_raw_tc2)
    assert len(session.messages) == 3  # user, assistant, tool — no extra assistant
    assert session.messages[1].tool_calls == [_raw_tc, _raw_tc2]
 def test_to_openai_messages_merges_split_assistants():
    """End-to-end: session with split assistants produces valid OpenAI messages."""
    session = ChatSession.new(user_id="u")
    session.messages = [
        ChatMessage(role="user", content="build agent"),
        ChatMessage(role="assistant", content="Let me build that"),
        ChatMessage(
            role="assistant",
            content="",
            tool_calls=[
                {
                    "id": "tc1",
                    "type": "function",
                    "function": {"name": "create_agent", "arguments": "{}"},
                }
            ],
        ),
        ChatMessage(role="tool", content="done", tool_call_id="tc1"),
        ChatMessage(role="assistant", content="Saved!"),
        ChatMessage(role="user", content="show me an example run"),
    ]
    openai_msgs = session.to_openai_messages()
    # The two consecutive assistants at index 1,2 should be merged
    roles = [m["role"] for m in openai_msgs]
    assert roles == ["user", "assistant", "tool", "assistant", "user"]
    # The merged assistant should have both content and tool_calls
    merged = cast(ChatCompletionAssistantMessageParam, openai_msgs[1])
    assert merged.get("content") == "Let me build that"
    tc_list = merged.get("tool_calls")
    assert tc_list is not None and len(list(tc_list)) == 1
    assert list(tc_list)[0]["id"] == "tc1"
--- a/autogpt_platform/backend/backend/api/features/chat/response_model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py
@@ -10,6 +10,8 @@ from typing import Any
 from pydantic import BaseModel, Field
 from backend.util.json import dumps as json_dumps
 class ResponseType(str, Enum):
    """Types of streaming responses following AI SDK protocol."""
@@ -18,6 +20,10 @@ class ResponseType(str, Enum):
    START = "start"
    FINISH = "finish"
    # Step lifecycle (one LLM API call within a message)
    START_STEP = "start-step"
    FINISH_STEP = "finish-step"
    # Text streaming
    TEXT_START = "text-start"
    TEXT_DELTA = "text-delta"
@@ -52,6 +58,20 @@ class StreamStart(StreamBaseResponse):
    type: ResponseType = ResponseType.START
    messageId: str = Field(..., description="Unique message ID")
    taskId: str | None = Field(
        default=None,
        description="Task ID for SSE reconnection. Clients can reconnect using GET /tasks/{taskId}/stream",
    )
    def to_sse(self) -> str:
        """Convert to SSE format, excluding non-protocol fields like taskId."""
        import json
        data: dict[str, Any] = {
            "type": self.type.value,
            "messageId": self.messageId,
        }
        return f"data: {json.dumps(data)}\n\n"
 class StreamFinish(StreamBaseResponse):
@@ -60,6 +80,26 @@ class StreamFinish(StreamBaseResponse):
    type: ResponseType = ResponseType.FINISH
 class StreamStartStep(StreamBaseResponse):
    """Start of a step (one LLM API call within a message).
    The AI SDK uses this to add a step-start boundary to message.parts,
    enabling visual separation between multiple LLM calls in a single message.
    """
    type: ResponseType = ResponseType.START_STEP
 class StreamFinishStep(StreamBaseResponse):
    """End of a step (one LLM API call within a message).
    The AI SDK uses this to reset activeTextParts and activeReasoningParts,
    so the next LLM call in a tool-call continuation starts with clean state.
    """
    type: ResponseType = ResponseType.FINISH_STEP
 # ========== Text Streaming ==========
@@ -113,7 +153,7 @@ class StreamToolOutputAvailable(StreamBaseResponse):
    type: ResponseType = ResponseType.TOOL_OUTPUT_AVAILABLE
    toolCallId: str = Field(..., description="Tool call ID this responds to")
    output: str | dict[str, Any] = Field(..., description="Tool execution output")
-    # Additional fields for internal use (not part of AI SDK spec but useful)
+    # Keep these for internal backend use
    toolName: str | None = Field(
        default=None, description="Name of the tool that was executed"
    )
@@ -121,6 +161,17 @@ class StreamToolOutputAvailable(StreamBaseResponse):
        default=True, description="Whether the tool execution succeeded"
    )
    def to_sse(self) -> str:
        """Convert to SSE format, excluding non-spec fields."""
        import json
        data = {
            "type": self.type.value,
            "toolCallId": self.toolCallId,
            "output": self.output,
        }
        return f"data: {json.dumps(data)}\n\n"
 # ========== Other ==========
@@ -144,6 +195,18 @@ class StreamError(StreamBaseResponse):
        default=None, description="Additional error details"
    )
    def to_sse(self) -> str:
        """Convert to SSE format, only emitting fields required by AI SDK protocol.
        The AI SDK uses z.strictObject({type, errorText}) which rejects
        any extra fields like `code` or `details`.
        """
        data = {
            "type": self.type.value,
            "errorText": self.errorText,
        }
        return f"data: {json_dumps(data)}\n\n"
 class StreamHeartbeat(StreamBaseResponse):
    """Heartbeat to keep SSE connection alive during long-running operations.
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -1,19 +1,56 @@
 """Chat API routes for chat session management and streaming via SSE."""
 import asyncio
 import logging
 import uuid as uuid_module
 from collections.abc import AsyncGenerator
 from typing import Annotated
 from autogpt_libs import auth
-from fastapi import APIRouter, Depends, Query, Security
+from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, Security
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from backend.util.exceptions import NotFoundError
 from backend.util.feature_flag import Flag, is_feature_enabled
 from . import service as chat_service
 from . import stream_registry
 from .completion_handler import process_operation_failure, process_operation_success
 from .config import ChatConfig
-from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions
+from .model import (
    ChatMessage,
    ChatSession,
    append_and_save_message,
    create_chat_session,
    get_chat_session,
    get_user_sessions,
 )
 from .response_model import StreamError, StreamFinish, StreamHeartbeat, StreamStart
 from .sdk import service as sdk_service
 from .tools.models import (
    AgentDetailsResponse,
    AgentOutputResponse,
    AgentPreviewResponse,
    AgentSavedResponse,
    AgentsFoundResponse,
    BlockListResponse,
    BlockOutputResponse,
    ClarificationNeededResponse,
    DocPageResponse,
    DocSearchResultsResponse,
    ErrorResponse,
    ExecutionStartedResponse,
    InputValidationErrorResponse,
    NeedLoginResponse,
    NoResultsResponse,
    OperationInProgressResponse,
    OperationPendingResponse,
    OperationStartedResponse,
    SetupRequirementsResponse,
    UnderstandingUpdatedResponse,
 )
 from .tracking import track_user_message
 config = ChatConfig()
@@ -55,6 +92,15 @@ class CreateSessionResponse(BaseModel):
    user_id: str | None
 class ActiveStreamInfo(BaseModel):
    """Information about an active stream for reconnection."""
    task_id: str
    last_message_id: str  # Redis Stream message ID for resumption
    operation_id: str  # Operation ID for completion tracking
    tool_name: str  # Name of the tool being executed
 class SessionDetailResponse(BaseModel):
    """Response model providing complete details for a chat session, including messages."""
@@ -63,6 +109,7 @@ class SessionDetailResponse(BaseModel):
    updated_at: str
    user_id: str | None
    messages: list[dict]
    active_stream: ActiveStreamInfo | None = None  # Present if stream is still active
 class SessionSummaryResponse(BaseModel):
@@ -81,6 +128,14 @@ class ListSessionsResponse(BaseModel):
    total: int
 class OperationCompleteRequest(BaseModel):
    """Request model for external completion webhook."""
    success: bool
    result: dict | str | None = None
    error: str | None = None
 # ========== Routes ==========
@@ -166,13 +221,14 @@ async def get_session(
    Retrieve the details of a specific chat session.
    Looks up a chat session by ID for the given user (if authenticated) and returns all session data including messages.
    If there's an active stream for this session, returns the task_id for reconnection.
    Args:
        session_id: The unique identifier for the desired chat session.
        user_id: The optional authenticated user ID, or None for anonymous access.
    Returns:
-        SessionDetailResponse: Details for the requested session, or None if not found.
+        SessionDetailResponse: Details for the requested session, including active_stream info if applicable.
    """
    session = await get_chat_session(session_id, user_id)
@@ -180,11 +236,32 @@ async def get_session(
        raise NotFoundError(f"Session {session_id} not found.")
    messages = [message.model_dump() for message in session.messages]
-    logger.info(
+
-        f"Returning session {session_id}: "
+    # Check if there's an active stream for this session
-        f"message_count={len(messages)}, "
+    active_stream_info = None
-        f"roles={[m.get('role') for m in messages]}"
+    active_task, last_message_id = await stream_registry.get_active_task_for_session(
        session_id, user_id
    )
    logger.info(
        f"[GET_SESSION] session={session_id}, active_task={active_task is not None}, "
        f"msg_count={len(messages)}, last_role={messages[-1].get('role') if messages else 'none'}"
    )
    if active_task:
        # Filter out the in-progress assistant message from the session response.
        # The client will receive the complete assistant response through the SSE
        # stream replay instead, preventing duplicate content.
        if messages and messages[-1].get("role") == "assistant":
            messages = messages[:-1]
        # Use "0-0" as last_message_id to replay the stream from the beginning.
        # Since we filtered out the cached assistant message, the client needs
        # the full stream to reconstruct the response.
        active_stream_info = ActiveStreamInfo(
            task_id=active_task.task_id,
            last_message_id="0-0",
            operation_id=active_task.operation_id,
            tool_name=active_task.tool_name,
        )
    return SessionDetailResponse(
        id=session.session_id,
@@ -192,6 +269,7 @@ async def get_session(
        updated_at=session.updated_at.isoformat(),
        user_id=session.user_id or None,
        messages=messages,
        active_stream=active_stream_info,
    )
@@ -211,49 +289,331 @@ async def stream_chat_post(
      - Tool call UI elements (if invoked)
      - Tool execution results
    The AI generation runs in a background task that continues even if the client disconnects.
    All chunks are written to Redis for reconnection support. If the client disconnects,
    they can reconnect using GET /tasks/{task_id}/stream to resume from where they left off.
    Args:
        session_id: The chat session identifier to associate with the streamed messages.
        request: Request body containing message, is_user_message, and optional context.
        user_id: Optional authenticated user ID.
    Returns:
-        StreamingResponse: SSE-formatted response chunks.
+        StreamingResponse: SSE-formatted response chunks. First chunk is a "start" event
        containing the task_id for reconnection.
    """
-    session = await _validate_and_get_session(session_id, user_id)
+    import asyncio
    import time
-    async def event_generator() -> AsyncGenerator[str, None]:
+    stream_start_time = time.perf_counter()
-        chunk_count = 0
+    log_meta = {"component": "ChatStream", "session_id": session_id}
-        first_chunk_type: str | None = None
+    if user_id:
-        async for chunk in chat_service.stream_chat_completion(
+        log_meta["user_id"] = user_id
-            session_id,
+
-            request.message,
+    logger.info(
-            is_user_message=request.is_user_message,
+        f"[TIMING] stream_chat_post STARTED, session={session_id}, "
-            user_id=user_id,
+        f"user={user_id}, message_len={len(request.message)}",
-            session=session,  # Pass pre-fetched session to avoid double-fetch
+        extra={"json_fields": log_meta},
-            context=request.context,
+    )
-        ):
+    session = await _validate_and_get_session(session_id, user_id)
-            if chunk_count < 3:
+    logger.info(
-                logger.info(
+        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time) * 1000:.1f}ms",
-                    "Chat stream chunk",
+        extra={
-                    extra={
+            "json_fields": {
-                        "session_id": session_id,
+                **log_meta,
-                        "chunk_type": str(chunk.type),
+                "duration_ms": (time.perf_counter() - stream_start_time) * 1000,
-                    },
+            }
-                )
+        },
-            if not first_chunk_type:
+    )
-                first_chunk_type = str(chunk.type)
+
-            chunk_count += 1
+    # Atomically append user message to session BEFORE creating task to avoid
-            yield chunk.to_sse()
+    # race condition where GET_SESSION sees task as "running" but message isn't
-        logger.info(
+    # saved yet.  append_and_save_message re-fetches inside a lock to prevent
-            "Chat stream completed",
+    # message loss from concurrent requests.
-            extra={
+    if request.message:
-                "session_id": session_id,
+        message = ChatMessage(
-                "chunk_count": chunk_count,
+            role="user" if request.is_user_message else "assistant",
-                "first_chunk_type": first_chunk_type,
+            content=request.message,
            },
        )
-        # AI SDK protocol termination
+        if request.is_user_message:
-        yield "data: [DONE]\n\n"
+            track_user_message(
                user_id=user_id,
                session_id=session_id,
                message_length=len(request.message),
            )
        logger.info(f"[STREAM] Saving user message to session {session_id}")
        session = await append_and_save_message(session_id, message)
        logger.info(f"[STREAM] User message saved for session {session_id}")
    # Create a task in the stream registry for reconnection support
    task_id = str(uuid_module.uuid4())
    operation_id = str(uuid_module.uuid4())
    log_meta["task_id"] = task_id
    task_create_start = time.perf_counter()
    await stream_registry.create_task(
        task_id=task_id,
        session_id=session_id,
        user_id=user_id,
        tool_call_id="chat_stream",  # Not a tool call, but needed for the model
        tool_name="chat",
        operation_id=operation_id,
    )
    logger.info(
        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start) * 1000:.1f}ms",
        extra={
            "json_fields": {
                **log_meta,
                "duration_ms": (time.perf_counter() - task_create_start) * 1000,
            }
        },
    )
    # Background task that runs the AI generation independently of SSE connection
    async def run_ai_generation():
        import time as time_module
        gen_start_time = time_module.perf_counter()
        logger.info(
            f"[TIMING] run_ai_generation STARTED, task={task_id}, session={session_id}, user={user_id}",
            extra={"json_fields": log_meta},
        )
        first_chunk_time, ttfc = None, None
        chunk_count = 0
        try:
            # Emit a start event with task_id for reconnection
            start_chunk = StreamStart(messageId=task_id, taskId=task_id)
            await stream_registry.publish_chunk(task_id, start_chunk)
            logger.info(
                f"[TIMING] StreamStart published at {(time_module.perf_counter() - gen_start_time) * 1000:.1f}ms",
                extra={
                    "json_fields": {
                        **log_meta,
                        "elapsed_ms": (time_module.perf_counter() - gen_start_time)
                        * 1000,
                    }
                },
            )
            # Choose service based on LaunchDarkly flag (falls back to config default)
            use_sdk = await is_feature_enabled(
                Flag.COPILOT_SDK,
                user_id or "anonymous",
                default=config.use_claude_agent_sdk,
            )
            stream_fn = (
                sdk_service.stream_chat_completion_sdk
                if use_sdk
                else chat_service.stream_chat_completion
            )
            logger.info(
                f"[TIMING] Calling {'sdk' if use_sdk else 'standard'} stream_chat_completion",
                extra={"json_fields": log_meta},
            )
            # Pass message=None since we already added it to the session above
            async for chunk in stream_fn(
                session_id,
                None,  # Message already in session
                is_user_message=request.is_user_message,
                user_id=user_id,
                session=session,  # Pass session with message already added
                context=request.context,
            ):
                # Skip duplicate StreamStart — we already published one above
                if isinstance(chunk, StreamStart):
                    continue
                chunk_count += 1
                if first_chunk_time is None:
                    first_chunk_time = time_module.perf_counter()
                    ttfc = first_chunk_time - gen_start_time
                    logger.info(
                        f"[TIMING] FIRST AI CHUNK at {ttfc:.2f}s, type={type(chunk).__name__}",
                        extra={
                            "json_fields": {
                                **log_meta,
                                "chunk_type": type(chunk).__name__,
                                "time_to_first_chunk_ms": ttfc * 1000,
                            }
                        },
                    )
                # Write to Redis (subscribers will receive via XREAD)
                await stream_registry.publish_chunk(task_id, chunk)
            gen_end_time = time_module.perf_counter()
            total_time = (gen_end_time - gen_start_time) * 1000
            logger.info(
                f"[TIMING] run_ai_generation FINISHED in {total_time / 1000:.1f}s; "
                f"task={task_id}, session={session_id}, "
                f"ttfc={ttfc or -1:.2f}s, n_chunks={chunk_count}",
                extra={
                    "json_fields": {
                        **log_meta,
                        "total_time_ms": total_time,
                        "time_to_first_chunk_ms": (
                            ttfc * 1000 if ttfc is not None else None
                        ),
                        "n_chunks": chunk_count,
                    }
                },
            )
            await stream_registry.mark_task_completed(task_id, "completed")
        except Exception as e:
            elapsed = time_module.perf_counter() - gen_start_time
            logger.error(
                f"[TIMING] run_ai_generation ERROR after {elapsed:.2f}s: {e}",
                extra={
                    "json_fields": {
                        **log_meta,
                        "elapsed_ms": elapsed * 1000,
                        "error": str(e),
                    }
                },
            )
            # Publish a StreamError so the frontend can display an error message
            try:
                await stream_registry.publish_chunk(
                    task_id,
                    StreamError(
                        errorText="An error occurred. Please try again.",
                        code="stream_error",
                    ),
                )
            except Exception:
                pass  # Best-effort; mark_task_completed will publish StreamFinish
            await stream_registry.mark_task_completed(task_id, "failed")
    # Start the AI generation in a background task
    bg_task = asyncio.create_task(run_ai_generation())
    await stream_registry.set_task_asyncio_task(task_id, bg_task)
    setup_time = (time.perf_counter() - stream_start_time) * 1000
    logger.info(
        f"[TIMING] Background task started, setup={setup_time:.1f}ms",
        extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
    )
    # SSE endpoint that subscribes to the task's stream
    async def event_generator() -> AsyncGenerator[str, None]:
        import time as time_module
        event_gen_start = time_module.perf_counter()
        logger.info(
            f"[TIMING] event_generator STARTED, task={task_id}, session={session_id}, "
            f"user={user_id}",
            extra={"json_fields": log_meta},
        )
        subscriber_queue = None
        first_chunk_yielded = False
        chunks_yielded = 0
        try:
            # Subscribe to the task stream (this replays existing messages + live updates)
            subscriber_queue = await stream_registry.subscribe_to_task(
                task_id=task_id,
                user_id=user_id,
                last_message_id="0-0",  # Get all messages from the beginning
            )
            if subscriber_queue is None:
                yield StreamFinish().to_sse()
                yield "data: [DONE]\n\n"
                return
            # Read from the subscriber queue and yield to SSE
            logger.info(
                "[TIMING] Starting to read from subscriber_queue",
                extra={"json_fields": log_meta},
            )
            while True:
                try:
                    chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
                    chunks_yielded += 1
                    if not first_chunk_yielded:
                        first_chunk_yielded = True
                        elapsed = time_module.perf_counter() - event_gen_start
                        logger.info(
                            f"[TIMING] FIRST CHUNK from queue at {elapsed:.2f}s, "
                            f"type={type(chunk).__name__}",
                            extra={
                                "json_fields": {
                                    **log_meta,
                                    "chunk_type": type(chunk).__name__,
                                    "elapsed_ms": elapsed * 1000,
                                }
                            },
                        )
                    yield chunk.to_sse()
                    # Check for finish signal
                    if isinstance(chunk, StreamFinish):
                        total_time = time_module.perf_counter() - event_gen_start
                        logger.info(
                            f"[TIMING] StreamFinish received in {total_time:.2f}s; "
                            f"n_chunks={chunks_yielded}",
                            extra={
                                "json_fields": {
                                    **log_meta,
                                    "chunks_yielded": chunks_yielded,
                                    "total_time_ms": total_time * 1000,
                                }
                            },
                        )
                        break
                except asyncio.TimeoutError:
                    yield StreamHeartbeat().to_sse()
        except GeneratorExit:
            logger.info(
                f"[TIMING] GeneratorExit (client disconnected), chunks={chunks_yielded}",
                extra={
                    "json_fields": {
                        **log_meta,
                        "chunks_yielded": chunks_yielded,
                        "reason": "client_disconnect",
                    }
                },
            )
            pass  # Client disconnected - background task continues
        except Exception as e:
            elapsed = (time_module.perf_counter() - event_gen_start) * 1000
            logger.error(
                f"[TIMING] event_generator ERROR after {elapsed:.1f}ms: {e}",
                extra={
                    "json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
                },
            )
            # Surface error to frontend so it doesn't appear stuck
            yield StreamError(
                errorText="An error occurred. Please try again.",
                code="stream_error",
            ).to_sse()
            yield StreamFinish().to_sse()
        finally:
            # Unsubscribe when client disconnects or stream ends
            if subscriber_queue is not None:
                try:
                    await stream_registry.unsubscribe_from_task(
                        task_id, subscriber_queue
                    )
                except Exception as unsub_err:
                    logger.error(
                        f"Error unsubscribing from task {task_id}: {unsub_err}",
                        exc_info=True,
                    )
            # AI SDK protocol termination - always yield even if unsubscribe fails
            total_time = time_module.perf_counter() - event_gen_start
            logger.info(
                f"[TIMING] event_generator FINISHED in {total_time:.2f}s; "
                f"task={task_id}, session={session_id}, n_chunks={chunks_yielded}",
                extra={
                    "json_fields": {
                        **log_meta,
                        "total_time_ms": total_time * 1000,
                        "chunks_yielded": chunks_yielded,
                    }
                },
            )
            yield "data: [DONE]\n\n"
    return StreamingResponse(
        event_generator(),
@@ -270,63 +630,90 @@ async def stream_chat_post(
@router.get(
    "/sessions/{session_id}/stream",
 )
-async def stream_chat_get(
+async def resume_session_stream(
    session_id: str,
    message: Annotated[str, Query(min_length=1, max_length=10000)],
    user_id: str | None = Depends(auth.get_user_id),
    is_user_message: bool = Query(default=True),
 ):
    """
-    Stream chat responses for a session (GET - legacy endpoint).
+    Resume an active stream for a session.
-    Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
+    Called by the AI SDK's ``useChat(resume: true)`` on page load.
-      - Text fragments as they are generated
+    Checks for an active (in-progress) task on the session and either replays
-      - Tool call UI elements (if invoked)
+    the full SSE stream or returns 204 No Content if nothing is running.
      - Tool execution results
    Args:
-        session_id: The chat session identifier to associate with the streamed messages.
+        session_id: The chat session identifier.
        message: The user's new message to process.
        user_id: Optional authenticated user ID.
        is_user_message: Whether the message is a user message.
    Returns:
        StreamingResponse: SSE-formatted response chunks.
    Returns:
        StreamingResponse (SSE) when an active stream exists,
        or 204 No Content when there is nothing to resume.
    """
-    session = await _validate_and_get_session(session_id, user_id)
+    import asyncio
    active_task, _last_id = await stream_registry.get_active_task_for_session(
        session_id, user_id
    )
    if not active_task:
        return Response(status_code=204)
    subscriber_queue = await stream_registry.subscribe_to_task(
        task_id=active_task.task_id,
        user_id=user_id,
        last_message_id="0-0",  # Full replay so useChat rebuilds the message
    )
    if subscriber_queue is None:
        return Response(status_code=204)
    async def event_generator() -> AsyncGenerator[str, None]:
        chunk_count = 0
        first_chunk_type: str | None = None
-        async for chunk in chat_service.stream_chat_completion(
+        try:
-            session_id,
+            while True:
-            message,
+                try:
-            is_user_message=is_user_message,
+                    chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
-            user_id=user_id,
+                    if chunk_count < 3:
-            session=session,  # Pass pre-fetched session to avoid double-fetch
+                        logger.info(
-        ):
+                            "Resume stream chunk",
-            if chunk_count < 3:
+                            extra={
-                logger.info(
+                                "session_id": session_id,
-                    "Chat stream chunk",
+                                "chunk_type": str(chunk.type),
-                    extra={
+                            },
-                        "session_id": session_id,
+                        )
-                        "chunk_type": str(chunk.type),
+                    if not first_chunk_type:
-                    },
+                        first_chunk_type = str(chunk.type)
                    chunk_count += 1
                    yield chunk.to_sse()
                    if isinstance(chunk, StreamFinish):
                        break
                except asyncio.TimeoutError:
                    yield StreamHeartbeat().to_sse()
        except GeneratorExit:
            pass
        except Exception as e:
            logger.error(f"Error in resume stream for session {session_id}: {e}")
        finally:
            try:
                await stream_registry.unsubscribe_from_task(
                    active_task.task_id, subscriber_queue
                )
-            if not first_chunk_type:
+            except Exception as unsub_err:
-                first_chunk_type = str(chunk.type)
+                logger.error(
-            chunk_count += 1
+                    f"Error unsubscribing from task {active_task.task_id}: {unsub_err}",
-            yield chunk.to_sse()
+                    exc_info=True,
-        logger.info(
+                )
-            "Chat stream completed",
+            logger.info(
-            extra={
+                "Resume stream completed",
-                "session_id": session_id,
+                extra={
-                "chunk_count": chunk_count,
+                    "session_id": session_id,
-                "first_chunk_type": first_chunk_type,
+                    "n_chunks": chunk_count,
-            },
+                    "first_chunk_type": first_chunk_type,
-        )
+                },
-        # AI SDK protocol termination
+            )
-        yield "data: [DONE]\n\n"
+            yield "data: [DONE]\n\n"
    return StreamingResponse(
        event_generator(),
@@ -334,8 +721,8 @@ async def stream_chat_get(
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no",  # Disable nginx buffering
+            "X-Accel-Buffering": "no",
-            "x-vercel-ai-ui-message-stream": "v1",  # AI SDK protocol header
+            "x-vercel-ai-ui-message-stream": "v1",
        },
    )
@@ -366,6 +753,249 @@ async def session_assign_user(
    return {"status": "ok"}
 # ========== Task Streaming (SSE Reconnection) ==========
@router.get(
    "/tasks/{task_id}/stream",
 )
 async def stream_task(
    task_id: str,
    user_id: str | None = Depends(auth.get_user_id),
    last_message_id: str = Query(
        default="0-0",
        description="Last Redis Stream message ID received (e.g., '1706540123456-0'). Use '0-0' for full replay.",
    ),
 ):
    """
    Reconnect to a long-running task's SSE stream.
    When a long-running operation (like agent generation) starts, the client
    receives a task_id. If the connection drops, the client can reconnect
    using this endpoint to resume receiving updates.
    Args:
        task_id: The task ID from the operation_started response.
        user_id: Authenticated user ID for ownership validation.
        last_message_id: Last Redis Stream message ID received ("0-0" for full replay).
    Returns:
        StreamingResponse: SSE-formatted response chunks starting after last_message_id.
    Raises:
        HTTPException: 404 if task not found, 410 if task expired, 403 if access denied.
    """
    # Check task existence and expiry before subscribing
    task, error_code = await stream_registry.get_task_with_expiry_info(task_id)
    if error_code == "TASK_EXPIRED":
        raise HTTPException(
            status_code=410,
            detail={
                "code": "TASK_EXPIRED",
                "message": "This operation has expired. Please try again.",
            },
        )
    if error_code == "TASK_NOT_FOUND":
        raise HTTPException(
            status_code=404,
            detail={
                "code": "TASK_NOT_FOUND",
                "message": f"Task {task_id} not found.",
            },
        )
    # Validate ownership if task has an owner
    if task and task.user_id and user_id != task.user_id:
        raise HTTPException(
            status_code=403,
            detail={
                "code": "ACCESS_DENIED",
                "message": "You do not have access to this task.",
            },
        )
    # Get subscriber queue from stream registry
    subscriber_queue = await stream_registry.subscribe_to_task(
        task_id=task_id,
        user_id=user_id,
        last_message_id=last_message_id,
    )
    if subscriber_queue is None:
        raise HTTPException(
            status_code=404,
            detail={
                "code": "TASK_NOT_FOUND",
                "message": f"Task {task_id} not found or access denied.",
            },
        )
    async def event_generator() -> AsyncGenerator[str, None]:
        heartbeat_interval = 15.0  # Send heartbeat every 15 seconds
        try:
            while True:
                try:
                    # Wait for next chunk with timeout for heartbeats
                    chunk = await asyncio.wait_for(
                        subscriber_queue.get(), timeout=heartbeat_interval
                    )
                    yield chunk.to_sse()
                    # Check for finish signal
                    if isinstance(chunk, StreamFinish):
                        break
                except asyncio.TimeoutError:
                    # Send heartbeat to keep connection alive
                    yield StreamHeartbeat().to_sse()
        except Exception as e:
            logger.error(f"Error in task stream {task_id}: {e}", exc_info=True)
        finally:
            # Unsubscribe when client disconnects or stream ends
            try:
                await stream_registry.unsubscribe_from_task(task_id, subscriber_queue)
            except Exception as unsub_err:
                logger.error(
                    f"Error unsubscribing from task {task_id}: {unsub_err}",
                    exc_info=True,
                )
            # AI SDK protocol termination - always yield even if unsubscribe fails
            yield "data: [DONE]\n\n"
    return StreamingResponse(
        event_generator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
            "x-vercel-ai-ui-message-stream": "v1",
        },
    )
@router.get(
    "/tasks/{task_id}",
 )
 async def get_task_status(
    task_id: str,
    user_id: str | None = Depends(auth.get_user_id),
 ) -> dict:
    """
    Get the status of a long-running task.
    Args:
        task_id: The task ID to check.
        user_id: Authenticated user ID for ownership validation.
    Returns:
        dict: Task status including task_id, status, tool_name, and operation_id.
    Raises:
        NotFoundError: If task_id is not found or user doesn't have access.
    """
    task = await stream_registry.get_task(task_id)
    if task is None:
        raise NotFoundError(f"Task {task_id} not found.")
    # Validate ownership - if task has an owner, requester must match
    if task.user_id and user_id != task.user_id:
        raise NotFoundError(f"Task {task_id} not found.")
    return {
        "task_id": task.task_id,
        "session_id": task.session_id,
        "status": task.status,
        "tool_name": task.tool_name,
        "operation_id": task.operation_id,
        "created_at": task.created_at.isoformat(),
    }
 # ========== External Completion Webhook ==========
@router.post(
    "/operations/{operation_id}/complete",
    status_code=200,
 )
 async def complete_operation(
    operation_id: str,
    request: OperationCompleteRequest,
    x_api_key: str | None = Header(default=None),
 ) -> dict:
    """
    External completion webhook for long-running operations.
    Called by Agent Generator (or other services) when an operation completes.
    This triggers the stream registry to publish completion and continue LLM generation.
    Args:
        operation_id: The operation ID to complete.
        request: Completion payload with success status and result/error.
        x_api_key: Internal API key for authentication.
    Returns:
        dict: Status of the completion.
    Raises:
        HTTPException: If API key is invalid or operation not found.
    """
    # Validate internal API key - reject if not configured or invalid
    if not config.internal_api_key:
        logger.error(
            "Operation complete webhook rejected: CHAT_INTERNAL_API_KEY not configured"
        )
        raise HTTPException(
            status_code=503,
            detail="Webhook not available: internal API key not configured",
        )
    if x_api_key != config.internal_api_key:
        raise HTTPException(status_code=401, detail="Invalid API key")
    # Find task by operation_id
    task = await stream_registry.find_task_by_operation_id(operation_id)
    if task is None:
        raise HTTPException(
            status_code=404,
            detail=f"Operation {operation_id} not found",
        )
    logger.info(
        f"Received completion webhook for operation {operation_id} "
        f"(task_id={task.task_id}, success={request.success})"
    )
    if request.success:
        await process_operation_success(task, request.result)
    else:
        await process_operation_failure(task, request.error)
    return {"status": "ok", "task_id": task.task_id}
 # ========== Configuration ==========
@router.get("/config/ttl", status_code=200)
 async def get_ttl_config() -> dict:
    """
    Get the stream TTL configuration.
    Returns the Time-To-Live settings for chat streams, which determines
    how long clients can reconnect to an active stream.
    Returns:
        dict: TTL configuration with seconds and milliseconds values.
    """
    return {
        "stream_ttl_seconds": config.stream_ttl,
        "stream_ttl_ms": config.stream_ttl * 1000,
    }
 # ========== Health Check ==========
@@ -402,3 +1032,42 @@ async def health_check() -> dict:
        "service": "chat",
        "version": "0.1.0",
    }
 # ========== Schema Export (for OpenAPI / Orval codegen) ==========
 ToolResponseUnion = (
    AgentsFoundResponse
    | NoResultsResponse
    | AgentDetailsResponse
    | SetupRequirementsResponse
    | ExecutionStartedResponse
    | NeedLoginResponse
    | ErrorResponse
    | InputValidationErrorResponse
    | AgentOutputResponse
    | UnderstandingUpdatedResponse
    | AgentPreviewResponse
    | AgentSavedResponse
    | ClarificationNeededResponse
    | BlockListResponse
    | BlockOutputResponse
    | DocSearchResultsResponse
    | DocPageResponse
    | OperationStartedResponse
    | OperationPendingResponse
    | OperationInProgressResponse
 )
@router.get(
    "/schema/tool-responses",
    response_model=ToolResponseUnion,
    include_in_schema=True,
    summary="[Dummy] Tool response type export for codegen",
    description="This endpoint is not meant to be called. It exists solely to "
    "expose tool response models in the OpenAPI schema for frontend codegen.",
 )
 async def _tool_response_schema() -> ToolResponseUnion:  # type: ignore[return]
    """Never called at runtime. Exists only so Orval generates TS types."""
    raise HTTPException(status_code=501, detail="Schema-only endpoint")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
@@ -0,0 +1,14 @@
 """Claude Agent SDK integration for CoPilot.
 This module provides the integration layer between the Claude Agent SDK
 and the existing CoPilot tool system, enabling drop-in replacement of
 the current LLM orchestration with the battle-tested Claude Agent SDK.
 """
 from .service import stream_chat_completion_sdk
 from .tool_adapter import create_copilot_mcp_server
 __all__ = [
    "stream_chat_completion_sdk",
    "create_copilot_mcp_server",
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
@@ -0,0 +1,198 @@
 """Response adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
 This module provides the adapter layer that converts streaming messages from
 the Claude Agent SDK into the Vercel AI SDK UI Stream Protocol format that
 the frontend expects.
 """
 import json
 import logging
 import uuid
 from claude_agent_sdk import (
    AssistantMessage,
    Message,
    ResultMessage,
    SystemMessage,
    TextBlock,
    ToolResultBlock,
    ToolUseBlock,
    UserMessage,
 )
 from backend.api.features.chat.response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
    StreamFinishStep,
    StreamStart,
    StreamStartStep,
    StreamTextDelta,
    StreamTextEnd,
    StreamTextStart,
    StreamToolInputAvailable,
    StreamToolInputStart,
    StreamToolOutputAvailable,
 )
 from backend.api.features.chat.sdk.tool_adapter import (
    MCP_TOOL_PREFIX,
    pop_pending_tool_output,
 )
 logger = logging.getLogger(__name__)
 class SDKResponseAdapter:
    """Adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
    This class maintains state during a streaming session to properly track
    text blocks, tool calls, and message lifecycle.
    """
    def __init__(self, message_id: str | None = None):
        self.message_id = message_id or str(uuid.uuid4())
        self.text_block_id = str(uuid.uuid4())
        self.has_started_text = False
        self.has_ended_text = False
        self.current_tool_calls: dict[str, dict[str, str]] = {}
        self.task_id: str | None = None
        self.step_open = False
    def set_task_id(self, task_id: str) -> None:
        """Set the task ID for reconnection support."""
        self.task_id = task_id
    def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
        """Convert a single SDK message to Vercel AI SDK format."""
        responses: list[StreamBaseResponse] = []
        if isinstance(sdk_message, SystemMessage):
            if sdk_message.subtype == "init":
                responses.append(
                    StreamStart(messageId=self.message_id, taskId=self.task_id)
                )
                # Open the first step (matches non-SDK: StreamStart then StreamStartStep)
                responses.append(StreamStartStep())
                self.step_open = True
        elif isinstance(sdk_message, AssistantMessage):
            # After tool results, the SDK sends a new AssistantMessage for the
            # next LLM turn. Open a new step if the previous one was closed.
            if not self.step_open:
                responses.append(StreamStartStep())
                self.step_open = True
            for block in sdk_message.content:
                if isinstance(block, TextBlock):
                    if block.text:
                        self._ensure_text_started(responses)
                        responses.append(
                            StreamTextDelta(id=self.text_block_id, delta=block.text)
                        )
                elif isinstance(block, ToolUseBlock):
                    self._end_text_if_open(responses)
                    # Strip MCP prefix so frontend sees "find_block"
                    # instead of "mcp__copilot__find_block".
                    tool_name = block.name.removeprefix(MCP_TOOL_PREFIX)
                    responses.append(
                        StreamToolInputStart(toolCallId=block.id, toolName=tool_name)
                    )
                    responses.append(
                        StreamToolInputAvailable(
                            toolCallId=block.id,
                            toolName=tool_name,
                            input=block.input,
                        )
                    )
                    self.current_tool_calls[block.id] = {"name": tool_name}
        elif isinstance(sdk_message, UserMessage):
            # UserMessage carries tool results back from tool execution.
            content = sdk_message.content
            blocks = content if isinstance(content, list) else []
            for block in blocks:
                if isinstance(block, ToolResultBlock) and block.tool_use_id:
                    tool_info = self.current_tool_calls.get(block.tool_use_id, {})
                    tool_name = tool_info.get("name", "unknown")
                    # Prefer the stashed full output over the SDK's
                    # (potentially truncated) ToolResultBlock content.
                    # The SDK truncates large results, writing them to disk,
                    # which breaks frontend widget parsing.
                    output = pop_pending_tool_output(tool_name) or (
                        _extract_tool_output(block.content)
                    )
                    responses.append(
                        StreamToolOutputAvailable(
                            toolCallId=block.tool_use_id,
                            toolName=tool_name,
                            output=output,
                            success=not (block.is_error or False),
                        )
                    )
            # Close the current step after tool results — the next
            # AssistantMessage will open a new step for the continuation.
            if self.step_open:
                responses.append(StreamFinishStep())
                self.step_open = False
        elif isinstance(sdk_message, ResultMessage):
            self._end_text_if_open(responses)
            # Close the step before finishing.
            if self.step_open:
                responses.append(StreamFinishStep())
                self.step_open = False
            if sdk_message.subtype == "success":
                responses.append(StreamFinish())
            elif sdk_message.subtype in ("error", "error_during_execution"):
                error_msg = getattr(sdk_message, "result", None) or "Unknown error"
                responses.append(
                    StreamError(errorText=str(error_msg), code="sdk_error")
                )
                responses.append(StreamFinish())
        else:
            logger.debug(f"Unhandled SDK message type: {type(sdk_message).__name__}")
        return responses
    def _ensure_text_started(self, responses: list[StreamBaseResponse]) -> None:
        """Start (or restart) a text block if needed."""
        if not self.has_started_text or self.has_ended_text:
            if self.has_ended_text:
                self.text_block_id = str(uuid.uuid4())
                self.has_ended_text = False
            responses.append(StreamTextStart(id=self.text_block_id))
            self.has_started_text = True
    def _end_text_if_open(self, responses: list[StreamBaseResponse]) -> None:
        """End the current text block if one is open."""
        if self.has_started_text and not self.has_ended_text:
            responses.append(StreamTextEnd(id=self.text_block_id))
            self.has_ended_text = True
 def _extract_tool_output(content: str | list[dict[str, str]] | None) -> str:
    """Extract a string output from a ToolResultBlock's content field."""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts = [item.get("text", "") for item in content if item.get("type") == "text"]
        if parts:
            return "".join(parts)
        try:
            return json.dumps(content)
        except (TypeError, ValueError):
            return str(content)
    if content is None:
        return ""
    try:
        return json.dumps(content)
    except (TypeError, ValueError):
        return str(content)
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
@@ -0,0 +1,366 @@
 """Unit tests for the SDK response adapter."""
 from claude_agent_sdk import (
    AssistantMessage,
    ResultMessage,
    SystemMessage,
    TextBlock,
    ToolResultBlock,
    ToolUseBlock,
    UserMessage,
 )
 from backend.api.features.chat.response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
    StreamFinishStep,
    StreamStart,
    StreamStartStep,
    StreamTextDelta,
    StreamTextEnd,
    StreamTextStart,
    StreamToolInputAvailable,
    StreamToolInputStart,
    StreamToolOutputAvailable,
 )
 from .response_adapter import SDKResponseAdapter
 from .tool_adapter import MCP_TOOL_PREFIX
 def _adapter() -> SDKResponseAdapter:
    a = SDKResponseAdapter(message_id="msg-1")
    a.set_task_id("task-1")
    return a
 # -- SystemMessage -----------------------------------------------------------
 def test_system_init_emits_start_and_step():
    adapter = _adapter()
    results = adapter.convert_message(SystemMessage(subtype="init", data={}))
    assert len(results) == 2
    assert isinstance(results[0], StreamStart)
    assert results[0].messageId == "msg-1"
    assert results[0].taskId == "task-1"
    assert isinstance(results[1], StreamStartStep)
 def test_system_non_init_emits_nothing():
    adapter = _adapter()
    results = adapter.convert_message(SystemMessage(subtype="other", data={}))
    assert results == []
 # -- AssistantMessage with TextBlock -----------------------------------------
 def test_text_block_emits_step_start_and_delta():
    adapter = _adapter()
    msg = AssistantMessage(content=[TextBlock(text="hello")], model="test")
    results = adapter.convert_message(msg)
    assert len(results) == 3
    assert isinstance(results[0], StreamStartStep)
    assert isinstance(results[1], StreamTextStart)
    assert isinstance(results[2], StreamTextDelta)
    assert results[2].delta == "hello"
 def test_empty_text_block_emits_only_step():
    adapter = _adapter()
    msg = AssistantMessage(content=[TextBlock(text="")], model="test")
    results = adapter.convert_message(msg)
    # Empty text skipped, but step still opens
    assert len(results) == 1
    assert isinstance(results[0], StreamStartStep)
 def test_multiple_text_deltas_reuse_block_id():
    adapter = _adapter()
    msg1 = AssistantMessage(content=[TextBlock(text="a")], model="test")
    msg2 = AssistantMessage(content=[TextBlock(text="b")], model="test")
    r1 = adapter.convert_message(msg1)
    r2 = adapter.convert_message(msg2)
    # First gets step+start+delta, second only delta (block & step already started)
    assert len(r1) == 3
    assert isinstance(r1[0], StreamStartStep)
    assert isinstance(r1[1], StreamTextStart)
    assert len(r2) == 1
    assert isinstance(r2[0], StreamTextDelta)
    assert r1[1].id == r2[0].id  # same block ID
 # -- AssistantMessage with ToolUseBlock --------------------------------------
 def test_tool_use_emits_input_start_and_available():
    """Tool names arrive with MCP prefix and should be stripped for the frontend."""
    adapter = _adapter()
    msg = AssistantMessage(
        content=[
            ToolUseBlock(
                id="tool-1",
                name=f"{MCP_TOOL_PREFIX}find_agent",
                input={"q": "x"},
            )
        ],
        model="test",
    )
    results = adapter.convert_message(msg)
    assert len(results) == 3
    assert isinstance(results[0], StreamStartStep)
    assert isinstance(results[1], StreamToolInputStart)
    assert results[1].toolCallId == "tool-1"
    assert results[1].toolName == "find_agent"  # prefix stripped
    assert isinstance(results[2], StreamToolInputAvailable)
    assert results[2].toolName == "find_agent"  # prefix stripped
    assert results[2].input == {"q": "x"}
 def test_text_then_tool_ends_text_block():
    adapter = _adapter()
    text_msg = AssistantMessage(content=[TextBlock(text="thinking...")], model="test")
    tool_msg = AssistantMessage(
        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
        model="test",
    )
    adapter.convert_message(text_msg)  # opens step + text
    results = adapter.convert_message(tool_msg)
    # Step already open, so: TextEnd, ToolInputStart, ToolInputAvailable
    assert len(results) == 3
    assert isinstance(results[0], StreamTextEnd)
    assert isinstance(results[1], StreamToolInputStart)
 # -- UserMessage with ToolResultBlock ----------------------------------------
 def test_tool_result_emits_output_and_finish_step():
    adapter = _adapter()
    # First register the tool call (opens step) — SDK sends prefixed name
    tool_msg = AssistantMessage(
        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}find_agent", input={})],
        model="test",
    )
    adapter.convert_message(tool_msg)
    # Now send tool result
    result_msg = UserMessage(
        content=[ToolResultBlock(tool_use_id="t1", content="found 3 agents")]
    )
    results = adapter.convert_message(result_msg)
    assert len(results) == 2
    assert isinstance(results[0], StreamToolOutputAvailable)
    assert results[0].toolCallId == "t1"
    assert results[0].toolName == "find_agent"  # prefix stripped
    assert results[0].output == "found 3 agents"
    assert results[0].success is True
    assert isinstance(results[1], StreamFinishStep)
 def test_tool_result_error():
    adapter = _adapter()
    adapter.convert_message(
        AssistantMessage(
            content=[
                ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}run_agent", input={})
            ],
            model="test",
        )
    )
    result_msg = UserMessage(
        content=[ToolResultBlock(tool_use_id="t1", content="timeout", is_error=True)]
    )
    results = adapter.convert_message(result_msg)
    assert isinstance(results[0], StreamToolOutputAvailable)
    assert results[0].success is False
    assert isinstance(results[1], StreamFinishStep)
 def test_tool_result_list_content():
    adapter = _adapter()
    adapter.convert_message(
        AssistantMessage(
            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
            model="test",
        )
    )
    result_msg = UserMessage(
        content=[
            ToolResultBlock(
                tool_use_id="t1",
                content=[
                    {"type": "text", "text": "line1"},
                    {"type": "text", "text": "line2"},
                ],
            )
        ]
    )
    results = adapter.convert_message(result_msg)
    assert isinstance(results[0], StreamToolOutputAvailable)
    assert results[0].output == "line1line2"
    assert isinstance(results[1], StreamFinishStep)
 def test_string_user_message_ignored():
    """A plain string UserMessage (not tool results) produces no output."""
    adapter = _adapter()
    results = adapter.convert_message(UserMessage(content="hello"))
    assert results == []
 # -- ResultMessage -----------------------------------------------------------
 def test_result_success_emits_finish_step_and_finish():
    adapter = _adapter()
    # Start some text first (opens step)
    adapter.convert_message(
        AssistantMessage(content=[TextBlock(text="done")], model="test")
    )
    msg = ResultMessage(
        subtype="success",
        duration_ms=100,
        duration_api_ms=50,
        is_error=False,
        num_turns=1,
        session_id="s1",
    )
    results = adapter.convert_message(msg)
    # TextEnd + FinishStep + StreamFinish
    assert len(results) == 3
    assert isinstance(results[0], StreamTextEnd)
    assert isinstance(results[1], StreamFinishStep)
    assert isinstance(results[2], StreamFinish)
 def test_result_error_emits_error_and_finish():
    adapter = _adapter()
    msg = ResultMessage(
        subtype="error",
        duration_ms=100,
        duration_api_ms=50,
        is_error=True,
        num_turns=0,
        session_id="s1",
        result="API rate limited",
    )
    results = adapter.convert_message(msg)
    # No step was open, so no FinishStep — just Error + Finish
    assert len(results) == 2
    assert isinstance(results[0], StreamError)
    assert "API rate limited" in results[0].errorText
    assert isinstance(results[1], StreamFinish)
 # -- Text after tools (new block ID) ----------------------------------------
 def test_text_after_tool_gets_new_block_id():
    adapter = _adapter()
    # Text -> Tool -> ToolResult -> Text should get a new text block ID and step
    adapter.convert_message(
        AssistantMessage(content=[TextBlock(text="before")], model="test")
    )
    adapter.convert_message(
        AssistantMessage(
            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
            model="test",
        )
    )
    # Send tool result (closes step)
    adapter.convert_message(
        UserMessage(content=[ToolResultBlock(tool_use_id="t1", content="ok")])
    )
    results = adapter.convert_message(
        AssistantMessage(content=[TextBlock(text="after")], model="test")
    )
    # Should get StreamStartStep (new step) + StreamTextStart (new block) + StreamTextDelta
    assert len(results) == 3
    assert isinstance(results[0], StreamStartStep)
    assert isinstance(results[1], StreamTextStart)
    assert isinstance(results[2], StreamTextDelta)
    assert results[2].delta == "after"
 # -- Full conversation flow --------------------------------------------------
 def test_full_conversation_flow():
    """Simulate a complete conversation: init -> text -> tool -> result -> text -> finish."""
    adapter = _adapter()
    all_responses: list[StreamBaseResponse] = []
    # 1. Init
    all_responses.extend(
        adapter.convert_message(SystemMessage(subtype="init", data={}))
    )
    # 2. Assistant text
    all_responses.extend(
        adapter.convert_message(
            AssistantMessage(content=[TextBlock(text="Let me search")], model="test")
        )
    )
    # 3. Tool use
    all_responses.extend(
        adapter.convert_message(
            AssistantMessage(
                content=[
                    ToolUseBlock(
                        id="t1",
                        name=f"{MCP_TOOL_PREFIX}find_agent",
                        input={"query": "email"},
                    )
                ],
                model="test",
            )
        )
    )
    # 4. Tool result
    all_responses.extend(
        adapter.convert_message(
            UserMessage(
                content=[ToolResultBlock(tool_use_id="t1", content="Found 2 agents")]
            )
        )
    )
    # 5. More text
    all_responses.extend(
        adapter.convert_message(
            AssistantMessage(content=[TextBlock(text="I found 2")], model="test")
        )
    )
    # 6. Result
    all_responses.extend(
        adapter.convert_message(
            ResultMessage(
                subtype="success",
                duration_ms=500,
                duration_api_ms=400,
                is_error=False,
                num_turns=2,
                session_id="s1",
            )
        )
    )
    types = [type(r).__name__ for r in all_responses]
    assert types == [
        "StreamStart",
        "StreamStartStep",  # step 1: text + tool call
        "StreamTextStart",
        "StreamTextDelta",  # "Let me search"
        "StreamTextEnd",  # closed before tool
        "StreamToolInputStart",
        "StreamToolInputAvailable",
        "StreamToolOutputAvailable",  # tool result
        "StreamFinishStep",  # step 1 closed after tool result
        "StreamStartStep",  # step 2: continuation text
        "StreamTextStart",  # new block after tool
        "StreamTextDelta",  # "I found 2"
        "StreamTextEnd",  # closed by result
        "StreamFinishStep",  # step 2 closed
        "StreamFinish",
    ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
@@ -0,0 +1,299 @@
 """Security hooks for Claude Agent SDK integration.
 This module provides security hooks that validate tool calls before execution,
 ensuring multi-user isolation and preventing unauthorized operations.
 """
 import json
 import logging
 import os
 import re
 from typing import Any, cast
 from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX
 logger = logging.getLogger(__name__)
 # Tools that are blocked entirely (CLI/system access).
 # "Bash" (capital) is the SDK built-in — it's NOT in allowed_tools but blocked
 # here as defence-in-depth.  The agent uses mcp__copilot__bash_exec instead,
 # which has kernel-level network isolation (unshare --net).
 BLOCKED_TOOLS = {
    "Bash",
    "bash",
    "shell",
    "exec",
    "terminal",
    "command",
 }
 # Tools allowed only when their path argument stays within the SDK workspace.
 # The SDK uses these to handle oversized tool results (writes to tool-results/
 # files, then reads them back) and for workspace file operations.
 WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
 # Dangerous patterns in tool inputs
 DANGEROUS_PATTERNS = [
    r"sudo",
    r"rm\s+-rf",
    r"dd\s+if=",
    r"/etc/passwd",
    r"/etc/shadow",
    r"chmod\s+777",
    r"curl\s+.*\|.*sh",
    r"wget\s+.*\|.*sh",
    r"eval\s*\(",
    r"exec\s*\(",
    r"__import__",
    r"os\.system",
    r"subprocess",
 ]
 def _deny(reason: str) -> dict[str, Any]:
    """Return a hook denial response."""
    return {
        "hookSpecificOutput": {
            "hookEventName": "PreToolUse",
            "permissionDecision": "deny",
            "permissionDecisionReason": reason,
        }
    }
 def _validate_workspace_path(
    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None
 ) -> dict[str, Any]:
    """Validate that a workspace-scoped tool only accesses allowed paths.
    Allowed directories:
    - The SDK working directory (``/tmp/copilot-<session>/``)
    - The SDK tool-results directory (``~/.claude/projects/…/tool-results/``)
    """
    path = tool_input.get("file_path") or tool_input.get("path") or ""
    if not path:
        # Glob/Grep without a path default to cwd which is already sandboxed
        return {}
    # Resolve relative paths against sdk_cwd (the SDK sets cwd so the LLM
    # naturally uses relative paths like "test.txt" instead of absolute ones).
    # Tilde paths (~/) are home-dir references, not relative — expand first.
    if path.startswith("~"):
        resolved = os.path.normpath(os.path.expanduser(path))
    elif not os.path.isabs(path) and sdk_cwd:
        resolved = os.path.normpath(os.path.join(sdk_cwd, path))
    else:
        resolved = os.path.normpath(path)
    # Allow access within the SDK working directory
    if sdk_cwd:
        norm_cwd = os.path.normpath(sdk_cwd)
        if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
            return {}
    # Allow access to ~/.claude/projects/*/tool-results/ (big tool results)
    claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects"))
    if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved:
        return {}
    logger.warning(
        f"Blocked {tool_name} outside workspace: {path} (resolved={resolved})"
    )
    workspace_hint = f" Allowed workspace: {sdk_cwd}" if sdk_cwd else ""
    return _deny(
        f"[SECURITY] Tool '{tool_name}' can only access files within the workspace "
        f"directory.{workspace_hint} "
        "This is enforced by the platform and cannot be bypassed."
    )
 def _validate_tool_access(
    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None = None
 ) -> dict[str, Any]:
    """Validate that a tool call is allowed.
    Returns:
        Empty dict to allow, or dict with hookSpecificOutput to deny
    """
    # Block forbidden tools
    if tool_name in BLOCKED_TOOLS:
        logger.warning(f"Blocked tool access attempt: {tool_name}")
        return _deny(
            f"[SECURITY] Tool '{tool_name}' is blocked for security. "
            "This is enforced by the platform and cannot be bypassed. "
            "Use the CoPilot-specific MCP tools instead."
        )
    # Workspace-scoped tools: allowed only within the SDK workspace directory
    if tool_name in WORKSPACE_SCOPED_TOOLS:
        return _validate_workspace_path(tool_name, tool_input, sdk_cwd)
    # Check for dangerous patterns in tool input
    # Use json.dumps for predictable format (str() produces Python repr)
    input_str = json.dumps(tool_input) if tool_input else ""
    for pattern in DANGEROUS_PATTERNS:
        if re.search(pattern, input_str, re.IGNORECASE):
            logger.warning(
                f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}"
            )
            return _deny(
                "[SECURITY] Input contains a blocked pattern. "
                "This is enforced by the platform and cannot be bypassed."
            )
    return {}
 def _validate_user_isolation(
    tool_name: str, tool_input: dict[str, Any], user_id: str | None
 ) -> dict[str, Any]:
    """Validate that tool calls respect user isolation."""
    # For workspace file tools, ensure path doesn't escape
    if "workspace" in tool_name.lower():
        path = tool_input.get("path", "") or tool_input.get("file_path", "")
        if path:
            # Check for path traversal
            if ".." in path or path.startswith("/"):
                logger.warning(
                    f"Blocked path traversal attempt: {path} by user {user_id}"
                )
                return {
                    "hookSpecificOutput": {
                        "hookEventName": "PreToolUse",
                        "permissionDecision": "deny",
                        "permissionDecisionReason": "Path traversal not allowed",
                    }
                }
    return {}
 def create_security_hooks(
    user_id: str | None,
    sdk_cwd: str | None = None,
    max_subtasks: int = 3,
 ) -> dict[str, Any]:
    """Create the security hooks configuration for Claude Agent SDK.
    Includes security validation and observability hooks:
    - PreToolUse: Security validation before tool execution
    - PostToolUse: Log successful tool executions
    - PostToolUseFailure: Log and handle failed tool executions
    - PreCompact: Log context compaction events (SDK handles compaction automatically)
    Args:
        user_id: Current user ID for isolation validation
        sdk_cwd: SDK working directory for workspace-scoped tool validation
        max_subtasks: Maximum Task (sub-agent) spawns allowed per session
    Returns:
        Hooks configuration dict for ClaudeAgentOptions
    """
    try:
        from claude_agent_sdk import HookMatcher
        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput
        # Per-session counter for Task sub-agent spawns
        task_spawn_count = 0
        async def pre_tool_use_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Combined pre-tool-use validation hook."""
            nonlocal task_spawn_count
            _ = context  # unused but required by signature
            tool_name = cast(str, input_data.get("tool_name", ""))
            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))
            # Rate-limit Task (sub-agent) spawns per session
            if tool_name == "Task":
                task_spawn_count += 1
                if task_spawn_count > max_subtasks:
                    logger.warning(
                        f"[SDK] Task limit reached ({max_subtasks}), user={user_id}"
                    )
                    return cast(
                        SyncHookJSONOutput,
                        _deny(
                            f"Maximum {max_subtasks} sub-tasks per session. "
                            "Please continue in the main conversation."
                        ),
                    )
            # Strip MCP prefix for consistent validation
            is_copilot_tool = tool_name.startswith(MCP_TOOL_PREFIX)
            clean_name = tool_name.removeprefix(MCP_TOOL_PREFIX)
            # Only block non-CoPilot tools; our MCP-registered tools
            # (including Read for oversized results) are already sandboxed.
            if not is_copilot_tool:
                result = _validate_tool_access(clean_name, tool_input, sdk_cwd)
                if result:
                    return cast(SyncHookJSONOutput, result)
            # Validate user isolation
            result = _validate_user_isolation(clean_name, tool_input, user_id)
            if result:
                return cast(SyncHookJSONOutput, result)
            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
            return cast(SyncHookJSONOutput, {})
        async def post_tool_use_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Log successful tool executions for observability."""
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))
            logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
            return cast(SyncHookJSONOutput, {})
        async def post_tool_failure_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Log failed tool executions for debugging."""
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))
            error = input_data.get("error", "Unknown error")
            logger.warning(
                f"[SDK] Tool failed: {tool_name}, error={error}, "
                f"user={user_id}, tool_use_id={tool_use_id}"
            )
            return cast(SyncHookJSONOutput, {})
        async def pre_compact_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Log when SDK triggers context compaction.
            The SDK automatically compacts conversation history when it grows too large.
            This hook provides visibility into when compaction happens.
            """
            _ = context, tool_use_id
            trigger = input_data.get("trigger", "auto")
            logger.info(
                f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
            )
            return cast(SyncHookJSONOutput, {})
        return {
            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
            "PostToolUseFailure": [
                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
            ],
            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
        }
    except ImportError:
        # Fallback for when SDK isn't available - return empty hooks
        logger.warning("claude-agent-sdk not available, security hooks disabled")
        return {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
@@ -0,0 +1,165 @@
 """Unit tests for SDK security hooks."""
 import os
 from .security_hooks import _validate_tool_access, _validate_user_isolation
 SDK_CWD = "/tmp/copilot-abc123"
 def _is_denied(result: dict) -> bool:
    hook = result.get("hookSpecificOutput", {})
    return hook.get("permissionDecision") == "deny"
 # -- Blocked tools -----------------------------------------------------------
 def test_blocked_tools_denied():
    for tool in ("bash", "shell", "exec", "terminal", "command"):
        result = _validate_tool_access(tool, {})
        assert _is_denied(result), f"{tool} should be blocked"
 def test_unknown_tool_allowed():
    result = _validate_tool_access("SomeCustomTool", {})
    assert result == {}
 # -- Workspace-scoped tools --------------------------------------------------
 def test_read_within_workspace_allowed():
    result = _validate_tool_access(
        "Read", {"file_path": f"{SDK_CWD}/file.txt"}, sdk_cwd=SDK_CWD
    )
    assert result == {}
 def test_write_within_workspace_allowed():
    result = _validate_tool_access(
        "Write", {"file_path": f"{SDK_CWD}/output.json"}, sdk_cwd=SDK_CWD
    )
    assert result == {}
 def test_edit_within_workspace_allowed():
    result = _validate_tool_access(
        "Edit", {"file_path": f"{SDK_CWD}/src/main.py"}, sdk_cwd=SDK_CWD
    )
    assert result == {}
 def test_glob_within_workspace_allowed():
    result = _validate_tool_access("Glob", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_grep_within_workspace_allowed():
    result = _validate_tool_access("Grep", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_read_outside_workspace_denied():
    result = _validate_tool_access(
        "Read", {"file_path": "/etc/passwd"}, sdk_cwd=SDK_CWD
    )
    assert _is_denied(result)
 def test_write_outside_workspace_denied():
    result = _validate_tool_access(
        "Write", {"file_path": "/home/user/secrets.txt"}, sdk_cwd=SDK_CWD
    )
    assert _is_denied(result)
 def test_traversal_attack_denied():
    result = _validate_tool_access(
        "Read",
        {"file_path": f"{SDK_CWD}/../../etc/passwd"},
        sdk_cwd=SDK_CWD,
    )
    assert _is_denied(result)
 def test_no_path_allowed():
    """Glob/Grep without a path argument defaults to cwd — should pass."""
    result = _validate_tool_access("Glob", {}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_read_no_cwd_denies_absolute():
    """If no sdk_cwd is set, absolute paths are denied."""
    result = _validate_tool_access("Read", {"file_path": "/tmp/anything"})
    assert _is_denied(result)
 # -- Tool-results directory --------------------------------------------------
 def test_read_tool_results_allowed():
    home = os.path.expanduser("~")
    path = f"{home}/.claude/projects/-tmp-copilot-abc123/tool-results/12345.txt"
    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_read_claude_projects_without_tool_results_denied():
    home = os.path.expanduser("~")
    path = f"{home}/.claude/projects/-tmp-copilot-abc123/settings.json"
    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
    assert _is_denied(result)
 # -- Built-in Bash is blocked (use bash_exec MCP tool instead) ---------------
 def test_bash_builtin_always_blocked():
    """SDK built-in Bash is blocked — bash_exec MCP tool with bubblewrap is used instead."""
    result = _validate_tool_access("Bash", {"command": "echo hello"}, sdk_cwd=SDK_CWD)
    assert _is_denied(result)
 # -- Dangerous patterns ------------------------------------------------------
 def test_dangerous_pattern_blocked():
    result = _validate_tool_access("SomeTool", {"cmd": "sudo rm -rf /"})
    assert _is_denied(result)
 def test_subprocess_pattern_blocked():
    result = _validate_tool_access("SomeTool", {"code": "subprocess.run(...)"})
    assert _is_denied(result)
 # -- User isolation ----------------------------------------------------------
 def test_workspace_path_traversal_blocked():
    result = _validate_user_isolation(
        "workspace_read", {"path": "../../../etc/shadow"}, user_id="user-1"
    )
    assert _is_denied(result)
 def test_workspace_absolute_path_blocked():
    result = _validate_user_isolation(
        "workspace_read", {"path": "/etc/passwd"}, user_id="user-1"
    )
    assert _is_denied(result)
 def test_workspace_normal_path_allowed():
    result = _validate_user_isolation(
        "workspace_read", {"path": "src/main.py"}, user_id="user-1"
    )
    assert result == {}
 def test_non_workspace_tool_passes_isolation():
    result = _validate_user_isolation(
        "find_agent", {"query": "email"}, user_id="user-1"
    )
    assert result == {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -0,0 +1,668 @@
 """Claude Agent SDK service layer for CoPilot chat completions."""
 import asyncio
 import json
 import logging
 import os
 import uuid
 from collections.abc import AsyncGenerator
 from typing import Any
 from backend.util.exceptions import NotFoundError
 from .. import stream_registry
 from ..config import ChatConfig
 from ..model import (
    ChatMessage,
    ChatSession,
    get_chat_session,
    update_session_title,
    upsert_chat_session,
 )
 from ..response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
    StreamStart,
    StreamTextDelta,
    StreamToolInputAvailable,
    StreamToolOutputAvailable,
 )
 from ..service import (
    _build_system_prompt,
    _execute_long_running_tool_with_streaming,
    _generate_session_title,
 )
 from ..tools.models import OperationPendingResponse, OperationStartedResponse
 from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path
 from ..tracking import track_user_message
 from .response_adapter import SDKResponseAdapter
 from .security_hooks import create_security_hooks
 from .tool_adapter import (
    COPILOT_TOOL_NAMES,
    LongRunningCallback,
    create_copilot_mcp_server,
    set_execution_context,
 )
 logger = logging.getLogger(__name__)
 config = ChatConfig()
 # Set to hold background tasks to prevent garbage collection
 _background_tasks: set[asyncio.Task[Any]] = set()
 _SDK_CWD_PREFIX = WORKSPACE_PREFIX
 # Appended to the system prompt to inform the agent about available tools.
 # The SDK built-in Bash is NOT available — use mcp__copilot__bash_exec instead,
 # which has kernel-level network isolation (unshare --net).
 _SDK_TOOL_SUPPLEMENT = """
 ## Tool notes
 - The SDK built-in Bash tool is NOT available.  Use the `bash_exec` MCP tool
  for shell commands — it runs in a network-isolated sandbox.
 - **Shared workspace**: The SDK Read/Write tools and `bash_exec` share the
  same working directory. Files created by one are readable by the other.
  These files are **ephemeral** — they exist only for the current session.
 - **Persistent storage**: Use `write_workspace_file` / `read_workspace_file`
  for files that should persist across sessions (stored in cloud storage).
 - Long-running tools (create_agent, edit_agent, etc.) are handled
  asynchronously.  You will receive an immediate response; the actual result
  is delivered to the user via a background stream.
 """
 def _build_long_running_callback(user_id: str | None) -> LongRunningCallback:
    """Build a callback that delegates long-running tools to the non-SDK infrastructure.
    Long-running tools (create_agent, edit_agent, etc.) are delegated to the
    existing background infrastructure: stream_registry (Redis Streams),
    database persistence, and SSE reconnection.  This means results survive
    page refreshes / pod restarts, and the frontend shows the proper loading
    widget with progress updates.
    The returned callback matches the ``LongRunningCallback`` signature:
    ``(tool_name, args, session) -> MCP response dict``.
    """
    async def _callback(
        tool_name: str, args: dict[str, Any], session: ChatSession
    ) -> dict[str, Any]:
        operation_id = str(uuid.uuid4())
        task_id = str(uuid.uuid4())
        tool_call_id = f"sdk-{uuid.uuid4().hex[:12]}"
        session_id = session.session_id
        # --- Build user-friendly messages (matches non-SDK service) ---
        if tool_name == "create_agent":
            desc = args.get("description", "")
            desc_preview = (desc[:100] + "...") if len(desc) > 100 else desc
            pending_msg = (
                f"Creating your agent: {desc_preview}"
                if desc_preview
                else "Creating agent... This may take a few minutes."
            )
            started_msg = (
                "Agent creation started. You can close this tab - "
                "check your library in a few minutes."
            )
        elif tool_name == "edit_agent":
            changes = args.get("changes", "")
            changes_preview = (changes[:100] + "...") if len(changes) > 100 else changes
            pending_msg = (
                f"Editing agent: {changes_preview}"
                if changes_preview
                else "Editing agent... This may take a few minutes."
            )
            started_msg = (
                "Agent edit started. You can close this tab - "
                "check your library in a few minutes."
            )
        else:
            pending_msg = f"Running {tool_name}... This may take a few minutes."
            started_msg = (
                f"{tool_name} started. You can close this tab - "
                "check back in a few minutes."
            )
        # --- Register task in Redis for SSE reconnection ---
        await stream_registry.create_task(
            task_id=task_id,
            session_id=session_id,
            user_id=user_id,
            tool_call_id=tool_call_id,
            tool_name=tool_name,
            operation_id=operation_id,
        )
        # --- Save OperationPendingResponse to chat history ---
        pending_message = ChatMessage(
            role="tool",
            content=OperationPendingResponse(
                message=pending_msg,
                operation_id=operation_id,
                tool_name=tool_name,
            ).model_dump_json(),
            tool_call_id=tool_call_id,
        )
        session.messages.append(pending_message)
        await upsert_chat_session(session)
        # --- Spawn background task (reuses non-SDK infrastructure) ---
        bg_task = asyncio.create_task(
            _execute_long_running_tool_with_streaming(
                tool_name=tool_name,
                parameters=args,
                tool_call_id=tool_call_id,
                operation_id=operation_id,
                task_id=task_id,
                session_id=session_id,
                user_id=user_id,
            )
        )
        _background_tasks.add(bg_task)
        bg_task.add_done_callback(_background_tasks.discard)
        await stream_registry.set_task_asyncio_task(task_id, bg_task)
        logger.info(
            f"[SDK] Long-running tool {tool_name} delegated to background "
            f"(operation_id={operation_id}, task_id={task_id})"
        )
        # --- Return OperationStartedResponse as MCP tool result ---
        # This flows through SDK → response adapter → frontend, triggering
        # the loading widget with SSE reconnection support.
        started_json = OperationStartedResponse(
            message=started_msg,
            operation_id=operation_id,
            tool_name=tool_name,
            task_id=task_id,
        ).model_dump_json()
        return {
            "content": [{"type": "text", "text": started_json}],
            "isError": False,
        }
    return _callback
 def _resolve_sdk_model() -> str | None:
    """Resolve the model name for the Claude Agent SDK CLI.
    Uses ``config.claude_agent_model`` if set, otherwise derives from
    ``config.model`` by stripping the OpenRouter provider prefix (e.g.,
    ``"anthropic/claude-opus-4.6"`` → ``"claude-opus-4.6"``).
    """
    if config.claude_agent_model:
        return config.claude_agent_model
    model = config.model
    if "/" in model:
        return model.split("/", 1)[1]
    return model
 def _build_sdk_env() -> dict[str, str]:
    """Build env vars for the SDK CLI process.
    Routes API calls through OpenRouter (or a custom base_url) using
    the same ``config.api_key`` / ``config.base_url`` as the non-SDK path.
    This gives per-call token and cost tracking on the OpenRouter dashboard.
    Only overrides ``ANTHROPIC_API_KEY`` when a valid proxy URL and auth
    token are both present — otherwise returns an empty dict so the SDK
    falls back to its default credentials.
    """
    env: dict[str, str] = {}
    if config.api_key and config.base_url:
        # Strip /v1 suffix — SDK expects the base URL without a version path
        base = config.base_url.rstrip("/")
        if base.endswith("/v1"):
            base = base[:-3]
        if not base or not base.startswith("http"):
            # Invalid base_url — don't override SDK defaults
            return env
        env["ANTHROPIC_BASE_URL"] = base
        env["ANTHROPIC_AUTH_TOKEN"] = config.api_key
        # Must be explicitly empty so the CLI uses AUTH_TOKEN instead
        env["ANTHROPIC_API_KEY"] = ""
    return env
 def _make_sdk_cwd(session_id: str) -> str:
    """Create a safe, session-specific working directory path.
    Delegates to :func:`~backend.api.features.chat.tools.sandbox.make_session_path`
    (single source of truth for path sanitization) and adds a defence-in-depth
    assertion.
    """
    cwd = make_session_path(session_id)
    # Defence-in-depth: normpath + startswith is a CodeQL-recognised sanitizer
    cwd = os.path.normpath(cwd)
    if not cwd.startswith(_SDK_CWD_PREFIX):
        raise ValueError(f"SDK cwd escaped prefix: {cwd}")
    return cwd
 def _cleanup_sdk_tool_results(cwd: str) -> None:
    """Remove SDK tool-result files for a specific session working directory.
    The SDK creates tool-result files under ~/.claude/projects/<encoded-cwd>/tool-results/.
    We clean only the specific cwd's results to avoid race conditions between
    concurrent sessions.
    Security: cwd MUST be created by _make_sdk_cwd() which sanitizes session_id.
    """
    import shutil
    # Security check 1: Validate cwd is under the expected prefix
    normalized = os.path.normpath(cwd)
    if not normalized.startswith(_SDK_CWD_PREFIX):
        logger.warning(f"[SDK] Rejecting cleanup for invalid path: {cwd}")
        return
    # Security check 2: Ensure no path traversal in the normalized path
    if ".." in normalized:
        logger.warning(f"[SDK] Rejecting cleanup for traversal attempt: {cwd}")
        return
    # SDK encodes the cwd path by replacing '/' with '-'
    encoded_cwd = normalized.replace("/", "-")
    # Construct the project directory path (known-safe home expansion)
    claude_projects = os.path.expanduser("~/.claude/projects")
    project_dir = os.path.join(claude_projects, encoded_cwd)
    # Security check 3: Validate project_dir is under ~/.claude/projects
    project_dir = os.path.normpath(project_dir)
    if not project_dir.startswith(claude_projects):
        logger.warning(
            f"[SDK] Rejecting cleanup for escaped project path: {project_dir}"
        )
        return
    results_dir = os.path.join(project_dir, "tool-results")
    if os.path.isdir(results_dir):
        for filename in os.listdir(results_dir):
            file_path = os.path.join(results_dir, filename)
            try:
                if os.path.isfile(file_path):
                    os.remove(file_path)
            except OSError:
                pass
    # Also clean up the temp cwd directory itself
    try:
        shutil.rmtree(normalized, ignore_errors=True)
    except OSError:
        pass
 async def _compress_conversation_history(
    session: ChatSession,
 ) -> list[ChatMessage]:
    """Compress prior conversation messages if they exceed the token threshold.
    Uses the shared compress_context() from prompt.py which supports:
    - LLM summarization of old messages (keeps recent ones intact)
    - Progressive content truncation as fallback
    - Middle-out deletion as last resort
    Returns the compressed prior messages (everything except the current message).
    """
    prior = session.messages[:-1]
    if len(prior) < 2:
        return prior
    from backend.util.prompt import compress_context
    # Convert ChatMessages to dicts for compress_context
    messages_dict = []
    for msg in prior:
        msg_dict: dict[str, Any] = {"role": msg.role}
        if msg.content:
            msg_dict["content"] = msg.content
        if msg.tool_calls:
            msg_dict["tool_calls"] = msg.tool_calls
        if msg.tool_call_id:
            msg_dict["tool_call_id"] = msg.tool_call_id
        messages_dict.append(msg_dict)
    try:
        import openai
        async with openai.AsyncOpenAI(
            api_key=config.api_key, base_url=config.base_url, timeout=30.0
        ) as client:
            result = await compress_context(
                messages=messages_dict,
                model=config.model,
                client=client,
            )
    except Exception as e:
        logger.warning(f"[SDK] Context compression with LLM failed: {e}")
        # Fall back to truncation-only (no LLM summarization)
        result = await compress_context(
            messages=messages_dict,
            model=config.model,
            client=None,
        )
    if result.was_compacted:
        logger.info(
            f"[SDK] Context compacted: {result.original_token_count} -> "
            f"{result.token_count} tokens "
            f"({result.messages_summarized} summarized, "
            f"{result.messages_dropped} dropped)"
        )
        # Convert compressed dicts back to ChatMessages
        return [
            ChatMessage(
                role=m["role"],
                content=m.get("content"),
                tool_calls=m.get("tool_calls"),
                tool_call_id=m.get("tool_call_id"),
            )
            for m in result.messages
        ]
    return prior
 def _format_conversation_context(messages: list[ChatMessage]) -> str | None:
    """Format conversation messages into a context prefix for the user message.
    Returns a string like:
        <conversation_history>
        User: hello
        You responded: Hi! How can I help?
        </conversation_history>
    Returns None if there are no messages to format.
    """
    if not messages:
        return None
    lines: list[str] = []
    for msg in messages:
        if not msg.content:
            continue
        if msg.role == "user":
            lines.append(f"User: {msg.content}")
        elif msg.role == "assistant":
            lines.append(f"You responded: {msg.content}")
        # Skip tool messages — they're internal details
    if not lines:
        return None
    return "<conversation_history>\n" + "\n".join(lines) + "\n</conversation_history>"
 async def stream_chat_completion_sdk(
    session_id: str,
    message: str | None = None,
    tool_call_response: str | None = None,  # noqa: ARG001
    is_user_message: bool = True,
    user_id: str | None = None,
    retry_count: int = 0,  # noqa: ARG001
    session: ChatSession | None = None,
    context: dict[str, str] | None = None,  # noqa: ARG001
 ) -> AsyncGenerator[StreamBaseResponse, None]:
    """Stream chat completion using Claude Agent SDK.
    Drop-in replacement for stream_chat_completion with improved reliability.
    """
    if session is None:
        session = await get_chat_session(session_id, user_id)
    if not session:
        raise NotFoundError(
            f"Session {session_id} not found. Please create a new session first."
        )
    if message:
        session.messages.append(
            ChatMessage(
                role="user" if is_user_message else "assistant", content=message
            )
        )
        if is_user_message:
            track_user_message(
                user_id=user_id, session_id=session_id, message_length=len(message)
            )
    session = await upsert_chat_session(session)
    # Generate title for new sessions (first user message)
    if is_user_message and not session.title:
        user_messages = [m for m in session.messages if m.role == "user"]
        if len(user_messages) == 1:
            first_message = user_messages[0].content or message or ""
            if first_message:
                task = asyncio.create_task(
                    _update_title_async(session_id, first_message, user_id)
                )
                _background_tasks.add(task)
                task.add_done_callback(_background_tasks.discard)
    # Build system prompt (reuses non-SDK path with Langfuse support)
    has_history = len(session.messages) > 1
    system_prompt, _ = await _build_system_prompt(
        user_id, has_conversation_history=has_history
    )
    system_prompt += _SDK_TOOL_SUPPLEMENT
    message_id = str(uuid.uuid4())
    task_id = str(uuid.uuid4())
    yield StreamStart(messageId=message_id, taskId=task_id)
    stream_completed = False
    # Initialise sdk_cwd before the try so the finally can reference it
    # even if _make_sdk_cwd raises (in that case it stays as "").
    sdk_cwd = ""
    try:
        # Use a session-specific temp dir to avoid cleanup race conditions
        # between concurrent sessions.
        sdk_cwd = _make_sdk_cwd(session_id)
        os.makedirs(sdk_cwd, exist_ok=True)
        set_execution_context(
            user_id,
            session,
            long_running_callback=_build_long_running_callback(user_id),
        )
        try:
            from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
            # Fail fast when no API credentials are available at all
            sdk_env = _build_sdk_env()
            if not sdk_env and not os.environ.get("ANTHROPIC_API_KEY"):
                raise RuntimeError(
                    "No API key configured. Set OPEN_ROUTER_API_KEY "
                    "(or CHAT_API_KEY) for OpenRouter routing, "
                    "or ANTHROPIC_API_KEY for direct Anthropic access."
                )
            mcp_server = create_copilot_mcp_server()
            sdk_model = _resolve_sdk_model()
            security_hooks = create_security_hooks(
                user_id,
                sdk_cwd=sdk_cwd,
                max_subtasks=config.claude_agent_max_subtasks,
            )
            options = ClaudeAgentOptions(
                system_prompt=system_prompt,
                mcp_servers={"copilot": mcp_server},  # type: ignore[arg-type]
                allowed_tools=COPILOT_TOOL_NAMES,
                hooks=security_hooks,  # type: ignore[arg-type]
                cwd=sdk_cwd,
                max_buffer_size=config.claude_agent_max_buffer_size,
                # Only pass model/env when OpenRouter is configured
                **({"model": sdk_model, "env": sdk_env} if sdk_env else {}),
            )
            adapter = SDKResponseAdapter(message_id=message_id)
            adapter.set_task_id(task_id)
            async with ClaudeSDKClient(options=options) as client:
                current_message = message or ""
                if not current_message and session.messages:
                    last_user = [m for m in session.messages if m.role == "user"]
                    if last_user:
                        current_message = last_user[-1].content or ""
                if not current_message.strip():
                    yield StreamError(
                        errorText="Message cannot be empty.",
                        code="empty_prompt",
                    )
                    yield StreamFinish()
                    return
                # Build query with conversation history context.
                # Compress history first to handle long conversations.
                query_message = current_message
                if len(session.messages) > 1:
                    compressed = await _compress_conversation_history(session)
                    history_context = _format_conversation_context(compressed)
                    if history_context:
                        query_message = (
                            f"{history_context}\n\n"
                            f"Now, the user says:\n{current_message}"
                        )
                logger.info(
                    f"[SDK] Sending query: {current_message[:80]!r}"
                    f" ({len(session.messages)} msgs in session)"
                )
                await client.query(query_message, session_id=session_id)
                assistant_response = ChatMessage(role="assistant", content="")
                accumulated_tool_calls: list[dict[str, Any]] = []
                has_appended_assistant = False
                has_tool_results = False
                async for sdk_msg in client.receive_messages():
                    logger.debug(
                        f"[SDK] Received: {type(sdk_msg).__name__} "
                        f"{getattr(sdk_msg, 'subtype', '')}"
                    )
                    for response in adapter.convert_message(sdk_msg):
                        if isinstance(response, StreamStart):
                            continue
                        yield response
                        if isinstance(response, StreamTextDelta):
                            delta = response.delta or ""
                            # After tool results, start a new assistant
                            # message for the post-tool text.
                            if has_tool_results and has_appended_assistant:
                                assistant_response = ChatMessage(
                                    role="assistant", content=delta
                                )
                                accumulated_tool_calls = []
                                has_appended_assistant = False
                                has_tool_results = False
                                session.messages.append(assistant_response)
                                has_appended_assistant = True
                            else:
                                assistant_response.content = (
                                    assistant_response.content or ""
                                ) + delta
                                if not has_appended_assistant:
                                    session.messages.append(assistant_response)
                                    has_appended_assistant = True
                        elif isinstance(response, StreamToolInputAvailable):
                            accumulated_tool_calls.append(
                                {
                                    "id": response.toolCallId,
                                    "type": "function",
                                    "function": {
                                        "name": response.toolName,
                                        "arguments": json.dumps(response.input or {}),
                                    },
                                }
                            )
                            assistant_response.tool_calls = accumulated_tool_calls
                            if not has_appended_assistant:
                                session.messages.append(assistant_response)
                                has_appended_assistant = True
                        elif isinstance(response, StreamToolOutputAvailable):
                            session.messages.append(
                                ChatMessage(
                                    role="tool",
                                    content=(
                                        response.output
                                        if isinstance(response.output, str)
                                        else str(response.output)
                                    ),
                                    tool_call_id=response.toolCallId,
                                )
                            )
                            has_tool_results = True
                        elif isinstance(response, StreamFinish):
                            stream_completed = True
                    if stream_completed:
                        break
                if (
                    assistant_response.content or assistant_response.tool_calls
                ) and not has_appended_assistant:
                    session.messages.append(assistant_response)
        except ImportError:
            raise RuntimeError(
                "claude-agent-sdk is not installed. "
                "Disable SDK mode (CHAT_USE_CLAUDE_AGENT_SDK=false) "
                "to use the OpenAI-compatible fallback."
            )
        await upsert_chat_session(session)
        logger.debug(
            f"[SDK] Session {session_id} saved with {len(session.messages)} messages"
        )
        if not stream_completed:
            yield StreamFinish()
    except Exception as e:
        logger.error(f"[SDK] Error: {e}", exc_info=True)
        try:
            await upsert_chat_session(session)
        except Exception as save_err:
            logger.error(f"[SDK] Failed to save session on error: {save_err}")
        yield StreamError(
            errorText="An error occurred. Please try again.",
            code="sdk_error",
        )
        yield StreamFinish()
    finally:
        if sdk_cwd:
            _cleanup_sdk_tool_results(sdk_cwd)
 async def _update_title_async(
    session_id: str, message: str, user_id: str | None = None
 ) -> None:
    """Background task to update session title."""
    try:
        title = await _generate_session_title(
            message, user_id=user_id, session_id=session_id
        )
        if title:
            await update_session_title(session_id, title)
            logger.debug(f"[SDK] Generated title for {session_id}: {title}")
    except Exception as e:
        logger.warning(f"[SDK] Failed to update session title: {e}")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
@@ -0,0 +1,320 @@
 """Tool adapter for wrapping existing CoPilot tools as Claude Agent SDK MCP tools.
 This module provides the adapter layer that converts existing BaseTool implementations
 into in-process MCP tools that can be used with the Claude Agent SDK.
 Long-running tools (``is_long_running=True``) are delegated to the non-SDK
 background infrastructure (stream_registry, Redis persistence, SSE reconnection)
 via a callback provided by the service layer.  This avoids wasteful SDK polling
 and makes results survive page refreshes.
 """
 import json
 import logging
 import os
 import uuid
 from collections.abc import Awaitable, Callable
 from contextvars import ContextVar
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools import TOOL_REGISTRY
 from backend.api.features.chat.tools.base import BaseTool
 logger = logging.getLogger(__name__)
 # Allowed base directory for the Read tool (SDK saves oversized tool results here).
 # Restricted to ~/.claude/projects/ and further validated to require "tool-results"
 # in the path — prevents reading settings, credentials, or other sensitive files.
 _SDK_PROJECTS_DIR = os.path.expanduser("~/.claude/projects/")
 # MCP server naming - the SDK prefixes tool names as "mcp__{server_name}__{tool}"
 MCP_SERVER_NAME = "copilot"
 MCP_TOOL_PREFIX = f"mcp__{MCP_SERVER_NAME}__"
 # Context variables to pass user/session info to tool execution
 _current_user_id: ContextVar[str | None] = ContextVar("current_user_id", default=None)
 _current_session: ContextVar[ChatSession | None] = ContextVar(
    "current_session", default=None
 )
 # Stash for MCP tool outputs before the SDK potentially truncates them.
 # Keyed by tool_name → full output string. Consumed (popped) by the
 # response adapter when it builds StreamToolOutputAvailable.
 _pending_tool_outputs: ContextVar[dict[str, str]] = ContextVar(
    "pending_tool_outputs", default=None  # type: ignore[arg-type]
 )
 # Callback type for delegating long-running tools to the non-SDK infrastructure.
 # Args: (tool_name, arguments, session) → MCP-formatted response dict.
 LongRunningCallback = Callable[
    [str, dict[str, Any], ChatSession], Awaitable[dict[str, Any]]
 ]
 # ContextVar so the service layer can inject the callback per-request.
 _long_running_callback: ContextVar[LongRunningCallback | None] = ContextVar(
    "long_running_callback", default=None
 )
 def set_execution_context(
    user_id: str | None,
    session: ChatSession,
    long_running_callback: LongRunningCallback | None = None,
 ) -> None:
    """Set the execution context for tool calls.
    This must be called before streaming begins to ensure tools have access
    to user_id and session information.
    Args:
        user_id: Current user's ID.
        session: Current chat session.
        long_running_callback: Optional callback to delegate long-running tools
            to the non-SDK background infrastructure (stream_registry + Redis).
    """
    _current_user_id.set(user_id)
    _current_session.set(session)
    _pending_tool_outputs.set({})
    _long_running_callback.set(long_running_callback)
 def get_execution_context() -> tuple[str | None, ChatSession | None]:
    """Get the current execution context."""
    return (
        _current_user_id.get(),
        _current_session.get(),
    )
 def pop_pending_tool_output(tool_name: str) -> str | None:
    """Pop and return the stashed full output for *tool_name*.
    The SDK CLI may truncate large tool results (writing them to disk and
    replacing the content with a file reference). This stash keeps the
    original MCP output so the response adapter can forward it to the
    frontend for proper widget rendering.
    Returns ``None`` if nothing was stashed for *tool_name*.
    """
    pending = _pending_tool_outputs.get(None)
    if pending is None:
        return None
    return pending.pop(tool_name, None)
 async def _execute_tool_sync(
    base_tool: BaseTool,
    user_id: str | None,
    session: ChatSession,
    args: dict[str, Any],
 ) -> dict[str, Any]:
    """Execute a tool synchronously and return MCP-formatted response."""
    effective_id = f"sdk-{uuid.uuid4().hex[:12]}"
    result = await base_tool.execute(
        user_id=user_id,
        session=session,
        tool_call_id=effective_id,
        **args,
    )
    text = (
        result.output if isinstance(result.output, str) else json.dumps(result.output)
    )
    # Stash the full output before the SDK potentially truncates it.
    pending = _pending_tool_outputs.get(None)
    if pending is not None:
        pending[base_tool.name] = text
    return {
        "content": [{"type": "text", "text": text}],
        "isError": not result.success,
    }
 def _mcp_error(message: str) -> dict[str, Any]:
    return {
        "content": [
            {"type": "text", "text": json.dumps({"error": message, "type": "error"})}
        ],
        "isError": True,
    }
 def create_tool_handler(base_tool: BaseTool):
    """Create an async handler function for a BaseTool.
    This wraps the existing BaseTool._execute method to be compatible
    with the Claude Agent SDK MCP tool format.
    Long-running tools (``is_long_running=True``) are delegated to the
    non-SDK background infrastructure via a callback set in the execution
    context.  The callback persists the operation in Redis (stream_registry)
    so results survive page refreshes and pod restarts.
    """
    async def tool_handler(args: dict[str, Any]) -> dict[str, Any]:
        """Execute the wrapped tool and return MCP-formatted response."""
        user_id, session = get_execution_context()
        if session is None:
            return _mcp_error("No session context available")
        # --- Long-running: delegate to non-SDK background infrastructure ---
        if base_tool.is_long_running:
            callback = _long_running_callback.get(None)
            if callback:
                try:
                    return await callback(base_tool.name, args, session)
                except Exception as e:
                    logger.error(
                        f"Long-running callback failed for {base_tool.name}: {e}",
                        exc_info=True,
                    )
                    return _mcp_error(f"Failed to start {base_tool.name}: {e}")
            # No callback — fall through to synchronous execution
            logger.warning(
                f"[SDK] No long-running callback for {base_tool.name}, "
                f"executing synchronously (may block)"
            )
        # --- Normal (fast) tool: execute synchronously ---
        try:
            return await _execute_tool_sync(base_tool, user_id, session, args)
        except Exception as e:
            logger.error(f"Error executing tool {base_tool.name}: {e}", exc_info=True)
            return _mcp_error(f"Failed to execute {base_tool.name}: {e}")
    return tool_handler
 def _build_input_schema(base_tool: BaseTool) -> dict[str, Any]:
    """Build a JSON Schema input schema for a tool."""
    return {
        "type": "object",
        "properties": base_tool.parameters.get("properties", {}),
        "required": base_tool.parameters.get("required", []),
    }
 async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
    """Read a file with optional offset/limit. Restricted to SDK working directory.
    After reading, the file is deleted to prevent accumulation in long-running pods.
    """
    file_path = args.get("file_path", "")
    offset = args.get("offset", 0)
    limit = args.get("limit", 2000)
    # Security: only allow reads under ~/.claude/projects/**/tool-results/
    real_path = os.path.realpath(file_path)
    if not real_path.startswith(_SDK_PROJECTS_DIR) or "tool-results" not in real_path:
        return {
            "content": [{"type": "text", "text": f"Access denied: {file_path}"}],
            "isError": True,
        }
    try:
        with open(real_path) as f:
            lines = f.readlines()
        selected = lines[offset : offset + limit]
        content = "".join(selected)
        return {"content": [{"type": "text", "text": content}], "isError": False}
    except FileNotFoundError:
        return {
            "content": [{"type": "text", "text": f"File not found: {file_path}"}],
            "isError": True,
        }
    except Exception as e:
        return {
            "content": [{"type": "text", "text": f"Error reading file: {e}"}],
            "isError": True,
        }
 _READ_TOOL_NAME = "Read"
 _READ_TOOL_DESCRIPTION = (
    "Read a file from the local filesystem. "
    "Use offset and limit to read specific line ranges for large files."
 )
 _READ_TOOL_SCHEMA = {
    "type": "object",
    "properties": {
        "file_path": {
            "type": "string",
            "description": "The absolute path to the file to read",
        },
        "offset": {
            "type": "integer",
            "description": "Line number to start reading from (0-indexed). Default: 0",
        },
        "limit": {
            "type": "integer",
            "description": "Number of lines to read. Default: 2000",
        },
    },
    "required": ["file_path"],
 }
 # Create the MCP server configuration
 def create_copilot_mcp_server():
    """Create an in-process MCP server configuration for CoPilot tools.
    This can be passed to ClaudeAgentOptions.mcp_servers.
    Note: The actual SDK MCP server creation depends on the claude-agent-sdk
    package being available. This function returns the configuration that
    can be used with the SDK.
    """
    try:
        from claude_agent_sdk import create_sdk_mcp_server, tool
        # Create decorated tool functions
        sdk_tools = []
        for tool_name, base_tool in TOOL_REGISTRY.items():
            handler = create_tool_handler(base_tool)
            decorated = tool(
                tool_name,
                base_tool.description,
                _build_input_schema(base_tool),
            )(handler)
            sdk_tools.append(decorated)
        # Add the Read tool so the SDK can read back oversized tool results
        read_tool = tool(
            _READ_TOOL_NAME,
            _READ_TOOL_DESCRIPTION,
            _READ_TOOL_SCHEMA,
        )(_read_file_handler)
        sdk_tools.append(read_tool)
        server = create_sdk_mcp_server(
            name=MCP_SERVER_NAME,
            version="1.0.0",
            tools=sdk_tools,
        )
        return server
    except ImportError:
        # Let ImportError propagate so service.py handles the fallback
        raise
 # SDK built-in tools allowed within the workspace directory.
 # Security hooks validate that file paths stay within sdk_cwd.
 # Bash is NOT included — use the sandboxed MCP bash_exec tool instead,
 # which provides kernel-level network isolation via unshare --net.
 # Task allows spawning sub-agents (rate-limited by security hooks).
 _SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Task"]
 # List of tool names for allowed_tools configuration
 # Include MCP tools, the MCP Read tool for oversized results,
 # and SDK built-in file tools for workspace operations.
 COPILOT_TOOL_NAMES = [
    *[f"{MCP_TOOL_PREFIX}{name}" for name in TOOL_REGISTRY.keys()],
    f"{MCP_TOOL_PREFIX}{_READ_TOOL_NAME}",
    *_SDK_BUILTIN_TOOLS,
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
--- a/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
+++ b/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
@@ -0,0 +1,989 @@
 """Stream registry for managing reconnectable SSE streams.
 This module provides a registry for tracking active streaming tasks and their
 messages. It uses Redis for all state management (no in-memory state), making
 pods stateless and horizontally scalable.
 Architecture:
 - Redis Stream: Persists all messages for replay and real-time delivery
 - Redis Hash: Task metadata (status, session_id, etc.)
 Subscribers:
 1. Replay missed messages from Redis Stream (XREAD)
 2. Listen for live updates via blocking XREAD
 3. No in-memory state required on the subscribing pod
 """
 import asyncio
 import logging
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any, Literal
 import orjson
 from backend.data.redis_client import get_redis_async
 from .config import ChatConfig
 from .response_model import StreamBaseResponse, StreamError, StreamFinish
 logger = logging.getLogger(__name__)
 config = ChatConfig()
 # Track background tasks for this pod (just the asyncio.Task reference, not subscribers)
 _local_tasks: dict[str, asyncio.Task] = {}
 # Track listener tasks per subscriber queue for cleanup
 # Maps queue id() to (task_id, asyncio.Task) for proper cleanup on unsubscribe
 _listener_tasks: dict[int, tuple[str, asyncio.Task]] = {}
 # Timeout for putting chunks into subscriber queues (seconds)
 # If the queue is full and doesn't drain within this time, send an overflow error
 QUEUE_PUT_TIMEOUT = 5.0
 # Lua script for atomic compare-and-swap status update (idempotent completion)
 # Returns 1 if status was updated, 0 if already completed/failed
 COMPLETE_TASK_SCRIPT = """
 local current = redis.call("HGET", KEYS[1], "status")
 if current == "running" then
    redis.call("HSET", KEYS[1], "status", ARGV[1])
    return 1
 end
 return 0
 """
@dataclass
 class ActiveTask:
    """Represents an active streaming task (metadata only, no in-memory queues)."""
    task_id: str
    session_id: str
    user_id: str | None
    tool_call_id: str
    tool_name: str
    operation_id: str
    status: Literal["running", "completed", "failed"] = "running"
    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    asyncio_task: asyncio.Task | None = None
 def _get_task_meta_key(task_id: str) -> str:
    """Get Redis key for task metadata."""
    return f"{config.task_meta_prefix}{task_id}"
 def _get_task_stream_key(task_id: str) -> str:
    """Get Redis key for task message stream."""
    return f"{config.task_stream_prefix}{task_id}"
 def _get_operation_mapping_key(operation_id: str) -> str:
    """Get Redis key for operation_id to task_id mapping."""
    return f"{config.task_op_prefix}{operation_id}"
 async def create_task(
    task_id: str,
    session_id: str,
    user_id: str | None,
    tool_call_id: str,
    tool_name: str,
    operation_id: str,
 ) -> ActiveTask:
    """Create a new streaming task in Redis.
    Args:
        task_id: Unique identifier for the task
        session_id: Chat session ID
        user_id: User ID (may be None for anonymous)
        tool_call_id: Tool call ID from the LLM
        tool_name: Name of the tool being executed
        operation_id: Operation ID for webhook callbacks
    Returns:
        The created ActiveTask instance (metadata only)
    """
    import time
    start_time = time.perf_counter()
    # Build log metadata for structured logging
    log_meta = {
        "component": "StreamRegistry",
        "task_id": task_id,
        "session_id": session_id,
    }
    if user_id:
        log_meta["user_id"] = user_id
    logger.info(
        f"[TIMING] create_task STARTED, task={task_id}, session={session_id}, user={user_id}",
        extra={"json_fields": log_meta},
    )
    task = ActiveTask(
        task_id=task_id,
        session_id=session_id,
        user_id=user_id,
        tool_call_id=tool_call_id,
        tool_name=tool_name,
        operation_id=operation_id,
    )
    # Store metadata in Redis
    redis_start = time.perf_counter()
    redis = await get_redis_async()
    redis_time = (time.perf_counter() - redis_start) * 1000
    logger.info(
        f"[TIMING] get_redis_async took {redis_time:.1f}ms",
        extra={"json_fields": {**log_meta, "duration_ms": redis_time}},
    )
    meta_key = _get_task_meta_key(task_id)
    op_key = _get_operation_mapping_key(operation_id)
    hset_start = time.perf_counter()
    await redis.hset(  # type: ignore[misc]
        meta_key,
        mapping={
            "task_id": task_id,
            "session_id": session_id,
            "user_id": user_id or "",
            "tool_call_id": tool_call_id,
            "tool_name": tool_name,
            "operation_id": operation_id,
            "status": task.status,
            "created_at": task.created_at.isoformat(),
        },
    )
    hset_time = (time.perf_counter() - hset_start) * 1000
    logger.info(
        f"[TIMING] redis.hset took {hset_time:.1f}ms",
        extra={"json_fields": {**log_meta, "duration_ms": hset_time}},
    )
    await redis.expire(meta_key, config.stream_ttl)
    # Create operation_id -> task_id mapping for webhook lookups
    await redis.set(op_key, task_id, ex=config.stream_ttl)
    total_time = (time.perf_counter() - start_time) * 1000
    logger.info(
        f"[TIMING] create_task COMPLETED in {total_time:.1f}ms; task={task_id}, session={session_id}",
        extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
    )
    return task
 async def publish_chunk(
    task_id: str,
    chunk: StreamBaseResponse,
 ) -> str:
    """Publish a chunk to Redis Stream.
    All delivery is via Redis Streams - no in-memory state.
    Args:
        task_id: Task ID to publish to
        chunk: The stream response chunk to publish
    Returns:
        The Redis Stream message ID
    """
    import time
    start_time = time.perf_counter()
    chunk_type = type(chunk).__name__
    chunk_json = chunk.model_dump_json()
    message_id = "0-0"
    # Build log metadata
    log_meta = {
        "component": "StreamRegistry",
        "task_id": task_id,
        "chunk_type": chunk_type,
    }
    try:
        redis = await get_redis_async()
        stream_key = _get_task_stream_key(task_id)
        # Write to Redis Stream for persistence and real-time delivery
        xadd_start = time.perf_counter()
        raw_id = await redis.xadd(
            stream_key,
            {"data": chunk_json},
            maxlen=config.stream_max_length,
        )
        xadd_time = (time.perf_counter() - xadd_start) * 1000
        message_id = raw_id if isinstance(raw_id, str) else raw_id.decode()
        # Set TTL on stream to match task metadata TTL
        await redis.expire(stream_key, config.stream_ttl)
        total_time = (time.perf_counter() - start_time) * 1000
        # Only log timing for significant chunks or slow operations
        if (
            chunk_type
            in ("StreamStart", "StreamFinish", "StreamTextStart", "StreamTextEnd")
            or total_time > 50
        ):
            logger.info(
                f"[TIMING] publish_chunk {chunk_type} in {total_time:.1f}ms (xadd={xadd_time:.1f}ms)",
                extra={
                    "json_fields": {
                        **log_meta,
                        "total_time_ms": total_time,
                        "xadd_time_ms": xadd_time,
                        "message_id": message_id,
                    }
                },
            )
    except Exception as e:
        elapsed = (time.perf_counter() - start_time) * 1000
        logger.error(
            f"[TIMING] Failed to publish chunk {chunk_type} after {elapsed:.1f}ms: {e}",
            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
            exc_info=True,
        )
    return message_id
 async def subscribe_to_task(
    task_id: str,
    user_id: str | None,
    last_message_id: str = "0-0",
 ) -> asyncio.Queue[StreamBaseResponse] | None:
    """Subscribe to a task's stream with replay of missed messages.
    This is fully stateless - uses Redis Stream for replay and pub/sub for live updates.
    Args:
        task_id: Task ID to subscribe to
        user_id: User ID for ownership validation
        last_message_id: Last Redis Stream message ID received ("0-0" for full replay)
    Returns:
        An asyncio Queue that will receive stream chunks, or None if task not found
        or user doesn't have access
    """
    import time
    start_time = time.perf_counter()
    # Build log metadata
    log_meta = {"component": "StreamRegistry", "task_id": task_id}
    if user_id:
        log_meta["user_id"] = user_id
    logger.info(
        f"[TIMING] subscribe_to_task STARTED, task={task_id}, user={user_id}, last_msg={last_message_id}",
        extra={"json_fields": {**log_meta, "last_message_id": last_message_id}},
    )
    redis_start = time.perf_counter()
    redis = await get_redis_async()
    meta_key = _get_task_meta_key(task_id)
    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
    hgetall_time = (time.perf_counter() - redis_start) * 1000
    logger.info(
        f"[TIMING] Redis hgetall took {hgetall_time:.1f}ms",
        extra={"json_fields": {**log_meta, "duration_ms": hgetall_time}},
    )
    if not meta:
        elapsed = (time.perf_counter() - start_time) * 1000
        logger.info(
            f"[TIMING] Task not found in Redis after {elapsed:.1f}ms",
            extra={
                "json_fields": {
                    **log_meta,
                    "elapsed_ms": elapsed,
                    "reason": "task_not_found",
                }
            },
        )
        return None
    # Note: Redis client uses decode_responses=True, so keys are strings
    task_status = meta.get("status", "")
    task_user_id = meta.get("user_id", "") or None
    log_meta["session_id"] = meta.get("session_id", "")
    # Validate ownership - if task has an owner, requester must match
    if task_user_id:
        if user_id != task_user_id:
            logger.warning(
                f"[TIMING] Access denied: user {user_id} tried to access task owned by {task_user_id}",
                extra={
                    "json_fields": {
                        **log_meta,
                        "task_owner": task_user_id,
                        "reason": "access_denied",
                    }
                },
            )
            return None
    subscriber_queue: asyncio.Queue[StreamBaseResponse] = asyncio.Queue()
    stream_key = _get_task_stream_key(task_id)
    # Step 1: Replay messages from Redis Stream
    xread_start = time.perf_counter()
    messages = await redis.xread({stream_key: last_message_id}, block=0, count=1000)
    xread_time = (time.perf_counter() - xread_start) * 1000
    logger.info(
        f"[TIMING] Redis xread (replay) took {xread_time:.1f}ms, status={task_status}",
        extra={
            "json_fields": {
                **log_meta,
                "duration_ms": xread_time,
                "task_status": task_status,
            }
        },
    )
    replayed_count = 0
    replay_last_id = last_message_id
    if messages:
        for _stream_name, stream_messages in messages:
            for msg_id, msg_data in stream_messages:
                replay_last_id = msg_id if isinstance(msg_id, str) else msg_id.decode()
                # Note: Redis client uses decode_responses=True, so keys are strings
                if "data" in msg_data:
                    try:
                        chunk_data = orjson.loads(msg_data["data"])
                        chunk = _reconstruct_chunk(chunk_data)
                        if chunk:
                            await subscriber_queue.put(chunk)
                            replayed_count += 1
                    except Exception as e:
                        logger.warning(f"Failed to replay message: {e}")
    logger.info(
        f"[TIMING] Replayed {replayed_count} messages, last_id={replay_last_id}",
        extra={
            "json_fields": {
                **log_meta,
                "n_messages_replayed": replayed_count,
                "replay_last_id": replay_last_id,
            }
        },
    )
    # Step 2: If task is still running, start stream listener for live updates
    if task_status == "running":
        logger.info(
            "[TIMING] Task still running, starting _stream_listener",
            extra={"json_fields": {**log_meta, "task_status": task_status}},
        )
        listener_task = asyncio.create_task(
            _stream_listener(task_id, subscriber_queue, replay_last_id, log_meta)
        )
        # Track listener task for cleanup on unsubscribe
        _listener_tasks[id(subscriber_queue)] = (task_id, listener_task)
    else:
        # Task is completed/failed - add finish marker
        logger.info(
            f"[TIMING] Task already {task_status}, adding StreamFinish",
            extra={"json_fields": {**log_meta, "task_status": task_status}},
        )
        await subscriber_queue.put(StreamFinish())
    total_time = (time.perf_counter() - start_time) * 1000
    logger.info(
        f"[TIMING] subscribe_to_task COMPLETED in {total_time:.1f}ms; task={task_id}, "
        f"n_messages_replayed={replayed_count}",
        extra={
            "json_fields": {
                **log_meta,
                "total_time_ms": total_time,
                "n_messages_replayed": replayed_count,
            }
        },
    )
    return subscriber_queue
 async def _stream_listener(
    task_id: str,
    subscriber_queue: asyncio.Queue[StreamBaseResponse],
    last_replayed_id: str,
    log_meta: dict | None = None,
 ) -> None:
    """Listen to Redis Stream for new messages using blocking XREAD.
    This approach avoids the duplicate message issue that can occur with pub/sub
    when messages are published during the gap between replay and subscription.
    Args:
        task_id: Task ID to listen for
        subscriber_queue: Queue to deliver messages to
        last_replayed_id: Last message ID from replay (continue from here)
        log_meta: Structured logging metadata
    """
    import time
    start_time = time.perf_counter()
    # Use provided log_meta or build minimal one
    if log_meta is None:
        log_meta = {"component": "StreamRegistry", "task_id": task_id}
    logger.info(
        f"[TIMING] _stream_listener STARTED, task={task_id}, last_id={last_replayed_id}",
        extra={"json_fields": {**log_meta, "last_replayed_id": last_replayed_id}},
    )
    queue_id = id(subscriber_queue)
    # Track the last successfully delivered message ID for recovery hints
    last_delivered_id = last_replayed_id
    messages_delivered = 0
    first_message_time = None
    xread_count = 0
    try:
        redis = await get_redis_async()
        stream_key = _get_task_stream_key(task_id)
        current_id = last_replayed_id
        while True:
            # Block for up to 30 seconds waiting for new messages
            # This allows periodic checking if task is still running
            xread_start = time.perf_counter()
            xread_count += 1
            messages = await redis.xread(
                {stream_key: current_id}, block=30000, count=100
            )
            xread_time = (time.perf_counter() - xread_start) * 1000
            if messages:
                msg_count = sum(len(msgs) for _, msgs in messages)
                logger.info(
                    f"[TIMING] xread #{xread_count} returned {msg_count} messages in {xread_time:.1f}ms",
                    extra={
                        "json_fields": {
                            **log_meta,
                            "xread_count": xread_count,
                            "n_messages": msg_count,
                            "duration_ms": xread_time,
                        }
                    },
                )
            elif xread_time > 1000:
                # Only log timeouts (30s blocking)
                logger.info(
                    f"[TIMING] xread #{xread_count} timeout after {xread_time:.1f}ms",
                    extra={
                        "json_fields": {
                            **log_meta,
                            "xread_count": xread_count,
                            "duration_ms": xread_time,
                            "reason": "timeout",
                        }
                    },
                )
            if not messages:
                # Timeout - check if task is still running
                meta_key = _get_task_meta_key(task_id)
                status = await redis.hget(meta_key, "status")  # type: ignore[misc]
                if status and status != "running":
                    try:
                        await asyncio.wait_for(
                            subscriber_queue.put(StreamFinish()),
                            timeout=QUEUE_PUT_TIMEOUT,
                        )
                    except asyncio.TimeoutError:
                        logger.warning(
                            f"Timeout delivering finish event for task {task_id}"
                        )
                    break
                continue
            for _stream_name, stream_messages in messages:
                for msg_id, msg_data in stream_messages:
                    current_id = msg_id if isinstance(msg_id, str) else msg_id.decode()
                    if "data" not in msg_data:
                        continue
                    try:
                        chunk_data = orjson.loads(msg_data["data"])
                        chunk = _reconstruct_chunk(chunk_data)
                        if chunk:
                            try:
                                await asyncio.wait_for(
                                    subscriber_queue.put(chunk),
                                    timeout=QUEUE_PUT_TIMEOUT,
                                )
                                # Update last delivered ID on successful delivery
                                last_delivered_id = current_id
                                messages_delivered += 1
                                if first_message_time is None:
                                    first_message_time = time.perf_counter()
                                    elapsed = (first_message_time - start_time) * 1000
                                    logger.info(
                                        f"[TIMING] FIRST live message at {elapsed:.1f}ms, type={type(chunk).__name__}",
                                        extra={
                                            "json_fields": {
                                                **log_meta,
                                                "elapsed_ms": elapsed,
                                                "chunk_type": type(chunk).__name__,
                                            }
                                        },
                                    )
                            except asyncio.TimeoutError:
                                logger.warning(
                                    f"[TIMING] Subscriber queue full, delivery timed out after {QUEUE_PUT_TIMEOUT}s",
                                    extra={
                                        "json_fields": {
                                            **log_meta,
                                            "timeout_s": QUEUE_PUT_TIMEOUT,
                                            "reason": "queue_full",
                                        }
                                    },
                                )
                                # Send overflow error with recovery info
                                try:
                                    overflow_error = StreamError(
                                        errorText="Message delivery timeout - some messages may have been missed",
                                        code="QUEUE_OVERFLOW",
                                        details={
                                            "last_delivered_id": last_delivered_id,
                                            "recovery_hint": f"Reconnect with last_message_id={last_delivered_id}",
                                        },
                                    )
                                    subscriber_queue.put_nowait(overflow_error)
                                except asyncio.QueueFull:
                                    # Queue is completely stuck, nothing more we can do
                                    logger.error(
                                        f"Cannot deliver overflow error for task {task_id}, "
                                        "queue completely blocked"
                                    )
                            # Stop listening on finish
                            if isinstance(chunk, StreamFinish):
                                total_time = (time.perf_counter() - start_time) * 1000
                                logger.info(
                                    f"[TIMING] StreamFinish received in {total_time/1000:.1f}s; delivered={messages_delivered}",
                                    extra={
                                        "json_fields": {
                                            **log_meta,
                                            "total_time_ms": total_time,
                                            "messages_delivered": messages_delivered,
                                        }
                                    },
                                )
                                return
                    except Exception as e:
                        logger.warning(
                            f"Error processing stream message: {e}",
                            extra={"json_fields": {**log_meta, "error": str(e)}},
                        )
    except asyncio.CancelledError:
        elapsed = (time.perf_counter() - start_time) * 1000
        logger.info(
            f"[TIMING] _stream_listener CANCELLED after {elapsed:.1f}ms, delivered={messages_delivered}",
            extra={
                "json_fields": {
                    **log_meta,
                    "elapsed_ms": elapsed,
                    "messages_delivered": messages_delivered,
                    "reason": "cancelled",
                }
            },
        )
        raise  # Re-raise to propagate cancellation
    except Exception as e:
        elapsed = (time.perf_counter() - start_time) * 1000
        logger.error(
            f"[TIMING] _stream_listener ERROR after {elapsed:.1f}ms: {e}",
            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
        )
        # On error, send finish to unblock subscriber
        try:
            await asyncio.wait_for(
                subscriber_queue.put(StreamFinish()),
                timeout=QUEUE_PUT_TIMEOUT,
            )
        except (asyncio.TimeoutError, asyncio.QueueFull):
            logger.warning(
                "Could not deliver finish event after error",
                extra={"json_fields": log_meta},
            )
    finally:
        # Clean up listener task mapping on exit
        total_time = (time.perf_counter() - start_time) * 1000
        logger.info(
            f"[TIMING] _stream_listener FINISHED in {total_time/1000:.1f}s; task={task_id}, "
            f"delivered={messages_delivered}, xread_count={xread_count}",
            extra={
                "json_fields": {
                    **log_meta,
                    "total_time_ms": total_time,
                    "messages_delivered": messages_delivered,
                    "xread_count": xread_count,
                }
            },
        )
        _listener_tasks.pop(queue_id, None)
 async def mark_task_completed(
    task_id: str,
    status: Literal["completed", "failed"] = "completed",
 ) -> bool:
    """Mark a task as completed and publish finish event.
    This is idempotent - calling multiple times with the same task_id is safe.
    Uses atomic compare-and-swap via Lua script to prevent race conditions.
    Status is updated first (source of truth), then finish event is published (best-effort).
    Args:
        task_id: Task ID to mark as completed
        status: Final status ("completed" or "failed")
    Returns:
        True if task was newly marked completed, False if already completed/failed
    """
    redis = await get_redis_async()
    meta_key = _get_task_meta_key(task_id)
    # Atomic compare-and-swap: only update if status is "running"
    # This prevents race conditions when multiple callers try to complete simultaneously
    result = await redis.eval(COMPLETE_TASK_SCRIPT, 1, meta_key, status)  # type: ignore[misc]
    if result == 0:
        logger.debug(f"Task {task_id} already completed/failed, skipping")
        return False
    # THEN publish finish event (best-effort - listeners can detect via status polling)
    try:
        await publish_chunk(task_id, StreamFinish())
    except Exception as e:
        logger.error(
            f"Failed to publish finish event for task {task_id}: {e}. "
            "Listeners will detect completion via status polling."
        )
    # Clean up local task reference if exists
    _local_tasks.pop(task_id, None)
    return True
 async def find_task_by_operation_id(operation_id: str) -> ActiveTask | None:
    """Find a task by its operation ID.
    Used by webhook callbacks to locate the task to update.
    Args:
        operation_id: Operation ID to search for
    Returns:
        ActiveTask if found, None otherwise
    """
    redis = await get_redis_async()
    op_key = _get_operation_mapping_key(operation_id)
    task_id = await redis.get(op_key)
    if not task_id:
        return None
    task_id_str = task_id.decode() if isinstance(task_id, bytes) else task_id
    return await get_task(task_id_str)
 async def get_task(task_id: str) -> ActiveTask | None:
    """Get a task by its ID from Redis.
    Args:
        task_id: Task ID to look up
    Returns:
        ActiveTask if found, None otherwise
    """
    redis = await get_redis_async()
    meta_key = _get_task_meta_key(task_id)
    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
    if not meta:
        return None
    # Note: Redis client uses decode_responses=True, so keys/values are strings
    return ActiveTask(
        task_id=meta.get("task_id", ""),
        session_id=meta.get("session_id", ""),
        user_id=meta.get("user_id", "") or None,
        tool_call_id=meta.get("tool_call_id", ""),
        tool_name=meta.get("tool_name", ""),
        operation_id=meta.get("operation_id", ""),
        status=meta.get("status", "running"),  # type: ignore[arg-type]
    )
 async def get_task_with_expiry_info(
    task_id: str,
 ) -> tuple[ActiveTask | None, str | None]:
    """Get a task by its ID with expiration detection.
    Returns (task, error_code) where error_code is:
    - None if task found
    - "TASK_EXPIRED" if stream exists but metadata is gone (TTL expired)
    - "TASK_NOT_FOUND" if neither exists
    Args:
        task_id: Task ID to look up
    Returns:
        Tuple of (ActiveTask or None, error_code or None)
    """
    redis = await get_redis_async()
    meta_key = _get_task_meta_key(task_id)
    stream_key = _get_task_stream_key(task_id)
    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
    if not meta:
        # Check if stream still has data (metadata expired but stream hasn't)
        stream_len = await redis.xlen(stream_key)
        if stream_len > 0:
            return None, "TASK_EXPIRED"
        return None, "TASK_NOT_FOUND"
    # Note: Redis client uses decode_responses=True, so keys/values are strings
    return (
        ActiveTask(
            task_id=meta.get("task_id", ""),
            session_id=meta.get("session_id", ""),
            user_id=meta.get("user_id", "") or None,
            tool_call_id=meta.get("tool_call_id", ""),
            tool_name=meta.get("tool_name", ""),
            operation_id=meta.get("operation_id", ""),
            status=meta.get("status", "running"),  # type: ignore[arg-type]
        ),
        None,
    )
 async def get_active_task_for_session(
    session_id: str,
    user_id: str | None = None,
 ) -> tuple[ActiveTask | None, str]:
    """Get the active (running) task for a session, if any.
    Scans Redis for tasks matching the session_id with status="running".
    Args:
        session_id: Session ID to look up
        user_id: User ID for ownership validation (optional)
    Returns:
        Tuple of (ActiveTask if found and running, last_message_id from Redis Stream)
    """
    redis = await get_redis_async()
    # Scan Redis for task metadata keys
    cursor = 0
    tasks_checked = 0
    while True:
        cursor, keys = await redis.scan(
            cursor, match=f"{config.task_meta_prefix}*", count=100
        )
        for key in keys:
            tasks_checked += 1
            meta: dict[Any, Any] = await redis.hgetall(key)  # type: ignore[misc]
            if not meta:
                continue
            # Note: Redis client uses decode_responses=True, so keys/values are strings
            task_session_id = meta.get("session_id", "")
            task_status = meta.get("status", "")
            task_user_id = meta.get("user_id", "") or None
            task_id = meta.get("task_id", "")
            if task_session_id == session_id and task_status == "running":
                # Validate ownership - if task has an owner, requester must match
                if task_user_id and user_id != task_user_id:
                    continue
                # Auto-expire stale tasks that exceeded stream_timeout
                created_at_str = meta.get("created_at", "")
                if created_at_str:
                    try:
                        created_at = datetime.fromisoformat(created_at_str)
                        age_seconds = (
                            datetime.now(timezone.utc) - created_at
                        ).total_seconds()
                        if age_seconds > config.stream_timeout:
                            logger.warning(
                                f"[TASK_LOOKUP] Auto-expiring stale task {task_id[:8]}... "
                                f"(age={age_seconds:.0f}s > timeout={config.stream_timeout}s)"
                            )
                            await mark_task_completed(task_id, "failed")
                            continue
                    except (ValueError, TypeError):
                        pass
                logger.info(
                    f"[TASK_LOOKUP] Found running task {task_id[:8]}... for session {session_id[:8]}..."
                )
                # Get the last message ID from Redis Stream
                stream_key = _get_task_stream_key(task_id)
                last_id = "0-0"
                try:
                    messages = await redis.xrevrange(stream_key, count=1)
                    if messages:
                        msg_id = messages[0][0]
                        last_id = msg_id if isinstance(msg_id, str) else msg_id.decode()
                except Exception as e:
                    logger.warning(f"Failed to get last message ID: {e}")
                return (
                    ActiveTask(
                        task_id=task_id,
                        session_id=task_session_id,
                        user_id=task_user_id,
                        tool_call_id=meta.get("tool_call_id", ""),
                        tool_name=meta.get("tool_name", ""),
                        operation_id=meta.get("operation_id", ""),
                        status="running",
                    ),
                    last_id,
                )
        if cursor == 0:
            break
    return None, "0-0"
 def _reconstruct_chunk(chunk_data: dict) -> StreamBaseResponse | None:
    """Reconstruct a StreamBaseResponse from JSON data.
    Args:
        chunk_data: Parsed JSON data from Redis
    Returns:
        Reconstructed response object, or None if unknown type
    """
    from .response_model import (
        ResponseType,
        StreamError,
        StreamFinish,
        StreamFinishStep,
        StreamHeartbeat,
        StreamStart,
        StreamStartStep,
        StreamTextDelta,
        StreamTextEnd,
        StreamTextStart,
        StreamToolInputAvailable,
        StreamToolInputStart,
        StreamToolOutputAvailable,
        StreamUsage,
    )
    # Map response types to their corresponding classes
    type_to_class: dict[str, type[StreamBaseResponse]] = {
        ResponseType.START.value: StreamStart,
        ResponseType.FINISH.value: StreamFinish,
        ResponseType.START_STEP.value: StreamStartStep,
        ResponseType.FINISH_STEP.value: StreamFinishStep,
        ResponseType.TEXT_START.value: StreamTextStart,
        ResponseType.TEXT_DELTA.value: StreamTextDelta,
        ResponseType.TEXT_END.value: StreamTextEnd,
        ResponseType.TOOL_INPUT_START.value: StreamToolInputStart,
        ResponseType.TOOL_INPUT_AVAILABLE.value: StreamToolInputAvailable,
        ResponseType.TOOL_OUTPUT_AVAILABLE.value: StreamToolOutputAvailable,
        ResponseType.ERROR.value: StreamError,
        ResponseType.USAGE.value: StreamUsage,
        ResponseType.HEARTBEAT.value: StreamHeartbeat,
    }
    chunk_type = chunk_data.get("type")
    chunk_class = type_to_class.get(chunk_type)  # type: ignore[arg-type]
    if chunk_class is None:
        logger.warning(f"Unknown chunk type: {chunk_type}")
        return None
    try:
        return chunk_class(**chunk_data)
    except Exception as e:
        logger.warning(f"Failed to reconstruct chunk of type {chunk_type}: {e}")
        return None
 async def set_task_asyncio_task(task_id: str, asyncio_task: asyncio.Task) -> None:
    """Track the asyncio.Task for a task (local reference only).
    This is just for cleanup purposes - the task state is in Redis.
    Args:
        task_id: Task ID
        asyncio_task: The asyncio Task to track
    """
    _local_tasks[task_id] = asyncio_task
 async def unsubscribe_from_task(
    task_id: str,
    subscriber_queue: asyncio.Queue[StreamBaseResponse],
 ) -> None:
    """Clean up when a subscriber disconnects.
    Cancels the XREAD-based listener task associated with this subscriber queue
    to prevent resource leaks.
    Args:
        task_id: Task ID
        subscriber_queue: The subscriber's queue used to look up the listener task
    """
    queue_id = id(subscriber_queue)
    listener_entry = _listener_tasks.pop(queue_id, None)
    if listener_entry is None:
        logger.debug(
            f"No listener task found for task {task_id} queue {queue_id} "
            "(may have already completed)"
        )
        return
    stored_task_id, listener_task = listener_entry
    if stored_task_id != task_id:
        logger.warning(
            f"Task ID mismatch in unsubscribe: expected {task_id}, "
            f"found {stored_task_id}"
        )
    if listener_task.done():
        logger.debug(f"Listener task for task {task_id} already completed")
        return
    # Cancel the listener task
    listener_task.cancel()
    try:
        # Wait for the task to be cancelled with a timeout
        await asyncio.wait_for(listener_task, timeout=5.0)
    except asyncio.CancelledError:
        # Expected - the task was successfully cancelled
        pass
    except asyncio.TimeoutError:
        logger.warning(
            f"Timeout waiting for listener task cancellation for task {task_id}"
        )
    except Exception as e:
        logger.error(f"Error during listener task cancellation for task {task_id}: {e}")
    logger.debug(f"Successfully unsubscribed from task {task_id}")
--- a/autogpt_platform/backend/backend/api/features/chat/tools/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/init.py
@@ -9,7 +9,10 @@ from backend.api.features.chat.tracking import track_tool_called
 from .add_understanding import AddUnderstandingTool
 from .agent_output import AgentOutputTool
 from .base import BaseTool
 from .bash_exec import BashExecTool
 from .check_operation_status import CheckOperationStatusTool
 from .create_agent import CreateAgentTool
 from .customize_agent import CustomizeAgentTool
 from .edit_agent import EditAgentTool
 from .find_agent import FindAgentTool
 from .find_block import FindBlockTool
@@ -18,6 +21,7 @@ from .get_doc_page import GetDocPageTool
 from .run_agent import RunAgentTool
 from .run_block import RunBlockTool
 from .search_docs import SearchDocsTool
 from .web_fetch import WebFetchTool
 from .workspace_files import (
    DeleteWorkspaceFileTool,
    ListWorkspaceFilesTool,
@@ -34,6 +38,7 @@ logger = logging.getLogger(__name__)
 TOOL_REGISTRY: dict[str, BaseTool] = {
    "add_understanding": AddUnderstandingTool(),
    "create_agent": CreateAgentTool(),
    "customize_agent": CustomizeAgentTool(),
    "edit_agent": EditAgentTool(),
    "find_agent": FindAgentTool(),
    "find_block": FindBlockTool(),
@@ -41,9 +46,14 @@ TOOL_REGISTRY: dict[str, BaseTool] = {
    "run_agent": RunAgentTool(),
    "run_block": RunBlockTool(),
    "view_agent_output": AgentOutputTool(),
    "check_operation_status": CheckOperationStatusTool(),
    "search_docs": SearchDocsTool(),
    "get_doc_page": GetDocPageTool(),
-    # Workspace tools for CoPilot file operations
+    # Web fetch for safe URL retrieval
    "web_fetch": WebFetchTool(),
    # Sandboxed code execution (bubblewrap)
    "bash_exec": BashExecTool(),
    # Persistent workspace tools (cloud storage, survives across sessions)
    "list_workspace_files": ListWorkspaceFilesTool(),
    "read_workspace_file": ReadWorkspaceFileTool(),
    "write_workspace_file": WriteWorkspaceFileTool(),
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
@@ -2,30 +2,58 @@
 from .core import (
    AgentGeneratorNotConfiguredError,
    AgentJsonValidationError,
    AgentSummary,
    DecompositionResult,
    DecompositionStep,
    LibraryAgentSummary,
    MarketplaceAgentSummary,
    customize_template,
    decompose_goal,
    enrich_library_agents_from_steps,
    extract_search_terms_from_steps,
    extract_uuids_from_text,
    generate_agent,
    generate_agent_patch,
    get_agent_as_json,
    get_all_relevant_agents_for_generation,
    get_library_agent_by_graph_id,
    get_library_agent_by_id,
    get_library_agents_for_generation,
    graph_to_json,
    json_to_graph,
    save_agent_to_library,
    search_marketplace_agents_for_generation,
 )
 from .errors import get_user_message_for_error
 from .service import health_check as check_external_service_health
 from .service import is_external_service_configured
 __all__ = [
-    # Core functions
+    "AgentGeneratorNotConfiguredError",
    "AgentJsonValidationError",
    "AgentSummary",
    "DecompositionResult",
    "DecompositionStep",
    "LibraryAgentSummary",
    "MarketplaceAgentSummary",
    "check_external_service_health",
    "customize_template",
    "decompose_goal",
    "enrich_library_agents_from_steps",
    "extract_search_terms_from_steps",
    "extract_uuids_from_text",
    "generate_agent",
    "generate_agent_patch",
    "save_agent_to_library",
    "get_agent_as_json",
-    "json_to_graph",
+    "get_all_relevant_agents_for_generation",
-    # Exceptions
+    "get_library_agent_by_graph_id",
-    "AgentGeneratorNotConfiguredError",
+    "get_library_agent_by_id",
-    # Service
+    "get_library_agents_for_generation",
    "is_external_service_configured",
    "check_external_service_health",
    # Error handling
    "get_user_message_for_error",
    "graph_to_json",
    "is_external_service_configured",
    "json_to_graph",
    "save_agent_to_library",
    "search_marketplace_agents_for_generation",
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
@@ -1,13 +1,17 @@
 """Core agent generation functions."""
 import logging
 import re
 import uuid
-from typing import Any
+from typing import Any, NotRequired, TypedDict
 from backend.api.features.library import db as library_db
-from backend.data.graph import Graph, Link, Node, create_graph
+from backend.api.features.store import db as store_db
 from backend.data.graph import Graph, Link, Node, get_graph, get_store_listed_graphs
 from backend.util.exceptions import DatabaseError, NotFoundError
 from .service import (
    customize_template_external,
    decompose_goal_external,
    generate_agent_external,
    generate_agent_patch_external,
@@ -17,6 +21,72 @@ from .service import (
 logger = logging.getLogger(__name__)
 class ExecutionSummary(TypedDict):
    """Summary of a single execution for quality assessment."""
    status: str
    correctness_score: NotRequired[float]
    activity_summary: NotRequired[str]
 class LibraryAgentSummary(TypedDict):
    """Summary of a library agent for sub-agent composition.
    Includes recent executions to help the LLM decide whether to use this agent.
    Each execution shows status, correctness_score (0-1), and activity_summary.
    """
    graph_id: str
    graph_version: int
    name: str
    description: str
    input_schema: dict[str, Any]
    output_schema: dict[str, Any]
    recent_executions: NotRequired[list[ExecutionSummary]]
 class MarketplaceAgentSummary(TypedDict):
    """Summary of a marketplace agent for sub-agent composition."""
    name: str
    description: str
    sub_heading: str
    creator: str
    is_marketplace_agent: bool
 class DecompositionStep(TypedDict, total=False):
    """A single step in decomposed instructions."""
    description: str
    action: str
    block_name: str
    tool: str
    name: str
 class DecompositionResult(TypedDict, total=False):
    """Result from decompose_goal - can be instructions, questions, or error."""
    type: str
    steps: list[DecompositionStep]
    questions: list[dict[str, Any]]
    error: str
    error_type: str
 AgentSummary = LibraryAgentSummary | MarketplaceAgentSummary | dict[str, Any]
 def _to_dict_list(
    agents: list[AgentSummary] | list[dict[str, Any]] | None,
 ) -> list[dict[str, Any]] | None:
    """Convert typed agent summaries to plain dicts for external service calls."""
    if agents is None:
        return None
    return [dict(a) for a in agents]
 class AgentGeneratorNotConfiguredError(Exception):
    """Raised when the external Agent Generator service is not configured."""
@@ -36,15 +106,422 @@ def _check_service_configured() -> None:
        )
-async def decompose_goal(description: str, context: str = "") -> dict[str, Any] | None:
+_UUID_PATTERN = re.compile(
    r"[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}",
    re.IGNORECASE,
 )
 def extract_uuids_from_text(text: str) -> list[str]:
    """Extract all UUID v4 strings from text.
    Args:
        text: Text that may contain UUIDs (e.g., user's goal description)
    Returns:
        List of unique UUIDs found in the text (lowercase)
    """
    matches = _UUID_PATTERN.findall(text)
    return list({m.lower() for m in matches})
 async def get_library_agent_by_id(
    user_id: str, agent_id: str
 ) -> LibraryAgentSummary | None:
    """Fetch a specific library agent by its ID (library agent ID or graph_id).
    This function tries multiple lookup strategies:
    1. First tries to find by graph_id (AgentGraph primary key)
    2. If not found, tries to find by library agent ID (LibraryAgent primary key)
    This handles both cases:
    - User provides graph_id (e.g., from AgentExecutorBlock)
    - User provides library agent ID (e.g., from library URL)
    Args:
        user_id: The user ID
        agent_id: The ID to look up (can be graph_id or library agent ID)
    Returns:
        LibraryAgentSummary if found, None otherwise
    """
    try:
        agent = await library_db.get_library_agent_by_graph_id(user_id, agent_id)
        if agent:
            logger.debug(f"Found library agent by graph_id: {agent.name}")
            return LibraryAgentSummary(
                graph_id=agent.graph_id,
                graph_version=agent.graph_version,
                name=agent.name,
                description=agent.description,
                input_schema=agent.input_schema,
                output_schema=agent.output_schema,
            )
    except DatabaseError:
        raise
    except Exception as e:
        logger.debug(f"Could not fetch library agent by graph_id {agent_id}: {e}")
    try:
        agent = await library_db.get_library_agent(agent_id, user_id)
        if agent:
            logger.debug(f"Found library agent by library_id: {agent.name}")
            return LibraryAgentSummary(
                graph_id=agent.graph_id,
                graph_version=agent.graph_version,
                name=agent.name,
                description=agent.description,
                input_schema=agent.input_schema,
                output_schema=agent.output_schema,
            )
    except NotFoundError:
        logger.debug(f"Library agent not found by library_id: {agent_id}")
    except DatabaseError:
        raise
    except Exception as e:
        logger.warning(
            f"Could not fetch library agent by library_id {agent_id}: {e}",
            exc_info=True,
        )
    return None
 get_library_agent_by_graph_id = get_library_agent_by_id
 async def get_library_agents_for_generation(
    user_id: str,
    search_query: str | None = None,
    exclude_graph_id: str | None = None,
    max_results: int = 15,
 ) -> list[LibraryAgentSummary]:
    """Fetch user's library agents formatted for Agent Generator.
    Uses search-based fetching to return relevant agents instead of all agents.
    This is more scalable for users with large libraries.
    Includes recent_executions list to help the LLM assess agent quality:
    - Each execution has status, correctness_score (0-1), and activity_summary
    - This gives the LLM concrete examples of recent performance
    Args:
        user_id: The user ID
        search_query: Optional search term to find relevant agents (user's goal/description)
        exclude_graph_id: Optional graph ID to exclude (prevents circular references)
        max_results: Maximum number of agents to return (default 15)
    Returns:
        List of LibraryAgentSummary with schemas and recent executions for sub-agent composition
    """
    try:
        response = await library_db.list_library_agents(
            user_id=user_id,
            search_term=search_query,
            page=1,
            page_size=max_results,
            include_executions=True,
        )
        results: list[LibraryAgentSummary] = []
        for agent in response.agents:
            if exclude_graph_id is not None and agent.graph_id == exclude_graph_id:
                continue
            summary = LibraryAgentSummary(
                graph_id=agent.graph_id,
                graph_version=agent.graph_version,
                name=agent.name,
                description=agent.description,
                input_schema=agent.input_schema,
                output_schema=agent.output_schema,
            )
            if agent.recent_executions:
                exec_summaries: list[ExecutionSummary] = []
                for ex in agent.recent_executions:
                    exec_sum = ExecutionSummary(status=ex.status)
                    if ex.correctness_score is not None:
                        exec_sum["correctness_score"] = ex.correctness_score
                    if ex.activity_summary:
                        exec_sum["activity_summary"] = ex.activity_summary
                    exec_summaries.append(exec_sum)
                summary["recent_executions"] = exec_summaries
            results.append(summary)
        return results
    except DatabaseError:
        raise
    except Exception as e:
        logger.warning(f"Failed to fetch library agents: {e}")
        return []
 async def search_marketplace_agents_for_generation(
    search_query: str,
    max_results: int = 10,
 ) -> list[LibraryAgentSummary]:
    """Search marketplace agents formatted for Agent Generator.
    Fetches marketplace agents and their full schemas so they can be used
    as sub-agents in generated workflows.
    Args:
        search_query: Search term to find relevant public agents
        max_results: Maximum number of agents to return (default 10)
    Returns:
        List of LibraryAgentSummary with full input/output schemas
    """
    try:
        response = await store_db.get_store_agents(
            search_query=search_query,
            page=1,
            page_size=max_results,
        )
        agents_with_graphs = [
            agent for agent in response.agents if agent.agent_graph_id
        ]
        if not agents_with_graphs:
            return []
        graph_ids = [agent.agent_graph_id for agent in agents_with_graphs]
        graphs = await get_store_listed_graphs(*graph_ids)
        results: list[LibraryAgentSummary] = []
        for agent in agents_with_graphs:
            graph_id = agent.agent_graph_id
            if graph_id and graph_id in graphs:
                graph = graphs[graph_id]
                results.append(
                    LibraryAgentSummary(
                        graph_id=graph.id,
                        graph_version=graph.version,
                        name=agent.agent_name,
                        description=agent.description,
                        input_schema=graph.input_schema,
                        output_schema=graph.output_schema,
                    )
                )
        return results
    except Exception as e:
        logger.warning(f"Failed to search marketplace agents: {e}")
        return []
 async def get_all_relevant_agents_for_generation(
    user_id: str,
    search_query: str | None = None,
    exclude_graph_id: str | None = None,
    include_library: bool = True,
    include_marketplace: bool = True,
    max_library_results: int = 15,
    max_marketplace_results: int = 10,
 ) -> list[AgentSummary]:
    """Fetch relevant agents from library and/or marketplace.
    Searches both user's library and marketplace by default.
    Explicitly mentioned UUIDs in the search query are always looked up.
    Args:
        user_id: The user ID
        search_query: Search term to find relevant agents (user's goal/description)
        exclude_graph_id: Optional graph ID to exclude (prevents circular references)
        include_library: Whether to search user's library (default True)
        include_marketplace: Whether to also search marketplace (default True)
        max_library_results: Max library agents to return (default 15)
        max_marketplace_results: Max marketplace agents to return (default 10)
    Returns:
        List of AgentSummary with full schemas (both library and marketplace agents)
    """
    agents: list[AgentSummary] = []
    seen_graph_ids: set[str] = set()
    if search_query:
        mentioned_uuids = extract_uuids_from_text(search_query)
        for graph_id in mentioned_uuids:
            if graph_id == exclude_graph_id:
                continue
            agent = await get_library_agent_by_graph_id(user_id, graph_id)
            agent_graph_id = agent.get("graph_id") if agent else None
            if agent and agent_graph_id and agent_graph_id not in seen_graph_ids:
                agents.append(agent)
                seen_graph_ids.add(agent_graph_id)
                logger.debug(
                    f"Found explicitly mentioned agent: {agent.get('name') or 'Unknown'}"
                )
    if include_library:
        library_agents = await get_library_agents_for_generation(
            user_id=user_id,
            search_query=search_query,
            exclude_graph_id=exclude_graph_id,
            max_results=max_library_results,
        )
        for agent in library_agents:
            graph_id = agent.get("graph_id")
            if graph_id and graph_id not in seen_graph_ids:
                agents.append(agent)
                seen_graph_ids.add(graph_id)
    if include_marketplace and search_query:
        marketplace_agents = await search_marketplace_agents_for_generation(
            search_query=search_query,
            max_results=max_marketplace_results,
        )
        for agent in marketplace_agents:
            graph_id = agent.get("graph_id")
            if graph_id and graph_id not in seen_graph_ids:
                agents.append(agent)
                seen_graph_ids.add(graph_id)
    return agents
 def extract_search_terms_from_steps(
    decomposition_result: DecompositionResult | dict[str, Any],
 ) -> list[str]:
    """Extract search terms from decomposed instruction steps.
    Analyzes the decomposition result to extract relevant keywords
    for additional library agent searches.
    Args:
        decomposition_result: Result from decompose_goal containing steps
    Returns:
        List of unique search terms extracted from steps
    """
    search_terms: list[str] = []
    if decomposition_result.get("type") != "instructions":
        return search_terms
    steps = decomposition_result.get("steps", [])
    if not steps:
        return search_terms
    step_keys: list[str] = ["description", "action", "block_name", "tool", "name"]
    for step in steps:
        for key in step_keys:
            value = step.get(key)  # type: ignore[union-attr]
            if isinstance(value, str) and len(value) > 3:
                search_terms.append(value)
    seen: set[str] = set()
    unique_terms: list[str] = []
    for term in search_terms:
        term_lower = term.lower()
        if term_lower not in seen:
            seen.add(term_lower)
            unique_terms.append(term)
    return unique_terms
 async def enrich_library_agents_from_steps(
    user_id: str,
    decomposition_result: DecompositionResult | dict[str, Any],
    existing_agents: list[AgentSummary] | list[dict[str, Any]],
    exclude_graph_id: str | None = None,
    include_marketplace: bool = True,
    max_additional_results: int = 10,
 ) -> list[AgentSummary] | list[dict[str, Any]]:
    """Enrich library agents list with additional searches based on decomposed steps.
    This implements two-phase search: after decomposition, we search for additional
    relevant agents based on the specific steps identified.
    Args:
        user_id: The user ID
        decomposition_result: Result from decompose_goal containing steps
        existing_agents: Already fetched library agents from initial search
        exclude_graph_id: Optional graph ID to exclude
        include_marketplace: Whether to also search marketplace
        max_additional_results: Max additional agents per search term (default 10)
    Returns:
        Combined list of library agents (existing + newly discovered)
    """
    search_terms = extract_search_terms_from_steps(decomposition_result)
    if not search_terms:
        return existing_agents
    existing_ids: set[str] = set()
    existing_names: set[str] = set()
    for agent in existing_agents:
        agent_name = agent.get("name")
        if agent_name and isinstance(agent_name, str):
            existing_names.add(agent_name.lower())
        graph_id = agent.get("graph_id")  # type: ignore[call-overload]
        if graph_id and isinstance(graph_id, str):
            existing_ids.add(graph_id)
    all_agents: list[AgentSummary] | list[dict[str, Any]] = list(existing_agents)
    for term in search_terms[:3]:
        try:
            additional_agents = await get_all_relevant_agents_for_generation(
                user_id=user_id,
                search_query=term,
                exclude_graph_id=exclude_graph_id,
                include_marketplace=include_marketplace,
                max_library_results=max_additional_results,
                max_marketplace_results=5,
            )
            for agent in additional_agents:
                agent_name = agent.get("name")
                if not agent_name or not isinstance(agent_name, str):
                    continue
                agent_name_lower = agent_name.lower()
                if agent_name_lower in existing_names:
                    continue
                graph_id = agent.get("graph_id")  # type: ignore[call-overload]
                if graph_id and graph_id in existing_ids:
                    continue
                all_agents.append(agent)
                existing_names.add(agent_name_lower)
                if graph_id and isinstance(graph_id, str):
                    existing_ids.add(graph_id)
        except DatabaseError:
            logger.error(f"Database error searching for agents with term '{term}'")
            raise
        except Exception as e:
            logger.warning(
                f"Failed to search for additional agents with term '{term}': {e}"
            )
    logger.debug(
        f"Enriched library agents: {len(existing_agents)} initial + "
        f"{len(all_agents) - len(existing_agents)} additional = {len(all_agents)} total"
    )
    return all_agents
 async def decompose_goal(
    description: str,
    context: str = "",
    library_agents: list[AgentSummary] | None = None,
 ) -> DecompositionResult | None:
    """Break down a goal into steps or return clarifying questions.
    Args:
        description: Natural language goal description
        context: Additional context (e.g., answers to previous questions)
        library_agents: User's library agents available for sub-agent composition
    Returns:
-        Dict with either:
+        DecompositionResult with either:
        - {"type": "clarifying_questions", "questions": [...]}
        - {"type": "instructions", "steps": [...]}
        Or None on error
@@ -54,29 +531,47 @@ async def decompose_goal(description: str, context: str = "") -> dict[str, Any]
    """
    _check_service_configured()
    logger.info("Calling external Agent Generator service for decompose_goal")
-    return await decompose_goal_external(description, context)
+    result = await decompose_goal_external(
        description, context, _to_dict_list(library_agents)
    )
    return result  # type: ignore[return-value]
-async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:
+async def generate_agent(
    instructions: DecompositionResult | dict[str, Any],
    library_agents: list[AgentSummary] | list[dict[str, Any]] | None = None,
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any] | None:
    """Generate agent JSON from instructions.
    Args:
        instructions: Structured instructions from decompose_goal
        library_agents: User's library agents available for sub-agent composition
        operation_id: Operation ID for async processing (enables Redis Streams
            completion notification)
        task_id: Task ID for async processing (enables Redis Streams persistence
            and SSE delivery)
    Returns:
-        Agent JSON dict, error dict {"type": "error", ...}, or None on error
+        Agent JSON dict, {"status": "accepted"} for async, error dict {"type": "error", ...}, or None on error
    Raises:
        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
    _check_service_configured()
    logger.info("Calling external Agent Generator service for generate_agent")
-    result = await generate_agent_external(instructions)
+    result = await generate_agent_external(
        dict(instructions), _to_dict_list(library_agents), operation_id, task_id
    )
    # Don't modify async response
    if result and result.get("status") == "accepted":
        return result
    if result:
        # Check if it's an error response - pass through as-is
        if isinstance(result, dict) and result.get("type") == "error":
            return result
        # Ensure required fields for successful agent generation
        if "id" not in result:
            result["id"] = str(uuid.uuid4())
        if "version" not in result:
@@ -86,6 +581,12 @@ async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:
    return result
 class AgentJsonValidationError(Exception):
    """Raised when agent JSON is invalid or missing required fields."""
    pass
 def json_to_graph(agent_json: dict[str, Any]) -> Graph:
    """Convert agent JSON dict to Graph model.
@@ -94,25 +595,55 @@ def json_to_graph(agent_json: dict[str, Any]) -> Graph:
    Returns:
        Graph ready for saving
    Raises:
        AgentJsonValidationError: If required fields are missing from nodes or links
    """
    nodes = []
-    for n in agent_json.get("nodes", []):
+    for idx, n in enumerate(agent_json.get("nodes", [])):
        block_id = n.get("block_id")
        if not block_id:
            node_id = n.get("id", f"index_{idx}")
            raise AgentJsonValidationError(
                f"Node '{node_id}' is missing required field 'block_id'"
            )
        node = Node(
            id=n.get("id", str(uuid.uuid4())),
-            block_id=n["block_id"],
+            block_id=block_id,
            input_default=n.get("input_default", {}),
            metadata=n.get("metadata", {}),
        )
        nodes.append(node)
    links = []
-    for link_data in agent_json.get("links", []):
+    for idx, link_data in enumerate(agent_json.get("links", [])):
        source_id = link_data.get("source_id")
        sink_id = link_data.get("sink_id")
        source_name = link_data.get("source_name")
        sink_name = link_data.get("sink_name")
        missing_fields = []
        if not source_id:
            missing_fields.append("source_id")
        if not sink_id:
            missing_fields.append("sink_id")
        if not source_name:
            missing_fields.append("source_name")
        if not sink_name:
            missing_fields.append("sink_name")
        if missing_fields:
            link_id = link_data.get("id", f"index_{idx}")
            raise AgentJsonValidationError(
                f"Link '{link_id}' is missing required fields: {', '.join(missing_fields)}"
            )
        link = Link(
            id=link_data.get("id", str(uuid.uuid4())),
-            source_id=link_data["source_id"],
+            source_id=source_id,
-            sink_id=link_data["sink_id"],
+            sink_id=sink_id,
-            source_name=link_data["source_name"],
+            source_name=source_name,
-            sink_name=link_data["sink_name"],
+            sink_name=sink_name,
            is_static=link_data.get("is_static", False),
        )
        links.append(link)
@@ -128,27 +659,6 @@ def json_to_graph(agent_json: dict[str, Any]) -> Graph:
    )
 def _reassign_node_ids(graph: Graph) -> None:
    """Reassign all node and link IDs to new UUIDs.
    This is needed when creating a new version to avoid unique constraint violations.
    """
    # Create mapping from old node IDs to new UUIDs
    id_map = {node.id: str(uuid.uuid4()) for node in graph.nodes}
    # Reassign node IDs
    for node in graph.nodes:
        node.id = id_map[node.id]
    # Update link references to use new node IDs
    for link in graph.links:
        link.id = str(uuid.uuid4())  # Also give links new IDs
        if link.source_id in id_map:
            link.source_id = id_map[link.source_id]
        if link.sink_id in id_map:
            link.sink_id = id_map[link.sink_id]
 async def save_agent_to_library(
    agent_json: dict[str, Any], user_id: str, is_update: bool = False
 ) -> tuple[Graph, Any]:
@@ -162,63 +672,21 @@ async def save_agent_to_library(
    Returns:
        Tuple of (created Graph, LibraryAgent)
    """
    from backend.data.graph import get_graph_all_versions
    graph = json_to_graph(agent_json)
    if is_update:
-        # For updates, keep the same graph ID but increment version
+        return await library_db.update_graph_in_library(graph, user_id)
-        # and reassign node/link IDs to avoid conflicts
+    return await library_db.create_graph_in_library(graph, user_id)
        if graph.id:
            existing_versions = await get_graph_all_versions(graph.id, user_id)
            if existing_versions:
                latest_version = max(v.version for v in existing_versions)
                graph.version = latest_version + 1
                # Reassign node IDs (but keep graph ID the same)
                _reassign_node_ids(graph)
                logger.info(f"Updating agent {graph.id} to version {graph.version}")
    else:
        # For new agents, always generate a fresh UUID to avoid collisions
        graph.id = str(uuid.uuid4())
        graph.version = 1
        # Reassign all node IDs as well
        _reassign_node_ids(graph)
        logger.info(f"Creating new agent with ID {graph.id}")
    # Save to database
    created_graph = await create_graph(graph, user_id)
    # Add to user's library (or update existing library agent)
    library_agents = await library_db.create_library_agent(
        graph=created_graph,
        user_id=user_id,
        sensitive_action_safe_mode=True,
        create_library_agents_for_sub_graphs=False,
    )
    return created_graph, library_agents[0]
-async def get_agent_as_json(
+def graph_to_json(graph: Graph) -> dict[str, Any]:
-    graph_id: str, user_id: str | None
+    """Convert a Graph object to JSON format for the agent generator.
 ) -> dict[str, Any] | None:
    """Fetch an agent and convert to JSON format for editing.
    Args:
-        graph_id: Graph ID or library agent ID
+        graph: Graph object to convert
        user_id: User ID
    Returns:
-        Agent as JSON dict or None if not found
+        Agent as JSON dict
    """
    from backend.data.graph import get_graph
    # Try to get the graph (version=None gets the active version)
    graph = await get_graph(graph_id, version=None, user_id=user_id)
    if not graph:
        return None
    # Convert to JSON format
    nodes = []
    for node in graph.nodes:
        nodes.append(
@@ -255,8 +723,41 @@ async def get_agent_as_json(
    }
 async def get_agent_as_json(
    agent_id: str, user_id: str | None
 ) -> dict[str, Any] | None:
    """Fetch an agent and convert to JSON format for editing.
    Args:
        agent_id: Graph ID or library agent ID
        user_id: User ID
    Returns:
        Agent as JSON dict or None if not found
    """
    graph = await get_graph(agent_id, version=None, user_id=user_id)
    if not graph and user_id:
        try:
            library_agent = await library_db.get_library_agent(agent_id, user_id)
            graph = await get_graph(
                library_agent.graph_id, version=None, user_id=user_id
            )
        except NotFoundError:
            pass
    if not graph:
        return None
    return graph_to_json(graph)
 async def generate_agent_patch(
-    update_request: str, current_agent: dict[str, Any]
+    update_request: str,
    current_agent: dict[str, Any],
    library_agents: list[AgentSummary] | None = None,
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any] | None:
    """Update an existing agent using natural language.
@@ -268,14 +769,57 @@ async def generate_agent_patch(
    Args:
        update_request: Natural language description of changes
        current_agent: Current agent JSON
        library_agents: User's library agents available for sub-agent composition
        operation_id: Operation ID for async processing (enables Redis Streams callback)
        task_id: Task ID for async processing (enables Redis Streams callback)
    Returns:
        Updated agent JSON, clarifying questions dict {"type": "clarifying_questions", ...},
-        error dict {"type": "error", ...}, or None on unexpected error
+        {"status": "accepted"} for async, error dict {"type": "error", ...}, or None on error
    Raises:
        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
    _check_service_configured()
    logger.info("Calling external Agent Generator service for generate_agent_patch")
-    return await generate_agent_patch_external(update_request, current_agent)
+    return await generate_agent_patch_external(
        update_request,
        current_agent,
        _to_dict_list(library_agents),
        operation_id,
        task_id,
    )
 async def customize_template(
    template_agent: dict[str, Any],
    modification_request: str,
    context: str = "",
 ) -> dict[str, Any] | None:
    """Customize a template/marketplace agent using natural language.
    This is used when users want to modify a template or marketplace agent
    to fit their specific needs before adding it to their library.
    The external Agent Generator service handles:
    - Understanding the modification request
    - Applying changes to the template
    - Fixing and validating the result
    Args:
        template_agent: The template agent JSON to customize
        modification_request: Natural language description of customizations
        context: Additional context (e.g., answers to previous questions)
    Returns:
        Customized agent JSON, clarifying questions dict {"type": "clarifying_questions", ...},
        error dict {"type": "error", ...}, or None on unexpected error
    Raises:
        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
    _check_service_configured()
    logger.info("Calling external Agent Generator service for customize_template")
    return await customize_template_external(
        template_agent, modification_request, context
    )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py
@@ -1,11 +1,43 @@
 """Error handling utilities for agent generator."""
 import re
 def _sanitize_error_details(details: str) -> str:
    """Sanitize error details to remove sensitive information.
    Strips common patterns that could expose internal system info:
    - File paths (Unix and Windows)
    - Database connection strings
    - URLs with credentials
    - Stack trace internals
    Args:
        details: Raw error details string
    Returns:
        Sanitized error details safe for user display
    """
    sanitized = re.sub(
        r"/[a-zA-Z0-9_./\-]+\.(py|js|ts|json|yaml|yml)", "[path]", details
    )
    sanitized = re.sub(r"[A-Z]:\\[a-zA-Z0-9_\\.\\-]+", "[path]", sanitized)
    sanitized = re.sub(
        r"(postgres|mysql|mongodb|redis)://[^\s]+", "[database_url]", sanitized
    )
    sanitized = re.sub(r"https?://[^:]+:[^@]+@[^\s]+", "[url]", sanitized)
    sanitized = re.sub(r", line \d+", "", sanitized)
    sanitized = re.sub(r'File "[^"]+",?', "", sanitized)
    return sanitized.strip()
 def get_user_message_for_error(
    error_type: str,
    operation: str = "process the request",
    llm_parse_message: str | None = None,
    validation_message: str | None = None,
    error_details: str | None = None,
 ) -> str:
    """Get a user-friendly error message based on error type.
@@ -19,25 +51,45 @@ def get_user_message_for_error(
            message (e.g., "analyze the goal", "generate the agent")
        llm_parse_message: Custom message for llm_parse_error type
        validation_message: Custom message for validation_error type
        error_details: Optional additional details about the error
    Returns:
        User-friendly error message suitable for display to the user
    """
    base_message = ""
    if error_type == "llm_parse_error":
-        return (
+        base_message = (
            llm_parse_message
            or "The AI had trouble processing this request. Please try again."
        )
    elif error_type == "validation_error":
-        return (
+        base_message = (
            validation_message
-            or "The request failed validation. Please try rephrasing."
+            or "The generated agent failed validation. "
            "This usually happens when the agent structure doesn't match "
            "what the platform expects. Please try simplifying your goal "
            "or breaking it into smaller parts."
        )
    elif error_type == "patch_error":
-        return "Failed to apply the changes. Please try a different approach."
+        base_message = (
            "Failed to apply the changes. The modification couldn't be "
            "validated. Please try a different approach or simplify the change."
        )
    elif error_type in ("timeout", "llm_timeout"):
-        return "The request took too long. Please try again."
+        base_message = (
            "The request took too long to process. This can happen with "
            "complex agents. Please try again or simplify your goal."
        )
    elif error_type in ("rate_limit", "llm_rate_limit"):
-        return "The service is currently busy. Please try again in a moment."
+        base_message = "The service is currently busy. Please try again in a moment."
    else:
-        return f"Failed to {operation}. Please try again."
+        base_message = f"Failed to {operation}. Please try again."
    if error_details:
        details = _sanitize_error_details(error_details)
        if len(details) > 200:
            details = details[:200] + "..."
        base_message += f"\n\nTechnical details: {details}"
    return base_message
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
@@ -117,13 +117,16 @@ def _get_client() -> httpx.AsyncClient:
 async def decompose_goal_external(
-    description: str, context: str = ""
+    description: str,
    context: str = "",
    library_agents: list[dict[str, Any]] | None = None,
 ) -> dict[str, Any] | None:
    """Call the external service to decompose a goal.
    Args:
        description: Natural language goal description
        context: Additional context (e.g., answers to previous questions)
        library_agents: User's library agents available for sub-agent composition
    Returns:
        Dict with either:
@@ -136,11 +139,12 @@ async def decompose_goal_external(
    """
    client = _get_client()
    # Build the request payload
    payload: dict[str, Any] = {"description": description}
    if context:
-        # The external service uses user_instruction for additional context
+        description = f"{description}\n\nAdditional context from user:\n{context}"
-        payload["user_instruction"] = context
+
    payload: dict[str, Any] = {"description": description}
    if library_agents:
        payload["library_agents"] = library_agents
    try:
        response = await client.post("/api/decompose-description", json=payload)
@@ -207,21 +211,46 @@ async def decompose_goal_external(
 async def generate_agent_external(
    instructions: dict[str, Any],
    library_agents: list[dict[str, Any]] | None = None,
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any] | None:
    """Call the external service to generate an agent from instructions.
    Args:
        instructions: Structured instructions from decompose_goal
        library_agents: User's library agents available for sub-agent composition
        operation_id: Operation ID for async processing (enables Redis Streams callback)
        task_id: Task ID for async processing (enables Redis Streams callback)
    Returns:
-        Agent JSON dict on success, or error dict {"type": "error", ...} on error
+        Agent JSON dict, {"status": "accepted"} for async, or error dict {"type": "error", ...} on error
    """
    client = _get_client()
    # Build request payload
    payload: dict[str, Any] = {"instructions": instructions}
    if library_agents:
        payload["library_agents"] = library_agents
    if operation_id and task_id:
        payload["operation_id"] = operation_id
        payload["task_id"] = task_id
    try:
-        response = await client.post(
+        response = await client.post("/api/generate-agent", json=payload)
-            "/api/generate-agent", json={"instructions": instructions}
+
-        )
+        # Handle 202 Accepted for async processing
        if response.status_code == 202:
            logger.info(
                f"Agent Generator accepted async request "
                f"(operation_id={operation_id}, task_id={task_id})"
            )
            return {
                "status": "accepted",
                "operation_id": operation_id,
                "task_id": task_id,
            }
        response.raise_for_status()
        data = response.json()
@@ -229,8 +258,7 @@ async def generate_agent_external(
            error_msg = data.get("error", "Unknown error from Agent Generator")
            error_type = data.get("error_type", "unknown")
            logger.error(
-                f"Agent Generator generation failed: {error_msg} "
+                f"Agent Generator generation failed: {error_msg} (type: {error_type})"
                f"(type: {error_type})"
            )
            return _create_error_response(error_msg, error_type)
@@ -251,27 +279,52 @@ async def generate_agent_external(
 async def generate_agent_patch_external(
-    update_request: str, current_agent: dict[str, Any]
+    update_request: str,
    current_agent: dict[str, Any],
    library_agents: list[dict[str, Any]] | None = None,
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any] | None:
    """Call the external service to generate a patch for an existing agent.
    Args:
        update_request: Natural language description of changes
        current_agent: Current agent JSON
        library_agents: User's library agents available for sub-agent composition
        operation_id: Operation ID for async processing (enables Redis Streams callback)
        task_id: Task ID for async processing (enables Redis Streams callback)
    Returns:
-        Updated agent JSON, clarifying questions dict, or error dict on error
+        Updated agent JSON, clarifying questions dict, {"status": "accepted"} for async, or error dict on error
    """
    client = _get_client()
    # Build request payload
    payload: dict[str, Any] = {
        "update_request": update_request,
        "current_agent_json": current_agent,
    }
    if library_agents:
        payload["library_agents"] = library_agents
    if operation_id and task_id:
        payload["operation_id"] = operation_id
        payload["task_id"] = task_id
    try:
-        response = await client.post(
+        response = await client.post("/api/update-agent", json=payload)
-            "/api/update-agent",
+
-            json={
+        # Handle 202 Accepted for async processing
-                "update_request": update_request,
+        if response.status_code == 202:
-                "current_agent_json": current_agent,
+            logger.info(
-            },
+                f"Agent Generator accepted async update request "
-        )
+                f"(operation_id={operation_id}, task_id={task_id})"
            )
            return {
                "status": "accepted",
                "operation_id": operation_id,
                "task_id": task_id,
            }
        response.raise_for_status()
        data = response.json()
@@ -315,6 +368,77 @@ async def generate_agent_patch_external(
        return _create_error_response(error_msg, "unexpected_error")
 async def customize_template_external(
    template_agent: dict[str, Any],
    modification_request: str,
    context: str = "",
 ) -> dict[str, Any] | None:
    """Call the external service to customize a template/marketplace agent.
    Args:
        template_agent: The template agent JSON to customize
        modification_request: Natural language description of customizations
        context: Additional context (e.g., answers to previous questions)
    Returns:
        Customized agent JSON, clarifying questions dict, or error dict on error
    """
    client = _get_client()
    request = modification_request
    if context:
        request = f"{modification_request}\n\nAdditional context from user:\n{context}"
    payload: dict[str, Any] = {
        "template_agent_json": template_agent,
        "modification_request": request,
    }
    try:
        response = await client.post("/api/template-modification", json=payload)
        response.raise_for_status()
        data = response.json()
        if not data.get("success"):
            error_msg = data.get("error", "Unknown error from Agent Generator")
            error_type = data.get("error_type", "unknown")
            logger.error(
                f"Agent Generator template customization failed: {error_msg} "
                f"(type: {error_type})"
            )
            return _create_error_response(error_msg, error_type)
        # Check if it's clarifying questions
        if data.get("type") == "clarifying_questions":
            return {
                "type": "clarifying_questions",
                "questions": data.get("questions", []),
            }
        # Check if it's an error passed through
        if data.get("type") == "error":
            return _create_error_response(
                data.get("error", "Unknown error"),
                data.get("error_type", "unknown"),
            )
        # Otherwise return the customized agent JSON
        return data.get("agent_json")
    except httpx.HTTPStatusError as e:
        error_type, error_msg = _classify_http_error(e)
        logger.error(error_msg)
        return _create_error_response(error_msg, error_type)
    except httpx.RequestError as e:
        error_type, error_msg = _classify_request_error(e)
        logger.error(error_msg)
        return _create_error_response(error_msg, error_type)
    except Exception as e:
        error_msg = f"Unexpected error calling Agent Generator: {e}"
        logger.error(error_msg)
        return _create_error_response(error_msg, "unexpected_error")
 async def get_blocks_external() -> list[dict[str, Any]] | None:
    """Get available blocks from the external service.
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
@@ -1,6 +1,7 @@
 """Shared agent search functionality for find_agent and find_library_agent tools."""
 import logging
 import re
 from typing import Literal
 from backend.api.features.library import db as library_db
@@ -19,6 +20,85 @@ logger = logging.getLogger(__name__)
 SearchSource = Literal["marketplace", "library"]
 _UUID_PATTERN = re.compile(
    r"^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$",
    re.IGNORECASE,
 )
 def _is_uuid(text: str) -> bool:
    """Check if text is a valid UUID v4."""
    return bool(_UUID_PATTERN.match(text.strip()))
 async def _get_library_agent_by_id(user_id: str, agent_id: str) -> AgentInfo | None:
    """Fetch a library agent by ID (library agent ID or graph_id).
    Tries multiple lookup strategies:
    1. First by graph_id (AgentGraph primary key)
    2. Then by library agent ID (LibraryAgent primary key)
    Args:
        user_id: The user ID
        agent_id: The ID to look up (can be graph_id or library agent ID)
    Returns:
        AgentInfo if found, None otherwise
    """
    try:
        agent = await library_db.get_library_agent_by_graph_id(user_id, agent_id)
        if agent:
            logger.debug(f"Found library agent by graph_id: {agent.name}")
            return AgentInfo(
                id=agent.id,
                name=agent.name,
                description=agent.description or "",
                source="library",
                in_library=True,
                creator=agent.creator_name,
                status=agent.status.value,
                can_access_graph=agent.can_access_graph,
                has_external_trigger=agent.has_external_trigger,
                new_output=agent.new_output,
                graph_id=agent.graph_id,
            )
    except DatabaseError:
        raise
    except Exception as e:
        logger.warning(
            f"Could not fetch library agent by graph_id {agent_id}: {e}",
            exc_info=True,
        )
    try:
        agent = await library_db.get_library_agent(agent_id, user_id)
        if agent:
            logger.debug(f"Found library agent by library_id: {agent.name}")
            return AgentInfo(
                id=agent.id,
                name=agent.name,
                description=agent.description or "",
                source="library",
                in_library=True,
                creator=agent.creator_name,
                status=agent.status.value,
                can_access_graph=agent.can_access_graph,
                has_external_trigger=agent.has_external_trigger,
                new_output=agent.new_output,
                graph_id=agent.graph_id,
            )
    except NotFoundError:
        logger.debug(f"Library agent not found by library_id: {agent_id}")
    except DatabaseError:
        raise
    except Exception as e:
        logger.warning(
            f"Could not fetch library agent by library_id {agent_id}: {e}",
            exc_info=True,
        )
    return None
 async def search_agents(
    query: str,
@@ -69,29 +149,37 @@ async def search_agents(
                        is_featured=False,
                    )
                )
-        else:  # library
+        else:
-            logger.info(f"Searching user library for: {query}")
+            if _is_uuid(query):
-            results = await library_db.list_library_agents(
+                logger.info(f"Query looks like UUID, trying direct lookup: {query}")
-                user_id=user_id,  # type: ignore[arg-type]
+                agent = await _get_library_agent_by_id(user_id, query)  # type: ignore[arg-type]
-                search_term=query,
+                if agent:
-                page_size=10,
+                    agents.append(agent)
-            )
+                    logger.info(f"Found agent by direct ID lookup: {agent.name}")
-            for agent in results.agents:
+
-                agents.append(
+            if not agents:
-                    AgentInfo(
+                logger.info(f"Searching user library for: {query}")
-                        id=agent.id,
+                results = await library_db.list_library_agents(
-                        name=agent.name,
+                    user_id=user_id,  # type: ignore[arg-type]
-                        description=agent.description or "",
+                    search_term=query,
-                        source="library",
+                    page_size=10,
                        in_library=True,
                        creator=agent.creator_name,
                        status=agent.status.value,
                        can_access_graph=agent.can_access_graph,
                        has_external_trigger=agent.has_external_trigger,
                        new_output=agent.new_output,
                        graph_id=agent.graph_id,
                    )
                )
                for agent in results.agents:
                    agents.append(
                        AgentInfo(
                            id=agent.id,
                            name=agent.name,
                            description=agent.description or "",
                            source="library",
                            in_library=True,
                            creator=agent.creator_name,
                            status=agent.status.value,
                            can_access_graph=agent.can_access_graph,
                            has_external_trigger=agent.has_external_trigger,
                            new_output=agent.new_output,
                            graph_id=agent.graph_id,
                        )
                    )
        logger.info(f"Found {len(agents)} agents in {source}")
    except NotFoundError:
        pass
@@ -118,9 +206,9 @@ async def search_agents(
            ]
        )
        no_results_msg = (
-            f"No agents found matching '{query}'. Try different keywords or browse the marketplace."
+            f"No agents found matching '{query}'. Let the user know they can try different keywords or browse the marketplace. Also let them know you can create a custom agent for them based on their needs."
            if source == "marketplace"
-            else f"No agents matching '{query}' found in your library."
+            else f"No agents matching '{query}' found in your library. Let the user know you can create a custom agent for them based on their needs."
        )
        return NoResultsResponse(
            message=no_results_msg, session_id=session_id, suggestions=suggestions
@@ -136,10 +224,10 @@ async def search_agents(
    message = (
        "Now you have found some options for the user to choose from. "
        "You can add a link to a recommended agent at: /marketplace/agent/agent_id "
-        "Please ask the user if they would like to use any of these agents."
+        "Please ask the user if they would like to use any of these agents. Let the user know we can create a custom agent for them based on their needs."
        if source == "marketplace"
        else "Found agents in the user's library. You can provide a link to view an agent at: "
-        "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute."
+        "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute. Let the user know we can create a custom agent for them based on their needs."
    )
    return AgentsFoundResponse(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
@@ -0,0 +1,131 @@
 """Bash execution tool — run shell commands in a bubblewrap sandbox.
 Full Bash scripting is allowed (loops, conditionals, pipes, functions, etc.).
 Safety comes from OS-level isolation (bubblewrap): only system dirs visible
 read-only, writable workspace only, clean env, no network.
 Requires bubblewrap (``bwrap``) — the tool is disabled when bwrap is not
 available (e.g. macOS development).
 """
 import logging
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
    BashExecResponse,
    ErrorResponse,
    ToolResponseBase,
 )
 from backend.api.features.chat.tools.sandbox import (
    get_workspace_dir,
    has_full_sandbox,
    run_sandboxed,
 )
 logger = logging.getLogger(__name__)
 class BashExecTool(BaseTool):
    """Execute Bash commands in a bubblewrap sandbox."""
    @property
    def name(self) -> str:
        return "bash_exec"
    @property
    def description(self) -> str:
        if not has_full_sandbox():
            return (
                "Bash execution is DISABLED — bubblewrap sandbox is not "
                "available on this platform. Do not call this tool."
            )
        return (
            "Execute a Bash command or script in a bubblewrap sandbox. "
            "Full Bash scripting is supported (loops, conditionals, pipes, "
            "functions, etc.). "
            "The sandbox shares the same working directory as the SDK Read/Write "
            "tools — files created by either are accessible to both. "
            "SECURITY: Only system directories (/usr, /bin, /lib, /etc) are "
            "visible read-only, the per-session workspace is the only writable "
            "path, environment variables are wiped (no secrets), all network "
            "access is blocked at the kernel level, and resource limits are "
            "enforced (max 64 processes, 512MB memory, 50MB file size). "
            "Application code, configs, and other directories are NOT accessible. "
            "To fetch web content, use the web_fetch tool instead. "
            "Execution is killed after the timeout (default 30s, max 120s). "
            "Returns stdout and stderr. "
            "Useful for file manipulation, data processing with Unix tools "
            "(grep, awk, sed, jq, etc.), and running shell scripts."
        )
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "command": {
                    "type": "string",
                    "description": "Bash command or script to execute.",
                },
                "timeout": {
                    "type": "integer",
                    "description": (
                        "Max execution time in seconds (default 30, max 120)."
                    ),
                    "default": 30,
                },
            },
            "required": ["command"],
        }
    @property
    def requires_auth(self) -> bool:
        return False
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs: Any,
    ) -> ToolResponseBase:
        session_id = session.session_id if session else None
        if not has_full_sandbox():
            return ErrorResponse(
                message="bash_exec requires bubblewrap sandbox (Linux only).",
                error="sandbox_unavailable",
                session_id=session_id,
            )
        command: str = (kwargs.get("command") or "").strip()
        timeout: int = kwargs.get("timeout", 30)
        if not command:
            return ErrorResponse(
                message="No command provided.",
                error="empty_command",
                session_id=session_id,
            )
        workspace = get_workspace_dir(session_id or "default")
        stdout, stderr, exit_code, timed_out = await run_sandboxed(
            command=["bash", "-c", command],
            cwd=workspace,
            timeout=timeout,
        )
        return BashExecResponse(
            message=(
                "Execution timed out"
                if timed_out
                else f"Command executed (exit {exit_code})"
            ),
            stdout=stdout,
            stderr=stderr,
            exit_code=exit_code,
            timed_out=timed_out,
            session_id=session_id,
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/check_operation_status.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/check_operation_status.py
@@ -0,0 +1,127 @@
 """CheckOperationStatusTool — query the status of a long-running operation."""
 import logging
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
    ErrorResponse,
    ResponseType,
    ToolResponseBase,
 )
 logger = logging.getLogger(__name__)
 class OperationStatusResponse(ToolResponseBase):
    """Response for check_operation_status tool."""
    type: ResponseType = ResponseType.OPERATION_STATUS
    task_id: str
    operation_id: str
    status: str  # "running", "completed", "failed"
    tool_name: str | None = None
    message: str = ""
 class CheckOperationStatusTool(BaseTool):
    """Check the status of a long-running operation (create_agent, edit_agent, etc.).
    The CoPilot uses this tool to report back to the user whether an
    operation that was started earlier has completed, failed, or is still
    running.
    """
    @property
    def name(self) -> str:
        return "check_operation_status"
    @property
    def description(self) -> str:
        return (
            "Check the current status of a long-running operation such as "
            "create_agent or edit_agent. Accepts either an operation_id or "
            "task_id from a previous operation_started response. "
            "Returns the current status: running, completed, or failed."
        )
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "operation_id": {
                    "type": "string",
                    "description": (
                        "The operation_id from an operation_started response."
                    ),
                },
                "task_id": {
                    "type": "string",
                    "description": (
                        "The task_id from an operation_started response. "
                        "Used as fallback if operation_id is not provided."
                    ),
                },
            },
            "required": [],
        }
    @property
    def requires_auth(self) -> bool:
        return False
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs,
    ) -> ToolResponseBase:
        from backend.api.features.chat import stream_registry
        operation_id: str = kwargs.get("operation_id", "").strip()
        task_id: str = kwargs.get("task_id", "").strip()
        if not operation_id and not task_id:
            return ErrorResponse(
                message="Please provide an operation_id or task_id.",
                error="missing_parameter",
            )
        task = None
        if operation_id:
            task = await stream_registry.find_task_by_operation_id(operation_id)
        if task is None and task_id:
            task = await stream_registry.get_task(task_id)
        if task is None:
            # Task not in Redis — it may have already expired (TTL).
            # Check conversation history for the result instead.
            return ErrorResponse(
                message=(
                    "Operation not found — it may have already completed and "
                    "expired from the status tracker. Check the conversation "
                    "history for the result."
                ),
                error="not_found",
            )
        status_messages = {
            "running": (
                f"The {task.tool_name or 'operation'} is still running. "
                "Please wait for it to complete."
            ),
            "completed": (
                f"The {task.tool_name or 'operation'} has completed successfully."
            ),
            "failed": f"The {task.tool_name or 'operation'} has failed.",
        }
        return OperationStatusResponse(
            task_id=task.task_id,
            operation_id=task.operation_id,
            status=task.status,
            tool_name=task.tool_name,
            message=status_messages.get(task.status, f"Status: {task.status}"),
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -8,7 +8,9 @@ from backend.api.features.chat.model import ChatSession
 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
    decompose_goal,
    enrich_library_agents_from_steps,
    generate_agent,
    get_all_relevant_agents_for_generation,
    get_user_message_for_error,
    save_agent_to_library,
 )
@@ -16,6 +18,7 @@ from .base import BaseTool
 from .models import (
    AgentPreviewResponse,
    AgentSavedResponse,
    AsyncProcessingResponse,
    ClarificationNeededResponse,
    ClarifyingQuestion,
    ErrorResponse,
@@ -96,6 +99,10 @@ class CreateAgentTool(BaseTool):
        save = kwargs.get("save", True)
        session_id = session.session_id if session else None
        # Extract async processing params (passed by long-running tool handler)
        operation_id = kwargs.get("_operation_id")
        task_id = kwargs.get("_task_id")
        if not description:
            return ErrorResponse(
                message="Please provide a description of what the agent should do.",
@@ -103,9 +110,24 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
-        # Step 1: Decompose goal into steps
+        library_agents = None
        if user_id:
            try:
                library_agents = await get_all_relevant_agents_for_generation(
                    user_id=user_id,
                    search_query=description,
                    include_marketplace=True,
                )
                logger.debug(
                    f"Found {len(library_agents)} relevant agents for sub-agent composition"
                )
            except Exception as e:
                logger.warning(f"Failed to fetch library agents: {e}")
        try:
-            decomposition_result = await decompose_goal(description, context)
+            decomposition_result = await decompose_goal(
                description, context, library_agents
            )
        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
                message=(
@@ -124,7 +146,6 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check if the result is an error from the external service
        if decomposition_result.get("type") == "error":
            error_msg = decomposition_result.get("error", "Unknown error")
            error_type = decomposition_result.get("error_type", "unknown")
@@ -144,7 +165,6 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check if LLM returned clarifying questions
        if decomposition_result.get("type") == "clarifying_questions":
            questions = decomposition_result.get("questions", [])
            return ClarificationNeededResponse(
@@ -163,7 +183,6 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check for unachievable/vague goals
        if decomposition_result.get("type") == "unachievable_goal":
            suggested = decomposition_result.get("suggested_goal", "")
            reason = decomposition_result.get("reason", "")
@@ -190,9 +209,27 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
-        # Step 2: Generate agent JSON (external service handles fixing and validation)
+        if user_id and library_agents is not None:
            try:
                library_agents = await enrich_library_agents_from_steps(
                    user_id=user_id,
                    decomposition_result=decomposition_result,
                    existing_agents=library_agents,
                    include_marketplace=True,
                )
                logger.debug(
                    f"After enrichment: {len(library_agents)} total agents for sub-agent composition"
                )
            except Exception as e:
                logger.warning(f"Failed to enrich library agents from steps: {e}")
        try:
-            agent_json = await generate_agent(decomposition_result)
+            agent_json = await generate_agent(
                decomposition_result,
                library_agents,
                operation_id=operation_id,
                task_id=task_id,
            )
        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
                message=(
@@ -211,7 +248,6 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check if the result is an error from the external service
        if isinstance(agent_json, dict) and agent_json.get("type") == "error":
            error_msg = agent_json.get("error", "Unknown error")
            error_type = agent_json.get("error_type", "unknown")
@@ -219,7 +255,12 @@ class CreateAgentTool(BaseTool):
                error_type,
                operation="generate the agent",
                llm_parse_message="The AI had trouble generating the agent. Please try again or simplify your goal.",
-                validation_message="The generated agent failed validation. Please try rephrasing your goal.",
+                validation_message=(
                    "I wasn't able to create a valid agent for this request. "
                    "The generated workflow had some structural issues. "
                    "Please try simplifying your goal or breaking it into smaller steps."
                ),
                error_details=error_msg,
            )
            return ErrorResponse(
                message=user_message,
@@ -232,12 +273,24 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check if Agent Generator accepted for async processing
        if agent_json.get("status") == "accepted":
            logger.info(
                f"Agent generation delegated to async processing "
                f"(operation_id={operation_id}, task_id={task_id})"
            )
            return AsyncProcessingResponse(
                message="Agent generation started. You'll be notified when it's complete.",
                operation_id=operation_id,
                task_id=task_id,
                session_id=session_id,
            )
        agent_name = agent_json.get("name", "Generated Agent")
        agent_description = agent_json.get("description", "")
        node_count = len(agent_json.get("nodes", []))
        link_count = len(agent_json.get("links", []))
        # Step 3: Preview or save
        if not save:
            return AgentPreviewResponse(
                message=(
@@ -252,7 +305,6 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )
        # Save to library
        if not user_id:
            return ErrorResponse(
                message="You must be logged in to save agents.",
@@ -270,7 +322,7 @@ class CreateAgentTool(BaseTool):
                agent_id=created_graph.id,
                agent_name=created_graph.name,
                library_agent_id=library_agent.id,
-                library_agent_link=f"/library/{library_agent.id}",
+                library_agent_link=f"/library/agents/{library_agent.id}",
                agent_page_link=f"/build?flowID={created_graph.id}",
                session_id=session_id,
            )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/customize_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/customize_agent.py
@@ -0,0 +1,337 @@
 """CustomizeAgentTool - Customizes marketplace/template agents using natural language."""
 import logging
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.store import db as store_db
 from backend.api.features.store.exceptions import AgentNotFoundError
 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
    customize_template,
    get_user_message_for_error,
    graph_to_json,
    save_agent_to_library,
 )
 from .base import BaseTool
 from .models import (
    AgentPreviewResponse,
    AgentSavedResponse,
    ClarificationNeededResponse,
    ClarifyingQuestion,
    ErrorResponse,
    ToolResponseBase,
 )
 logger = logging.getLogger(__name__)
 class CustomizeAgentTool(BaseTool):
    """Tool for customizing marketplace/template agents using natural language."""
    @property
    def name(self) -> str:
        return "customize_agent"
    @property
    def description(self) -> str:
        return (
            "Customize a marketplace or template agent using natural language. "
            "Takes an existing agent from the marketplace and modifies it based on "
            "the user's requirements before adding to their library."
        )
    @property
    def requires_auth(self) -> bool:
        return True
    @property
    def is_long_running(self) -> bool:
        return True
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "agent_id": {
                    "type": "string",
                    "description": (
                        "The marketplace agent ID in format 'creator/slug' "
                        "(e.g., 'autogpt/newsletter-writer'). "
                        "Get this from find_agent results."
                    ),
                },
                "modifications": {
                    "type": "string",
                    "description": (
                        "Natural language description of how to customize the agent. "
                        "Be specific about what changes you want to make."
                    ),
                },
                "context": {
                    "type": "string",
                    "description": (
                        "Additional context or answers to previous clarifying questions."
                    ),
                },
                "save": {
                    "type": "boolean",
                    "description": (
                        "Whether to save the customized agent to the user's library. "
                        "Default is true. Set to false for preview only."
                    ),
                    "default": True,
                },
            },
            "required": ["agent_id", "modifications"],
        }
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs,
    ) -> ToolResponseBase:
        """Execute the customize_agent tool.
        Flow:
        1. Parse the agent ID to get creator/slug
        2. Fetch the template agent from the marketplace
        3. Call customize_template with the modification request
        4. Preview or save based on the save parameter
        """
        agent_id = kwargs.get("agent_id", "").strip()
        modifications = kwargs.get("modifications", "").strip()
        context = kwargs.get("context", "")
        save = kwargs.get("save", True)
        session_id = session.session_id if session else None
        if not agent_id:
            return ErrorResponse(
                message="Please provide the marketplace agent ID (e.g., 'creator/agent-name').",
                error="missing_agent_id",
                session_id=session_id,
            )
        if not modifications:
            return ErrorResponse(
                message="Please describe how you want to customize this agent.",
                error="missing_modifications",
                session_id=session_id,
            )
        # Parse agent_id in format "creator/slug"
        parts = [p.strip() for p in agent_id.split("/")]
        if len(parts) != 2 or not parts[0] or not parts[1]:
            return ErrorResponse(
                message=(
                    f"Invalid agent ID format: '{agent_id}'. "
                    "Expected format is 'creator/agent-name' "
                    "(e.g., 'autogpt/newsletter-writer')."
                ),
                error="invalid_agent_id_format",
                session_id=session_id,
            )
        creator_username, agent_slug = parts
        # Fetch the marketplace agent details
        try:
            agent_details = await store_db.get_store_agent_details(
                username=creator_username, agent_name=agent_slug
            )
        except AgentNotFoundError:
            return ErrorResponse(
                message=(
                    f"Could not find marketplace agent '{agent_id}'. "
                    "Please check the agent ID and try again."
                ),
                error="agent_not_found",
                session_id=session_id,
            )
        except Exception as e:
            logger.error(f"Error fetching marketplace agent {agent_id}: {e}")
            return ErrorResponse(
                message="Failed to fetch the marketplace agent. Please try again.",
                error="fetch_error",
                session_id=session_id,
            )
        if not agent_details.store_listing_version_id:
            return ErrorResponse(
                message=(
                    f"The agent '{agent_id}' does not have an available version. "
                    "Please try a different agent."
                ),
                error="no_version_available",
                session_id=session_id,
            )
        # Get the full agent graph
        try:
            graph = await store_db.get_agent(agent_details.store_listing_version_id)
            template_agent = graph_to_json(graph)
        except Exception as e:
            logger.error(f"Error fetching agent graph for {agent_id}: {e}")
            return ErrorResponse(
                message="Failed to fetch the agent configuration. Please try again.",
                error="graph_fetch_error",
                session_id=session_id,
            )
        # Call customize_template
        try:
            result = await customize_template(
                template_agent=template_agent,
                modification_request=modifications,
                context=context,
            )
        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
                message=(
                    "Agent customization is not available. "
                    "The Agent Generator service is not configured."
                ),
                error="service_not_configured",
                session_id=session_id,
            )
        except Exception as e:
            logger.error(f"Error calling customize_template for {agent_id}: {e}")
            return ErrorResponse(
                message=(
                    "Failed to customize the agent due to a service error. "
                    "Please try again."
                ),
                error="customization_service_error",
                session_id=session_id,
            )
        if result is None:
            return ErrorResponse(
                message=(
                    "Failed to customize the agent. "
                    "The agent generation service may be unavailable or timed out. "
                    "Please try again."
                ),
                error="customization_failed",
                session_id=session_id,
            )
        # Handle error response
        if isinstance(result, dict) and result.get("type") == "error":
            error_msg = result.get("error", "Unknown error")
            error_type = result.get("error_type", "unknown")
            user_message = get_user_message_for_error(
                error_type,
                operation="customize the agent",
                llm_parse_message=(
                    "The AI had trouble customizing the agent. "
                    "Please try again or simplify your request."
                ),
                validation_message=(
                    "The customized agent failed validation. "
                    "Please try rephrasing your request."
                ),
                error_details=error_msg,
            )
            return ErrorResponse(
                message=user_message,
                error=f"customization_failed:{error_type}",
                session_id=session_id,
            )
        # Handle clarifying questions
        if isinstance(result, dict) and result.get("type") == "clarifying_questions":
            questions = result.get("questions") or []
            if not isinstance(questions, list):
                logger.error(
                    f"Unexpected clarifying questions format: {type(questions)}"
                )
                questions = []
            return ClarificationNeededResponse(
                message=(
                    "I need some more information to customize this agent. "
                    "Please answer the following questions:"
                ),
                questions=[
                    ClarifyingQuestion(
                        question=q.get("question", ""),
                        keyword=q.get("keyword", ""),
                        example=q.get("example"),
                    )
                    for q in questions
                    if isinstance(q, dict)
                ],
                session_id=session_id,
            )
        # Result should be the customized agent JSON
        if not isinstance(result, dict):
            logger.error(f"Unexpected customize_template response type: {type(result)}")
            return ErrorResponse(
                message="Failed to customize the agent due to an unexpected response.",
                error="unexpected_response_type",
                session_id=session_id,
            )
        customized_agent = result
        agent_name = customized_agent.get(
            "name", f"Customized {agent_details.agent_name}"
        )
        agent_description = customized_agent.get("description", "")
        nodes = customized_agent.get("nodes")
        links = customized_agent.get("links")
        node_count = len(nodes) if isinstance(nodes, list) else 0
        link_count = len(links) if isinstance(links, list) else 0
        if not save:
            return AgentPreviewResponse(
                message=(
                    f"I've customized the agent '{agent_details.agent_name}'. "
                    f"The customized agent has {node_count} blocks. "
                    f"Review it and call customize_agent with save=true to save it."
                ),
                agent_json=customized_agent,
                agent_name=agent_name,
                description=agent_description,
                node_count=node_count,
                link_count=link_count,
                session_id=session_id,
            )
        if not user_id:
            return ErrorResponse(
                message="You must be logged in to save agents.",
                error="auth_required",
                session_id=session_id,
            )
        # Save to user's library
        try:
            created_graph, library_agent = await save_agent_to_library(
                customized_agent, user_id, is_update=False
            )
            return AgentSavedResponse(
                message=(
                    f"Customized agent '{created_graph.name}' "
                    f"(based on '{agent_details.agent_name}') "
                    f"has been saved to your library!"
                ),
                agent_id=created_graph.id,
                agent_name=created_graph.name,
                library_agent_id=library_agent.id,
                library_agent_link=f"/library/agents/{library_agent.id}",
                agent_page_link=f"/build?flowID={created_graph.id}",
                session_id=session_id,
            )
        except Exception as e:
            logger.error(f"Error saving customized agent: {e}")
            return ErrorResponse(
                message="Failed to save the customized agent. Please try again.",
                error="save_failed",
                session_id=session_id,
            )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -9,6 +9,7 @@ from .agent_generator import (
    AgentGeneratorNotConfiguredError,
    generate_agent_patch,
    get_agent_as_json,
    get_all_relevant_agents_for_generation,
    get_user_message_for_error,
    save_agent_to_library,
 )
@@ -16,6 +17,7 @@ from .base import BaseTool
 from .models import (
    AgentPreviewResponse,
    AgentSavedResponse,
    AsyncProcessingResponse,
    ClarificationNeededResponse,
    ClarifyingQuestion,
    ErrorResponse,
@@ -103,6 +105,10 @@ class EditAgentTool(BaseTool):
        save = kwargs.get("save", True)
        session_id = session.session_id if session else None
        # Extract async processing params (passed by long-running tool handler)
        operation_id = kwargs.get("_operation_id")
        task_id = kwargs.get("_task_id")
        if not agent_id:
            return ErrorResponse(
                message="Please provide the agent ID to edit.",
@@ -117,7 +123,6 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )
        # Step 1: Fetch current agent
        current_agent = await get_agent_as_json(agent_id, user_id)
        if current_agent is None:
@@ -127,14 +132,34 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )
-        # Build the update request with context
+        library_agents = None
        if user_id:
            try:
                graph_id = current_agent.get("id")
                library_agents = await get_all_relevant_agents_for_generation(
                    user_id=user_id,
                    search_query=changes,
                    exclude_graph_id=graph_id,
                    include_marketplace=True,
                )
                logger.debug(
                    f"Found {len(library_agents)} relevant agents for sub-agent composition"
                )
            except Exception as e:
                logger.warning(f"Failed to fetch library agents: {e}")
        update_request = changes
        if context:
            update_request = f"{changes}\n\nAdditional context:\n{context}"
        # Step 2: Generate updated agent (external service handles fixing and validation)
        try:
-            result = await generate_agent_patch(update_request, current_agent)
+            result = await generate_agent_patch(
                update_request,
                current_agent,
                library_agents,
                operation_id=operation_id,
                task_id=task_id,
            )
        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
                message=(
@@ -153,6 +178,19 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check if Agent Generator accepted for async processing
        if result.get("status") == "accepted":
            logger.info(
                f"Agent edit delegated to async processing "
                f"(operation_id={operation_id}, task_id={task_id})"
            )
            return AsyncProcessingResponse(
                message="Agent edit started. You'll be notified when it's complete.",
                operation_id=operation_id,
                task_id=task_id,
                session_id=session_id,
            )
        # Check if the result is an error from the external service
        if isinstance(result, dict) and result.get("type") == "error":
            error_msg = result.get("error", "Unknown error")
@@ -162,6 +200,7 @@ class EditAgentTool(BaseTool):
                operation="generate the changes",
                llm_parse_message="The AI had trouble generating the changes. Please try again or simplify your request.",
                validation_message="The generated changes failed validation. Please try rephrasing your request.",
                error_details=error_msg,
            )
            return ErrorResponse(
                message=user_message,
@@ -175,7 +214,6 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )
        # Check if LLM returned clarifying questions
        if result.get("type") == "clarifying_questions":
            questions = result.get("questions", [])
            return ClarificationNeededResponse(
@@ -194,7 +232,6 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )
        # Result is the updated agent JSON
        updated_agent = result
        agent_name = updated_agent.get("name", "Updated Agent")
@@ -202,7 +239,6 @@ class EditAgentTool(BaseTool):
        node_count = len(updated_agent.get("nodes", []))
        link_count = len(updated_agent.get("links", []))
        # Step 3: Preview or save
        if not save:
            return AgentPreviewResponse(
                message=(
@@ -218,7 +254,6 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )
        # Save to library (creates a new version)
        if not user_id:
            return ErrorResponse(
                message="You must be logged in to save agents.",
@@ -236,7 +271,7 @@ class EditAgentTool(BaseTool):
                agent_id=created_graph.id,
                agent_name=created_graph.name,
                library_agent_id=library_agent.id,
-                library_agent_link=f"/library/{library_agent.id}",
+                library_agent_link=f"/library/agents/{library_agent.id}",
                agent_page_link=f"/build?flowID={created_graph.id}",
                session_id=session_id,
            )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -13,10 +13,33 @@ from backend.api.features.chat.tools.models import (
    NoResultsResponse,
 )
 from backend.api.features.store.hybrid_search import unified_hybrid_search
-from backend.data.block import get_block
+from backend.blocks import get_block
 from backend.blocks._base import BlockType
 logger = logging.getLogger(__name__)
 _TARGET_RESULTS = 10
 # Over-fetch to compensate for post-hoc filtering of graph-only blocks.
 # 40 is 2x current removed; speed of query 10 vs 40 is minimial
 _OVERFETCH_PAGE_SIZE = 40
 # Block types that only work within graphs and cannot run standalone in CoPilot.
 COPILOT_EXCLUDED_BLOCK_TYPES = {
    BlockType.INPUT,  # Graph interface definition - data enters via chat, not graph inputs
    BlockType.OUTPUT,  # Graph interface definition - data exits via chat, not graph outputs
    BlockType.WEBHOOK,  # Wait for external events - would hang forever in CoPilot
    BlockType.WEBHOOK_MANUAL,  # Same as WEBHOOK
    BlockType.NOTE,  # Visual annotation only - no runtime behavior
    BlockType.HUMAN_IN_THE_LOOP,  # Pauses for human approval - CoPilot IS human-in-the-loop
    BlockType.AGENT,  # AgentExecutorBlock requires execution_context - use run_agent tool
 }
 # Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
 COPILOT_EXCLUDED_BLOCK_IDS = {
    # SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
    "3b191d9f-356f-482d-8238-ba04b6d18381",
 }
 class FindBlockTool(BaseTool):
    """Tool for searching available blocks."""
@@ -88,7 +111,7 @@ class FindBlockTool(BaseTool):
                query=query,
                content_types=[ContentType.BLOCK],
                page=1,
-                page_size=10,
+                page_size=_OVERFETCH_PAGE_SIZE,
            )
            if not results:
@@ -108,60 +131,90 @@ class FindBlockTool(BaseTool):
                block = get_block(block_id)
                # Skip disabled blocks
-                if block and not block.disabled:
+                if not block or block.disabled:
-                    # Get input/output schemas
+                    continue
                    input_schema = {}
                    output_schema = {}
                    try:
                        input_schema = block.input_schema.jsonschema()
                    except Exception:
                        pass
                    try:
                        output_schema = block.output_schema.jsonschema()
                    except Exception:
                        pass
-                    # Get categories from block instance
+                # Skip blocks excluded from CoPilot (graph-only blocks)
-                    categories = []
+                if (
-                    if hasattr(block, "categories") and block.categories:
+                    block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
-                        categories = [cat.value for cat in block.categories]
+                    or block.id in COPILOT_EXCLUDED_BLOCK_IDS
                ):
                    continue
-                    # Extract required inputs for easier use
+                # Get input/output schemas
-                    required_inputs: list[BlockInputFieldInfo] = []
+                input_schema = {}
-                    if input_schema:
+                output_schema = {}
-                        properties = input_schema.get("properties", {})
+                try:
-                        required_fields = set(input_schema.get("required", []))
+                    input_schema = block.input_schema.jsonschema()
-                        # Get credential field names to exclude from required inputs
+                except Exception as e:
-                        credentials_fields = set(
+                    logger.debug(
-                            block.input_schema.get_credentials_fields().keys()
+                        "Failed to generate input schema for block %s: %s",
-                        )
+                        block_id,
-
+                        e,
                        for field_name, field_schema in properties.items():
                            # Skip credential fields - they're handled separately
                            if field_name in credentials_fields:
                                continue
                            required_inputs.append(
                                BlockInputFieldInfo(
                                    name=field_name,
                                    type=field_schema.get("type", "string"),
                                    description=field_schema.get("description", ""),
                                    required=field_name in required_fields,
                                    default=field_schema.get("default"),
                                )
                            )
                    blocks.append(
                        BlockInfoSummary(
                            id=block_id,
                            name=block.name,
                            description=block.description or "",
                            categories=categories,
                            input_schema=input_schema,
                            output_schema=output_schema,
                            required_inputs=required_inputs,
                        )
                    )
                try:
                    output_schema = block.output_schema.jsonschema()
                except Exception as e:
                    logger.debug(
                        "Failed to generate output schema for block %s: %s",
                        block_id,
                        e,
                    )
                # Get categories from block instance
                categories = []
                if hasattr(block, "categories") and block.categories:
                    categories = [cat.value for cat in block.categories]
                # Extract required inputs for easier use
                required_inputs: list[BlockInputFieldInfo] = []
                if input_schema:
                    properties = input_schema.get("properties", {})
                    required_fields = set(input_schema.get("required", []))
                    # Get credential field names to exclude from required inputs
                    credentials_fields = set(
                        block.input_schema.get_credentials_fields().keys()
                    )
                    for field_name, field_schema in properties.items():
                        # Skip credential fields - they're handled separately
                        if field_name in credentials_fields:
                            continue
                        required_inputs.append(
                            BlockInputFieldInfo(
                                name=field_name,
                                type=field_schema.get("type", "string"),
                                description=field_schema.get("description", ""),
                                required=field_name in required_fields,
                                default=field_schema.get("default"),
                            )
                        )
                blocks.append(
                    BlockInfoSummary(
                        id=block_id,
                        name=block.name,
                        description=block.description or "",
                        categories=categories,
                        input_schema=input_schema,
                        output_schema=output_schema,
                        required_inputs=required_inputs,
                    )
                )
                if len(blocks) >= _TARGET_RESULTS:
                    break
            if blocks and len(blocks) < _TARGET_RESULTS:
                logger.debug(
                    "find_block returned %d/%d results for query '%s' "
                    "(filtered %d excluded/disabled blocks)",
                    len(blocks),
                    _TARGET_RESULTS,
                    query,
                    len(results) - len(blocks),
                )
            if not blocks:
                return NoResultsResponse(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -0,0 +1,139 @@
 """Tests for block filtering in FindBlockTool."""
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from backend.api.features.chat.tools.find_block import (
    COPILOT_EXCLUDED_BLOCK_IDS,
    COPILOT_EXCLUDED_BLOCK_TYPES,
    FindBlockTool,
 )
 from backend.api.features.chat.tools.models import BlockListResponse
 from backend.blocks._base import BlockType
 from ._test_data import make_session
 _TEST_USER_ID = "test-user-find-block"
 def make_mock_block(
    block_id: str, name: str, block_type: BlockType, disabled: bool = False
 ):
    """Create a mock block for testing."""
    mock = MagicMock()
    mock.id = block_id
    mock.name = name
    mock.description = f"{name} description"
    mock.block_type = block_type
    mock.disabled = disabled
    mock.input_schema = MagicMock()
    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
    mock.input_schema.get_credentials_fields.return_value = {}
    mock.output_schema = MagicMock()
    mock.output_schema.jsonschema.return_value = {}
    mock.categories = []
    return mock
 class TestFindBlockFiltering:
    """Tests for block filtering in FindBlockTool."""
    def test_excluded_block_types_contains_expected_types(self):
        """Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
        assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
        assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
        assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
        assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
        assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
        assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
        assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES
    def test_excluded_block_ids_contains_smart_decision_maker(self):
        """Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
        assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS
    @pytest.mark.asyncio(loop_scope="session")
    async def test_excluded_block_type_filtered_from_results(self):
        """Verify blocks with excluded BlockTypes are filtered from search results."""
        session = make_session(user_id=_TEST_USER_ID)
        # Mock search returns an INPUT block (excluded) and a STANDARD block (included)
        search_results = [
            {"content_id": "input-block-id", "score": 0.9},
            {"content_id": "standard-block-id", "score": 0.8},
        ]
        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
        standard_block = make_mock_block(
            "standard-block-id", "HTTP Request", BlockType.STANDARD
        )
        def mock_get_block(block_id):
            return {
                "input-block-id": input_block,
                "standard-block-id": standard_block,
            }.get(block_id)
        with patch(
            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
            new_callable=AsyncMock,
            return_value=(search_results, 2),
        ):
            with patch(
                "backend.api.features.chat.tools.find_block.get_block",
                side_effect=mock_get_block,
            ):
                tool = FindBlockTool()
                response = await tool._execute(
                    user_id=_TEST_USER_ID, session=session, query="test"
                )
        # Should only return the standard block, not the INPUT block
        assert isinstance(response, BlockListResponse)
        assert len(response.blocks) == 1
        assert response.blocks[0].id == "standard-block-id"
    @pytest.mark.asyncio(loop_scope="session")
    async def test_excluded_block_id_filtered_from_results(self):
        """Verify SmartDecisionMakerBlock is filtered from search results."""
        session = make_session(user_id=_TEST_USER_ID)
        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
        search_results = [
            {"content_id": smart_decision_id, "score": 0.9},
            {"content_id": "normal-block-id", "score": 0.8},
        ]
        # SmartDecisionMakerBlock has STANDARD type but is excluded by ID
        smart_block = make_mock_block(
            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
        )
        normal_block = make_mock_block(
            "normal-block-id", "Normal Block", BlockType.STANDARD
        )
        def mock_get_block(block_id):
            return {
                smart_decision_id: smart_block,
                "normal-block-id": normal_block,
            }.get(block_id)
        with patch(
            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
            new_callable=AsyncMock,
            return_value=(search_results, 2),
        ):
            with patch(
                "backend.api.features.chat.tools.find_block.get_block",
                side_effect=mock_get_block,
            ):
                tool = FindBlockTool()
                response = await tool._execute(
                    user_id=_TEST_USER_ID, session=session, query="decision"
                )
        # Should only return normal block, not SmartDecisionMakerBlock
        assert isinstance(response, BlockListResponse)
        assert len(response.blocks) == 1
        assert response.blocks[0].id == "normal-block-id"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/helpers.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/helpers.py
@@ -0,0 +1,29 @@
 """Shared helpers for chat tools."""
 from typing import Any
 def get_inputs_from_schema(
    input_schema: dict[str, Any],
    exclude_fields: set[str] | None = None,
 ) -> list[dict[str, Any]]:
    """Extract input field info from JSON schema."""
    if not isinstance(input_schema, dict):
        return []
    exclude = exclude_fields or set()
    properties = input_schema.get("properties", {})
    required = set(input_schema.get("required", []))
    return [
        {
            "name": name,
            "title": schema.get("title", name),
            "type": schema.get("type", "string"),
            "description": schema.get("description", ""),
            "required": name in required,
            "default": schema.get("default"),
        }
        for name, schema in properties.items()
        if name not in exclude
    ]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -38,6 +38,14 @@ class ResponseType(str, Enum):
    OPERATION_STARTED = "operation_started"
    OPERATION_PENDING = "operation_pending"
    OPERATION_IN_PROGRESS = "operation_in_progress"
    # Input validation
    INPUT_VALIDATION_ERROR = "input_validation_error"
    # Web fetch
    WEB_FETCH = "web_fetch"
    # Code execution
    BASH_EXEC = "bash_exec"
    # Operation status check
    OPERATION_STATUS = "operation_status"
 # Base response model
@@ -68,6 +76,10 @@ class AgentInfo(BaseModel):
    has_external_trigger: bool | None = None
    new_output: bool | None = None
    graph_id: str | None = None
    inputs: dict[str, Any] | None = Field(
        default=None,
        description="Input schema for the agent, including field names, types, and defaults",
    )
 class AgentsFoundResponse(ToolResponseBase):
@@ -194,6 +206,20 @@ class ErrorResponse(ToolResponseBase):
    details: dict[str, Any] | None = None
 class InputValidationErrorResponse(ToolResponseBase):
    """Response when run_agent receives unknown input fields."""
    type: ResponseType = ResponseType.INPUT_VALIDATION_ERROR
    unrecognized_fields: list[str] = Field(
        description="List of input field names that were not recognized"
    )
    inputs: dict[str, Any] = Field(
        description="The agent's valid input schema for reference"
    )
    graph_id: str | None = None
    graph_version: int | None = None
 # Agent output models
 class ExecutionOutputInfo(BaseModel):
    """Summary of a single execution's outputs."""
@@ -315,11 +341,17 @@ class BlockInfoSummary(BaseModel):
    name: str
    description: str
    categories: list[str]
-    input_schema: dict[str, Any]
+    input_schema: dict[str, Any] = Field(
-    output_schema: dict[str, Any]
+        default_factory=dict,
        description="Full JSON schema for block inputs",
    )
    output_schema: dict[str, Any] = Field(
        default_factory=dict,
        description="Full JSON schema for block outputs",
    )
    required_inputs: list[BlockInputFieldInfo] = Field(
        default_factory=list,
-        description="List of required input fields for this block",
+        description="List of input fields for this block",
    )
@@ -332,7 +364,7 @@ class BlockListResponse(ToolResponseBase):
    query: str
    usage_hint: str = Field(
        default="To execute a block, call run_block with block_id set to the block's "
-        "'id' field and input_data containing the required fields from input_schema."
+        "'id' field and input_data containing the fields listed in required_inputs."
    )
@@ -352,11 +384,15 @@ class OperationStartedResponse(ToolResponseBase):
    This is returned immediately to the client while the operation continues
    to execute. The user can close the tab and check back later.
    The task_id can be used to reconnect to the SSE stream via
    GET /chat/tasks/{task_id}/stream?last_idx=0
    """
    type: ResponseType = ResponseType.OPERATION_STARTED
    operation_id: str
    tool_name: str
    task_id: str | None = None  # For SSE reconnection
 class OperationPendingResponse(ToolResponseBase):
@@ -380,3 +416,41 @@ class OperationInProgressResponse(ToolResponseBase):
    type: ResponseType = ResponseType.OPERATION_IN_PROGRESS
    tool_call_id: str
 class AsyncProcessingResponse(ToolResponseBase):
    """Response when an operation has been delegated to async processing.
    This is returned by tools when the external service accepts the request
    for async processing (HTTP 202 Accepted). The Redis Streams completion
    consumer will handle the result when the external service completes.
    The status field is specifically "accepted" to allow the long-running tool
    handler to detect this response and skip LLM continuation.
    """
    type: ResponseType = ResponseType.OPERATION_STARTED
    status: str = "accepted"  # Must be "accepted" for detection
    operation_id: str | None = None
    task_id: str | None = None
 class WebFetchResponse(ToolResponseBase):
    """Response for web_fetch tool."""
    type: ResponseType = ResponseType.WEB_FETCH
    url: str
    status_code: int
    content_type: str
    content: str
    truncated: bool = False
 class BashExecResponse(ToolResponseBase):
    """Response for bash_exec tool."""
    type: ResponseType = ResponseType.BASH_EXEC
    stdout: str
    stderr: str
    exit_code: int
    timed_out: bool = False
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -24,12 +24,14 @@ from backend.util.timezone_utils import (
 )
 from .base import BaseTool
 from .helpers import get_inputs_from_schema
 from .models import (
    AgentDetails,
    AgentDetailsResponse,
    ErrorResponse,
    ExecutionOptions,
    ExecutionStartedResponse,
    InputValidationErrorResponse,
    SetupInfo,
    SetupRequirementsResponse,
    ToolResponseBase,
@@ -260,7 +262,7 @@ class RunAgentTool(BaseTool):
                        ),
                        requirements={
                            "credentials": requirements_creds_list,
-                            "inputs": self._get_inputs_list(graph.input_schema),
+                            "inputs": get_inputs_from_schema(graph.input_schema),
                            "execution_modes": self._get_execution_modes(graph),
                        },
                    ),
@@ -273,6 +275,22 @@ class RunAgentTool(BaseTool):
            input_properties = graph.input_schema.get("properties", {})
            required_fields = set(graph.input_schema.get("required", []))
            provided_inputs = set(params.inputs.keys())
            valid_fields = set(input_properties.keys())
            # Check for unknown input fields
            unrecognized_fields = provided_inputs - valid_fields
            if unrecognized_fields:
                return InputValidationErrorResponse(
                    message=(
                        f"Unknown input field(s) provided: {', '.join(sorted(unrecognized_fields))}. "
                        f"Agent was not executed. Please use the correct field names from the schema."
                    ),
                    session_id=session_id,
                    unrecognized_fields=sorted(unrecognized_fields),
                    inputs=graph.input_schema,
                    graph_id=graph.id,
                    graph_version=graph.version,
                )
            # If agent has inputs but none were provided AND use_defaults is not set,
            # always show what's available first so user can decide
@@ -352,22 +370,6 @@ class RunAgentTool(BaseTool):
                session_id=session_id,
            )
    def _get_inputs_list(self, input_schema: dict[str, Any]) -> list[dict[str, Any]]:
        """Extract inputs list from schema."""
        inputs_list = []
        if isinstance(input_schema, dict) and "properties" in input_schema:
            for field_name, field_schema in input_schema["properties"].items():
                inputs_list.append(
                    {
                        "name": field_name,
                        "title": field_schema.get("title", field_name),
                        "type": field_schema.get("type", "string"),
                        "description": field_schema.get("description", ""),
                        "required": field_name in input_schema.get("required", []),
                    }
                )
        return inputs_list
    def _get_execution_modes(self, graph: GraphModel) -> list[str]:
        """Get available execution modes for the graph."""
        trigger_info = graph.trigger_setup_info
@@ -381,7 +383,7 @@ class RunAgentTool(BaseTool):
        suffix: str,
    ) -> str:
        """Build a message describing available inputs for an agent."""
-        inputs_list = self._get_inputs_list(graph.input_schema)
+        inputs_list = get_inputs_from_schema(graph.input_schema)
        required_names = [i["name"] for i in inputs_list if i["required"]]
        optional_names = [i["name"] for i in inputs_list if not i["required"]]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
@@ -402,3 +402,42 @@ async def test_run_agent_schedule_without_name(setup_test_data):
    # Should return error about missing schedule_name
    assert result_data.get("type") == "error"
    assert "schedule_name" in result_data["message"].lower()
@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_rejects_unknown_input_fields(setup_test_data):
    """Test that run_agent returns input_validation_error for unknown input fields."""
    user = setup_test_data["user"]
    store_submission = setup_test_data["store_submission"]
    tool = RunAgentTool()
    agent_marketplace_id = f"{user.email.split('@')[0]}/{store_submission.slug}"
    session = make_session(user_id=user.id)
    # Execute with unknown input field names
    response = await tool.execute(
        user_id=user.id,
        session_id=str(uuid.uuid4()),
        tool_call_id=str(uuid.uuid4()),
        username_agent_slug=agent_marketplace_id,
        inputs={
            "unknown_field": "some value",
            "another_unknown": "another value",
        },
        session=session,
    )
    assert response is not None
    assert hasattr(response, "output")
    assert isinstance(response.output, str)
    result_data = orjson.loads(response.output)
    # Should return input_validation_error type with unrecognized fields
    assert result_data.get("type") == "input_validation_error"
    assert "unrecognized_fields" in result_data
    assert set(result_data["unrecognized_fields"]) == {
        "another_unknown",
        "unknown_field",
    }
    assert "inputs" in result_data  # Contains the valid schema
    assert "Agent was not executed" in result_data["message"]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -5,15 +5,23 @@ import uuid
 from collections import defaultdict
 from typing import Any
 from pydantic_core import PydanticUndefined
 from backend.api.features.chat.model import ChatSession
-from backend.data.block import get_block
+from backend.api.features.chat.tools.find_block import (
    COPILOT_EXCLUDED_BLOCK_IDS,
    COPILOT_EXCLUDED_BLOCK_TYPES,
 )
 from backend.blocks import get_block
 from backend.blocks._base import AnyBlockSchema
 from backend.data.execution import ExecutionContext
-from backend.data.model import CredentialsMetaInput
+from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
 from backend.data.workspace import get_or_create_workspace
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util.exceptions import BlockError
 from .base import BaseTool
 from .helpers import get_inputs_from_schema
 from .models import (
    BlockOutputResponse,
    ErrorResponse,
@@ -22,7 +30,10 @@ from .models import (
    ToolResponseBase,
    UserReadiness,
 )
-from .utils import build_missing_credentials_from_field_info
+from .utils import (
    build_missing_credentials_from_field_info,
    match_credentials_to_requirements,
 )
 logger = logging.getLogger(__name__)
@@ -71,65 +82,6 @@ class RunBlockTool(BaseTool):
    def requires_auth(self) -> bool:
        return True
    async def _check_block_credentials(
        self,
        user_id: str,
        block: Any,
    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
        """
        Check if user has required credentials for a block.
        Returns:
            tuple[matched_credentials, missing_credentials]
        """
        matched_credentials: dict[str, CredentialsMetaInput] = {}
        missing_credentials: list[CredentialsMetaInput] = []
        # Get credential field info from block's input schema
        credentials_fields_info = block.input_schema.get_credentials_fields_info()
        if not credentials_fields_info:
            return matched_credentials, missing_credentials
        # Get user's available credentials
        creds_manager = IntegrationCredentialsManager()
        available_creds = await creds_manager.store.get_all_creds(user_id)
        for field_name, field_info in credentials_fields_info.items():
            # field_info.provider is a frozenset of acceptable providers
            # field_info.supported_types is a frozenset of acceptable types
            matching_cred = next(
                (
                    cred
                    for cred in available_creds
                    if cred.provider in field_info.provider
                    and cred.type in field_info.supported_types
                ),
                None,
            )
            if matching_cred:
                matched_credentials[field_name] = CredentialsMetaInput(
                    id=matching_cred.id,
                    provider=matching_cred.provider,  # type: ignore
                    type=matching_cred.type,
                    title=matching_cred.title,
                )
            else:
                # Create a placeholder for the missing credential
                provider = next(iter(field_info.provider), "unknown")
                cred_type = next(iter(field_info.supported_types), "api_key")
                missing_credentials.append(
                    CredentialsMetaInput(
                        id=field_name,
                        provider=provider,  # type: ignore
                        type=cred_type,  # type: ignore
                        title=field_name.replace("_", " ").title(),
                    )
                )
        return matched_credentials, missing_credentials
    async def _execute(
        self,
        user_id: str | None,
@@ -184,12 +136,24 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )
        # Check if block is excluded from CoPilot (graph-only blocks)
        if (
            block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
            or block.id in COPILOT_EXCLUDED_BLOCK_IDS
        ):
            return ErrorResponse(
                message=(
                    f"Block '{block.name}' cannot be run directly in CoPilot. "
                    "This block is designed for use within graphs only."
                ),
                session_id=session_id,
            )
        logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")
        # Check credentials
        creds_manager = IntegrationCredentialsManager()
-        matched_credentials, missing_credentials = await self._check_block_credentials(
+        matched_credentials, missing_credentials = (
-            user_id, block
+            await self._resolve_block_credentials(user_id, block, input_data)
        )
        if missing_credentials:
@@ -318,29 +282,75 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )
-    def _get_inputs_list(self, block: Any) -> list[dict[str, Any]]:
+    async def _resolve_block_credentials(
        self,
        user_id: str,
        block: AnyBlockSchema,
        input_data: dict[str, Any] | None = None,
    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
        """
        Resolve credentials for a block by matching user's available credentials.
        Args:
            user_id: User ID
            block: Block to resolve credentials for
            input_data: Input data for the block (used to determine provider via discriminator)
        Returns:
            tuple of (matched_credentials, missing_credentials) - matched credentials
            are used for block execution, missing ones indicate setup requirements.
        """
        input_data = input_data or {}
        requirements = self._resolve_discriminated_credentials(block, input_data)
        if not requirements:
            return {}, []
        return await match_credentials_to_requirements(user_id, requirements)
    def _get_inputs_list(self, block: AnyBlockSchema) -> list[dict[str, Any]]:
        """Extract non-credential inputs from block schema."""
        inputs_list = []
        schema = block.input_schema.jsonschema()
        properties = schema.get("properties", {})
        required_fields = set(schema.get("required", []))
        # Get credential field names to exclude
        credentials_fields = set(block.input_schema.get_credentials_fields().keys())
        return get_inputs_from_schema(schema, exclude_fields=credentials_fields)
-        for field_name, field_schema in properties.items():
+    def _resolve_discriminated_credentials(
-            # Skip credential fields
+        self,
-            if field_name in credentials_fields:
+        block: AnyBlockSchema,
-                continue
+        input_data: dict[str, Any],
    ) -> dict[str, CredentialsFieldInfo]:
        """Resolve credential requirements, applying discriminator logic where needed."""
        credentials_fields_info = block.input_schema.get_credentials_fields_info()
        if not credentials_fields_info:
            return {}
-            inputs_list.append(
+        resolved: dict[str, CredentialsFieldInfo] = {}
                {
                    "name": field_name,
                    "title": field_schema.get("title", field_name),
                    "type": field_schema.get("type", "string"),
                    "description": field_schema.get("description", ""),
                    "required": field_name in required_fields,
                }
            )
-        return inputs_list
+        for field_name, field_info in credentials_fields_info.items():
            effective_field_info = field_info
            if field_info.discriminator and field_info.discriminator_mapping:
                discriminator_value = input_data.get(field_info.discriminator)
                if discriminator_value is None:
                    field = block.input_schema.model_fields.get(
                        field_info.discriminator
                    )
                    if field and field.default is not PydanticUndefined:
                        discriminator_value = field.default
                if (
                    discriminator_value
                    and discriminator_value in field_info.discriminator_mapping
                ):
                    effective_field_info = field_info.discriminate(discriminator_value)
                    # For host-scoped credentials, add the discriminator value
                    # (e.g., URL) so _credential_is_for_host can match it
                    effective_field_info.discriminator_values.add(discriminator_value)
                    logger.debug(
                        f"Discriminated provider for {field_name}: "
                        f"{discriminator_value} -> {effective_field_info.provider}"
                    )
            resolved[field_name] = effective_field_info
        return resolved
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
@@ -0,0 +1,106 @@
 """Tests for block execution guards in RunBlockTool."""
 from unittest.mock import MagicMock, patch
 import pytest
 from backend.api.features.chat.tools.models import ErrorResponse
 from backend.api.features.chat.tools.run_block import RunBlockTool
 from backend.blocks._base import BlockType
 from ._test_data import make_session
 _TEST_USER_ID = "test-user-run-block"
 def make_mock_block(
    block_id: str, name: str, block_type: BlockType, disabled: bool = False
 ):
    """Create a mock block for testing."""
    mock = MagicMock()
    mock.id = block_id
    mock.name = name
    mock.block_type = block_type
    mock.disabled = disabled
    mock.input_schema = MagicMock()
    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
    mock.input_schema.get_credentials_fields_info.return_value = []
    return mock
 class TestRunBlockFiltering:
    """Tests for block execution guards in RunBlockTool."""
    @pytest.mark.asyncio(loop_scope="session")
    async def test_excluded_block_type_returns_error(self):
        """Attempting to execute a block with excluded BlockType returns error."""
        session = make_session(user_id=_TEST_USER_ID)
        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=input_block,
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="input-block-id",
                input_data={},
            )
        assert isinstance(response, ErrorResponse)
        assert "cannot be run directly in CoPilot" in response.message
        assert "designed for use within graphs only" in response.message
    @pytest.mark.asyncio(loop_scope="session")
    async def test_excluded_block_id_returns_error(self):
        """Attempting to execute SmartDecisionMakerBlock returns error."""
        session = make_session(user_id=_TEST_USER_ID)
        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
        smart_block = make_mock_block(
            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
        )
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=smart_block,
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id=smart_decision_id,
                input_data={},
            )
        assert isinstance(response, ErrorResponse)
        assert "cannot be run directly in CoPilot" in response.message
    @pytest.mark.asyncio(loop_scope="session")
    async def test_non_excluded_block_passes_guard(self):
        """Non-excluded blocks pass the filtering guard (may fail later for other reasons)."""
        session = make_session(user_id=_TEST_USER_ID)
        standard_block = make_mock_block(
            "standard-id", "HTTP Request", BlockType.STANDARD
        )
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=standard_block,
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="standard-id",
                input_data={},
            )
        # Should NOT be an ErrorResponse about CoPilot exclusion
        # (may be other errors like missing credentials, but not the exclusion guard)
        if isinstance(response, ErrorResponse):
            assert "cannot be run directly in CoPilot" not in response.message
--- a/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
@@ -0,0 +1,267 @@
 """Sandbox execution utilities for code execution tools.
 Provides filesystem + network isolated command execution using **bubblewrap**
 (``bwrap``): whitelist-only filesystem (only system dirs visible read-only),
 writable workspace only, clean environment, network blocked.
 Tools that call :func:`run_sandboxed` must first check :func:`has_full_sandbox`
 and refuse to run if bubblewrap is not available.
 """
 import asyncio
 import logging
 import os
 import platform
 import shutil
 logger = logging.getLogger(__name__)
 # Output limits — prevent blowing up LLM context
 _MAX_OUTPUT_CHARS = 50_000
 _DEFAULT_TIMEOUT = 30
 _MAX_TIMEOUT = 120
 # ---------------------------------------------------------------------------
 # Sandbox capability detection (cached at first call)
 # ---------------------------------------------------------------------------
 _BWRAP_AVAILABLE: bool | None = None
 def has_full_sandbox() -> bool:
    """Return True if bubblewrap is available (filesystem + network isolation).
    On non-Linux platforms (macOS), always returns False.
    """
    global _BWRAP_AVAILABLE
    if _BWRAP_AVAILABLE is None:
        _BWRAP_AVAILABLE = (
            platform.system() == "Linux" and shutil.which("bwrap") is not None
        )
    return _BWRAP_AVAILABLE
 WORKSPACE_PREFIX = "/tmp/copilot-"
 def make_session_path(session_id: str) -> str:
    """Build a sanitized, session-specific path under :data:`WORKSPACE_PREFIX`.
    Shared by both the SDK working-directory setup and the sandbox tools so
    they always resolve to the same directory for a given session.
    Steps:
        1. Strip all characters except ``[A-Za-z0-9-]``.
        2. Construct ``/tmp/copilot-<safe_id>``.
        3. Validate via ``os.path.normpath`` + ``startswith`` (CodeQL-recognised
           sanitizer) to prevent path traversal.
    Raises:
        ValueError: If the resulting path escapes the prefix.
    """
    import re
    safe_id = re.sub(r"[^A-Za-z0-9-]", "", session_id)
    if not safe_id:
        safe_id = "default"
    path = os.path.normpath(f"{WORKSPACE_PREFIX}{safe_id}")
    if not path.startswith(WORKSPACE_PREFIX):
        raise ValueError(f"Session path escaped prefix: {path}")
    return path
 def get_workspace_dir(session_id: str) -> str:
    """Get or create the workspace directory for a session.
    Uses :func:`make_session_path` — the same path the SDK uses — so that
    bash_exec shares the workspace with the SDK file tools.
    """
    workspace = make_session_path(session_id)
    os.makedirs(workspace, exist_ok=True)
    return workspace
 # ---------------------------------------------------------------------------
 # Bubblewrap command builder
 # ---------------------------------------------------------------------------
 # System directories mounted read-only inside the sandbox.
 # ONLY these are visible — /app, /root, /home, /opt, /var etc. are NOT accessible.
 _SYSTEM_RO_BINDS = [
    "/usr",  # binaries, libraries, Python interpreter
    "/etc",  # system config: ld.so, locale, passwd, alternatives
 ]
 # Compat paths: symlinks to /usr/* on modern Debian, real dirs on older systems.
 # On Debian 13 these are symlinks (e.g. /bin -> usr/bin).  bwrap --ro-bind
 # can't create a symlink target, so we detect and use --symlink instead.
 # /lib64 is critical: the ELF dynamic linker lives at /lib64/ld-linux-x86-64.so.2.
 _COMPAT_PATHS = [
    ("/bin", "usr/bin"),  # -> /usr/bin on Debian 13
    ("/sbin", "usr/sbin"),  # -> /usr/sbin on Debian 13
    ("/lib", "usr/lib"),  # -> /usr/lib on Debian 13
    ("/lib64", "usr/lib64"),  # 64-bit libraries / ELF interpreter
 ]
 # Resource limits to prevent fork bombs, memory exhaustion, and disk abuse.
 # Applied via ulimit inside the sandbox before exec'ing the user command.
 _RESOURCE_LIMITS = (
    "ulimit -u 64"  # max 64 processes  (prevents fork bombs)
    " -v 524288"  # 512 MB virtual memory
    " -f 51200"  # 50 MB max file size  (1024-byte blocks)
    " -n 256"  # 256 open file descriptors
    " 2>/dev/null"
 )
 def _build_bwrap_command(
    command: list[str], cwd: str, env: dict[str, str]
 ) -> list[str]:
    """Build a bubblewrap command with strict filesystem + network isolation.
    Security model:
    - **Whitelist-only filesystem**: only system directories (``/usr``, ``/etc``,
      ``/bin``, ``/lib``) are mounted read-only.  Application code (``/app``),
      home directories, ``/var``, ``/opt``, etc. are NOT accessible at all.
    - **Writable workspace only**: the per-session workspace is the sole
      writable path.
    - **Clean environment**: ``--clearenv`` wipes all inherited env vars.
      Only the explicitly-passed safe env vars are set inside the sandbox.
    - **Network isolation**: ``--unshare-net`` blocks all network access.
    - **Resource limits**: ulimit caps on processes (64), memory (512MB),
      file size (50MB), and open FDs (256) to prevent fork bombs and abuse.
    - **New session**: prevents terminal control escape.
    - **Die with parent**: prevents orphaned sandbox processes.
    """
    cmd = [
        "bwrap",
        # Create a new user namespace so bwrap can set up sandboxing
        # inside unprivileged Docker containers (no CAP_SYS_ADMIN needed).
        "--unshare-user",
        # Wipe all inherited environment variables (API keys, secrets, etc.)
        "--clearenv",
    ]
    # Set only the safe env vars inside the sandbox
    for key, value in env.items():
        cmd.extend(["--setenv", key, value])
    # System directories: read-only
    for path in _SYSTEM_RO_BINDS:
        cmd.extend(["--ro-bind", path, path])
    # Compat paths: use --symlink when host path is a symlink (Debian 13),
    # --ro-bind when it's a real directory (older distros).
    for path, symlink_target in _COMPAT_PATHS:
        if os.path.islink(path):
            cmd.extend(["--symlink", symlink_target, path])
        elif os.path.exists(path):
            cmd.extend(["--ro-bind", path, path])
    # Wrap the user command with resource limits:
    #   sh -c 'ulimit ...; exec "$@"' -- <original command>
    # `exec "$@"` replaces the shell so there's no extra process overhead,
    # and properly handles arguments with spaces.
    limited_command = [
        "sh",
        "-c",
        f'{_RESOURCE_LIMITS}; exec "$@"',
        "--",
        *command,
    ]
    cmd.extend(
        [
            # Fresh virtual filesystems
            "--dev",
            "/dev",
            "--proc",
            "/proc",
            "--tmpfs",
            "/tmp",
            # Workspace bind AFTER --tmpfs /tmp so it's visible through the tmpfs.
            # (workspace lives under /tmp/copilot-<session>)
            "--bind",
            cwd,
            cwd,
            # Isolation
            "--unshare-net",
            "--die-with-parent",
            "--new-session",
            "--chdir",
            cwd,
            "--",
            *limited_command,
        ]
    )
    return cmd
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 async def run_sandboxed(
    command: list[str],
    cwd: str,
    timeout: int = _DEFAULT_TIMEOUT,
    env: dict[str, str] | None = None,
 ) -> tuple[str, str, int, bool]:
    """Run a command inside a bubblewrap sandbox.
    Callers **must** check :func:`has_full_sandbox` before calling this
    function.  If bubblewrap is not available, this function raises
    :class:`RuntimeError` rather than running unsandboxed.
    Returns:
        (stdout, stderr, exit_code, timed_out)
    """
    if not has_full_sandbox():
        raise RuntimeError(
            "run_sandboxed() requires bubblewrap but bwrap is not available. "
            "Callers must check has_full_sandbox() before calling this function."
        )
    timeout = min(max(timeout, 1), _MAX_TIMEOUT)
    safe_env = {
        "PATH": "/usr/local/bin:/usr/bin:/bin",
        "HOME": cwd,
        "TMPDIR": cwd,
        "LANG": "en_US.UTF-8",
        "PYTHONDONTWRITEBYTECODE": "1",
        "PYTHONIOENCODING": "utf-8",
    }
    if env:
        safe_env.update(env)
    full_command = _build_bwrap_command(command, cwd, safe_env)
    try:
        proc = await asyncio.create_subprocess_exec(
            *full_command,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            cwd=cwd,
            env=safe_env,
        )
        try:
            stdout_bytes, stderr_bytes = await asyncio.wait_for(
                proc.communicate(), timeout=timeout
            )
            stdout = stdout_bytes.decode("utf-8", errors="replace")[:_MAX_OUTPUT_CHARS]
            stderr = stderr_bytes.decode("utf-8", errors="replace")[:_MAX_OUTPUT_CHARS]
            return stdout, stderr, proc.returncode or 0, False
        except asyncio.TimeoutError:
            proc.kill()
            await proc.communicate()
            return "", f"Execution timed out after {timeout}s", -1, True
    except RuntimeError:
        raise
    except Exception as e:
        return "", f"Sandbox error: {e}", -1, False
--- a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
@@ -6,9 +6,14 @@ from typing import Any
 from backend.api.features.library import db as library_db
 from backend.api.features.library import model as library_model
 from backend.api.features.store import db as store_db
 from backend.data import graph as graph_db
 from backend.data.graph import GraphModel
-from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
+from backend.data.model import (
    Credentials,
    CredentialsFieldInfo,
    CredentialsMetaInput,
    HostScopedCredentials,
    OAuth2Credentials,
 )
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util.exceptions import NotFoundError
@@ -39,14 +44,8 @@ async def fetch_graph_from_store_slug(
        return None, None
    # Get the graph from store listing version
-    graph_meta = await store_db.get_available_graph(
+    graph = await store_db.get_available_graph(
-        store_agent.store_listing_version_id
+        store_agent.store_listing_version_id, hide_nodes=False
    )
    graph = await graph_db.get_graph(
        graph_id=graph_meta.id,
        version=graph_meta.version,
        user_id=None,  # Public access
        include_subgraphs=True,
    )
    return graph, store_agent
@@ -123,7 +122,7 @@ def build_missing_credentials_from_graph(
    return {
        field_key: _serialize_missing_credential(field_key, field_info)
-        for field_key, (field_info, _node_fields) in aggregated_fields.items()
+        for field_key, (field_info, _, _) in aggregated_fields.items()
        if field_key not in matched_keys
    }
@@ -225,6 +224,99 @@ async def get_or_create_library_agent(
    return library_agents[0]
 async def match_credentials_to_requirements(
    user_id: str,
    requirements: dict[str, CredentialsFieldInfo],
 ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
    """
    Match user's credentials against a dictionary of credential requirements.
    This is the core matching logic shared by both graph and block credential matching.
    """
    matched: dict[str, CredentialsMetaInput] = {}
    missing: list[CredentialsMetaInput] = []
    if not requirements:
        return matched, missing
    available_creds = await get_user_credentials(user_id)
    for field_name, field_info in requirements.items():
        matching_cred = find_matching_credential(available_creds, field_info)
        if matching_cred:
            try:
                matched[field_name] = create_credential_meta_from_match(matching_cred)
            except Exception as e:
                logger.error(
                    f"Failed to create CredentialsMetaInput for field '{field_name}': "
                    f"provider={matching_cred.provider}, type={matching_cred.type}, "
                    f"credential_id={matching_cred.id}",
                    exc_info=True,
                )
                provider = next(iter(field_info.provider), "unknown")
                cred_type = next(iter(field_info.supported_types), "api_key")
                missing.append(
                    CredentialsMetaInput(
                        id=field_name,
                        provider=provider,  # type: ignore
                        type=cred_type,  # type: ignore
                        title=f"{field_name} (validation failed: {e})",
                    )
                )
        else:
            provider = next(iter(field_info.provider), "unknown")
            cred_type = next(iter(field_info.supported_types), "api_key")
            missing.append(
                CredentialsMetaInput(
                    id=field_name,
                    provider=provider,  # type: ignore
                    type=cred_type,  # type: ignore
                    title=field_name.replace("_", " ").title(),
                )
            )
    return matched, missing
 async def get_user_credentials(user_id: str) -> list[Credentials]:
    """Get all available credentials for a user."""
    creds_manager = IntegrationCredentialsManager()
    return await creds_manager.store.get_all_creds(user_id)
 def find_matching_credential(
    available_creds: list[Credentials],
    field_info: CredentialsFieldInfo,
 ) -> Credentials | None:
    """Find a credential that matches the required provider, type, scopes, and host."""
    for cred in available_creds:
        if cred.provider not in field_info.provider:
            continue
        if cred.type not in field_info.supported_types:
            continue
        if cred.type == "oauth2" and not _credential_has_required_scopes(
            cred, field_info
        ):
            continue
        if cred.type == "host_scoped" and not _credential_is_for_host(cred, field_info):
            continue
        return cred
    return None
 def create_credential_meta_from_match(
    matching_cred: Credentials,
 ) -> CredentialsMetaInput:
    """Create a CredentialsMetaInput from a matched credential."""
    return CredentialsMetaInput(
        id=matching_cred.id,
        provider=matching_cred.provider,  # type: ignore
        type=matching_cred.type,
        title=matching_cred.title,
    )
 async def match_user_credentials_to_graph(
    user_id: str,
    graph: GraphModel,
@@ -264,15 +356,24 @@ async def match_user_credentials_to_graph(
    # provider is in the set of acceptable providers.
    for credential_field_name, (
        credential_requirements,
-        _node_fields,
+        _,
        _,
    ) in aggregated_creds.items():
-        # Find first matching credential by provider and type
+        # Find first matching credential by provider, type, and scopes
        matching_cred = next(
            (
                cred
                for cred in available_creds
                if cred.provider in credential_requirements.provider
                and cred.type in credential_requirements.supported_types
                and (
                    cred.type != "oauth2"
                    or _credential_has_required_scopes(cred, credential_requirements)
                )
                and (
                    cred.type != "host_scoped"
                    or _credential_is_for_host(cred, credential_requirements)
                )
            ),
            None,
        )
@@ -296,10 +397,17 @@ async def match_user_credentials_to_graph(
                    f"{credential_field_name} (validation failed: {e})"
                )
        else:
            # Build a helpful error message including scope requirements
            error_parts = [
                f"provider in {list(credential_requirements.provider)}",
                f"type in {list(credential_requirements.supported_types)}",
            ]
            if credential_requirements.required_scopes:
                error_parts.append(
                    f"scopes including {list(credential_requirements.required_scopes)}"
                )
            missing_creds.append(
-                f"{credential_field_name} "
+                f"{credential_field_name} (requires {', '.join(error_parts)})"
                f"(requires provider in {list(credential_requirements.provider)}, "
                f"type in {list(credential_requirements.supported_types)})"
            )
    logger.info(
@@ -309,6 +417,33 @@ async def match_user_credentials_to_graph(
    return graph_credentials_inputs, missing_creds
 def _credential_has_required_scopes(
    credential: OAuth2Credentials,
    requirements: CredentialsFieldInfo,
 ) -> bool:
    """Check if an OAuth2 credential has all the scopes required by the input."""
    # If no scopes are required, any credential matches
    if not requirements.required_scopes:
        return True
    return set(credential.scopes).issuperset(requirements.required_scopes)
 def _credential_is_for_host(
    credential: HostScopedCredentials,
    requirements: CredentialsFieldInfo,
 ) -> bool:
    """Check if a host-scoped credential matches the host required by the input."""
    # We need to know the host to match host-scoped credentials to.
    # Graph.aggregate_credentials_inputs() adds the node's set URL value (if any)
    # to discriminator_values. No discriminator_values -> no host to match against.
    if not requirements.discriminator_values:
        return True
    # Check that credential host matches required host.
    # Host-scoped credential inputs are grouped by host, so any item from the set works.
    return credential.matches_url(list(requirements.discriminator_values)[0])
 async def check_user_has_required_credentials(
    user_id: str,
    required_credentials: list[CredentialsMetaInput],
--- a/autogpt_platform/backend/backend/api/features/chat/tools/web_fetch.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/web_fetch.py
@@ -0,0 +1,156 @@
 """Web fetch tool — safely retrieve public web page content."""
 import logging
 from typing import Any
 import aiohttp
 import html2text
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
    ErrorResponse,
    ToolResponseBase,
    WebFetchResponse,
 )
 from backend.util.request import Requests
 logger = logging.getLogger(__name__)
 # Limits
 _MAX_CONTENT_BYTES = 102_400  # 100 KB download cap
 _MAX_OUTPUT_CHARS = 50_000  # 50K char truncation for LLM context
 _REQUEST_TIMEOUT = aiohttp.ClientTimeout(total=15)
 # Content types we'll read as text
 _TEXT_CONTENT_TYPES = {
    "text/html",
    "text/plain",
    "text/xml",
    "text/csv",
    "text/markdown",
    "application/json",
    "application/xml",
    "application/xhtml+xml",
    "application/rss+xml",
    "application/atom+xml",
 }
 def _is_text_content(content_type: str) -> bool:
    base = content_type.split(";")[0].strip().lower()
    return base in _TEXT_CONTENT_TYPES or base.startswith("text/")
 def _html_to_text(html: str) -> str:
    h = html2text.HTML2Text()
    h.ignore_links = False
    h.ignore_images = True
    h.body_width = 0
    return h.handle(html)
 class WebFetchTool(BaseTool):
    """Safely fetch content from a public URL using SSRF-protected HTTP."""
    @property
    def name(self) -> str:
        return "web_fetch"
    @property
    def description(self) -> str:
        return (
            "Fetch the content of a public web page by URL. "
            "Returns readable text extracted from HTML by default. "
            "Useful for reading documentation, articles, and API responses. "
            "Only supports HTTP/HTTPS GET requests to public URLs "
            "(private/internal network addresses are blocked)."
        )
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The public HTTP/HTTPS URL to fetch.",
                },
                "extract_text": {
                    "type": "boolean",
                    "description": (
                        "If true (default), extract readable text from HTML. "
                        "If false, return raw content."
                    ),
                    "default": True,
                },
            },
            "required": ["url"],
        }
    @property
    def requires_auth(self) -> bool:
        return False
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs: Any,
    ) -> ToolResponseBase:
        url: str = (kwargs.get("url") or "").strip()
        extract_text: bool = kwargs.get("extract_text", True)
        session_id = session.session_id if session else None
        if not url:
            return ErrorResponse(
                message="Please provide a URL to fetch.",
                error="missing_url",
                session_id=session_id,
            )
        try:
            client = Requests(raise_for_status=False, retry_max_attempts=1)
            response = await client.get(url, timeout=_REQUEST_TIMEOUT)
        except ValueError as e:
            # validate_url raises ValueError for SSRF / blocked IPs
            return ErrorResponse(
                message=f"URL blocked: {e}",
                error="url_blocked",
                session_id=session_id,
            )
        except Exception as e:
            logger.warning(f"[web_fetch] Request failed for {url}: {e}")
            return ErrorResponse(
                message=f"Failed to fetch URL: {e}",
                error="fetch_failed",
                session_id=session_id,
            )
        content_type = response.headers.get("content-type", "")
        if not _is_text_content(content_type):
            return ErrorResponse(
                message=f"Non-text content type: {content_type.split(';')[0]}",
                error="unsupported_content_type",
                session_id=session_id,
            )
        raw = response.content[:_MAX_CONTENT_BYTES]
        text = raw.decode("utf-8", errors="replace")
        if extract_text and "html" in content_type.lower():
            text = _html_to_text(text)
        truncated = len(text) > _MAX_OUTPUT_CHARS
        if truncated:
            text = text[:_MAX_OUTPUT_CHARS]
        return WebFetchResponse(
            message=f"Fetched {url}" + (" (truncated)" if truncated else ""),
            url=response.url,
            status_code=response.status,
            content_type=content_type.split(";")[0].strip(),
            content=text,
            truncated=truncated,
            session_id=session_id,
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
@@ -88,7 +88,9 @@ class ListWorkspaceFilesTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "List files in the user's workspace. "
+            "List files in the user's persistent workspace (cloud storage). "
            "These files survive across sessions. "
            "For ephemeral session files, use the SDK Read/Glob tools instead. "
            "Returns file names, paths, sizes, and metadata. "
            "Optionally filter by path prefix."
        )
@@ -204,7 +206,9 @@ class ReadWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Read a file from the user's workspace. "
+            "Read a file from the user's persistent workspace (cloud storage). "
            "These files survive across sessions. "
            "For ephemeral session files, use the SDK Read tool instead. "
            "Specify either file_id or path to identify the file. "
            "For small text files, returns content directly. "
            "For large or binary files, returns metadata and a download URL. "
@@ -378,7 +382,9 @@ class WriteWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Write or create a file in the user's workspace. "
+            "Write or create a file in the user's persistent workspace (cloud storage). "
            "These files survive across sessions. "
            "For ephemeral session files, use the SDK Write tool instead. "
            "Provide the content as a base64-encoded string. "
            f"Maximum file size is {Config().max_file_size_mb}MB. "
            "Files are saved to the current session's folder by default. "
@@ -523,7 +529,7 @@ class DeleteWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Delete a file from the user's workspace. "
+            "Delete a file from the user's persistent workspace (cloud storage). "
            "Specify either file_id or path to identify the file. "
            "Paths are scoped to the current session by default. "
            "Use /sessions/<session_id>/... for cross-session access."
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -12,14 +12,16 @@ import backend.api.features.store.image_gen as store_image_gen
 import backend.api.features.store.media as store_media
 import backend.data.graph as graph_db
 import backend.data.integrations as integrations_db
 from backend.data.block import BlockInput
 from backend.data.db import transaction
 from backend.data.execution import get_graph_execution
 from backend.data.graph import GraphSettings
 from backend.data.includes import AGENT_PRESET_INCLUDE, library_agent_include
-from backend.data.model import CredentialsMetaInput
+from backend.data.model import CredentialsMetaInput, GraphInput
 from backend.integrations.creds_manager import IntegrationCredentialsManager
-from backend.integrations.webhooks.graph_lifecycle_hooks import on_graph_activate
+from backend.integrations.webhooks.graph_lifecycle_hooks import (
    on_graph_activate,
    on_graph_deactivate,
 )
 from backend.util.clients import get_scheduler_client
 from backend.util.exceptions import DatabaseError, InvalidInputError, NotFoundError
 from backend.util.json import SafeJson
@@ -39,6 +41,7 @@ async def list_library_agents(
    sort_by: library_model.LibraryAgentSort = library_model.LibraryAgentSort.UPDATED_AT,
    page: int = 1,
    page_size: int = 50,
    include_executions: bool = False,
 ) -> library_model.LibraryAgentResponse:
    """
    Retrieves a paginated list of LibraryAgent records for a given user.
@@ -49,6 +52,9 @@ async def list_library_agents(
        sort_by: Sorting field (createdAt, updatedAt, isFavorite, isCreatedByUser).
        page: Current page (1-indexed).
        page_size: Number of items per page.
        include_executions: Whether to include execution data for status calculation.
            Defaults to False for performance (UI fetches status separately).
            Set to True when accurate status/metrics are needed (e.g., agent generator).
    Returns:
        A LibraryAgentResponse containing the list of agents and pagination details.
@@ -76,7 +82,6 @@ async def list_library_agents(
        "isArchived": False,
    }
    # Build search filter if applicable
    if search_term:
        where_clause["OR"] = [
            {
@@ -93,7 +98,6 @@ async def list_library_agents(
            },
        ]
    # Determine sorting
    order_by: prisma.types.LibraryAgentOrderByInput | None = None
    if sort_by == library_model.LibraryAgentSort.CREATED_AT:
@@ -105,7 +109,7 @@ async def list_library_agents(
        library_agents = await prisma.models.LibraryAgent.prisma().find_many(
            where=where_clause,
            include=library_agent_include(
-                user_id, include_nodes=False, include_executions=False
+                user_id, include_nodes=False, include_executions=include_executions
            ),
            order=order_by,
            skip=(page - 1) * page_size,
@@ -369,7 +373,7 @@ async def get_library_agent_by_graph_id(
 async def add_generated_agent_image(
-    graph: graph_db.BaseGraph,
+    graph: graph_db.GraphBaseMeta,
    user_id: str,
    library_agent_id: str,
 ) -> Optional[prisma.models.LibraryAgent]:
@@ -535,6 +539,92 @@ async def update_agent_version_in_library(
    return library_model.LibraryAgent.from_db(lib)
 async def create_graph_in_library(
    graph: graph_db.Graph,
    user_id: str,
 ) -> tuple[graph_db.GraphModel, library_model.LibraryAgent]:
    """Create a new graph and add it to the user's library."""
    graph.version = 1
    graph_model = graph_db.make_graph_model(graph, user_id)
    graph_model.reassign_ids(user_id=user_id, reassign_graph_id=True)
    created_graph = await graph_db.create_graph(graph_model, user_id)
    library_agents = await create_library_agent(
        graph=created_graph,
        user_id=user_id,
        sensitive_action_safe_mode=True,
        create_library_agents_for_sub_graphs=False,
    )
    if created_graph.is_active:
        created_graph = await on_graph_activate(created_graph, user_id=user_id)
    return created_graph, library_agents[0]
 async def update_graph_in_library(
    graph: graph_db.Graph,
    user_id: str,
 ) -> tuple[graph_db.GraphModel, library_model.LibraryAgent]:
    """Create a new version of an existing graph and update the library entry."""
    existing_versions = await graph_db.get_graph_all_versions(graph.id, user_id)
    current_active_version = (
        next((v for v in existing_versions if v.is_active), None)
        if existing_versions
        else None
    )
    graph.version = (
        max(v.version for v in existing_versions) + 1 if existing_versions else 1
    )
    graph_model = graph_db.make_graph_model(graph, user_id)
    graph_model.reassign_ids(user_id=user_id, reassign_graph_id=False)
    created_graph = await graph_db.create_graph(graph_model, user_id)
    library_agent = await get_library_agent_by_graph_id(user_id, created_graph.id)
    if not library_agent:
        raise NotFoundError(f"Library agent not found for graph {created_graph.id}")
    library_agent = await update_library_agent_version_and_settings(
        user_id, created_graph
    )
    if created_graph.is_active:
        created_graph = await on_graph_activate(created_graph, user_id=user_id)
        await graph_db.set_graph_active_version(
            graph_id=created_graph.id,
            version=created_graph.version,
            user_id=user_id,
        )
        if current_active_version:
            await on_graph_deactivate(current_active_version, user_id=user_id)
    return created_graph, library_agent
 async def update_library_agent_version_and_settings(
    user_id: str, agent_graph: graph_db.GraphModel
 ) -> library_model.LibraryAgent:
    """Update library agent to point to new graph version and sync settings."""
    library = await update_agent_version_in_library(
        user_id, agent_graph.id, agent_graph.version
    )
    updated_settings = GraphSettings.from_graph(
        graph=agent_graph,
        hitl_safe_mode=library.settings.human_in_the_loop_safe_mode,
        sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode,
    )
    if updated_settings != library.settings:
        library = await update_library_agent(
            library_agent_id=library.id,
            user_id=user_id,
            settings=updated_settings,
        )
    return library
 async def update_library_agent(
    library_agent_id: str,
    user_id: str,
@@ -1039,7 +1129,7 @@ async def create_preset_from_graph_execution(
 async def update_preset(
    user_id: str,
    preset_id: str,
-    inputs: Optional[BlockInput] = None,
+    inputs: Optional[GraphInput] = None,
    credentials: Optional[dict[str, CredentialsMetaInput]] = None,
    name: Optional[str] = None,
    description: Optional[str] = None,
--- a/autogpt_platform/backend/backend/api/features/library/model.py
+++ b/autogpt_platform/backend/backend/api/features/library/model.py
@@ -6,9 +6,13 @@ import prisma.enums
 import prisma.models
 import pydantic
 from backend.data.block import BlockInput
 from backend.data.graph import GraphModel, GraphSettings, GraphTriggerInfo
-from backend.data.model import CredentialsMetaInput, is_credentials_field_name
+from backend.data.model import (
    CredentialsMetaInput,
    GraphInput,
    is_credentials_field_name,
 )
 from backend.util.json import loads as json_loads
 from backend.util.models import Pagination
 if TYPE_CHECKING:
@@ -16,10 +20,10 @@ if TYPE_CHECKING:
 class LibraryAgentStatus(str, Enum):
-    COMPLETED = "COMPLETED"  # All runs completed
+    COMPLETED = "COMPLETED"
-    HEALTHY = "HEALTHY"  # Agent is running (not all runs have completed)
+    HEALTHY = "HEALTHY"
-    WAITING = "WAITING"  # Agent is queued or waiting to start
+    WAITING = "WAITING"
-    ERROR = "ERROR"  # Agent is in an error state
+    ERROR = "ERROR"
 class MarketplaceListingCreator(pydantic.BaseModel):
@@ -39,6 +43,30 @@ class MarketplaceListing(pydantic.BaseModel):
    creator: MarketplaceListingCreator
 class RecentExecution(pydantic.BaseModel):
    """Summary of a recent execution for quality assessment.
    Used by the LLM to understand the agent's recent performance with specific examples
    rather than just aggregate statistics.
    """
    status: str
    correctness_score: float | None = None
    activity_summary: str | None = None
 def _parse_settings(settings: dict | str | None) -> GraphSettings:
    """Parse settings from database, handling both dict and string formats."""
    if settings is None:
        return GraphSettings()
    try:
        if isinstance(settings, str):
            settings = json_loads(settings)
        return GraphSettings.model_validate(settings)
    except Exception:
        return GraphSettings()
 class LibraryAgent(pydantic.BaseModel):
    """
    Represents an agent in the library, including metadata for display and
@@ -48,7 +76,7 @@ class LibraryAgent(pydantic.BaseModel):
    id: str
    graph_id: str
    graph_version: int
-    owner_user_id: str  # ID of user who owns/created this agent graph
+    owner_user_id: str
    image_url: str | None
@@ -64,7 +92,7 @@ class LibraryAgent(pydantic.BaseModel):
    description: str
    instructions: str | None = None
-    input_schema: dict[str, Any]  # Should be BlockIOObjectSubSchema in frontend
+    input_schema: dict[str, Any]
    output_schema: dict[str, Any]
    credentials_input_schema: dict[str, Any] | None = pydantic.Field(
        description="Input schema for credentials required by the agent",
@@ -81,25 +109,19 @@ class LibraryAgent(pydantic.BaseModel):
    )
    trigger_setup_info: Optional[GraphTriggerInfo] = None
    # Indicates whether there's a new output (based on recent runs)
    new_output: bool
-
+    execution_count: int = 0
-    # Whether the user can access the underlying graph
+    success_rate: float | None = None
    avg_correctness_score: float | None = None
    recent_executions: list[RecentExecution] = pydantic.Field(
        default_factory=list,
        description="List of recent executions with status, score, and summary",
    )
    can_access_graph: bool
    # Indicates if this agent is the latest version
    is_latest_version: bool
    # Whether the agent is marked as favorite by the user
    is_favorite: bool
    # Recommended schedule cron (from marketplace agents)
    recommended_schedule_cron: str | None = None
    # User-specific settings for this library agent
    settings: GraphSettings = pydantic.Field(default_factory=GraphSettings)
    # Marketplace listing information if the agent has been published
    marketplace_listing: Optional["MarketplaceListing"] = None
    @staticmethod
@@ -123,7 +145,6 @@ class LibraryAgent(pydantic.BaseModel):
        agent_updated_at = agent.AgentGraph.updatedAt
        lib_agent_updated_at = agent.updatedAt
        # Compute updated_at as the latest between library agent and graph
        updated_at = (
            max(agent_updated_at, lib_agent_updated_at)
            if agent_updated_at
@@ -136,7 +157,6 @@ class LibraryAgent(pydantic.BaseModel):
            creator_name = agent.Creator.name or "Unknown"
            creator_image_url = agent.Creator.avatarUrl or ""
        # Logic to calculate status and new_output
        week_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
            days=7
        )
@@ -145,13 +165,55 @@ class LibraryAgent(pydantic.BaseModel):
        status = status_result.status
        new_output = status_result.new_output
-        # Check if user can access the graph
+        execution_count = len(executions)
-        can_access_graph = agent.AgentGraph.userId == agent.userId
+        success_rate: float | None = None
        avg_correctness_score: float | None = None
        if execution_count > 0:
            success_count = sum(
                1
                for e in executions
                if e.executionStatus == prisma.enums.AgentExecutionStatus.COMPLETED
            )
            success_rate = (success_count / execution_count) * 100
-        # Hard-coded to True until a method to check is implemented
+            correctness_scores = []
            for e in executions:
                if e.stats and isinstance(e.stats, dict):
                    score = e.stats.get("correctness_score")
                    if score is not None and isinstance(score, (int, float)):
                        correctness_scores.append(float(score))
            if correctness_scores:
                avg_correctness_score = sum(correctness_scores) / len(
                    correctness_scores
                )
        recent_executions: list[RecentExecution] = []
        for e in executions:
            exec_score: float | None = None
            exec_summary: str | None = None
            if e.stats and isinstance(e.stats, dict):
                score = e.stats.get("correctness_score")
                if score is not None and isinstance(score, (int, float)):
                    exec_score = float(score)
                summary = e.stats.get("activity_status")
                if summary is not None and isinstance(summary, str):
                    exec_summary = summary
            exec_status = (
                e.executionStatus.value
                if hasattr(e.executionStatus, "value")
                else str(e.executionStatus)
            )
            recent_executions.append(
                RecentExecution(
                    status=exec_status,
                    correctness_score=exec_score,
                    activity_summary=exec_summary,
                )
            )
        can_access_graph = agent.AgentGraph.userId == agent.userId
        is_latest_version = True
        # Build marketplace_listing if available
        marketplace_listing_data = None
        if store_listing and store_listing.ActiveVersion and profile:
            creator_data = MarketplaceListingCreator(
@@ -190,11 +252,15 @@ class LibraryAgent(pydantic.BaseModel):
            has_sensitive_action=graph.has_sensitive_action,
            trigger_setup_info=graph.trigger_setup_info,
            new_output=new_output,
            execution_count=execution_count,
            success_rate=success_rate,
            avg_correctness_score=avg_correctness_score,
            recent_executions=recent_executions,
            can_access_graph=can_access_graph,
            is_latest_version=is_latest_version,
            is_favorite=agent.isFavorite,
            recommended_schedule_cron=agent.AgentGraph.recommendedScheduleCron,
-            settings=GraphSettings.model_validate(agent.settings),
+            settings=_parse_settings(agent.settings),
            marketplace_listing=marketplace_listing_data,
        )
@@ -220,18 +286,15 @@ def _calculate_agent_status(
    if not executions:
        return AgentStatusResult(status=LibraryAgentStatus.COMPLETED, new_output=False)
    # Track how many times each execution status appears
    status_counts = {status: 0 for status in prisma.enums.AgentExecutionStatus}
    new_output = False
    for execution in executions:
        # Check if there's a completed run more recent than `recent_threshold`
        if execution.createdAt >= recent_threshold:
            if execution.executionStatus == prisma.enums.AgentExecutionStatus.COMPLETED:
                new_output = True
        status_counts[execution.executionStatus] += 1
    # Determine the final status based on counts
    if status_counts[prisma.enums.AgentExecutionStatus.FAILED] > 0:
        return AgentStatusResult(status=LibraryAgentStatus.ERROR, new_output=new_output)
    elif status_counts[prisma.enums.AgentExecutionStatus.QUEUED] > 0:
@@ -263,7 +326,7 @@ class LibraryAgentPresetCreatable(pydantic.BaseModel):
    graph_id: str
    graph_version: int
-    inputs: BlockInput
+    inputs: GraphInput
    credentials: dict[str, CredentialsMetaInput]
    name: str
@@ -292,7 +355,7 @@ class LibraryAgentPresetUpdatable(pydantic.BaseModel):
    Request model used when updating a preset for a library agent.
    """
-    inputs: Optional[BlockInput] = None
+    inputs: Optional[GraphInput] = None
    credentials: Optional[dict[str, CredentialsMetaInput]] = None
    name: Optional[str] = None
@@ -335,7 +398,7 @@ class LibraryAgentPreset(LibraryAgentPresetCreatable):
                "Webhook must be included in AgentPreset query when webhookId is set"
            )
-        input_data: BlockInput = {}
+        input_data: GraphInput = {}
        input_credentials: dict[str, CredentialsMetaInput] = {}
        for preset_input in preset.InputPresets:
--- a/autogpt_platform/backend/backend/api/features/otto/service.py
+++ b/autogpt_platform/backend/backend/api/features/otto/service.py
@@ -5,8 +5,8 @@ from typing import Optional
 import aiohttp
 from fastapi import HTTPException
 from backend.blocks import get_block
 from backend.data import graph as graph_db
 from backend.data.block import get_block
 from backend.util.settings import Settings
 from .models import ApiResponse, ChatRequest, GraphData
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers.py
@@ -152,7 +152,7 @@ class BlockHandler(ContentHandler):
    async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
        """Fetch blocks without embeddings."""
-        from backend.data.block import get_blocks
+        from backend.blocks import get_blocks
        # Get all available blocks
        all_blocks = get_blocks()
@@ -249,7 +249,7 @@ class BlockHandler(ContentHandler):
    async def get_stats(self) -> dict[str, int]:
        """Get statistics about block embedding coverage."""
-        from backend.data.block import get_blocks
+        from backend.blocks import get_blocks
        all_blocks = get_blocks()
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
@@ -93,7 +93,7 @@ async def test_block_handler_get_missing_items(mocker):
    mock_existing = []
    with patch(
-        "backend.data.block.get_blocks",
+        "backend.blocks.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
@@ -135,7 +135,7 @@ async def test_block_handler_get_stats(mocker):
    mock_embedded = [{"count": 2}]
    with patch(
-        "backend.data.block.get_blocks",
+        "backend.blocks.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
@@ -327,7 +327,7 @@ async def test_block_handler_handles_missing_attributes():
    mock_blocks = {"block-minimal": mock_block_class}
    with patch(
-        "backend.data.block.get_blocks",
+        "backend.blocks.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
@@ -360,7 +360,7 @@ async def test_block_handler_skips_failed_blocks():
    mock_blocks = {"good-block": good_block, "bad-block": bad_block}
    with patch(
-        "backend.data.block.get_blocks",
+        "backend.blocks.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
--- a/autogpt_platform/backend/backend/api/features/store/db.py
+++ b/autogpt_platform/backend/backend/api/features/store/db.py
@@ -1,7 +1,7 @@
 import asyncio
 import logging
 from datetime import datetime, timezone
-from typing import Any, Literal
+from typing import Any, Literal, overload
 import fastapi
 import prisma.enums
@@ -11,8 +11,8 @@ import prisma.types
 from backend.data.db import transaction
 from backend.data.graph import (
    GraphMeta,
    GraphModel,
    GraphModelWithoutNodes,
    get_graph,
    get_graph_as_admin,
    get_sub_graphs,
@@ -112,6 +112,7 @@ async def get_store_agents(
                            description=agent["description"],
                            runs=agent["runs"],
                            rating=agent["rating"],
                            agent_graph_id=agent.get("agentGraphId", ""),
                        )
                        store_agents.append(store_agent)
                    except Exception as e:
@@ -170,6 +171,7 @@ async def get_store_agents(
                        description=agent.description,
                        runs=agent.runs,
                        rating=agent.rating,
                        agent_graph_id=agent.agentGraphId,
                    )
                    # Add to the list only if creation was successful
                    store_agents.append(store_agent)
@@ -332,7 +334,22 @@ async def get_store_agent_details(
        raise DatabaseError("Failed to fetch agent details") from e
-async def get_available_graph(store_listing_version_id: str) -> GraphMeta:
+@overload
 async def get_available_graph(
    store_listing_version_id: str, hide_nodes: Literal[False]
 ) -> GraphModel: ...
@overload
 async def get_available_graph(
    store_listing_version_id: str, hide_nodes: Literal[True] = True
 ) -> GraphModelWithoutNodes: ...
 async def get_available_graph(
    store_listing_version_id: str,
    hide_nodes: bool = True,
 ) -> GraphModelWithoutNodes | GraphModel:
    try:
        # Get avaialble, non-deleted store listing version
        store_listing_version = (
@@ -342,7 +359,7 @@ async def get_available_graph(store_listing_version_id: str) -> GraphMeta:
                    "isAvailable": True,
                    "isDeleted": False,
                },
-                include={"AgentGraph": {"include": {"Nodes": True}}},
+                include={"AgentGraph": {"include": AGENT_GRAPH_INCLUDE}},
            )
        )
@@ -352,7 +369,9 @@ async def get_available_graph(store_listing_version_id: str) -> GraphMeta:
                detail=f"Store listing version {store_listing_version_id} not found",
            )
-        return GraphModel.from_db(store_listing_version.AgentGraph).meta()
+        return (GraphModelWithoutNodes if hide_nodes else GraphModel).from_db(
            store_listing_version.AgentGraph
        )
    except Exception as e:
        logger.error(f"Error getting agent: {e}")
--- a/autogpt_platform/backend/backend/api/features/store/embeddings.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py
@@ -662,7 +662,7 @@ async def cleanup_orphaned_embeddings() -> dict[str, Any]:
                )
                current_ids = {row["id"] for row in valid_agents}
            elif content_type == ContentType.BLOCK:
-                from backend.data.block import get_blocks
+                from backend.blocks import get_blocks
                current_ids = set(get_blocks().keys())
            elif content_type == ContentType.DOCUMENTATION:
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_e2e_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_e2e_test.py
@@ -454,6 +454,9 @@ async def test_unified_hybrid_search_pagination(
    cleanup_embeddings: list,
 ):
    """Test unified search pagination works correctly."""
    # Use a unique search term to avoid matching other test data
    unique_term = f"xyzpagtest{uuid.uuid4().hex[:8]}"
    # Create multiple items
    content_ids = []
    for i in range(5):
@@ -465,14 +468,14 @@ async def test_unified_hybrid_search_pagination(
            content_type=ContentType.BLOCK,
            content_id=content_id,
            embedding=mock_embedding,
-            searchable_text=f"pagination test item number {i}",
+            searchable_text=f"{unique_term} item number {i}",
            metadata={"index": i},
            user_id=None,
        )
    # Get first page
    page1_results, total1 = await unified_hybrid_search(
-        query="pagination test",
+        query=unique_term,
        content_types=[ContentType.BLOCK],
        page=1,
        page_size=2,
@@ -480,7 +483,7 @@ async def test_unified_hybrid_search_pagination(
    # Get second page
    page2_results, total2 = await unified_hybrid_search(
-        query="pagination test",
+        query=unique_term,
        content_types=[ContentType.BLOCK],
        page=2,
        page_size=2,
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance.
 import logging
 import re
 import time
 from dataclasses import dataclass
 from typing import Any, Literal
@@ -362,7 +363,11 @@ async def unified_hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """
-    results = await query_raw_with_schema(sql_query, *params)
+    try:
        results = await query_raw_with_schema(sql_query, *params)
    except Exception as e:
        await _log_vector_error_diagnostics(e)
        raise
    total = results[0]["total_count"] if results else 0
    # Apply BM25 reranking
@@ -600,6 +605,7 @@ async def hybrid_search(
                sa.featured,
                sa.is_available,
                sa.updated_at,
                sa."agentGraphId",
                -- Searchable text for BM25 reranking
                COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
                -- Semantic score
@@ -659,6 +665,7 @@ async def hybrid_search(
                featured,
                is_available,
                updated_at,
                "agentGraphId",
                searchable_text,
                semantic_score,
                lexical_score,
@@ -684,7 +691,11 @@ async def hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """
-    results = await query_raw_with_schema(sql_query, *params)
+    try:
        results = await query_raw_with_schema(sql_query, *params)
    except Exception as e:
        await _log_vector_error_diagnostics(e)
        raise
    total = results[0]["total_count"] if results else 0
@@ -716,6 +727,87 @@ async def hybrid_search_simple(
    return await hybrid_search(query=query, page=page, page_size=page_size)
 # ============================================================================
 # Diagnostics
 # ============================================================================
 # Rate limit: only log vector error diagnostics once per this interval
 _VECTOR_DIAG_INTERVAL_SECONDS = 60
 _last_vector_diag_time: float = 0
 async def _log_vector_error_diagnostics(error: Exception) -> None:
    """Log diagnostic info when 'type vector does not exist' error occurs.
    Note: Diagnostic queries use query_raw_with_schema which may run on a different
    pooled connection than the one that failed. Session-level search_path can differ,
    so these diagnostics show cluster-wide state, not necessarily the failed session.
    Includes rate limiting to avoid log spam - only logs once per minute.
    Caller should re-raise the error after calling this function.
    """
    global _last_vector_diag_time
    # Check if this is the vector type error
    error_str = str(error).lower()
    if not (
        "type" in error_str and "vector" in error_str and "does not exist" in error_str
    ):
        return
    # Rate limit: only log once per interval
    now = time.time()
    if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS:
        return
    _last_vector_diag_time = now
    try:
        diagnostics: dict[str, object] = {}
        try:
            search_path_result = await query_raw_with_schema("SHOW search_path")
            diagnostics["search_path"] = search_path_result
        except Exception as e:
            diagnostics["search_path"] = f"Error: {e}"
        try:
            schema_result = await query_raw_with_schema("SELECT current_schema()")
            diagnostics["current_schema"] = schema_result
        except Exception as e:
            diagnostics["current_schema"] = f"Error: {e}"
        try:
            user_result = await query_raw_with_schema(
                "SELECT current_user, session_user, current_database()"
            )
            diagnostics["user_info"] = user_result
        except Exception as e:
            diagnostics["user_info"] = f"Error: {e}"
        try:
            # Check pgvector extension installation (cluster-wide, stable info)
            ext_result = await query_raw_with_schema(
                "SELECT extname, extversion, nspname as schema "
                "FROM pg_extension e "
                "JOIN pg_namespace n ON e.extnamespace = n.oid "
                "WHERE extname = 'vector'"
            )
            diagnostics["pgvector_extension"] = ext_result
        except Exception as e:
            diagnostics["pgvector_extension"] = f"Error: {e}"
        logger.error(
            f"Vector type error diagnostics:\n"
            f"  Error: {error}\n"
            f"  search_path: {diagnostics.get('search_path')}\n"
            f"  current_schema: {diagnostics.get('current_schema')}\n"
            f"  user_info: {diagnostics.get('user_info')}\n"
            f"  pgvector_extension: {diagnostics.get('pgvector_extension')}"
        )
    except Exception as diag_error:
        logger.error(f"Failed to collect vector error diagnostics: {diag_error}")
 # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
 # for existing code that expects the popularity parameter
 HybridSearchWeights = StoreAgentSearchWeights
--- a/autogpt_platform/backend/backend/api/features/store/image_gen.py
+++ b/autogpt_platform/backend/backend/api/features/store/image_gen.py
@@ -7,16 +7,7 @@ from replicate.client import Client as ReplicateClient
 from replicate.exceptions import ReplicateError
 from replicate.helpers import FileOutput
-from backend.blocks.ideogram import (
+from backend.data.graph import GraphBaseMeta
    AspectRatio,
    ColorPalettePreset,
    IdeogramModelBlock,
    IdeogramModelName,
    MagicPromptOption,
    StyleType,
    UpscaleOption,
 )
 from backend.data.graph import BaseGraph
 from backend.data.model import CredentialsMetaInput, ProviderName
 from backend.integrations.credentials_store import ideogram_credentials
 from backend.util.request import Requests
@@ -34,14 +25,14 @@ class ImageStyle(str, Enum):
    DIGITAL_ART = "digital art"
-async def generate_agent_image(agent: BaseGraph | AgentGraph) -> io.BytesIO:
+async def generate_agent_image(agent: GraphBaseMeta | AgentGraph) -> io.BytesIO:
    if settings.config.use_agent_image_generation_v2:
        return await generate_agent_image_v2(graph=agent)
    else:
        return await generate_agent_image_v1(agent=agent)
-async def generate_agent_image_v2(graph: BaseGraph | AgentGraph) -> io.BytesIO:
+async def generate_agent_image_v2(graph: GraphBaseMeta | AgentGraph) -> io.BytesIO:
    """
    Generate an image for an agent using Ideogram model.
    Returns:
@@ -50,18 +41,31 @@ async def generate_agent_image_v2(graph: BaseGraph | AgentGraph) -> io.BytesIO:
    if not ideogram_credentials.api_key:
        raise ValueError("Missing Ideogram API key")
    from backend.blocks.ideogram import (
        AspectRatio,
        ColorPalettePreset,
        IdeogramModelBlock,
        IdeogramModelName,
        MagicPromptOption,
        StyleType,
        UpscaleOption,
    )
    name = graph.name
    description = f"{name} ({graph.description})" if graph.description else name
    prompt = (
-        f"Create a visually striking retro-futuristic vector pop art illustration prominently featuring "
+        "Create a visually striking retro-futuristic vector pop art illustration "
-        f'"{name}" in bold typography. The image clearly and literally depicts a {description}, '
+        f'prominently featuring "{name}" in bold typography. The image clearly and '
-        f"along with recognizable objects directly associated with the primary function of a {name}. "
+        f"literally depicts a {description}, along with recognizable objects directly "
-        f"Ensure the imagery is concrete, intuitive, and immediately understandable, clearly conveying the "
+        f"associated with the primary function of a {name}. "
-        f"purpose of a {name}. Maintain vibrant, limited-palette colors, sharp vector lines, geometric "
+        f"Ensure the imagery is concrete, intuitive, and immediately understandable, "
-        f"shapes, flat illustration techniques, and solid colors without gradients or shading. Preserve a "
+        f"clearly conveying the purpose of a {name}. "
-        f"retro-futuristic aesthetic influenced by mid-century futurism and 1960s psychedelia, "
+        "Maintain vibrant, limited-palette colors, sharp vector lines, "
-        f"prioritizing clear visual storytelling and thematic clarity above all else."
+        "geometric shapes, flat illustration techniques, and solid colors "
        "without gradients or shading. Preserve a retro-futuristic aesthetic "
        "influenced by mid-century futurism and 1960s psychedelia, "
        "prioritizing clear visual storytelling and thematic clarity above all else."
    )
    custom_colors = [
@@ -99,12 +103,12 @@ async def generate_agent_image_v2(graph: BaseGraph | AgentGraph) -> io.BytesIO:
    return io.BytesIO(response.content)
-async def generate_agent_image_v1(agent: BaseGraph | AgentGraph) -> io.BytesIO:
+async def generate_agent_image_v1(agent: GraphBaseMeta | AgentGraph) -> io.BytesIO:
    """
    Generate an image for an agent using Flux model via Replicate API.
    Args:
-        agent (Graph): The agent to generate an image for
+        agent (GraphBaseMeta | AgentGraph): The agent to generate an image for
    Returns:
        io.BytesIO: The generated image as bytes
@@ -114,7 +118,13 @@ async def generate_agent_image_v1(agent: BaseGraph | AgentGraph) -> io.BytesIO:
            raise ValueError("Missing Replicate API key in settings")
        # Construct prompt from agent details
-        prompt = f"Create a visually engaging app store thumbnail for the AI agent that highlights what it does in a clear and captivating way:\n- **Name**: {agent.name}\n- **Description**: {agent.description}\nFocus on showcasing its core functionality with an appealing design."
+        prompt = (
            "Create a visually engaging app store thumbnail for the AI agent "
            "that highlights what it does in a clear and captivating way:\n"
            f"- **Name**: {agent.name}\n"
            f"- **Description**: {agent.description}\n"
            f"Focus on showcasing its core functionality with an appealing design."
        )
        # Set up Replicate client
        client = ReplicateClient(api_token=settings.secrets.replicate_api_key)
--- a/autogpt_platform/backend/backend/api/features/store/model.py
+++ b/autogpt_platform/backend/backend/api/features/store/model.py
@@ -38,6 +38,7 @@ class StoreAgent(pydantic.BaseModel):
    description: str
    runs: int
    rating: float
    agent_graph_id: str
 class StoreAgentsResponse(pydantic.BaseModel):
--- a/autogpt_platform/backend/backend/api/features/store/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/model_test.py
@@ -26,11 +26,13 @@ def test_store_agent():
        description="Test description",
        runs=50,
        rating=4.5,
        agent_graph_id="test-graph-id",
    )
    assert agent.slug == "test-agent"
    assert agent.agent_name == "Test Agent"
    assert agent.runs == 50
    assert agent.rating == 4.5
    assert agent.agent_graph_id == "test-graph-id"
 def test_store_agents_response():
@@ -46,6 +48,7 @@ def test_store_agents_response():
                description="Test description",
                runs=50,
                rating=4.5,
                agent_graph_id="test-graph-id",
            )
        ],
        pagination=store_model.Pagination(
--- a/autogpt_platform/backend/backend/api/features/store/routes.py
+++ b/autogpt_platform/backend/backend/api/features/store/routes.py
@@ -278,7 +278,7 @@ async def get_agent(
 )
 async def get_graph_meta_by_store_listing_version_id(
    store_listing_version_id: str,
-) -> backend.data.graph.GraphMeta:
+) -> backend.data.graph.GraphModelWithoutNodes:
    """
    Get Agent Graph from Store Listing Version ID.
    """
--- a/autogpt_platform/backend/backend/api/features/store/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/routes_test.py
@@ -82,6 +82,7 @@ def test_get_agents_featured(
                description="Featured agent description",
                runs=100,
                rating=4.5,
                agent_graph_id="test-graph-1",
            )
        ],
        pagination=store_model.Pagination(
@@ -127,6 +128,7 @@ def test_get_agents_by_creator(
                description="Creator agent description",
                runs=50,
                rating=4.0,
                agent_graph_id="test-graph-2",
            )
        ],
        pagination=store_model.Pagination(
@@ -172,6 +174,7 @@ def test_get_agents_sorted(
                description="Top agent description",
                runs=1000,
                rating=5.0,
                agent_graph_id="test-graph-3",
            )
        ],
        pagination=store_model.Pagination(
@@ -217,6 +220,7 @@ def test_get_agents_search(
                description="Specific search term description",
                runs=75,
                rating=4.2,
                agent_graph_id="test-graph-search",
            )
        ],
        pagination=store_model.Pagination(
@@ -262,6 +266,7 @@ def test_get_agents_category(
                description="Category agent description",
                runs=60,
                rating=4.1,
                agent_graph_id="test-graph-category",
            )
        ],
        pagination=store_model.Pagination(
@@ -306,6 +311,7 @@ def test_get_agents_pagination(
                description=f"Agent {i} description",
                runs=i * 10,
                rating=4.0,
                agent_graph_id="test-graph-2",
            )
            for i in range(5)
        ],
--- a/autogpt_platform/backend/backend/api/features/store/test_cache_delete.py
+++ b/autogpt_platform/backend/backend/api/features/store/test_cache_delete.py
@@ -33,6 +33,7 @@ class TestCacheDeletion:
                    description="Test description",
                    runs=100,
                    rating=4.5,
                    agent_graph_id="test-graph-id",
                )
            ],
            pagination=Pagination(
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -40,10 +40,11 @@ from backend.api.model import (
    UpdateTimezoneRequest,
    UploadFileResponse,
 )
 from backend.blocks import get_block, get_blocks
 from backend.data import execution as execution_db
 from backend.data import graph as graph_db
 from backend.data.auth import api_key as api_key_db
-from backend.data.block import BlockInput, CompletedBlockOutput, get_block, get_blocks
+from backend.data.block import BlockInput, CompletedBlockOutput
 from backend.data.credit import (
    AutoTopUpConfig,
    RefundRequest,
@@ -101,7 +102,6 @@ from backend.util.timezone_utils import (
 from backend.util.virus_scanner import scan_content_safe
 from .library import db as library_db
 from .library import model as library_model
 from .store.model import StoreAgentDetails
@@ -823,18 +823,16 @@ async def update_graph(
    graph: graph_db.Graph,
    user_id: Annotated[str, Security(get_user_id)],
 ) -> graph_db.GraphModel:
    # Sanity check
    if graph.id and graph.id != graph_id:
        raise HTTPException(400, detail="Graph ID does not match ID in URI")
    # Determine new version
    existing_versions = await graph_db.get_graph_all_versions(graph_id, user_id=user_id)
    if not existing_versions:
        raise HTTPException(404, detail=f"Graph #{graph_id} not found")
    latest_version_number = max(g.version for g in existing_versions)
    graph.version = latest_version_number + 1
    graph.version = max(g.version for g in existing_versions) + 1
    current_active_version = next((v for v in existing_versions if v.is_active), None)
    graph = graph_db.make_graph_model(graph, user_id)
    graph.reassign_ids(user_id=user_id, reassign_graph_id=False)
    graph.validate_graph(for_run=False)
@@ -842,27 +840,23 @@ async def update_graph(
    new_graph_version = await graph_db.create_graph(graph, user_id=user_id)
    if new_graph_version.is_active:
-        # Keep the library agent up to date with the new active version
+        await library_db.update_library_agent_version_and_settings(
-        await _update_library_agent_version_and_settings(user_id, new_graph_version)
+            user_id, new_graph_version
-
+        )
        # Handle activation of the new graph first to ensure continuity
        new_graph_version = await on_graph_activate(new_graph_version, user_id=user_id)
        # Ensure new version is the only active version
        await graph_db.set_graph_active_version(
            graph_id=graph_id, version=new_graph_version.version, user_id=user_id
        )
        if current_active_version:
            # Handle deactivation of the previously active version
            await on_graph_deactivate(current_active_version, user_id=user_id)
    # Fetch new graph version *with sub-graphs* (needed for credentials input schema)
    new_graph_version_with_subgraphs = await graph_db.get_graph(
        graph_id,
        new_graph_version.version,
        user_id=user_id,
        include_subgraphs=True,
    )
-    assert new_graph_version_with_subgraphs  # make type checker happy
+    assert new_graph_version_with_subgraphs
    return new_graph_version_with_subgraphs
@@ -900,33 +894,15 @@ async def set_graph_active_version(
    )
    # Keep the library agent up to date with the new active version
-    await _update_library_agent_version_and_settings(user_id, new_active_graph)
+    await library_db.update_library_agent_version_and_settings(
        user_id, new_active_graph
    )
    if current_active_graph and current_active_graph.version != new_active_version:
        # Handle deactivation of the previously active version
        await on_graph_deactivate(current_active_graph, user_id=user_id)
 async def _update_library_agent_version_and_settings(
    user_id: str, agent_graph: graph_db.GraphModel
 ) -> library_model.LibraryAgent:
    library = await library_db.update_agent_version_in_library(
        user_id, agent_graph.id, agent_graph.version
    )
    updated_settings = GraphSettings.from_graph(
        graph=agent_graph,
        hitl_safe_mode=library.settings.human_in_the_loop_safe_mode,
        sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode,
    )
    if updated_settings != library.settings:
        library = await library_db.update_library_agent(
            library_agent_id=library.id,
            user_id=user_id,
            settings=updated_settings,
        )
    return library
@v1_router.patch(
    path="/graphs/{graph_id}/settings",
    summary="Update graph settings",
--- a/autogpt_platform/backend/backend/api/rest_api.py
+++ b/autogpt_platform/backend/backend/api/rest_api.py
@@ -40,6 +40,10 @@ import backend.data.user
 import backend.integrations.webhooks.utils
 import backend.util.service
 import backend.util.settings
 from backend.api.features.chat.completion_consumer import (
    start_completion_consumer,
    stop_completion_consumer,
 )
 from backend.blocks.llm import DEFAULT_LLM_MODEL
 from backend.data.model import Credentials
 from backend.integrations.providers import ProviderName
@@ -118,9 +122,21 @@ async def lifespan_context(app: fastapi.FastAPI):
    await backend.data.graph.migrate_llm_models(DEFAULT_LLM_MODEL)
    await backend.integrations.webhooks.utils.migrate_legacy_triggered_graphs()
    # Start chat completion consumer for Redis Streams notifications
    try:
        await start_completion_consumer()
    except Exception as e:
        logger.warning(f"Could not start chat completion consumer: {e}")
    with launch_darkly_context():
        yield
    # Stop chat completion consumer
    try:
        await stop_completion_consumer()
    except Exception as e:
        logger.warning(f"Error stopping chat completion consumer: {e}")
    try:
        await shutdown_cloud_storage_handler()
    except Exception as e:
--- a/autogpt_platform/backend/backend/api/ws_api.py
+++ b/autogpt_platform/backend/backend/api/ws_api.py
@@ -66,18 +66,24 @@ async def event_broadcaster(manager: ConnectionManager):
    execution_bus = AsyncRedisExecutionEventBus()
    notification_bus = AsyncRedisNotificationEventBus()
-    async def execution_worker():
+    try:
        async for event in execution_bus.listen("*"):
            await manager.send_execution_update(event)
-    async def notification_worker():
+        async def execution_worker():
-        async for notification in notification_bus.listen("*"):
+            async for event in execution_bus.listen("*"):
-            await manager.send_notification(
+                await manager.send_execution_update(event)
                user_id=notification.user_id,
                payload=notification.payload,
            )
-    await asyncio.gather(execution_worker(), notification_worker())
+        async def notification_worker():
            async for notification in notification_bus.listen("*"):
                await manager.send_notification(
                    user_id=notification.user_id,
                    payload=notification.payload,
                )
        await asyncio.gather(execution_worker(), notification_worker())
    finally:
        # Ensure PubSub connections are closed on any exit to prevent leaks
        await execution_bus.close()
        await notification_bus.close()
 async def authenticate_websocket(websocket: WebSocket) -> str:
--- a/autogpt_platform/backend/backend/blocks/init.py
+++ b/autogpt_platform/backend/backend/blocks/init.py
@@ -3,22 +3,19 @@ import logging
 import os
 import re
 from pathlib import Path
-from typing import TYPE_CHECKING, TypeVar
+from typing import Sequence, Type, TypeVar
 from backend.blocks._base import AnyBlockSchema, BlockType
 from backend.util.cache import cached
 logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
    from backend.data.block import Block
 T = TypeVar("T")
@cached(ttl_seconds=3600)
-def load_all_blocks() -> dict[str, type["Block"]]:
+def load_all_blocks() -> dict[str, type["AnyBlockSchema"]]:
-    from backend.data.block import Block
+    from backend.blocks._base import Block
    from backend.util.settings import Config
    # Check if example blocks should be loaded from settings
@@ -50,8 +47,8 @@ def load_all_blocks() -> dict[str, type["Block"]]:
        importlib.import_module(f".{module}", package=__name__)
    # Load all Block instances from the available modules
-    available_blocks: dict[str, type["Block"]] = {}
+    available_blocks: dict[str, type["AnyBlockSchema"]] = {}
-    for block_cls in all_subclasses(Block):
+    for block_cls in _all_subclasses(Block):
        class_name = block_cls.__name__
        if class_name.endswith("Base"):
@@ -64,7 +61,7 @@ def load_all_blocks() -> dict[str, type["Block"]]:
                "please name the class with 'Base' at the end"
            )
-        block = block_cls.create()
+        block = block_cls()  # pyright: ignore[reportAbstractUsage]
        if not isinstance(block.id, str) or len(block.id) != 36:
            raise ValueError(
@@ -105,7 +102,7 @@ def load_all_blocks() -> dict[str, type["Block"]]:
        available_blocks[block.id] = block_cls
    # Filter out blocks with incomplete auth configs, e.g. missing OAuth server secrets
-    from backend.data.block import is_block_auth_configured
+    from ._utils import is_block_auth_configured
    filtered_blocks = {}
    for block_id, block_cls in available_blocks.items():
@@ -115,11 +112,48 @@ def load_all_blocks() -> dict[str, type["Block"]]:
    return filtered_blocks
-__all__ = ["load_all_blocks"]
+def _all_subclasses(cls: type[T]) -> list[type[T]]:
 def all_subclasses(cls: type[T]) -> list[type[T]]:
    subclasses = cls.__subclasses__()
    for subclass in subclasses:
-        subclasses += all_subclasses(subclass)
+        subclasses += _all_subclasses(subclass)
    return subclasses
 # ============== Block access helper functions ============== #
 def get_blocks() -> dict[str, Type["AnyBlockSchema"]]:
    return load_all_blocks()
 # Note on the return type annotation: https://github.com/microsoft/pyright/issues/10281
 def get_block(block_id: str) -> "AnyBlockSchema | None":
    cls = get_blocks().get(block_id)
    return cls() if cls else None
@cached(ttl_seconds=3600)
 def get_webhook_block_ids() -> Sequence[str]:
    return [
        id
        for id, B in get_blocks().items()
        if B().block_type in (BlockType.WEBHOOK, BlockType.WEBHOOK_MANUAL)
    ]
@cached(ttl_seconds=3600)
 def get_io_block_ids() -> Sequence[str]:
    return [
        id
        for id, B in get_blocks().items()
        if B().block_type in (BlockType.INPUT, BlockType.OUTPUT)
    ]
@cached(ttl_seconds=3600)
 def get_human_in_the_loop_block_ids() -> Sequence[str]:
    return [
        id
        for id, B in get_blocks().items()
        if B().block_type == BlockType.HUMAN_IN_THE_LOOP
    ]
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -0,0 +1,739 @@
 import inspect
 import logging
 from abc import ABC, abstractmethod
 from enum import Enum
 from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    ClassVar,
    Generic,
    Optional,
    Type,
    TypeAlias,
    TypeVar,
    cast,
    get_origin,
 )
 import jsonref
 import jsonschema
 from pydantic import BaseModel
 from backend.data.block import BlockInput, BlockOutput, BlockOutputEntry
 from backend.data.model import (
    Credentials,
    CredentialsFieldInfo,
    CredentialsMetaInput,
    SchemaField,
    is_credentials_field_name,
 )
 from backend.integrations.providers import ProviderName
 from backend.util import json
 from backend.util.exceptions import (
    BlockError,
    BlockExecutionError,
    BlockInputError,
    BlockOutputError,
    BlockUnknownError,
 )
 from backend.util.settings import Config
 logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
    from backend.data.execution import ExecutionContext
    from backend.data.model import ContributorDetails, NodeExecutionStats
    from ..data.graph import Link
 app_config = Config()
 BlockTestOutput = BlockOutputEntry | tuple[str, Callable[[Any], bool]]
 class BlockType(Enum):
    STANDARD = "Standard"
    INPUT = "Input"
    OUTPUT = "Output"
    NOTE = "Note"
    WEBHOOK = "Webhook"
    WEBHOOK_MANUAL = "Webhook (manual)"
    AGENT = "Agent"
    AI = "AI"
    AYRSHARE = "Ayrshare"
    HUMAN_IN_THE_LOOP = "Human In The Loop"
 class BlockCategory(Enum):
    AI = "Block that leverages AI to perform a task."
    SOCIAL = "Block that interacts with social media platforms."
    TEXT = "Block that processes text data."
    SEARCH = "Block that searches or extracts information from the internet."
    BASIC = "Block that performs basic operations."
    INPUT = "Block that interacts with input of the graph."
    OUTPUT = "Block that interacts with output of the graph."
    LOGIC = "Programming logic to control the flow of your agent"
    COMMUNICATION = "Block that interacts with communication platforms."
    DEVELOPER_TOOLS = "Developer tools such as GitHub blocks."
    DATA = "Block that interacts with structured data."
    HARDWARE = "Block that interacts with hardware."
    AGENT = "Block that interacts with other agents."
    CRM = "Block that interacts with CRM services."
    SAFETY = (
        "Block that provides AI safety mechanisms such as detecting harmful content"
    )
    PRODUCTIVITY = "Block that helps with productivity"
    ISSUE_TRACKING = "Block that helps with issue tracking"
    MULTIMEDIA = "Block that interacts with multimedia content"
    MARKETING = "Block that helps with marketing"
    def dict(self) -> dict[str, str]:
        return {"category": self.name, "description": self.value}
 class BlockCostType(str, Enum):
    RUN = "run"  # cost X credits per run
    BYTE = "byte"  # cost X credits per byte
    SECOND = "second"  # cost X credits per second
 class BlockCost(BaseModel):
    cost_amount: int
    cost_filter: BlockInput
    cost_type: BlockCostType
    def __init__(
        self,
        cost_amount: int,
        cost_type: BlockCostType = BlockCostType.RUN,
        cost_filter: Optional[BlockInput] = None,
        **data: Any,
    ) -> None:
        super().__init__(
            cost_amount=cost_amount,
            cost_filter=cost_filter or {},
            cost_type=cost_type,
            **data,
        )
 class BlockInfo(BaseModel):
    id: str
    name: str
    inputSchema: dict[str, Any]
    outputSchema: dict[str, Any]
    costs: list[BlockCost]
    description: str
    categories: list[dict[str, str]]
    contributors: list[dict[str, Any]]
    staticOutput: bool
    uiType: str
 class BlockSchema(BaseModel):
    cached_jsonschema: ClassVar[dict[str, Any]]
    @classmethod
    def jsonschema(cls) -> dict[str, Any]:
        if cls.cached_jsonschema:
            return cls.cached_jsonschema
        model = jsonref.replace_refs(cls.model_json_schema(), merge_props=True)
        def ref_to_dict(obj):
            if isinstance(obj, dict):
                # OpenAPI <3.1 does not support sibling fields that has a $ref key
                # So sometimes, the schema has an "allOf"/"anyOf"/"oneOf" with 1 item.
                keys = {"allOf", "anyOf", "oneOf"}
                one_key = next((k for k in keys if k in obj and len(obj[k]) == 1), None)
                if one_key:
                    obj.update(obj[one_key][0])
                return {
                    key: ref_to_dict(value)
                    for key, value in obj.items()
                    if not key.startswith("$") and key != one_key
                }
            elif isinstance(obj, list):
                return [ref_to_dict(item) for item in obj]
            return obj
        cls.cached_jsonschema = cast(dict[str, Any], ref_to_dict(model))
        return cls.cached_jsonschema
    @classmethod
    def validate_data(cls, data: BlockInput) -> str | None:
        return json.validate_with_jsonschema(
            schema=cls.jsonschema(),
            data={k: v for k, v in data.items() if v is not None},
        )
    @classmethod
    def get_mismatch_error(cls, data: BlockInput) -> str | None:
        return cls.validate_data(data)
    @classmethod
    def get_field_schema(cls, field_name: str) -> dict[str, Any]:
        model_schema = cls.jsonschema().get("properties", {})
        if not model_schema:
            raise ValueError(f"Invalid model schema {cls}")
        property_schema = model_schema.get(field_name)
        if not property_schema:
            raise ValueError(f"Invalid property name {field_name}")
        return property_schema
    @classmethod
    def validate_field(cls, field_name: str, data: BlockInput) -> str | None:
        """
        Validate the data against a specific property (one of the input/output name).
        Returns the validation error message if the data does not match the schema.
        """
        try:
            property_schema = cls.get_field_schema(field_name)
            jsonschema.validate(json.to_dict(data), property_schema)
            return None
        except jsonschema.ValidationError as e:
            return str(e)
    @classmethod
    def get_fields(cls) -> set[str]:
        return set(cls.model_fields.keys())
    @classmethod
    def get_required_fields(cls) -> set[str]:
        return {
            field
            for field, field_info in cls.model_fields.items()
            if field_info.is_required()
        }
    @classmethod
    def __pydantic_init_subclass__(cls, **kwargs):
        """Validates the schema definition. Rules:
        - Fields with annotation `CredentialsMetaInput` MUST be
          named `credentials` or `*_credentials`
        - Fields named `credentials` or `*_credentials` MUST be
          of type `CredentialsMetaInput`
        """
        super().__pydantic_init_subclass__(**kwargs)
        # Reset cached JSON schema to prevent inheriting it from parent class
        cls.cached_jsonschema = {}
        credentials_fields = cls.get_credentials_fields()
        for field_name in cls.get_fields():
            if is_credentials_field_name(field_name):
                if field_name not in credentials_fields:
                    raise TypeError(
                        f"Credentials field '{field_name}' on {cls.__qualname__} "
                        f"is not of type {CredentialsMetaInput.__name__}"
                    )
                CredentialsMetaInput.validate_credentials_field_schema(
                    cls.get_field_schema(field_name), field_name
                )
            elif field_name in credentials_fields:
                raise KeyError(
                    f"Credentials field '{field_name}' on {cls.__qualname__} "
                    "has invalid name: must be 'credentials' or *_credentials"
                )
    @classmethod
    def get_credentials_fields(cls) -> dict[str, type[CredentialsMetaInput]]:
        return {
            field_name: info.annotation
            for field_name, info in cls.model_fields.items()
            if (
                inspect.isclass(info.annotation)
                and issubclass(
                    get_origin(info.annotation) or info.annotation,
                    CredentialsMetaInput,
                )
            )
        }
    @classmethod
    def get_auto_credentials_fields(cls) -> dict[str, dict[str, Any]]:
        """
        Get fields that have auto_credentials metadata (e.g., GoogleDriveFileInput).
        Returns a dict mapping kwarg_name -> {field_name, auto_credentials_config}
        Raises:
            ValueError: If multiple fields have the same kwarg_name, as this would
                cause silent overwriting and only the last field would be processed.
        """
        result: dict[str, dict[str, Any]] = {}
        schema = cls.jsonschema()
        properties = schema.get("properties", {})
        for field_name, field_schema in properties.items():
            auto_creds = field_schema.get("auto_credentials")
            if auto_creds:
                kwarg_name = auto_creds.get("kwarg_name", "credentials")
                if kwarg_name in result:
                    raise ValueError(
                        f"Duplicate auto_credentials kwarg_name '{kwarg_name}' "
                        f"in fields '{result[kwarg_name]['field_name']}' and "
                        f"'{field_name}' on {cls.__qualname__}"
                    )
                result[kwarg_name] = {
                    "field_name": field_name,
                    "config": auto_creds,
                }
        return result
    @classmethod
    def get_credentials_fields_info(cls) -> dict[str, CredentialsFieldInfo]:
        result = {}
        # Regular credentials fields
        for field_name in cls.get_credentials_fields().keys():
            result[field_name] = CredentialsFieldInfo.model_validate(
                cls.get_field_schema(field_name), by_alias=True
            )
        # Auto-generated credentials fields (from GoogleDriveFileInput etc.)
        for kwarg_name, info in cls.get_auto_credentials_fields().items():
            config = info["config"]
            # Build a schema-like dict that CredentialsFieldInfo can parse
            auto_schema = {
                "credentials_provider": [config.get("provider", "google")],
                "credentials_types": [config.get("type", "oauth2")],
                "credentials_scopes": config.get("scopes"),
            }
            result[kwarg_name] = CredentialsFieldInfo.model_validate(
                auto_schema, by_alias=True
            )
        return result
    @classmethod
    def get_input_defaults(cls, data: BlockInput) -> BlockInput:
        return data  # Return as is, by default.
    @classmethod
    def get_missing_links(cls, data: BlockInput, links: list["Link"]) -> set[str]:
        input_fields_from_nodes = {link.sink_name for link in links}
        return input_fields_from_nodes - set(data)
    @classmethod
    def get_missing_input(cls, data: BlockInput) -> set[str]:
        return cls.get_required_fields() - set(data)
 class BlockSchemaInput(BlockSchema):
    """
    Base schema class for block inputs.
    All block input schemas should extend this class for consistency.
    """
    pass
 class BlockSchemaOutput(BlockSchema):
    """
    Base schema class for block outputs that includes a standard error field.
    All block output schemas should extend this class to ensure consistent error handling.
    """
    error: str = SchemaField(
        description="Error message if the operation failed", default=""
    )
 BlockSchemaInputType = TypeVar("BlockSchemaInputType", bound=BlockSchemaInput)
 BlockSchemaOutputType = TypeVar("BlockSchemaOutputType", bound=BlockSchemaOutput)
 class EmptyInputSchema(BlockSchemaInput):
    pass
 class EmptyOutputSchema(BlockSchemaOutput):
    pass
 # For backward compatibility - will be deprecated
 EmptySchema = EmptyOutputSchema
 # --8<-- [start:BlockWebhookConfig]
 class BlockManualWebhookConfig(BaseModel):
    """
    Configuration model for webhook-triggered blocks on which
    the user has to manually set up the webhook at the provider.
    """
    provider: ProviderName
    """The service provider that the webhook connects to"""
    webhook_type: str
    """
    Identifier for the webhook type. E.g. GitHub has repo and organization level hooks.
    Only for use in the corresponding `WebhooksManager`.
    """
    event_filter_input: str = ""
    """
    Name of the block's event filter input.
    Leave empty if the corresponding webhook doesn't have distinct event/payload types.
    """
    event_format: str = "{event}"
    """
    Template string for the event(s) that a block instance subscribes to.
    Applied individually to each event selected in the event filter input.
    Example: `"pull_request.{event}"` -> `"pull_request.opened"`
    """
 class BlockWebhookConfig(BlockManualWebhookConfig):
    """
    Configuration model for webhook-triggered blocks for which
    the webhook can be automatically set up through the provider's API.
    """
    resource_format: str
    """
    Template string for the resource that a block instance subscribes to.
    Fields will be filled from the block's inputs (except `payload`).
    Example: `f"{repo}/pull_requests"` (note: not how it's actually implemented)
    Only for use in the corresponding `WebhooksManager`.
    """
    # --8<-- [end:BlockWebhookConfig]
 class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
    def __init__(
        self,
        id: str = "",
        description: str = "",
        contributors: list["ContributorDetails"] = [],
        categories: set[BlockCategory] | None = None,
        input_schema: Type[BlockSchemaInputType] = EmptyInputSchema,
        output_schema: Type[BlockSchemaOutputType] = EmptyOutputSchema,
        test_input: BlockInput | list[BlockInput] | None = None,
        test_output: BlockTestOutput | list[BlockTestOutput] | None = None,
        test_mock: dict[str, Any] | None = None,
        test_credentials: Optional[Credentials | dict[str, Credentials]] = None,
        disabled: bool = False,
        static_output: bool = False,
        block_type: BlockType = BlockType.STANDARD,
        webhook_config: Optional[BlockWebhookConfig | BlockManualWebhookConfig] = None,
        is_sensitive_action: bool = False,
    ):
        """
        Initialize the block with the given schema.
        Args:
            id: The unique identifier for the block, this value will be persisted in the
                DB. So it should be a unique and constant across the application run.
                Use the UUID format for the ID.
            description: The description of the block, explaining what the block does.
            contributors: The list of contributors who contributed to the block.
            input_schema: The schema, defined as a Pydantic model, for the input data.
            output_schema: The schema, defined as a Pydantic model, for the output data.
            test_input: The list or single sample input data for the block, for testing.
            test_output: The list or single expected output if the test_input is run.
            test_mock: function names on the block implementation to mock on test run.
            disabled: If the block is disabled, it will not be available for execution.
            static_output: Whether the output links of the block are static by default.
        """
        from backend.data.model import NodeExecutionStats
        self.id = id
        self.input_schema = input_schema
        self.output_schema = output_schema
        self.test_input = test_input
        self.test_output = test_output
        self.test_mock = test_mock
        self.test_credentials = test_credentials
        self.description = description
        self.categories = categories or set()
        self.contributors = contributors or set()
        self.disabled = disabled
        self.static_output = static_output
        self.block_type = block_type
        self.webhook_config = webhook_config
        self.is_sensitive_action = is_sensitive_action
        self.execution_stats: "NodeExecutionStats" = NodeExecutionStats()
        if self.webhook_config:
            if isinstance(self.webhook_config, BlockWebhookConfig):
                # Enforce presence of credentials field on auto-setup webhook blocks
                if not (cred_fields := self.input_schema.get_credentials_fields()):
                    raise TypeError(
                        "credentials field is required on auto-setup webhook blocks"
                    )
                # Disallow multiple credentials inputs on webhook blocks
                elif len(cred_fields) > 1:
                    raise ValueError(
                        "Multiple credentials inputs not supported on webhook blocks"
                    )
                self.block_type = BlockType.WEBHOOK
            else:
                self.block_type = BlockType.WEBHOOK_MANUAL
            # Enforce shape of webhook event filter, if present
            if self.webhook_config.event_filter_input:
                event_filter_field = self.input_schema.model_fields[
                    self.webhook_config.event_filter_input
                ]
                if not (
                    isinstance(event_filter_field.annotation, type)
                    and issubclass(event_filter_field.annotation, BaseModel)
                    and all(
                        field.annotation is bool
                        for field in event_filter_field.annotation.model_fields.values()
                    )
                ):
                    raise NotImplementedError(
                        f"{self.name} has an invalid webhook event selector: "
                        "field must be a BaseModel and all its fields must be boolean"
                    )
            # Enforce presence of 'payload' input
            if "payload" not in self.input_schema.model_fields:
                raise TypeError(
                    f"{self.name} is webhook-triggered but has no 'payload' input"
                )
            # Disable webhook-triggered block if webhook functionality not available
            if not app_config.platform_base_url:
                self.disabled = True
    @abstractmethod
    async def run(self, input_data: BlockSchemaInputType, **kwargs) -> BlockOutput:
        """
        Run the block with the given input data.
        Args:
            input_data: The input data with the structure of input_schema.
        Kwargs: Currently 14/02/2025 these include
            graph_id: The ID of the graph.
            node_id: The ID of the node.
            graph_exec_id: The ID of the graph execution.
            node_exec_id: The ID of the node execution.
            user_id: The ID of the user.
        Returns:
            A Generator that yields (output_name, output_data).
            output_name: One of the output name defined in Block's output_schema.
            output_data: The data for the output_name, matching the defined schema.
        """
        # --- satisfy the type checker, never executed -------------
        if False:  # noqa: SIM115
            yield "name", "value"  # pyright: ignore[reportMissingYield]
        raise NotImplementedError(f"{self.name} does not implement the run method.")
    async def run_once(
        self, input_data: BlockSchemaInputType, output: str, **kwargs
    ) -> Any:
        async for item in self.run(input_data, **kwargs):
            name, data = item
            if name == output:
                return data
        raise ValueError(f"{self.name} did not produce any output for {output}")
    def merge_stats(self, stats: "NodeExecutionStats") -> "NodeExecutionStats":
        self.execution_stats += stats
        return self.execution_stats
    @property
    def name(self):
        return self.__class__.__name__
    def to_dict(self):
        return {
            "id": self.id,
            "name": self.name,
            "inputSchema": self.input_schema.jsonschema(),
            "outputSchema": self.output_schema.jsonschema(),
            "description": self.description,
            "categories": [category.dict() for category in self.categories],
            "contributors": [
                contributor.model_dump() for contributor in self.contributors
            ],
            "staticOutput": self.static_output,
            "uiType": self.block_type.value,
        }
    def get_info(self) -> BlockInfo:
        from backend.data.credit import get_block_cost
        return BlockInfo(
            id=self.id,
            name=self.name,
            inputSchema=self.input_schema.jsonschema(),
            outputSchema=self.output_schema.jsonschema(),
            costs=get_block_cost(self),
            description=self.description,
            categories=[category.dict() for category in self.categories],
            contributors=[
                contributor.model_dump() for contributor in self.contributors
            ],
            staticOutput=self.static_output,
            uiType=self.block_type.value,
        )
    async def execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
        try:
            async for output_name, output_data in self._execute(input_data, **kwargs):
                yield output_name, output_data
        except Exception as ex:
            if isinstance(ex, BlockError):
                raise ex
            else:
                raise (
                    BlockExecutionError
                    if isinstance(ex, ValueError)
                    else BlockUnknownError
                )(
                    message=str(ex),
                    block_name=self.name,
                    block_id=self.id,
                ) from ex
    async def is_block_exec_need_review(
        self,
        input_data: BlockInput,
        *,
        user_id: str,
        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
        graph_version: int,
        execution_context: "ExecutionContext",
        **kwargs,
    ) -> tuple[bool, BlockInput]:
        """
        Check if this block execution needs human review and handle the review process.
        Returns:
            Tuple of (should_pause, input_data_to_use)
            - should_pause: True if execution should be paused for review
            - input_data_to_use: The input data to use (may be modified by reviewer)
        """
        if not (
            self.is_sensitive_action and execution_context.sensitive_action_safe_mode
        ):
            return False, input_data
        from backend.blocks.helpers.review import HITLReviewHelper
        # Handle the review request and get decision
        decision = await HITLReviewHelper.handle_review_decision(
            input_data=input_data,
            user_id=user_id,
            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
            block_name=self.name,
            editable=True,
        )
        if decision is None:
            # We're awaiting review - pause execution
            return True, input_data
        if not decision.should_proceed:
            # Review was rejected, raise an error to stop execution
            raise BlockExecutionError(
                message=f"Block execution rejected by reviewer: {decision.message}",
                block_name=self.name,
                block_id=self.id,
            )
        # Review was approved - use the potentially modified data
        # ReviewResult.data must be a dict for block inputs
        reviewed_data = decision.review_result.data
        if not isinstance(reviewed_data, dict):
            raise BlockExecutionError(
                message=f"Review data must be a dict for block input, got {type(reviewed_data).__name__}",
                block_name=self.name,
                block_id=self.id,
            )
        return False, reviewed_data
    async def _execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
        # Check for review requirement only if running within a graph execution context
        # Direct block execution (e.g., from chat) skips the review process
        has_graph_context = all(
            key in kwargs
            for key in (
                "node_exec_id",
                "graph_exec_id",
                "graph_id",
                "execution_context",
            )
        )
        if has_graph_context:
            should_pause, input_data = await self.is_block_exec_need_review(
                input_data, **kwargs
            )
            if should_pause:
                return
        # Validate the input data (original or reviewer-modified) once
        if error := self.input_schema.validate_data(input_data):
            raise BlockInputError(
                message=f"Unable to execute block with invalid input data: {error}",
                block_name=self.name,
                block_id=self.id,
            )
        # Use the validated input data
        async for output_name, output_data in self.run(
            self.input_schema(**{k: v for k, v in input_data.items() if v is not None}),
            **kwargs,
        ):
            if output_name == "error":
                raise BlockExecutionError(
                    message=output_data, block_name=self.name, block_id=self.id
                )
            if self.block_type == BlockType.STANDARD and (
                error := self.output_schema.validate_field(output_name, output_data)
            ):
                raise BlockOutputError(
                    message=f"Block produced an invalid output data: {error}",
                    block_name=self.name,
                    block_id=self.id,
                )
            yield output_name, output_data
    def is_triggered_by_event_type(
        self, trigger_config: dict[str, Any], event_type: str
    ) -> bool:
        if not self.webhook_config:
            raise TypeError("This method can't be used on non-trigger blocks")
        if not self.webhook_config.event_filter_input:
            return True
        event_filter = trigger_config.get(self.webhook_config.event_filter_input)
        if not event_filter:
            raise ValueError("Event filter is not configured on trigger")
        return event_type in [
            self.webhook_config.event_format.format(event=k)
            for k in event_filter
            if event_filter[k] is True
        ]
 # Type alias for any block with standard input/output schemas
 AnyBlockSchema: TypeAlias = Block[BlockSchemaInput, BlockSchemaOutput]
--- a/autogpt_platform/backend/backend/blocks/_utils.py
+++ b/autogpt_platform/backend/backend/blocks/_utils.py
@@ -0,0 +1,122 @@
 import logging
 import os
 from backend.integrations.providers import ProviderName
 from ._base import AnyBlockSchema
 logger = logging.getLogger(__name__)
 def is_block_auth_configured(
    block_cls: type[AnyBlockSchema],
 ) -> bool:
    """
    Check if a block has a valid authentication method configured at runtime.
    For example if a block is an OAuth-only block and there env vars are not set,
    do not show it in the UI.
    """
    from backend.sdk.registry import AutoRegistry
    # Create an instance to access input_schema
    try:
        block = block_cls()
    except Exception as e:
        # If we can't create a block instance, assume it's not OAuth-only
        logger.error(f"Error creating block instance for {block_cls.__name__}: {e}")
        return True
    logger.debug(
        f"Checking if block {block_cls.__name__} has a valid provider configured"
    )
    # Get all credential inputs from input schema
    credential_inputs = block.input_schema.get_credentials_fields_info()
    required_inputs = block.input_schema.get_required_fields()
    if not credential_inputs:
        logger.debug(
            f"Block {block_cls.__name__} has no credential inputs - Treating as valid"
        )
        return True
    # Check credential inputs
    if len(required_inputs.intersection(credential_inputs.keys())) == 0:
        logger.debug(
            f"Block {block_cls.__name__} has only optional credential inputs"
            " - will work without credentials configured"
        )
    # Check if the credential inputs for this block are correctly configured
    for field_name, field_info in credential_inputs.items():
        provider_names = field_info.provider
        if not provider_names:
            logger.warning(
                f"Block {block_cls.__name__} "
                f"has credential input '{field_name}' with no provider options"
                " - Disabling"
            )
            return False
        # If a field has multiple possible providers, each one needs to be usable to
        # prevent breaking the UX
        for _provider_name in provider_names:
            provider_name = _provider_name.value
            if provider_name in ProviderName.__members__.values():
                logger.debug(
                    f"Block {block_cls.__name__} credential input '{field_name}' "
                    f"provider '{provider_name}' is part of the legacy provider system"
                    " - Treating as valid"
                )
                break
            provider = AutoRegistry.get_provider(provider_name)
            if not provider:
                logger.warning(
                    f"Block {block_cls.__name__} credential input '{field_name}' "
                    f"refers to unknown provider '{provider_name}' - Disabling"
                )
                return False
            # Check the provider's supported auth types
            if field_info.supported_types != provider.supported_auth_types:
                logger.warning(
                    f"Block {block_cls.__name__} credential input '{field_name}' "
                    f"has mismatched supported auth types (field <> Provider): "
                    f"{field_info.supported_types} != {provider.supported_auth_types}"
                )
            if not (supported_auth_types := provider.supported_auth_types):
                # No auth methods are been configured for this provider
                logger.warning(
                    f"Block {block_cls.__name__} credential input '{field_name}' "
                    f"provider '{provider_name}' "
                    "has no authentication methods configured - Disabling"
                )
                return False
            # Check if provider supports OAuth
            if "oauth2" in supported_auth_types:
                # Check if OAuth environment variables are set
                if (oauth_config := provider.oauth_config) and bool(
                    os.getenv(oauth_config.client_id_env_var)
                    and os.getenv(oauth_config.client_secret_env_var)
                ):
                    logger.debug(
                        f"Block {block_cls.__name__} credential input '{field_name}' "
                        f"provider '{provider_name}' is configured for OAuth"
                    )
                else:
                    logger.error(
                        f"Block {block_cls.__name__} credential input '{field_name}' "
                        f"provider '{provider_name}' "
                        "is missing OAuth client ID or secret - Disabling"
                    )
                    return False
        logger.debug(
            f"Block {block_cls.__name__} credential input '{field_name}' is valid; "
            f"supported credential types: {', '.join(field_info.supported_types)}"
        )
    return True
--- a/autogpt_platform/backend/backend/blocks/agent.py
+++ b/autogpt_platform/backend/backend/blocks/agent.py
@@ -1,7 +1,7 @@
 import logging
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
-from backend.data.block import (
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockInput,
@@ -9,13 +9,15 @@ from backend.data.block import (
    BlockSchema,
    BlockSchemaInput,
    BlockType,
    get_block,
 )
 from backend.data.execution import ExecutionContext, ExecutionStatus, NodesInputMasks
 from backend.data.model import NodeExecutionStats, SchemaField
 from backend.util.json import validate_with_jsonschema
 from backend.util.retry import func_retry
 if TYPE_CHECKING:
    from backend.executor.utils import LogMetadata
 _logger = logging.getLogger(__name__)
@@ -124,9 +126,10 @@ class AgentExecutorBlock(Block):
        graph_version: int,
        graph_exec_id: str,
        user_id: str,
-        logger,
+        logger: "LogMetadata",
    ) -> BlockOutput:
        from backend.blocks import get_block
        from backend.data.execution import ExecutionEventType
        from backend.executor import utils as execution_utils
@@ -198,7 +201,7 @@ class AgentExecutorBlock(Block):
        self,
        graph_exec_id: str,
        user_id: str,
-        logger,
+        logger: "LogMetadata",
    ) -> None:
        from backend.executor import utils as execution_utils
--- a/autogpt_platform/backend/backend/blocks/ai_condition.py
+++ b/autogpt_platform/backend/backend/blocks/ai_condition.py
@@ -1,5 +1,11 @@
 from typing import Any
 from backend.blocks._base import (
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.blocks.llm import (
    DEFAULT_LLM_MODEL,
    TEST_CREDENTIALS,
@@ -11,12 +17,6 @@ from backend.blocks.llm import (
    LLMResponse,
    llm_call,
 )
 from backend.data.block import (
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.data.model import APIKeyCredentials, NodeExecutionStats, SchemaField
--- a/autogpt_platform/backend/backend/blocks/ai_image_customizer.py
+++ b/autogpt_platform/backend/backend/blocks/ai_image_customizer.py
@@ -6,7 +6,7 @@ from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient
 from replicate.helpers import FileOutput
-from backend.data.block import (
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/ai_image_generator_block.py
+++ b/autogpt_platform/backend/backend/blocks/ai_image_generator_block.py
@@ -5,7 +5,12 @@ from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient
 from replicate.helpers import FileOutput
-from backend.data.block import Block, BlockCategory, BlockSchemaInput, BlockSchemaOutput
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.data.execution import ExecutionContext
 from backend.data.model import (
    APIKeyCredentials,
--- a/autogpt_platform/backend/backend/blocks/ai_music_generator.py
+++ b/autogpt_platform/backend/backend/blocks/ai_music_generator.py
@@ -6,7 +6,7 @@ from typing import Literal
 from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient
-from backend.data.block import (
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/ai_shortform_video_block.py
+++ b/autogpt_platform/backend/backend/blocks/ai_shortform_video_block.py
@@ -6,7 +6,7 @@ from typing import Literal
 from pydantic import SecretStr
-from backend.data.block import (
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/apollo/organization.py
+++ b/autogpt_platform/backend/backend/blocks/apollo/organization.py
@@ -1,3 +1,10 @@
 from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.blocks.apollo._api import ApolloClient
 from backend.blocks.apollo._auth import (
    TEST_CREDENTIALS,
@@ -10,13 +17,6 @@ from backend.blocks.apollo.models import (
    PrimaryPhone,
    SearchOrganizationsRequest,
 )
 from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.data.model import CredentialsField, SchemaField
--- a/autogpt_platform/backend/backend/blocks/apollo/people.py
+++ b/autogpt_platform/backend/backend/blocks/apollo/people.py
@@ -1,5 +1,12 @@
 import asyncio
 from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.blocks.apollo._api import ApolloClient
 from backend.blocks.apollo._auth import (
    TEST_CREDENTIALS,
@@ -14,13 +21,6 @@ from backend.blocks.apollo.models import (
    SearchPeopleRequest,
    SenorityLevels,
 )
 from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.data.model import CredentialsField, SchemaField
--- a/autogpt_platform/backend/backend/blocks/apollo/person.py
+++ b/autogpt_platform/backend/backend/blocks/apollo/person.py
@@ -1,3 +1,10 @@
 from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.blocks.apollo._api import ApolloClient
 from backend.blocks.apollo._auth import (
    TEST_CREDENTIALS,
@@ -6,13 +13,6 @@ from backend.blocks.apollo._auth import (
    ApolloCredentialsInput,
 )
 from backend.blocks.apollo.models import Contact, EnrichPersonRequest
 from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
 )
 from backend.data.model import CredentialsField, SchemaField
--- a/autogpt_platform/backend/backend/blocks/ayrshare/_util.py
+++ b/autogpt_platform/backend/backend/blocks/ayrshare/_util.py
@@ -3,7 +3,7 @@ from typing import Optional
 from pydantic import BaseModel, Field
-from backend.data.block import BlockSchemaInput
+from backend.blocks._base import BlockSchemaInput
 from backend.data.model import SchemaField, UserIntegrations
 from backend.integrations.ayrshare import AyrshareClient
 from backend.util.clients import get_database_manager_async_client
--- a/autogpt_platform/backend/backend/blocks/basic.py
+++ b/autogpt_platform/backend/backend/blocks/basic.py
@@ -1,7 +1,7 @@
 import enum
 from typing import Any
-from backend.data.block import (
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/block.py
+++ b/autogpt_platform/backend/backend/blocks/block.py
@@ -2,7 +2,7 @@ import os
 import re
 from typing import Type
-from backend.data.block import (
+from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/Show More
+++ b/Show More