docs: fix broken paths and outdated references across all docs

- Remove references to deleted classic/benchmark/ (→ direct_benchmark) - Remove references to deleted classic/frontend/ - Remove references to deleted FORGE-QUICKSTART.md, CLI-USAGE.md - Update default model names: gpt-3.5-turbo/gpt-4-turbo → gpt-5.4 - Update root README: benchmark section, forge link, CLI section - Update docs/content/classic/: index, setup, configuration - Update docs/content/forge/: component config examples - Update docs/content/challenges/: agbenchmark → direct_benchmark - Rewrite challenges/README.md for current direct_benchmark usage - Update .env.template, azure.yaml.template, all CLAUDE.md files Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
fix(frontend): AutoPilot notification follow-ups — branding, UX, persistence, and cross-tab sync (#12428 )
2026-04-08 03:00:28 -04:00 · 2026-04-03 15:56:45 +02:00 · 2026-04-03 11:44:22 +00:00 · 2026-04-03 11:20:57 +00:00 · 2026-04-03 11:19:09 +00:00 · 2026-04-03 18:06:57 +07:00
2444 changed files with 53161 additions and 822851 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -0,0 +1,10 @@
+{
+  "permissions": {
+    "allowedTools": [
+      "Read", "Grep", "Glob",
+      "Bash(ls:*)", "Bash(cat:*)", "Bash(grep:*)", "Bash(find:*)",
+      "Bash(git status:*)", "Bash(git diff:*)", "Bash(git log:*)", "Bash(git worktree:*)",
+      "Bash(tmux:*)", "Bash(sleep:*)", "Bash(branchlet:*)"
+    ]
+  }
+}
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -6,11 +6,19 @@ on:
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/forge/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/forge/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'

 concurrency:
  group: ${{ format('classic-autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -19,47 +27,22 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic/original_autogpt
+    working-directory: classic

 jobs:
  test:
    permissions:
      contents: read
    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
-        platform-os: [ubuntu, macos, macos-arm64, windows]
-    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
+    runs-on: ubuntu-latest

    steps:
-      # Quite slow on macOS (2~4 minutes to set up Docker)
-      # - name: Set up Docker (macOS)
-      #   if: runner.os == 'macOS'
-      #   uses: crazy-max/ghaction-setup-docker@v3
-
-      - name: Start MinIO service (Linux)
-        if: runner.os == 'Linux'
+      - name: Start MinIO service
        working-directory: '.'
        run: |
          docker pull minio/minio:edge-cicd
          docker run -d -p 9000:9000 minio/minio:edge-cicd

-      - name: Start MinIO service (macOS)
-        if: runner.os == 'macOS'
-        working-directory: ${{ runner.temp }}
-        run: |
-          brew install minio/stable/minio
-          mkdir data
-          minio server ./data &
-
-      # No MinIO on Windows:
-      # - Windows doesn't support running Linux Docker containers
-      # - It doesn't seem possible to start background processes on Windows. They are
-      #   killed after the step returns.
-      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
-
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
@@ -71,41 +54,23 @@ jobs:
          git config --global user.name "Auto-GPT-Bot"
          git config --global user.email "github-bot@agpt.co"

-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python 3.12
        uses: actions/setup-python@v5
        with:
-          python-version: ${{ matrix.python-version }}
+          python-version: "3.12"

      - id: get_date
        name: Get date
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Set up Python dependency cache
-        # On Windows, unpacking cached dependencies takes longer than just installing them
-        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }}
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}

-      - name: Install Poetry (Unix)
-        if: runner.os != 'Windows'
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-          if [ "${{ runner.os }}" = "macOS" ]; then
-            PATH="$HOME/.local/bin:$PATH"
-            echo "$HOME/.local/bin" >> $GITHUB_PATH
-          fi
-
-      - name: Install Poetry (Windows)
-        if: runner.os == 'Windows'
-        shell: pwsh
-        run: |
-          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
-
-          $env:PATH += ";$env:APPDATA\Python\Scripts"
-          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
+      - name: Install Poetry
+        run: curl -sSL https://install.python-poetry.org | python3 -

      - name: Install Python dependencies
        run: poetry install
@@ -116,12 +81,13 @@ jobs:
            --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
            --numprocesses=logical --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            tests/unit tests/integration
+            original_autogpt/tests/unit original_autogpt/tests/integration
        env:
          CI: true
          PLAIN_OUTPUT: True
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          S3_ENDPOINT_URL: http://127.0.0.1:9000
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin

@@ -135,11 +101,11 @@ jobs:
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: autogpt-agent,${{ runner.os }}
+          flags: autogpt-agent

      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/original_autogpt/logs/
+          path: classic/logs/
--- a/.github/workflows/classic-autogpt-docker-ci.yml
+++ b/.github/workflows/classic-autogpt-docker-ci.yml
@@ -148,7 +148,7 @@ jobs:
            --entrypoint poetry ${{ env.IMAGE_NAME }} run \
            pytest -v --cov=autogpt --cov-branch --cov-report term-missing \
            --numprocesses=4 --durations=10 \
-            tests/unit tests/integration 2>&1 | tee test_output.txt
+            original_autogpt/tests/unit original_autogpt/tests/integration 2>&1 | tee test_output.txt

          test_failure=${PIPESTATUS[0]}

--- a/.github/workflows/classic-autogpts-ci.yml
+++ b/.github/workflows/classic-autogpts-ci.yml
@@ -10,10 +10,9 @@ on:
      - '.github/workflows/classic-autogpts-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/benchmark/**'
-      - 'classic/run'
-      - 'classic/cli.py'
-      - 'classic/setup.py'
+      - 'classic/direct_benchmark/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
      - '!**/*.md'
  pull_request:
    branches: [ master, dev, release-* ]
@@ -21,10 +20,9 @@ on:
      - '.github/workflows/classic-autogpts-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/benchmark/**'
-      - 'classic/run'
-      - 'classic/cli.py'
-      - 'classic/setup.py'
+      - 'classic/direct_benchmark/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
      - '!**/*.md'

 defaults:
@@ -35,13 +33,9 @@ defaults:
 jobs:
  serve-agent-protocol:
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        agent-name: [ original_autogpt ]
-      fail-fast: false
    timeout-minutes: 20
    env:
-      min-python-version: '3.10'
+      min-python-version: '3.12'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -55,22 +49,22 @@ jobs:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
-        working-directory: ./classic/${{ matrix.agent-name }}/
        run: |
          curl -sSL https://install.python-poetry.org | python -

-      - name: Run regression tests
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run smoke tests with direct-benchmark
        run: |
-          ./run agent start ${{ matrix.agent-name }}
-          cd ${{ matrix.agent-name }}
-          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
-          poetry run agbenchmark --test=WriteFile
+          poetry run direct-benchmark run \
+            --strategies one_shot \
+            --models claude \
+            --tests ReadFile,WriteFile \
+            --json
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AGENT_NAME: ${{ matrix.agent-name }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
-          HELICONE_CACHE_ENABLED: false
-          HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
-          REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }}
-          TELEMETRY_ENVIRONMENT: autogpt-ci
-          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
+          NONINTERACTIVE_MODE: "true"
+          CI: true
--- a/.github/workflows/classic-benchmark-ci.yml
+++ b/.github/workflows/classic-benchmark-ci.yml
@@ -1,18 +1,24 @@
-name: Classic - AGBenchmark CI
+name: Classic - Direct Benchmark CI

 on:
  push:
    branches: [ master, dev, ci-test* ]
    paths:
-      - 'classic/benchmark/**'
-      - '!classic/benchmark/reports/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/original_autogpt/**'
+      - 'classic/forge/**'
      - .github/workflows/classic-benchmark-ci.yml
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
-      - 'classic/benchmark/**'
-      - '!classic/benchmark/reports/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/original_autogpt/**'
+      - 'classic/forge/**'
      - .github/workflows/classic-benchmark-ci.yml
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'

 concurrency:
  group: ${{ format('benchmark-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -23,95 +29,16 @@ defaults:
    shell: bash

 env:
-  min-python-version: '3.10'
+  min-python-version: '3.12'

 jobs:
-  test:
-    permissions:
-      contents: read
+  benchmark-tests:
+    runs-on: ubuntu-latest
    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
-        platform-os: [ubuntu, macos, macos-arm64, windows]
-    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
    defaults:
      run:
        shell: bash
-        working-directory: classic/benchmark
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: true
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Set up Python dependency cache
-        # On Windows, unpacking cached dependencies takes longer than just installing them
-        if: runner.os != 'Windows'
-        uses: actions/cache@v4
-        with:
-          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/benchmark/poetry.lock') }}
-
-      - name: Install Poetry (Unix)
-        if: runner.os != 'Windows'
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-          if [ "${{ runner.os }}" = "macOS" ]; then
-            PATH="$HOME/.local/bin:$PATH"
-            echo "$HOME/.local/bin" >> $GITHUB_PATH
-          fi
-
-      - name: Install Poetry (Windows)
-        if: runner.os == 'Windows'
-        shell: pwsh
-        run: |
-          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
-
-          $env:PATH += ";$env:APPDATA\Python\Scripts"
-          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
-
-      - name: Install Python dependencies
-        run: poetry install
-
-      - name: Run pytest with coverage
-        run: |
-          poetry run pytest -vv \
-            --cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \
-            --durations=10 \
-            --junitxml=junit.xml -o junit_family=legacy \
-            tests
-        env:
-          CI: true
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-
-      - name: Upload test results to Codecov
-        if: ${{ !cancelled() }}  # Run even if tests fail
-        uses: codecov/test-results-action@v1
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-
-      - name: Upload coverage reports to Codecov
-        uses: codecov/codecov-action@v5
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: agbenchmark,${{ runner.os }}
-
-  self-test-with-agent:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        agent-name: [forge]
-      fail-fast: false
-    timeout-minutes: 20
+        working-directory: classic
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -124,53 +51,120 @@ jobs:
        with:
          python-version: ${{ env.min-python-version }}

+      - name: Set up Python dependency cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+
      - name: Install Poetry
        run: |
-          curl -sSL https://install.python-poetry.org | python -
+          curl -sSL https://install.python-poetry.org | python3 -
+
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run basic benchmark tests
+        run: |
+          echo "Testing ReadFile challenge with one_shot strategy..."
+          poetry run direct-benchmark run \
+            --fresh \
+            --strategies one_shot \
+            --models claude \
+            --tests ReadFile \
+            --json
+
+          echo "Testing WriteFile challenge..."
+          poetry run direct-benchmark run \
+            --fresh \
+            --strategies one_shot \
+            --models claude \
+            --tests WriteFile \
+            --json
+        env:
+          CI: true
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          NONINTERACTIVE_MODE: "true"
+
+      - name: Test category filtering
+        run: |
+          echo "Testing coding category..."
+          poetry run direct-benchmark run \
+            --fresh \
+            --strategies one_shot \
+            --models claude \
+            --categories coding \
+            --tests ReadFile,WriteFile \
+            --json
+        env:
+          CI: true
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          NONINTERACTIVE_MODE: "true"
+
+      - name: Test multiple strategies
+        run: |
+          echo "Testing multiple strategies..."
+          poetry run direct-benchmark run \
+            --fresh \
+            --strategies one_shot,plan_execute \
+            --models claude \
+            --tests ReadFile \
+            --parallel 2 \
+            --json
+        env:
+          CI: true
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          NONINTERACTIVE_MODE: "true"
+
+  # Run regression tests on maintain challenges
+  regression-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/dev'
+    defaults:
+      run:
+        shell: bash
+        working-directory: classic
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: true
+
+      - name: Set up Python ${{ env.min-python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.min-python-version }}
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+
+      - name: Install dependencies
+        run: poetry install

      - name: Run regression tests
-        working-directory: classic
        run: |
-          ./run agent start ${{ matrix.agent-name }}
-          cd ${{ matrix.agent-name }}
-
-          set +e # Ignore non-zero exit codes and continue execution
-          echo "Running the following command: poetry run agbenchmark --maintain --mock"
-          poetry run agbenchmark --maintain --mock
-          EXIT_CODE=$?
-          set -e  # Stop ignoring non-zero exit codes
-          # Check if the exit code was 5, and if so, exit with 0 instead
-          if [ $EXIT_CODE -eq 5 ]; then
-            echo "regression_tests.json is empty."
-          fi
-
-          echo "Running the following command: poetry run agbenchmark --mock"
-          poetry run agbenchmark --mock
-
-          echo "Running the following command: poetry run agbenchmark --mock --category=data"
-          poetry run agbenchmark --mock --category=data
-
-          echo "Running the following command: poetry run agbenchmark --mock --category=coding"
-          poetry run agbenchmark --mock --category=coding
-
-          # echo "Running the following command: poetry run agbenchmark --test=WriteFile"
-          # poetry run agbenchmark --test=WriteFile
-          cd ../benchmark
-          poetry install
-          echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
-          export BUILD_SKILL_TREE=true
-
-          # poetry run agbenchmark --mock
-
-          # CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../classic/frontend/assets)') || echo "No diffs"
-          # if [ ! -z "$CHANGED" ]; then
-          #   echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
-          #   echo "$CHANGED"
-          #   exit 1
-          # else
-          #   echo "No unstaged changes."
-          # fi
+          echo "Running regression tests (previously beaten challenges)..."
+          poetry run direct-benchmark run \
+            --fresh \
+            --strategies one_shot \
+            --models claude \
+            --maintain \
+            --parallel 4 \
+            --json
        env:
+          CI: true
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
-          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
+          NONINTERACTIVE_MODE: "true"
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -6,13 +6,15 @@ on:
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
-      - '!classic/forge/tests/vcr_cassettes'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
-      - '!classic/forge/tests/vcr_cassettes'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'

 concurrency:
  group: ${{ format('forge-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -21,131 +23,60 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic/forge
+    working-directory: classic

 jobs:
  test:
    permissions:
      contents: read
    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
-        platform-os: [ubuntu, macos, macos-arm64, windows]
-    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
+    runs-on: ubuntu-latest

    steps:
-      # Quite slow on macOS (2~4 minutes to set up Docker)
-      # - name: Set up Docker (macOS)
-      #   if: runner.os == 'macOS'
-      #   uses: crazy-max/ghaction-setup-docker@v3
-
-      - name: Start MinIO service (Linux)
-        if: runner.os == 'Linux'
+      - name: Start MinIO service
        working-directory: '.'
        run: |
          docker pull minio/minio:edge-cicd
          docker run -d -p 9000:9000 minio/minio:edge-cicd

-      - name: Start MinIO service (macOS)
-        if: runner.os == 'macOS'
-        working-directory: ${{ runner.temp }}
-        run: |
-          brew install minio/stable/minio
-          mkdir data
-          minio server ./data &
-
-      # No MinIO on Windows:
-      # - Windows doesn't support running Linux Docker containers
-      # - It doesn't seem possible to start background processes on Windows. They are
-      #   killed after the step returns.
-      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
-
      - name: Checkout repository
        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: true

-      - name: Checkout cassettes
-        if: ${{ startsWith(github.event_name, 'pull_request') }}
-        env:
-          PR_BASE: ${{ github.event.pull_request.base.ref }}
-          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-        run: |
-          cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
-          cassette_base_branch="${PR_BASE}"
-          cd tests/vcr_cassettes
-
-          if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then
-            cassette_base_branch="master"
-          fi
-
-          if git ls-remote --exit-code --heads origin $cassette_branch ; then
-            git fetch origin $cassette_branch
-            git fetch origin $cassette_base_branch
-
-            git checkout $cassette_branch
-
-            # Pick non-conflicting cassette updates from the base branch
-            git merge --no-commit --strategy-option=ours origin/$cassette_base_branch
-            echo "Using cassettes from mirror branch '$cassette_branch'," \
-              "synced to upstream branch '$cassette_base_branch'."
-          else
-            git checkout -b $cassette_branch
-            echo "Branch '$cassette_branch' does not exist in cassette submodule." \
-              "Using cassettes from '$cassette_base_branch'."
-          fi
-
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python 3.12
        uses: actions/setup-python@v5
        with:
-          python-version: ${{ matrix.python-version }}
+          python-version: "3.12"

      - name: Set up Python dependency cache
-        # On Windows, unpacking cached dependencies takes longer than just installing them
-        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/forge/poetry.lock') }}
+          path: ~/.cache/pypoetry
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}

-      - name: Install Poetry (Unix)
-        if: runner.os != 'Windows'
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-          if [ "${{ runner.os }}" = "macOS" ]; then
-            PATH="$HOME/.local/bin:$PATH"
-            echo "$HOME/.local/bin" >> $GITHUB_PATH
-          fi
-
-      - name: Install Poetry (Windows)
-        if: runner.os == 'Windows'
-        shell: pwsh
-        run: |
-          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
-
-          $env:PATH += ";$env:APPDATA\Python\Scripts"
-          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
+      - name: Install Poetry
+        run: curl -sSL https://install.python-poetry.org | python3 -

      - name: Install Python dependencies
        run: poetry install

+      - name: Install Playwright browsers
+        run: poetry run playwright install chromium
+
      - name: Run pytest with coverage
        run: |
          poetry run pytest -vv \
            --cov=forge --cov-branch --cov-report term-missing --cov-report xml \
            --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            forge
+            forge/forge forge/tests
        env:
          CI: true
          PLAIN_OUTPUT: True
+          # API keys - tests that need these will skip if not available
+          # Secrets are not available to fork PRs (GitHub security feature)
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          S3_ENDPOINT_URL: http://127.0.0.1:9000
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin

@@ -159,85 +90,11 @@ jobs:
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: forge,${{ runner.os }}
-
-      - id: setup_git_auth
-        name: Set up git token authentication
-        # Cassettes may be pushed even when tests fail
-        if: success() || failure()
-        run: |
-          config_key="http.${{ github.server_url }}/.extraheader"
-          if [ "${{ runner.os }}" = 'macOS' ]; then
-            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64)
-          else
-            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64 -w0)
-          fi
-
-          git config "$config_key" \
-            "Authorization: Basic $base64_pat"
-
-          cd tests/vcr_cassettes
-          git config "$config_key" \
-            "Authorization: Basic $base64_pat"
-
-          echo "config_key=$config_key" >> $GITHUB_OUTPUT
-
-      - id: push_cassettes
-        name: Push updated cassettes
-        # For pull requests, push updated cassettes even when tests fail
-        if: github.event_name == 'push' || (! github.event.pull_request.head.repo.fork && (success() || failure()))
-        env:
-          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-        run: |
-          if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
-            is_pull_request=true
-            cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
-          else
-            cassette_branch="${{ github.ref_name }}"
-          fi
-
-          cd tests/vcr_cassettes
-          # Commit & push changes to cassettes if any
-          if ! git diff --quiet; then
-            git add .
-            git commit -m "Auto-update cassettes"
-            git push origin HEAD:$cassette_branch
-            if [ ! $is_pull_request ]; then
-              cd ../..
-              git add tests/vcr_cassettes
-              git commit -m "Update cassette submodule"
-              git push origin HEAD:$cassette_branch
-            fi
-            echo "updated=true" >> $GITHUB_OUTPUT
-          else
-            echo "updated=false" >> $GITHUB_OUTPUT
-            echo "No cassette changes to commit"
-          fi
-
-      - name: Post Set up git token auth
-        if: steps.setup_git_auth.outcome == 'success'
-        run: |
-          git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
-          git submodule foreach git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
-
-      - name: Apply "behaviour change" label and comment on PR
-        if: ${{ startsWith(github.event_name, 'pull_request') }}
-        run: |
-          PR_NUMBER="${{ github.event.pull_request.number }}"
-          TOKEN="${{ secrets.PAT_REVIEW }}"
-          REPO="${{ github.repository }}"
-
-          if [[ "${{ steps.push_cassettes.outputs.updated }}" == "true" ]]; then
-            echo "Adding label and comment..."
-            echo $TOKEN | gh auth login --with-token
-            gh issue edit $PR_NUMBER --add-label "behaviour change"
-            gh issue comment $PR_NUMBER --body "You changed AutoGPT's behaviour on ${{ runner.os }}. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
-          fi
+          flags: forge

      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/forge/logs/
+          path: classic/logs/
--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -1,60 +0,0 @@
-name: Classic - Frontend CI/CD
-
-on:
-  push:
-    branches:
-      - master
-      - dev
-      - 'ci-test*' # This will match any branch that starts with "ci-test"
-    paths:
-      - 'classic/frontend/**'
-      - '.github/workflows/classic-frontend-ci.yml'
-  pull_request:
-    paths:
-      - 'classic/frontend/**'
-      - '.github/workflows/classic-frontend-ci.yml'
-
-jobs:
-  build:
-    permissions:
-      contents: write
-      pull-requests: write
-    runs-on: ubuntu-latest
-    env:
-      BUILD_BRANCH: ${{ format('classic-frontend-build/{0}', github.ref_name) }}
-
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-
-      - name: Setup Flutter
-        uses: subosito/flutter-action@v2
-        with:
-          flutter-version: '3.13.2'
-
-      - name: Build Flutter to Web
-        run: |
-          cd classic/frontend
-          flutter build web --base-href /app/
-
-      # - name: Commit and Push to ${{ env.BUILD_BRANCH }}
-      #   if: github.event_name == 'push'
-      #   run: |
-      #     git config --local user.email "action@github.com"
-      #     git config --local user.name "GitHub Action"
-      #     git add classic/frontend/build/web
-      #     git checkout -B ${{ env.BUILD_BRANCH }}
-      #     git commit -m "Update frontend build to ${GITHUB_SHA:0:7}" -a
-      #     git push -f origin ${{ env.BUILD_BRANCH }}
-
-      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
-        if: github.event_name == 'push'
-        uses: peter-evans/create-pull-request@v8
-        with:
-          add-paths: classic/frontend/build/web
-          base: ${{ github.ref_name }}
-          branch: ${{ env.BUILD_BRANCH }}
-          delete-branch: true
-          title: "Update frontend build in `${{ github.ref_name }}`"
-          body: "This PR updates the frontend build based on commit ${{ github.sha }}."
-          commit-message: "Update frontend build based on commit ${{ github.sha }}"
--- a/.github/workflows/classic-python-checks.yml
+++ b/.github/workflows/classic-python-checks.yml
@@ -7,7 +7,9 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/benchmark/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
@@ -16,7 +18,9 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/benchmark/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'

@@ -27,44 +31,13 @@ concurrency:
 defaults:
  run:
    shell: bash
+    working-directory: classic

 jobs:
-  get-changed-parts:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - id: changes-in
-        name: Determine affected subprojects
-        uses: dorny/paths-filter@v3
-        with:
-          filters: |
-            original_autogpt:
-              - classic/original_autogpt/autogpt/**
-              - classic/original_autogpt/tests/**
-              - classic/original_autogpt/poetry.lock
-            forge:
-              - classic/forge/forge/**
-              - classic/forge/tests/**
-              - classic/forge/poetry.lock
-            benchmark:
-              - classic/benchmark/agbenchmark/**
-              - classic/benchmark/tests/**
-              - classic/benchmark/poetry.lock
-    outputs:
-      changed-parts: ${{ steps.changes-in.outputs.changes }}
-
  lint:
-    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.10"
-
-    strategy:
-      matrix:
-        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
-      fail-fast: false
+      min-python-version: "3.12"

    steps:
      - name: Checkout repository
@@ -81,42 +54,31 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

-      # Install dependencies
-
      - name: Install Python dependencies
-        run: poetry -C classic/${{ matrix.sub-package }} install
+        run: poetry install

      # Lint

      - name: Lint (isort)
        run: poetry run isort --check .
-        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Black)
        if: success() || failure()
        run: poetry run black --check .
-        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Flake8)
        if: success() || failure()
        run: poetry run flake8 .
-        working-directory: classic/${{ matrix.sub-package }}

  types:
-    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.10"
-
-    strategy:
-      matrix:
-        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
-      fail-fast: false
+      min-python-version: "3.12"

    steps:
      - name: Checkout repository
@@ -133,19 +95,16 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

-      # Install dependencies
-
      - name: Install Python dependencies
-        run: poetry -C classic/${{ matrix.sub-package }} install
+        run: poetry install

      # Typecheck

      - name: Typecheck
        if: success() || failure()
        run: poetry run pyright
-        working-directory: classic/${{ matrix.sub-package }}
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -269,12 +269,14 @@ jobs:
          DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
          DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}

-      - name: Run pytest
+      - name: Run pytest with coverage
        run: |
          if [[ "${{ runner.debug }}" == "1" ]]; then
-            poetry run pytest -s -vv -o log_cli=true -o log_cli_level=DEBUG
+            poetry run pytest -s -vv -o log_cli=true -o log_cli_level=DEBUG \
+              --cov=backend --cov-branch --cov-report term-missing --cov-report xml
          else
-            poetry run pytest -s -vv
+            poetry run pytest -s -vv \
+              --cov=backend --cov-branch --cov-report term-missing --cov-report xml
          fi
        env:
          LOG_LEVEL: ${{ runner.debug && 'DEBUG' || 'INFO' }}
@@ -287,11 +289,13 @@ jobs:
          REDIS_PORT: "6379"
          ENCRYPTION_KEY: "dvziYgz0KSK8FENhju0ZYi8-fRTfAdlz6YLhdB_jhNw=" # DO NOT USE IN PRODUCTION!!

-      # - name: Upload coverage reports to Codecov
-      #   uses: codecov/codecov-action@v4
-      #   with:
-      #     token: ${{ secrets.CODECOV_TOKEN }}
-      #     flags: backend,${{ runner.os }}
+      - name: Upload coverage reports to Codecov
+        if: ${{ !cancelled() }}
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          flags: platform-backend
+          files: ./autogpt_platform/backend/coverage.xml

    env:
      CI: true
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -148,3 +148,11 @@ jobs:

      - name: Run Integration Tests
        run: pnpm test:unit
+
+      - name: Upload coverage reports to Codecov
+        if: ${{ !cancelled() }}
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          flags: platform-frontend
+          files: ./autogpt_platform/frontend/coverage/cobertura-coverage.xml
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 classic/original_autogpt/keys.py
 classic/original_autogpt/*.json
 auto_gpt_workspace/*
+.autogpt/
 *.mpeg
 .env
 # Root .env files
@@ -16,6 +17,7 @@ log-ingestion.txt
 /logs
 *.log
 *.mp3
+!autogpt_platform/frontend/public/notification.mp3
 mem.sqlite3
 venvAutoGPT

@@ -159,6 +161,10 @@ CURRENT_BULLETIN.md

 # AgBenchmark
 classic/benchmark/agbenchmark/reports/
+classic/reports/
+classic/direct_benchmark/reports/
+classic/.benchmark_workspaces/
+classic/direct_benchmark/.benchmark_workspaces/

 # Nodejs
 package-lock.json
@@ -177,9 +183,13 @@ autogpt_platform/backend/settings.py

 *.ign.*
 .test-contents
+**/.claude/settings.local.json
 .claude/settings.local.json
 CLAUDE.local.md
 /autogpt_platform/backend/logs
+
+# Test database
+test.db
 .next
 # Implementation plans (generated by AI agents)
 plans/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "classic/forge/tests/vcr_cassettes"]
-	path = classic/forge/tests/vcr_cassettes
-	url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -84,51 +84,16 @@ repos:
        stages: [pre-commit, post-checkout]

      - id: poetry-install
-        name: Check & Install dependencies - Classic - AutoGPT
-        alias: poetry-install-classic-autogpt
+        name: Check & Install dependencies - Classic
+        alias: poetry-install-classic
        entry: >
          bash -c '
          if [ -n "$PRE_COMMIT_FROM_REF" ]; then
            git diff --name-only "$PRE_COMMIT_FROM_REF" "$PRE_COMMIT_TO_REF"
          else
            git diff --cached --name-only
-          fi | grep -qE "^classic/(original_autogpt|forge)/poetry\.lock$" || exit 0;
-          poetry -C classic/original_autogpt install
-          '
-        # include forge source (since it's a path dependency)
-        always_run: true
-        language: system
-        pass_filenames: false
-        stages: [pre-commit, post-checkout]
-
-      - id: poetry-install
-        name: Check & Install dependencies - Classic - Forge
-        alias: poetry-install-classic-forge
-        entry: >
-          bash -c '
-          if [ -n "$PRE_COMMIT_FROM_REF" ]; then
-            git diff --name-only "$PRE_COMMIT_FROM_REF" "$PRE_COMMIT_TO_REF"
-          else
-            git diff --cached --name-only
-          fi | grep -qE "^classic/forge/poetry\.lock$" || exit 0;
-          poetry -C classic/forge install
-          '
-        always_run: true
-        language: system
-        pass_filenames: false
-        stages: [pre-commit, post-checkout]
-
-      - id: poetry-install
-        name: Check & Install dependencies - Classic - Benchmark
-        alias: poetry-install-classic-benchmark
-        entry: >
-          bash -c '
-          if [ -n "$PRE_COMMIT_FROM_REF" ]; then
-            git diff --name-only "$PRE_COMMIT_FROM_REF" "$PRE_COMMIT_TO_REF"
-          else
-            git diff --cached --name-only
-          fi | grep -qE "^classic/benchmark/poetry\.lock$" || exit 0;
-          poetry -C classic/benchmark install
+          fi | grep -qE "^classic/poetry\.lock$" || exit 0;
+          poetry -C classic install
          '
        always_run: true
        language: system
@@ -223,26 +188,10 @@ repos:
        language: system

      - id: isort
-        name: Lint (isort) - Classic - AutoGPT
-        alias: isort-classic-autogpt
-        entry: poetry -P classic/original_autogpt run isort -p autogpt
-        files: ^classic/original_autogpt/
-        types: [file, python]
-        language: system
-
-      - id: isort
-        name: Lint (isort) - Classic - Forge
-        alias: isort-classic-forge
-        entry: poetry -P classic/forge run isort -p forge
-        files: ^classic/forge/
-        types: [file, python]
-        language: system
-
-      - id: isort
-        name: Lint (isort) - Classic - Benchmark
-        alias: isort-classic-benchmark
-        entry: poetry -P classic/benchmark run isort -p agbenchmark
-        files: ^classic/benchmark/
+        name: Lint (isort) - Classic
+        alias: isort-classic
+        entry: bash -c 'cd classic && poetry run isort $(echo "$@" | sed "s|classic/||g")' --
+        files: ^classic/(original_autogpt|forge|direct_benchmark)/
        types: [file, python]
        language: system

@@ -256,26 +205,13 @@ repos:

  - repo: https://github.com/PyCQA/flake8
    rev: 7.0.0
-    # To have flake8 load the config of the individual subprojects, we have to call
-    # them separately.
+    # Use consolidated flake8 config at classic/.flake8
    hooks:
      - id: flake8
-        name: Lint (Flake8) - Classic - AutoGPT
-        alias: flake8-classic-autogpt
-        files: ^classic/original_autogpt/(autogpt|scripts|tests)/
-        args: [--config=classic/original_autogpt/.flake8]
-
-      - id: flake8
-        name: Lint (Flake8) - Classic - Forge
-        alias: flake8-classic-forge
-        files: ^classic/forge/(forge|tests)/
-        args: [--config=classic/forge/.flake8]
-
-      - id: flake8
-        name: Lint (Flake8) - Classic - Benchmark
-        alias: flake8-classic-benchmark
-        files: ^classic/benchmark/(agbenchmark|tests)/((?!reports).)*[/.]
-        args: [--config=classic/benchmark/.flake8]
+        name: Lint (Flake8) - Classic
+        alias: flake8-classic
+        files: ^classic/(original_autogpt|forge|direct_benchmark)/
+        args: [--config=classic/.flake8]

  - repo: local
    hooks:
@@ -311,29 +247,10 @@ repos:
        pass_filenames: false

      - id: pyright
-        name: Typecheck - Classic - AutoGPT
-        alias: pyright-classic-autogpt
-        entry: poetry -C classic/original_autogpt run pyright
-        # include forge source (since it's a path dependency) but exclude *_test.py files:
-        files: ^(classic/original_autogpt/((autogpt|scripts|tests)/|poetry\.lock$)|classic/forge/(forge/.*(?<!_test)\.py|poetry\.lock)$)
-        types: [file]
-        language: system
-        pass_filenames: false
-
-      - id: pyright
-        name: Typecheck - Classic - Forge
-        alias: pyright-classic-forge
-        entry: poetry -C classic/forge run pyright
-        files: ^classic/forge/(forge/|poetry\.lock$)
-        types: [file]
-        language: system
-        pass_filenames: false
-
-      - id: pyright
-        name: Typecheck - Classic - Benchmark
-        alias: pyright-classic-benchmark
-        entry: poetry -C classic/benchmark run pyright
-        files: ^classic/benchmark/(agbenchmark/|tests/|poetry\.lock$)
+        name: Typecheck - Classic
+        alias: pyright-classic
+        entry: poetry -C classic run pyright
+        files: ^classic/(original_autogpt|forge|direct_benchmark)/.*\.py$|^classic/poetry\.lock$
        types: [file]
        language: system
        pass_filenames: false
@@ -360,26 +277,9 @@ repos:
  #       pass_filenames: false

  #     - id: pytest
-  #       name: Run tests - Classic - AutoGPT (excl. slow tests)
-  #       alias: pytest-classic-autogpt
-  #       entry: bash -c 'cd classic/original_autogpt && poetry run pytest --cov=autogpt -m "not slow" tests/unit tests/integration'
-  #       # include forge source (since it's a path dependency) but exclude *_test.py files:
-  #       files: ^(classic/original_autogpt/((autogpt|tests)/|poetry\.lock$)|classic/forge/(forge/.*(?<!_test)\.py|poetry\.lock)$)
-  #       language: system
-  #       pass_filenames: false
-
-  #     - id: pytest
-  #       name: Run tests - Classic - Forge (excl. slow tests)
-  #       alias: pytest-classic-forge
-  #       entry: bash -c 'cd classic/forge && poetry run pytest --cov=forge -m "not slow"'
-  #       files: ^classic/forge/(forge/|tests/|poetry\.lock$)
-  #       language: system
-  #       pass_filenames: false
-
-  #     - id: pytest
-  #       name: Run tests - Classic - Benchmark
-  #       alias: pytest-classic-benchmark
-  #       entry: bash -c 'cd classic/benchmark && poetry run pytest --cov=benchmark'
-  #       files: ^classic/benchmark/(agbenchmark/|tests/|poetry\.lock$)
+  #       name: Run tests - Classic (excl. slow tests)
+  #       alias: pytest-classic
+  #       entry: bash -c 'cd classic && poetry run pytest -m "not slow"'
+  #       files: ^classic/(original_autogpt|forge|direct_benchmark)/
  #       language: system
  #       pass_filenames: false
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ These examples show just a glimpse of what you can achieve with AutoGPT! You can
 All code and content within the `autogpt_platform` folder is licensed under the Polyform Shield License. This new project is our in-developlemt platform for building, deploying and managing agents.</br>_[Read more about this effort](https://agpt.co/blog/introducing-the-autogpt-platform)_

 🦉 **MIT License:**
-All other portions of the AutoGPT repository (i.e., everything outside the `autogpt_platform` folder) are licensed under the MIT License. This includes the original stand-alone AutoGPT Agent, along with projects such as [Forge](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge), [agbenchmark](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/benchmark) and the [AutoGPT Classic GUI](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/frontend).</br>We also publish additional work under the MIT Licence in other repositories, such as [GravitasML](https://github.com/Significant-Gravitas/gravitasml) which is developed for and used in the AutoGPT Platform. See also our MIT Licenced [Code Ability](https://github.com/Significant-Gravitas/AutoGPT-Code-Ability) project.
+All other portions of the AutoGPT repository (i.e., everything outside the `autogpt_platform` folder) are licensed under the MIT License. This includes the original stand-alone AutoGPT Agent, along with projects such as [Forge](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge) and the [Direct Benchmark](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/direct_benchmark).</br>We also publish additional work under the MIT Licence in other repositories, such as [GravitasML](https://github.com/Significant-Gravitas/gravitasml) which is developed for and used in the AutoGPT Platform. See also our MIT Licenced [Code Ability](https://github.com/Significant-Gravitas/AutoGPT-Code-Ability) project.

 ---
 ### Mission
@@ -150,7 +150,7 @@ Be part of the revolution! **AutoGPT** is here to stay, at the forefront of AI i
 ## 🤖 AutoGPT Classic
 > Below is information about the classic version of AutoGPT.

-**🛠️ [Build your own Agent - Quickstart](classic/FORGE-QUICKSTART.md)**
+**🛠️ [Build your own Agent - Forge](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge)**

 ### 🏗️ Forge

@@ -161,46 +161,26 @@ This guide will walk you through the process of creating your own agent and usin

 📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge) about Forge

-### 🎯 Benchmark
+### 🎯 Direct Benchmark

-**Measure your agent's performance!** The `agbenchmark` can be used with any agent that supports the agent protocol, and the integration with the project's [CLI] makes it even easier to use with AutoGPT and forge-based agents. The benchmark offers a stringent testing environment. Our framework allows for autonomous, objective performance evaluations, ensuring your agents are primed for real-world action.
+**Measure your agent's performance!** The `direct_benchmark` harness tests agents directly without the agent protocol overhead. It supports multiple prompt strategies (one_shot, reflexion, plan_execute, tree_of_thoughts, etc.) and model configurations, with parallel execution and detailed reporting.

-<!-- TODO: insert visual demonstrating the benchmark -->
-
-📦 [`agbenchmark`](https://pypi.org/project/agbenchmark/) on Pypi
-&ensp;|&ensp;
-📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/benchmark) about the Benchmark
-
-### 💻 UI
-
-**Makes agents easy to use!** The `frontend` gives you a user-friendly interface to control and monitor your agents. It connects to agents through the [agent protocol](#-agent-protocol), ensuring compatibility with many agents from both inside and outside of our ecosystem.
-
-<!-- TODO: insert screenshot of front end -->
-
-The frontend works out-of-the-box with all agents in the repo. Just use the [CLI] to run your agent of choice!
-
-📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/frontend) about the Frontend
+📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/direct_benchmark) about the Benchmark

 ### ⌨️ CLI

 [CLI]: #-cli

-To make it as easy as possible to use all of the tools offered by the repository, a CLI is included at the root of the repo:
+AutoGPT Classic is run via Poetry from the `classic/` directory:

 ```shell
-$ ./run
-Usage: cli.py [OPTIONS] COMMAND [ARGS]...
-
-Options:
-  --help  Show this message and exit.
-
-Commands:
-  agent      Commands to create, start and stop agents
-  benchmark  Commands to start the benchmark and list tests and categories
-  setup      Installs dependencies needed for your system.
+cd classic
+poetry install
+poetry run autogpt        # Interactive CLI mode
+poetry run serve --debug  # Agent Protocol server
 ```

-Just clone the repo, install dependencies with `./run setup`, and you should be good to go!
+See the [classic README](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic) for full setup instructions.

 ## 🤔 Questions? Problems? Suggestions?

--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -550,6 +550,8 @@ async def reset_copilot_usage(

    try:
        # Verify the user is actually at or over their daily limit.
+        # (rate_limit_reset_cost intentionally omitted — this object is only
+        # used for limit checks, not returned to the client.)
        usage_status = await get_usage_status(
            user_id=user_id,
            daily_token_limit=daily_limit,
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -481,6 +481,11 @@ async def create_library_agent(
                                    sensitive_action_safe_mode=sensitive_action_safe_mode,
                                ).model_dump()
                            ),
+                            **(
+                                {"Folder": {"connect": {"id": folder_id}}}
+                                if folder_id and graph_entry is graph
+                                else {}
+                            ),
                        },
                    },
                    include=library_agent_include(
--- a/autogpt_platform/backend/backend/api/features/onboarding_profile_test.py
+++ b/autogpt_platform/backend/backend/api/features/onboarding_profile_test.py
@@ -0,0 +1,61 @@
+from unittest.mock import AsyncMock
+
+import fastapi
+import fastapi.testclient
+import pytest
+
+from backend.api.features.v1 import v1_router
+
+app = fastapi.FastAPI()
+app.include_router(v1_router)
+client = fastapi.testclient.TestClient(app)
+
+
+@pytest.fixture(autouse=True)
+def setup_app_auth(mock_jwt_user):
+    from autogpt_libs.auth.jwt_utils import get_jwt_payload
+
+    app.dependency_overrides[get_jwt_payload] = mock_jwt_user["get_jwt_payload"]
+    yield
+    app.dependency_overrides.clear()
+
+
+def test_onboarding_profile_success(mocker):
+    mock_extract = mocker.patch(
+        "backend.api.features.v1.extract_business_understanding",
+        new_callable=AsyncMock,
+    )
+    mock_upsert = mocker.patch(
+        "backend.api.features.v1.upsert_business_understanding",
+        new_callable=AsyncMock,
+    )
+
+    from backend.data.understanding import BusinessUnderstandingInput
+
+    mock_extract.return_value = BusinessUnderstandingInput.model_construct(
+        user_name="John",
+        user_role="Founder/CEO",
+        pain_points=["Finding leads"],
+        suggested_prompts={"Learn": ["How do I automate lead gen?"]},
+    )
+    mock_upsert.return_value = AsyncMock()
+
+    response = client.post(
+        "/onboarding/profile",
+        json={
+            "user_name": "John",
+            "user_role": "Founder/CEO",
+            "pain_points": ["Finding leads", "Email & outreach"],
+        },
+    )
+    assert response.status_code == 200
+    mock_extract.assert_awaited_once()
+    mock_upsert.assert_awaited_once()
+
+
+def test_onboarding_profile_missing_fields():
+    response = client.post(
+        "/onboarding/profile",
+        json={"user_name": "John"},
+    )
+    assert response.status_code == 422
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -63,12 +63,17 @@ from backend.data.onboarding import (
    UserOnboardingUpdate,
    complete_onboarding_step,
    complete_re_run_agent,
+    format_onboarding_for_extraction,
    get_recommended_agents,
    get_user_onboarding,
-    onboarding_enabled,
    reset_user_onboarding,
    update_user_onboarding,
 )
+from backend.data.tally import extract_business_understanding
+from backend.data.understanding import (
+    BusinessUnderstandingInput,
+    upsert_business_understanding,
+)
 from backend.data.user import (
    get_or_create_user,
    get_user_by_id,
@@ -282,35 +287,33 @@ async def get_onboarding_agents(
    return await get_recommended_agents(user_id)


-class OnboardingStatusResponse(pydantic.BaseModel):
-    """Response for onboarding status check."""
+class OnboardingProfileRequest(pydantic.BaseModel):
+    """Request body for onboarding profile submission."""

-    is_onboarding_enabled: bool
-    is_chat_enabled: bool
+    user_name: str = pydantic.Field(min_length=1, max_length=100)
+    user_role: str = pydantic.Field(min_length=1, max_length=100)
+    pain_points: list[str] = pydantic.Field(default_factory=list, max_length=20)
+
+
+class OnboardingStatusResponse(pydantic.BaseModel):
+    """Response for onboarding completion check."""
+
+    is_completed: bool


@v1_router.get(
-    "/onboarding/enabled",
-    summary="Is onboarding enabled",
+    "/onboarding/completed",
+    summary="Check if onboarding is completed",
    tags=["onboarding", "public"],
    response_model=OnboardingStatusResponse,
+    dependencies=[Security(requires_user)],
 )
-async def is_onboarding_enabled(
+async def is_onboarding_completed(
    user_id: Annotated[str, Security(get_user_id)],
 ) -> OnboardingStatusResponse:
-    # Check if chat is enabled for user
-    is_chat_enabled = await is_feature_enabled(Flag.CHAT, user_id, False)
-
-    # If chat is enabled, skip legacy onboarding
-    if is_chat_enabled:
-        return OnboardingStatusResponse(
-            is_onboarding_enabled=False,
-            is_chat_enabled=True,
-        )
-
+    user_onboarding = await get_user_onboarding(user_id)
    return OnboardingStatusResponse(
-        is_onboarding_enabled=await onboarding_enabled(),
-        is_chat_enabled=False,
+        is_completed=OnboardingStep.VISIT_COPILOT in user_onboarding.completedSteps,
    )


@@ -325,6 +328,38 @@ async def reset_onboarding(user_id: Annotated[str, Security(get_user_id)]):
    return await reset_user_onboarding(user_id)


+@v1_router.post(
+    "/onboarding/profile",
+    summary="Submit onboarding profile",
+    tags=["onboarding"],
+    dependencies=[Security(requires_user)],
+)
+async def submit_onboarding_profile(
+    data: OnboardingProfileRequest,
+    user_id: Annotated[str, Security(get_user_id)],
+):
+    formatted = format_onboarding_for_extraction(
+        user_name=data.user_name,
+        user_role=data.user_role,
+        pain_points=data.pain_points,
+    )
+
+    try:
+        understanding_input = await extract_business_understanding(formatted)
+    except Exception:
+        understanding_input = BusinessUnderstandingInput.model_construct()
+
+    # Ensure the direct fields are set even if LLM missed them
+    understanding_input.user_name = data.user_name
+    understanding_input.user_role = data.user_role
+    if not understanding_input.pain_points:
+        understanding_input.pain_points = data.pain_points
+
+    await upsert_business_understanding(user_id, understanding_input)
+
+    return {"status": "ok"}
+
+
 ########################################################
 ##################### Blocks ###########################
 ########################################################
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -698,13 +698,30 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
            if should_pause:
                return

-        # Validate the input data (original or reviewer-modified) once
-        if error := self.input_schema.validate_data(input_data):
-            raise BlockInputError(
-                message=f"Unable to execute block with invalid input data: {error}",
-                block_name=self.name,
-                block_id=self.id,
-            )
+        # Validate the input data (original or reviewer-modified) once.
+        # In dry-run mode, credential fields may contain sentinel None values
+        # that would fail JSON schema required checks.  We still validate the
+        # non-credential fields so blocks that execute for real during dry-run
+        # (e.g. AgentExecutorBlock) get proper input validation.
+        is_dry_run = getattr(kwargs.get("execution_context"), "dry_run", False)
+        if is_dry_run:
+            cred_field_names = set(self.input_schema.get_credentials_fields().keys())
+            non_cred_data = {
+                k: v for k, v in input_data.items() if k not in cred_field_names
+            }
+            if error := self.input_schema.validate_data(non_cred_data):
+                raise BlockInputError(
+                    message=f"Unable to execute block with invalid input data: {error}",
+                    block_name=self.name,
+                    block_id=self.id,
+                )
+        else:
+            if error := self.input_schema.validate_data(input_data):
+                raise BlockInputError(
+                    message=f"Unable to execute block with invalid input data: {error}",
+                    block_name=self.name,
+                    block_id=self.id,
+                )

        # Use the validated input data
        async for output_name, output_data in self.run(
--- a/autogpt_platform/backend/backend/blocks/agent.py
+++ b/autogpt_platform/backend/backend/blocks/agent.py
@@ -49,11 +49,17 @@ class AgentExecutorBlock(Block):
        @classmethod
        def get_missing_input(cls, data: BlockInput) -> set[str]:
            required_fields = cls.get_input_schema(data).get("required", [])
-            return set(required_fields) - set(data)
+            # Check against the nested `inputs` dict, not the top-level node
+            # data — required fields like "topic" live inside data["inputs"],
+            # not at data["topic"].
+            provided = data.get("inputs", {})
+            return set(required_fields) - set(provided)

        @classmethod
        def get_mismatch_error(cls, data: BlockInput) -> str | None:
-            return validate_with_jsonschema(cls.get_input_schema(data), data)
+            return validate_with_jsonschema(
+                cls.get_input_schema(data), data.get("inputs", {})
+            )

    class Output(BlockSchema):
        # Use BlockSchema to avoid automatic error field that could clash with graph outputs
@@ -88,6 +94,7 @@ class AgentExecutorBlock(Block):
            execution_context=execution_context.model_copy(
                update={"parent_execution_id": graph_exec_id},
            ),
+            dry_run=execution_context.dry_run,
        )

        logger = execution_utils.LogMetadata(
@@ -149,14 +156,19 @@ class AgentExecutorBlock(Block):
                ExecutionStatus.TERMINATED,
                ExecutionStatus.FAILED,
            ]:
-                logger.debug(
-                    f"Execution {log_id} received event {event.event_type} with status {event.status}"
+                logger.info(
+                    f"Execution {log_id} skipping event {event.event_type} status={event.status} "
+                    f"node={getattr(event, 'node_exec_id', '?')}"
                )
                continue

            if event.event_type == ExecutionEventType.GRAPH_EXEC_UPDATE:
                # If the graph execution is COMPLETED, TERMINATED, or FAILED,
                # we can stop listening for further events.
+                logger.info(
+                    f"Execution {log_id} graph completed with status {event.status}, "
+                    f"yielded {len(yielded_node_exec_ids)} outputs"
+                )
                self.merge_stats(
                    NodeExecutionStats(
                        extra_cost=event.stats.cost if event.stats else 0,
--- a/autogpt_platform/backend/backend/blocks/io.py
+++ b/autogpt_platform/backend/backend/blocks/io.py
@@ -2,6 +2,8 @@ import copy
 from datetime import date, time
 from typing import Any, Optional

+from pydantic import AliasChoices, Field
+
 from backend.blocks._base import (
    Block,
    BlockCategory,
@@ -467,7 +469,8 @@ class AgentFileInputBlock(AgentInputBlock):

 class AgentDropdownInputBlock(AgentInputBlock):
    """
-    A specialized text input block that relies on placeholder_values to present a dropdown.
+    A specialized text input block that presents a dropdown selector
+    restricted to a fixed set of values.
    """

    class Input(AgentInputBlock.Input):
@@ -477,16 +480,23 @@ class AgentDropdownInputBlock(AgentInputBlock):
            advanced=False,
            title="Default Value",
        )
-        placeholder_values: list = SchemaField(
-            description="Possible values for the dropdown.",
+        # Use Field() directly (not SchemaField) to pass validation_alias,
+        # which handles backward compat for legacy "placeholder_values" across
+        # all construction paths (model_construct, __init__, model_validate).
+        options: list = Field(
            default_factory=list,
-            advanced=False,
            title="Dropdown Options",
+            description=(
+                "If provided, renders the input as a dropdown selector "
+                "restricted to these values. Leave empty for free-text input."
+            ),
+            validation_alias=AliasChoices("options", "placeholder_values"),
+            json_schema_extra={"advanced": False, "secret": False},
        )

        def generate_schema(self):
            schema = super().generate_schema()
-            if possible_values := self.placeholder_values:
+            if possible_values := self.options:
                schema["enum"] = possible_values
            return schema

@@ -504,13 +514,13 @@ class AgentDropdownInputBlock(AgentInputBlock):
                {
                    "value": "Option A",
                    "name": "dropdown_1",
-                    "placeholder_values": ["Option A", "Option B", "Option C"],
+                    "options": ["Option A", "Option B", "Option C"],
                    "description": "Dropdown example 1",
                },
                {
                    "value": "Option C",
                    "name": "dropdown_2",
-                    "placeholder_values": ["Option A", "Option B", "Option C"],
+                    "options": ["Option A", "Option B", "Option C"],
                    "description": "Dropdown example 2",
                },
            ],
--- a/autogpt_platform/backend/backend/blocks/mcp/block.py
+++ b/autogpt_platform/backend/backend/blocks/mcp/block.py
@@ -89,6 +89,12 @@ class MCPToolBlock(Block):
            default={},
            hidden=True,
        )
+        tool_description: str = SchemaField(
+            description="Description of the selected MCP tool. "
+            "Populated automatically when a tool is selected.",
+            default="",
+            hidden=True,
+        )

        tool_arguments: dict[str, Any] = SchemaField(
            description="Arguments to pass to the selected MCP tool. "
--- a/autogpt_platform/backend/backend/blocks/sql_query_block.py
+++ b/autogpt_platform/backend/backend/blocks/sql_query_block.py
@@ -0,0 +1,323 @@
+import asyncio
+from typing import Any, Literal
+
+from pydantic import SecretStr
+from sqlalchemy.engine.url import URL
+from sqlalchemy.exc import DBAPIError, OperationalError, ProgrammingError
+
+from backend.blocks._base import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.blocks.sql_query_helpers import (
+    _DATABASE_TYPE_DEFAULT_PORT,
+    _DATABASE_TYPE_TO_DRIVER,
+    DatabaseType,
+    _execute_query,
+    _sanitize_error,
+    _validate_query_is_read_only,
+    _validate_single_statement,
+)
+from backend.data.model import (
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+    UserPasswordCredentials,
+)
+from backend.integrations.providers import ProviderName
+from backend.util.request import resolve_and_check_blocked
+
+TEST_CREDENTIALS = UserPasswordCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="database",
+    username=SecretStr("test_user"),
+    password=SecretStr("test_pass"),
+    title="Mock Database credentials",
+)
+
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+DatabaseCredentials = UserPasswordCredentials
+DatabaseCredentialsInput = CredentialsMetaInput[
+    Literal[ProviderName.DATABASE],
+    Literal["user_password"],
+]
+
+
+def DatabaseCredentialsField() -> DatabaseCredentialsInput:
+    return CredentialsField(
+        description="Database username and password",
+    )
+
+
+class SQLQueryBlock(Block):
+    class Input(BlockSchemaInput):
+        database_type: DatabaseType = SchemaField(
+            default=DatabaseType.POSTGRES,
+            description="Database engine",
+            advanced=False,
+        )
+        host: SecretStr = SchemaField(
+            description=(
+                "Database hostname or IP address. "
+                "Treated as a secret to avoid leaking infrastructure details. "
+                "Private/internal IPs are blocked (SSRF protection)."
+            ),
+            placeholder="db.example.com",
+            secret=True,
+        )
+        port: int | None = SchemaField(
+            default=None,
+            description=(
+                "Database port (leave empty for default: "
+                "PostgreSQL: 5432, MySQL: 3306, MSSQL: 1433)"
+            ),
+            ge=1,
+            le=65535,
+        )
+        database: str = SchemaField(
+            description="Name of the database to connect to",
+            placeholder="my_database",
+        )
+        query: str = SchemaField(
+            description="SQL query to execute",
+            placeholder="SELECT * FROM analytics.daily_active_users LIMIT 10",
+        )
+        read_only: bool = SchemaField(
+            default=True,
+            description=(
+                "When enabled (default), only SELECT queries are allowed "
+                "and the database session is set to read-only mode. "
+                "Disable to allow write operations (INSERT, UPDATE, DELETE, etc.)."
+            ),
+        )
+        timeout: int = SchemaField(
+            default=30,
+            description="Query timeout in seconds (max 120)",
+            ge=1,
+            le=120,
+        )
+        max_rows: int = SchemaField(
+            default=1000,
+            description="Maximum number of rows to return (max 10000)",
+            ge=1,
+            le=10000,
+        )
+        credentials: DatabaseCredentialsInput = DatabaseCredentialsField()
+
+    class Output(BlockSchemaOutput):
+        results: list[dict[str, Any]] = SchemaField(
+            description="Query results as a list of row dictionaries"
+        )
+        columns: list[str] = SchemaField(
+            description="Column names from the query result"
+        )
+        row_count: int = SchemaField(description="Number of rows returned")
+        truncated: bool = SchemaField(
+            description=(
+                "True when the result set was capped by max_rows, "
+                "indicating additional rows exist in the database"
+            )
+        )
+        affected_rows: int = SchemaField(
+            description="Number of rows affected by a write query (INSERT/UPDATE/DELETE)"
+        )
+        error: str = SchemaField(description="Error message if the query failed")
+
+    def __init__(self):
+        super().__init__(
+            id="4dc35c0f-4fd8-465e-9616-5a216f1ba2bc",
+            description=(
+                "Execute a SQL query. Read-only by default for safety "
+                "-- disable to allow write operations. "
+                "Supports PostgreSQL, MySQL, and MSSQL via SQLAlchemy."
+            ),
+            categories={BlockCategory.DATA},
+            input_schema=SQLQueryBlock.Input,
+            output_schema=SQLQueryBlock.Output,
+            test_input={
+                "query": "SELECT 1 AS test_col",
+                "database_type": DatabaseType.POSTGRES,
+                "host": "localhost",
+                "database": "test_db",
+                "timeout": 30,
+                "max_rows": 1000,
+                "credentials": TEST_CREDENTIALS_INPUT,
+            },
+            test_credentials=TEST_CREDENTIALS,
+            test_output=[
+                ("results", [{"test_col": 1}]),
+                ("columns", ["test_col"]),
+                ("row_count", 1),
+                ("truncated", False),
+            ],
+            test_mock={
+                "execute_query": lambda *_args, **_kwargs: (
+                    [{"test_col": 1}],
+                    ["test_col"],
+                    -1,
+                    False,
+                ),
+                "check_host_allowed": lambda *_args, **_kwargs: ["127.0.0.1"],
+            },
+        )
+
+    @staticmethod
+    async def check_host_allowed(host: str) -> list[str]:
+        """Validate that the given host is not a private/blocked address.
+
+        Returns the list of resolved IP addresses so the caller can pin the
+        connection to the validated IP (preventing DNS rebinding / TOCTOU).
+        Raises ValueError or OSError if the host is blocked.
+        Extracted as a method so it can be mocked during block tests.
+        """
+        return await resolve_and_check_blocked(host)
+
+    @staticmethod
+    def execute_query(
+        connection_url: URL | str,
+        query: str,
+        timeout: int,
+        max_rows: int,
+        read_only: bool = True,
+        database_type: DatabaseType = DatabaseType.POSTGRES,
+    ) -> tuple[list[dict[str, Any]], list[str], int, bool]:
+        """Execute a SQL query and return (rows, columns, affected_rows, truncated).
+
+        Delegates to ``_execute_query`` in ``sql_query_helpers``.
+        Extracted as a method so it can be mocked during block tests.
+        """
+        return _execute_query(
+            connection_url=connection_url,
+            query=query,
+            timeout=timeout,
+            max_rows=max_rows,
+            read_only=read_only,
+            database_type=database_type,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: DatabaseCredentials,
+        **_kwargs: Any,
+    ) -> BlockOutput:
+        # Validate query structure and read-only constraints.
+        error = self._validate_query(input_data)
+        if error:
+            yield "error", error
+            return
+
+        # Validate host and resolve for SSRF protection.
+        host, pinned_host, error = await self._resolve_host(input_data)
+        if error:
+            yield "error", error
+            return
+
+        # Build connection URL and execute.
+        port = input_data.port or _DATABASE_TYPE_DEFAULT_PORT[input_data.database_type]
+        username = credentials.username.get_secret_value()
+        connection_url = URL.create(
+            drivername=_DATABASE_TYPE_TO_DRIVER[input_data.database_type],
+            username=username,
+            password=credentials.password.get_secret_value(),
+            host=pinned_host,
+            port=port,
+            database=input_data.database,
+        )
+        conn_str = connection_url.render_as_string(hide_password=True)
+        db_name = input_data.database
+
+        def _sanitize(err: Exception) -> str:
+            return _sanitize_error(
+                str(err).strip(),
+                conn_str,
+                host=pinned_host,
+                original_host=host,
+                username=username,
+                port=port,
+                database=db_name,
+            )
+
+        try:
+            results, columns, affected, truncated = await asyncio.to_thread(
+                self.execute_query,
+                connection_url=connection_url,
+                query=input_data.query,
+                timeout=input_data.timeout,
+                max_rows=input_data.max_rows,
+                read_only=input_data.read_only,
+                database_type=input_data.database_type,
+            )
+            yield "results", results
+            yield "columns", columns
+            yield "row_count", len(results)
+            yield "truncated", truncated
+            if affected >= 0:
+                yield "affected_rows", affected
+        except OperationalError as e:
+            yield (
+                "error",
+                self._classify_operational_error(
+                    _sanitize(e),
+                    input_data.timeout,
+                ),
+            )
+        except ProgrammingError as e:
+            yield "error", f"SQL error: {_sanitize(e)}"
+        except DBAPIError as e:
+            yield "error", f"Database error: {_sanitize(e)}"
+        except ModuleNotFoundError:
+            yield (
+                "error",
+                (
+                    f"Database driver not available for "
+                    f"{input_data.database_type.value}. "
+                    f"Please contact the platform administrator."
+                ),
+            )
+
+    @staticmethod
+    def _validate_query(input_data: "SQLQueryBlock.Input") -> str | None:
+        """Validate query structure and read-only constraints."""
+        stmt_error, parsed_stmt = _validate_single_statement(input_data.query)
+        if stmt_error:
+            return stmt_error
+        assert parsed_stmt is not None
+        if input_data.read_only:
+            return _validate_query_is_read_only(parsed_stmt)
+        return None
+
+    async def _resolve_host(
+        self, input_data: "SQLQueryBlock.Input"
+    ) -> tuple[str, str, str | None]:
+        """Validate and resolve the database host. Returns (host, pinned_ip, error)."""
+        host = input_data.host.get_secret_value().strip()
+        if not host:
+            return "", "", "Database host is required."
+        if host.startswith("/"):
+            return host, "", "Unix socket connections are not allowed."
+        try:
+            resolved_ips = await self.check_host_allowed(host)
+        except (ValueError, OSError) as e:
+            return host, "", f"Blocked host: {str(e).strip()}"
+        return host, resolved_ips[0], None
+
+    @staticmethod
+    def _classify_operational_error(sanitized_msg: str, timeout: int) -> str:
+        """Classify an already-sanitized OperationalError for user display."""
+        lower = sanitized_msg.lower()
+        if "timeout" in lower or "cancel" in lower:
+            return f"Query timed out after {timeout}s."
+        if "connect" in lower:
+            return f"Failed to connect to database: {sanitized_msg}"
+        return f"Database error: {sanitized_msg}"
--- a/autogpt_platform/backend/backend/blocks/sql_query_block_test.py
+++ b/autogpt_platform/backend/backend/blocks/sql_query_block_test.py
--- a/autogpt_platform/backend/backend/blocks/sql_query_helpers.py
+++ b/autogpt_platform/backend/backend/blocks/sql_query_helpers.py
@@ -0,0 +1,430 @@
+import re
+from datetime import date, datetime, time
+from decimal import Decimal
+from enum import Enum
+from typing import Any
+
+import sqlparse
+from sqlalchemy import create_engine, text
+from sqlalchemy.engine.url import URL
+
+
+class DatabaseType(str, Enum):
+    POSTGRES = "postgres"
+    MYSQL = "mysql"
+    MSSQL = "mssql"
+
+
+# Defense-in-depth: reject queries containing data-modifying keywords.
+# These are checked against parsed SQL tokens (not raw text) so column names
+# and string literals do not cause false positives.
+_DISALLOWED_KEYWORDS = {
+    "INSERT",
+    "UPDATE",
+    "DELETE",
+    "DROP",
+    "ALTER",
+    "CREATE",
+    "TRUNCATE",
+    "GRANT",
+    "REVOKE",
+    "COPY",
+    "EXECUTE",
+    "CALL",
+    "SET",
+    "RESET",
+    "DISCARD",
+    "NOTIFY",
+    "DO",
+    # MySQL file exfiltration: LOAD DATA LOCAL INFILE reads server/client files
+    "LOAD",
+    # MySQL REPLACE is INSERT-or-UPDATE; data modification
+    "REPLACE",
+    # ANSI MERGE (UPSERT) modifies data
+    "MERGE",
+    # MSSQL BULK INSERT loads external files into tables
+    "BULK",
+    # MSSQL EXEC / EXEC sp_name runs stored procedures (arbitrary code)
+    "EXEC",
+}
+
+# Map DatabaseType enum values to the expected SQLAlchemy driver prefix.
+_DATABASE_TYPE_TO_DRIVER = {
+    DatabaseType.POSTGRES: "postgresql",
+    DatabaseType.MYSQL: "mysql+pymysql",
+    DatabaseType.MSSQL: "mssql+pymssql",
+}
+
+# Connection timeout in seconds passed to the DBAPI driver (connect_timeout /
+# login_timeout).  This bounds how long the driver waits to establish a TCP
+# connection to the database server.  It is separate from the per-statement
+# timeout configured via SET commands inside _configure_session().
+_CONNECT_TIMEOUT_SECONDS = 10
+
+# Default ports for each database type.
+_DATABASE_TYPE_DEFAULT_PORT = {
+    DatabaseType.POSTGRES: 5432,
+    DatabaseType.MYSQL: 3306,
+    DatabaseType.MSSQL: 1433,
+}
+
+
+def _sanitize_error(
+    error_msg: str,
+    connection_string: str,
+    *,
+    host: str = "",
+    original_host: str = "",
+    username: str = "",
+    port: int = 0,
+    database: str = "",
+) -> str:
+    """Remove connection string, credentials, and infrastructure details
+    from error messages so they are safe to expose to the LLM.
+
+    Scrubs:
+    - The full connection string
+    - URL-embedded credentials (``://user:pass@``)
+    - ``password=<value>`` key-value pairs
+    - The database hostname / IP used for the connection
+    - The original (pre-resolution) hostname provided by the user
+    - Any IPv4 addresses that appear in the message
+    - Any bracketed IPv6 addresses (e.g. ``[::1]``, ``[fe80::1%eth0]``)
+    - The database username
+    - The database port number
+    - The database name
+    """
+    sanitized = error_msg.replace(connection_string, "<connection_string>")
+    sanitized = re.sub(r"password=[^\s&]+", "password=***", sanitized)
+    sanitized = re.sub(r"://[^@]+@", "://***:***@", sanitized)
+
+    # Replace the known host (may be an IP already) before the generic IP pass.
+    # Also replace the original (pre-DNS-resolution) hostname if it differs.
+    if original_host and original_host != host:
+        sanitized = sanitized.replace(original_host, "<host>")
+    if host:
+        sanitized = sanitized.replace(host, "<host>")
+
+    # Replace any remaining IPv4 addresses (e.g. resolved IPs the driver logs)
+    sanitized = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", "<ip>", sanitized)
+
+    # Replace bracketed IPv6 addresses (e.g. "[::1]", "[fe80::1%eth0]")
+    sanitized = re.sub(r"\[[0-9a-fA-F:]+(?:%[^\]]+)?\]", "<ip>", sanitized)
+
+    # Replace the database username (handles double-quoted, single-quoted,
+    # and unquoted formats across PostgreSQL, MySQL, and MSSQL error messages).
+    if username:
+        sanitized = re.sub(
+            r"""for user ["']?""" + re.escape(username) + r"""["']?""",
+            "for user <user>",
+            sanitized,
+        )
+        # Catch remaining bare occurrences in various quote styles:
+        # - PostgreSQL: "FATAL:  role "myuser" does not exist"
+        # - MySQL: "Access denied for user 'myuser'@'host'"
+        # - MSSQL: "Login failed for user 'myuser'"
+        sanitized = sanitized.replace(f'"{username}"', "<user>")
+        sanitized = sanitized.replace(f"'{username}'", "<user>")
+
+    # Replace the port number (handles "port 5432" and ":5432" formats)
+    if port:
+        port_str = re.escape(str(port))
+        sanitized = re.sub(
+            r"(?:port |:)" + port_str + r"(?![0-9])",
+            lambda m: ("port " if m.group().startswith("p") else ":") + "<port>",
+            sanitized,
+        )
+
+    # Replace the database name to avoid leaking internal infrastructure names.
+    # Use word-boundary regex to prevent mangling when the database name is a
+    # common substring (e.g. "test", "data", "on").
+    if database:
+        sanitized = re.sub(r"\b" + re.escape(database) + r"\b", "<database>", sanitized)
+
+    return sanitized
+
+
+def _extract_keyword_tokens(parsed: sqlparse.sql.Statement) -> list[str]:
+    """Extract keyword tokens from a parsed SQL statement.
+
+    Uses sqlparse token type classification to collect Keyword/DML/DDL/DCL
+    tokens. String literals and identifiers have different token types, so
+    they are naturally excluded from the result.
+    """
+    return [
+        token.normalized.upper()
+        for token in parsed.flatten()
+        if token.ttype
+        in (
+            sqlparse.tokens.Keyword,
+            sqlparse.tokens.Keyword.DML,
+            sqlparse.tokens.Keyword.DDL,
+            sqlparse.tokens.Keyword.DCL,
+        )
+    ]
+
+
+def _has_disallowed_into(stmt: sqlparse.sql.Statement) -> bool:
+    """Check if a statement contains a disallowed ``INTO`` clause.
+
+    ``SELECT ... INTO @variable`` is a valid read-only MySQL syntax that stores
+    a query result into a session-scoped user variable.  All other forms of
+    ``INTO`` are data-modifying or file-writing and must be blocked:
+
+    * ``SELECT ... INTO new_table``  (PostgreSQL / MSSQL – creates a table)
+    * ``SELECT ... INTO OUTFILE``    (MySQL – writes to the filesystem)
+    * ``SELECT ... INTO DUMPFILE``   (MySQL – writes to the filesystem)
+    * ``INSERT INTO ...``            (already blocked by INSERT being in the
+      disallowed set, but we reject INTO as well for defense-in-depth)
+
+    Returns ``True`` if the statement contains a disallowed ``INTO``.
+    """
+    flat = list(stmt.flatten())
+    for i, token in enumerate(flat):
+        if not (
+            token.ttype in (sqlparse.tokens.Keyword,)
+            and token.normalized.upper() == "INTO"
+        ):
+            continue
+
+        # Look at the first non-whitespace token after INTO.
+        j = i + 1
+        while j < len(flat) and flat[j].ttype is sqlparse.tokens.Text.Whitespace:
+            j += 1
+
+        if j >= len(flat):
+            # INTO at the very end – malformed, block it.
+            return True
+
+        next_token = flat[j]
+        # MySQL user variable: either a single Name starting with "@"
+        # (e.g. ``@total``) or a bare ``@`` Operator token followed by a Name.
+        if next_token.ttype is sqlparse.tokens.Name and next_token.value.startswith(
+            "@"
+        ):
+            continue
+        if next_token.ttype is sqlparse.tokens.Operator and next_token.value == "@":
+            continue
+
+        # Everything else (table name, OUTFILE, DUMPFILE, etc.) is disallowed.
+        return True
+
+    return False
+
+
+def _validate_query_is_read_only(stmt: sqlparse.sql.Statement) -> str | None:
+    """Validate that a parsed SQL statement is read-only (SELECT/WITH only).
+
+    Accepts an already-parsed statement from ``_validate_single_statement``
+    to avoid re-parsing. Checks:
+    1. Statement type must be SELECT (sqlparse classifies WITH...SELECT as SELECT)
+    2. No disallowed keywords (INSERT, UPDATE, DELETE, DROP, etc.)
+    3. No disallowed INTO clauses (allows MySQL ``SELECT ... INTO @variable``)
+
+    Returns an error message if the query is not read-only, None otherwise.
+    """
+    # sqlparse returns 'SELECT' for SELECT and WITH...SELECT queries
+    if stmt.get_type() != "SELECT":
+        return "Only SELECT queries are allowed."
+
+    # Defense-in-depth: check parsed keyword tokens for disallowed keywords
+    for kw in _extract_keyword_tokens(stmt):
+        # Normalize multi-word tokens (e.g. "SET LOCAL" -> "SET")
+        base_kw = kw.split()[0] if " " in kw else kw
+        if base_kw in _DISALLOWED_KEYWORDS:
+            return f"Disallowed SQL keyword: {kw}"
+
+    # Contextual check for INTO: allow MySQL @variable syntax, block everything else
+    if _has_disallowed_into(stmt):
+        return "Disallowed SQL keyword: INTO"
+
+    return None
+
+
+def _validate_single_statement(
+    query: str,
+) -> tuple[str | None, sqlparse.sql.Statement | None]:
+    """Validate that the query contains exactly one non-empty SQL statement.
+
+    Returns (error_message, parsed_statement). If error_message is not None,
+    the query is invalid and parsed_statement will be None.
+    """
+    stripped = query.strip().rstrip(";").strip()
+    if not stripped:
+        return "Query is empty.", None
+
+    # Parse the SQL using sqlparse for proper tokenization
+    statements = sqlparse.parse(stripped)
+
+    # Filter out empty statements and comment-only statements
+    statements = [
+        s
+        for s in statements
+        if s.tokens
+        and str(s).strip()
+        and not all(
+            t.is_whitespace or t.ttype in sqlparse.tokens.Comment for t in s.flatten()
+        )
+    ]
+
+    if not statements:
+        return "Query is empty.", None
+
+    # Reject multiple statements -- prevents injection via semicolons
+    if len(statements) > 1:
+        return "Only single statements are allowed.", None
+
+    return None, statements[0]
+
+
+def _serialize_value(value: Any) -> Any:
+    """Convert database-specific types to JSON-serializable Python types."""
+    if isinstance(value, Decimal):
+        # NaN / Infinity are not valid JSON numbers; serialize as strings.
+        if value.is_nan() or value.is_infinite():
+            return str(value)
+        # Use int for whole numbers; use str for fractional to preserve exact
+        # precision (float would silently round high-precision analytics values).
+        if value == value.to_integral_value():
+            return int(value)
+        return str(value)
+    if isinstance(value, (datetime, date, time)):
+        return value.isoformat()
+    if isinstance(value, memoryview):
+        return bytes(value).hex()
+    if isinstance(value, bytes):
+        return value.hex()
+    return value
+
+
+def _configure_session(
+    conn: Any,
+    dialect_name: str,
+    timeout_ms: str,
+    read_only: bool,
+) -> None:
+    """Set session-level timeout and read-only mode for the given dialect.
+
+    Timeout limitations by database:
+
+    * **PostgreSQL** – ``statement_timeout`` reliably cancels any running
+      statement (SELECT or DML) after the configured duration.
+    * **MySQL** – ``MAX_EXECUTION_TIME`` only applies to **read-only SELECT**
+      statements.  DML (INSERT/UPDATE/DELETE) and DDL are *not* bounded by
+      this hint; they rely on the server's ``wait_timeout`` /
+      ``interactive_timeout`` instead.  There is no session-level setting in
+      MySQL that reliably cancels long-running writes.
+    * **MSSQL** – ``SET LOCK_TIMEOUT`` only limits how long the server waits
+      to acquire a **lock**.  CPU-bound queries (e.g. large scans, hash
+      joins) that do not block on locks will *not* be cancelled.  MSSQL has
+      no session-level ``statement_timeout`` equivalent; the closest
+      mechanism is Resource Governor (requires sysadmin configuration) or
+      ``CONTEXT_INFO``-based external monitoring.
+
+    Note: SQLite is not supported by this block.  The ``_configure_session``
+    function is a no-op for unrecognised dialect names, so an SQLite engine
+    would skip all SET commands silently.  The block's ``DatabaseType`` enum
+    intentionally excludes SQLite.
+    """
+    if dialect_name == "postgresql":
+        conn.execute(text("SET statement_timeout = " + timeout_ms))
+        if read_only:
+            conn.execute(text("SET default_transaction_read_only = ON"))
+    elif dialect_name == "mysql":
+        # NOTE: MAX_EXECUTION_TIME only applies to SELECT statements.
+        # Write queries (INSERT/UPDATE/DELETE) are not bounded by this
+        # setting; they rely on the database's wait_timeout instead.
+        # See docstring above for full limitations.
+        conn.execute(text("SET SESSION MAX_EXECUTION_TIME = " + timeout_ms))
+        if read_only:
+            conn.execute(text("SET SESSION TRANSACTION READ ONLY"))
+    elif dialect_name == "mssql":
+        # MSSQL: SET LOCK_TIMEOUT limits lock-wait time (ms) only.
+        # CPU-bound queries without lock contention are NOT cancelled.
+        # See docstring above for full limitations.
+        conn.execute(text("SET LOCK_TIMEOUT " + timeout_ms))
+        # MSSQL lacks a session-level read-only mode like
+        # PostgreSQL/MySQL.  Read-only enforcement is handled by
+        # the SQL validation layer (_validate_query_is_read_only)
+        # and the ROLLBACK in the finally block.
+
+
+def _run_in_transaction(
+    conn: Any,
+    dialect_name: str,
+    query: str,
+    max_rows: int,
+    read_only: bool,
+) -> tuple[list[dict[str, Any]], list[str], int, bool]:
+    """Execute a query inside an explicit transaction, returning results.
+
+    Returns ``(rows, columns, affected_rows, truncated)`` where *truncated*
+    is ``True`` when ``fetchmany`` returned exactly ``max_rows`` rows,
+    indicating that additional rows may exist in the result set.
+    """
+    # MSSQL uses T-SQL "BEGIN TRANSACTION"; others use "BEGIN".
+    begin_stmt = "BEGIN TRANSACTION" if dialect_name == "mssql" else "BEGIN"
+    conn.execute(text(begin_stmt))
+    try:
+        result = conn.execute(text(query))
+        affected = result.rowcount if not result.returns_rows else -1
+        columns = list(result.keys()) if result.returns_rows else []
+        rows = result.fetchmany(max_rows) if result.returns_rows else []
+        truncated = len(rows) == max_rows
+        results = [
+            {col: _serialize_value(val) for col, val in zip(columns, row)}
+            for row in rows
+        ]
+    except Exception:
+        try:
+            conn.execute(text("ROLLBACK"))
+        except Exception:
+            pass
+        raise
+    else:
+        conn.execute(text("ROLLBACK" if read_only else "COMMIT"))
+    return results, columns, affected, truncated
+
+
+def _execute_query(
+    connection_url: URL | str,
+    query: str,
+    timeout: int,
+    max_rows: int,
+    read_only: bool = True,
+    database_type: DatabaseType = DatabaseType.POSTGRES,
+) -> tuple[list[dict[str, Any]], list[str], int, bool]:
+    """Execute a SQL query and return (rows, columns, affected_rows, truncated).
+
+    Uses SQLAlchemy to connect to any supported database.
+    For SELECT queries, rows are limited to ``max_rows`` via DBAPI fetchmany.
+    ``truncated`` is ``True`` when the result set was capped by ``max_rows``.
+    For write queries, affected_rows contains the rowcount from the driver.
+    When ``read_only`` is True, the database session is set to read-only
+    mode and the transaction is always rolled back.
+    """
+    # Determine driver-specific connection timeout argument.
+    # pymssql uses "login_timeout", while PostgreSQL/MySQL use "connect_timeout".
+    timeout_key = (
+        "login_timeout" if database_type == DatabaseType.MSSQL else "connect_timeout"
+    )
+    engine = create_engine(
+        connection_url, connect_args={timeout_key: _CONNECT_TIMEOUT_SECONDS}
+    )
+    try:
+        with engine.connect() as conn:
+            # Use AUTOCOMMIT so SET commands take effect immediately.
+            conn = conn.execution_options(isolation_level="AUTOCOMMIT")
+
+            # Compute timeout in milliseconds. The value is Pydantic-validated
+            # (ge=1, le=120), but we use int() as defense-in-depth.
+            # NOTE: SET commands do not support bind parameters in most
+            # databases, so we use str(int(...)) for safe interpolation.
+            timeout_ms = str(int(timeout * 1000))
+
+            _configure_session(conn, engine.dialect.name, timeout_ms, read_only)
+            return _run_in_transaction(
+                conn, engine.dialect.name, query, max_rows, read_only
+            )
+    finally:
+        engine.dispose()
--- a/autogpt_platform/backend/backend/blocks/test/test_block.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_block.py
@@ -300,13 +300,27 @@ def test_agent_input_block_ignores_legacy_placeholder_values():


 def test_dropdown_input_block_produces_enum():
-    """Verify AgentDropdownInputBlock.Input.generate_schema() produces enum."""
-    options = ["Option A", "Option B"]
+    """Verify AgentDropdownInputBlock.Input.generate_schema() produces enum
+    using the canonical 'options' field name."""
+    opts = ["Option A", "Option B"]
    instance = AgentDropdownInputBlock.Input.model_construct(
-        name="choice", value=None, placeholder_values=options
+        name="choice", value=None, options=opts
    )
    schema = instance.generate_schema()
-    assert schema.get("enum") == options
+    assert schema.get("enum") == opts
+
+
+def test_dropdown_input_block_legacy_placeholder_values_produces_enum():
+    """Verify backward compat: passing legacy 'placeholder_values' to
+    AgentDropdownInputBlock still produces enum via model_construct remap."""
+    opts = ["Option A", "Option B"]
+    instance = AgentDropdownInputBlock.Input.model_construct(
+        name="choice", value=None, placeholder_values=opts
+    )
+    schema = instance.generate_schema()
+    assert (
+        schema.get("enum") == opts
+    ), "Legacy placeholder_values should be remapped to options"


 def test_generate_schema_integration_legacy_placeholder_values():
@@ -329,11 +343,11 @@ def test_generate_schema_integration_legacy_placeholder_values():

 def test_generate_schema_integration_dropdown_produces_enum():
    """Test the full Graph._generate_schema path with AgentDropdownInputBlock
-    — verifies enum IS produced for dropdown blocks."""
+    — verifies enum IS produced for dropdown blocks using canonical field name."""
    dropdown_input_default = {
        "name": "color",
        "value": None,
-        "placeholder_values": ["Red", "Green", "Blue"],
+        "options": ["Red", "Green", "Blue"],
    }
    result = BaseGraph._generate_schema(
        (AgentDropdownInputBlock.Input, dropdown_input_default),
@@ -344,3 +358,36 @@ def test_generate_schema_integration_dropdown_produces_enum():
        "Green",
        "Blue",
    ], "Graph schema should contain enum from AgentDropdownInputBlock"
+
+
+def test_generate_schema_integration_dropdown_legacy_placeholder_values():
+    """Test the full Graph._generate_schema path with AgentDropdownInputBlock
+    using legacy 'placeholder_values' — verifies backward compat produces enum."""
+    legacy_dropdown_input_default = {
+        "name": "color",
+        "value": None,
+        "placeholder_values": ["Red", "Green", "Blue"],
+    }
+    result = BaseGraph._generate_schema(
+        (AgentDropdownInputBlock.Input, legacy_dropdown_input_default),
+    )
+    color_props = result["properties"]["color"]
+    assert color_props.get("enum") == [
+        "Red",
+        "Green",
+        "Blue",
+    ], "Legacy placeholder_values should still produce enum via model_construct remap"
+
+
+def test_dropdown_input_block_init_legacy_placeholder_values():
+    """Verify backward compat: constructing AgentDropdownInputBlock.Input via
+    model_validate with legacy 'placeholder_values' correctly maps to 'options'."""
+    opts = ["Option A", "Option B"]
+    instance = AgentDropdownInputBlock.Input.model_validate(
+        {"name": "choice", "value": None, "placeholder_values": opts}
+    )
+    assert (
+        instance.options == opts
+    ), "Legacy placeholder_values should be remapped to options via model_validate"
+    schema = instance.generate_schema()
+    assert schema.get("enum") == opts
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -20,6 +20,10 @@ class ChatConfig(BaseSettings):
        default="openai/gpt-4o-mini",
        description="Model to use for generating session titles (should be fast/cheap)",
    )
+    simulation_model: str = Field(
+        default="google/gemini-2.5-flash",
+        description="Model for dry-run block simulation (should be fast/cheap with good JSON output)",
+    )
    api_key: str | None = Field(default=None, description="OpenAI API key")
    base_url: str | None = Field(
        default=OPENROUTER_BASE_URL,
--- a/autogpt_platform/backend/backend/copilot/context.py
+++ b/autogpt_platform/backend/backend/copilot/context.py
@@ -149,7 +149,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:

    Allowed:
    - Files under *sdk_cwd* (``/tmp/copilot-<session>/``)
-    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``.
+    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``
+      or ``tool-outputs/...``.
      The SDK nests tool-results under a conversation UUID directory;
      the UUID segment is validated with ``_UUID_RE``.
    """
@@ -174,17 +175,20 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
        # Defence-in-depth: ensure project_dir didn't escape the base.
        if not project_dir.startswith(SDK_PROJECTS_DIR + os.sep):
            return False
-        # Only allow: <encoded-cwd>/<uuid>/tool-results/<file>
+        # Only allow: <encoded-cwd>/<uuid>/<tool-dir>/<file>
        # The SDK always creates a conversation UUID directory between
-        # the project dir and tool-results/.
+        # the project dir and the tool directory.
+        # Accept both "tool-results" (SDK's persisted outputs) and
+        # "tool-outputs" (the model sometimes confuses workspace paths
+        # with filesystem paths and generates this variant).
        if resolved.startswith(project_dir + os.sep):
            relative = resolved[len(project_dir) + 1 :]
            parts = relative.split(os.sep)
-            # Require exactly: [<uuid>, "tool-results", <file>, ...]
+            # Require exactly: [<uuid>, "tool-results"|"tool-outputs", <file>, ...]
            if (
                len(parts) >= 3
                and _UUID_RE.match(parts[0])
-                and parts[1] == "tool-results"
+                and parts[1] in ("tool-results", "tool-outputs")
            ):
                return True

--- a/autogpt_platform/backend/backend/copilot/context_test.py
+++ b/autogpt_platform/backend/backend/copilot/context_test.py
@@ -134,6 +134,21 @@ def test_is_allowed_local_path_tool_results_with_uuid():
        _current_project_dir.set("")


+def test_is_allowed_local_path_tool_outputs_with_uuid():
+    """Files under <encoded-cwd>/<uuid>/tool-outputs/ are also allowed."""
+    encoded = "test-encoded-dir"
+    conv_uuid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+    path = os.path.join(
+        SDK_PROJECTS_DIR, encoded, conv_uuid, "tool-outputs", "output.json"
+    )
+
+    _current_project_dir.set(encoded)
+    try:
+        assert is_allowed_local_path(path, sdk_cwd=None)
+    finally:
+        _current_project_dir.set("")
+
+
 def test_is_allowed_local_path_tool_results_without_uuid_rejected():
    """Direct <encoded-cwd>/tool-results/ (no UUID) is rejected."""
    encoded = "test-encoded-dir"
@@ -159,7 +174,7 @@ def test_is_allowed_local_path_sibling_of_tool_results_is_rejected():


 def test_is_allowed_local_path_valid_uuid_wrong_segment_name_rejected():
-    """A valid UUID dir but non-'tool-results' second segment is rejected."""
+    """A valid UUID dir but non-'tool-results'/'tool-outputs' second segment is rejected."""
    encoded = "test-encoded-dir"
    uuid_str = "12345678-1234-5678-9abc-def012345678"
    path = os.path.join(
--- a/autogpt_platform/backend/backend/copilot/integration_creds.py
+++ b/autogpt_platform/backend/backend/copilot/integration_creds.py
@@ -59,6 +59,16 @@ _null_cache: TTLCache[tuple[str, str], bool] = TTLCache(
    maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
 )

+# GitHub user identity caches (keyed by user_id only, not provider tuple).
+# Declared here so invalidate_user_provider_cache() can reference them.
+_GH_IDENTITY_CACHE_TTL = 600.0  # 10 min — profile data rarely changes
+_gh_identity_cache: TTLCache[str, dict[str, str]] = TTLCache(
+    maxsize=_CACHE_MAX_SIZE, ttl=_GH_IDENTITY_CACHE_TTL
+)
+_gh_identity_null_cache: TTLCache[str, bool] = TTLCache(
+    maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
+)
+

 def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
    """Remove the cached entry for *user_id*/*provider* from both caches.
@@ -66,11 +76,19 @@ def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
    Call this after storing new credentials so that the next
    ``get_provider_token()`` call performs a fresh DB lookup instead of
    serving a stale TTL-cached result.
+
+    For GitHub specifically, also clears the git-identity caches so that
+    ``get_github_user_git_identity()`` re-fetches the user's profile on
+    the next call instead of serving stale identity data.
    """
    key = (user_id, provider)
    _token_cache.pop(key, None)
    _null_cache.pop(key, None)

+    if provider == "github":
+        _gh_identity_cache.pop(user_id, None)
+        _gh_identity_null_cache.pop(user_id, None)
+

 # Register this module's cache-bust function with the credentials manager so
 # that any create/update/delete operation immediately evicts stale cache
@@ -123,6 +141,7 @@ async def get_provider_token(user_id: str, provider: str) -> str | None:
        [c for c in creds_list if c.type == "oauth2"],
        key=lambda c: 0 if "repo" in (cast(OAuth2Credentials, c).scopes or []) else 1,
    )
+    refresh_failed = False
    for creds in oauth2_creds:
        if creds.type == "oauth2":
            try:
@@ -141,6 +160,7 @@ async def get_provider_token(user_id: str, provider: str) -> str | None:
                # Do NOT fall back to the stale token — it is likely expired
                # or revoked.  Returning None forces the caller to re-auth,
                # preventing the LLM from receiving a non-functional token.
+                refresh_failed = True
                continue
            _token_cache[cache_key] = token
            return token
@@ -152,8 +172,12 @@ async def get_provider_token(user_id: str, provider: str) -> str | None:
            _token_cache[cache_key] = token
            return token

-    # No credentials found — cache to avoid repeated DB hits.
-    _null_cache[cache_key] = True
+    # Only cache "not connected" when the user truly has no credentials for this
+    # provider.  If we had OAuth credentials but refresh failed (e.g. transient
+    # network error, event-loop mismatch), do NOT cache the negative result —
+    # the next call should retry the refresh instead of being blocked for 60 s.
+    if not refresh_failed:
+        _null_cache[cache_key] = True
    return None


@@ -171,3 +195,76 @@ async def get_integration_env_vars(user_id: str) -> dict[str, str]:
            for var in var_names:
                env[var] = token
    return env
+
+
+# ---------------------------------------------------------------------------
+# GitHub user identity (for git committer env vars)
+# ---------------------------------------------------------------------------
+
+
+async def get_github_user_git_identity(user_id: str) -> dict[str, str] | None:
+    """Fetch the GitHub user's name and email for git committer env vars.
+
+    Uses the ``/user`` GitHub API endpoint with the user's stored token.
+    Returns a dict with ``GIT_AUTHOR_NAME``, ``GIT_AUTHOR_EMAIL``,
+    ``GIT_COMMITTER_NAME``, and ``GIT_COMMITTER_EMAIL`` if the user has a
+    connected GitHub account.  Returns ``None`` otherwise.
+
+    Results are cached for 10 minutes; "not connected" results are cached for
+    60 s (same as null-token cache).
+    """
+    if user_id in _gh_identity_null_cache:
+        return None
+    if cached := _gh_identity_cache.get(user_id):
+        return cached
+
+    token = await get_provider_token(user_id, "github")
+    if not token:
+        _gh_identity_null_cache[user_id] = True
+        return None
+
+    import aiohttp
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(
+                "https://api.github.com/user",
+                headers={
+                    "Authorization": f"token {token}",
+                    "Accept": "application/vnd.github+json",
+                },
+                timeout=aiohttp.ClientTimeout(total=5),
+            ) as resp:
+                if resp.status != 200:
+                    logger.warning(
+                        "[git-identity] GitHub /user returned %s for user %s",
+                        resp.status,
+                        user_id,
+                    )
+                    return None
+                data = await resp.json()
+    except Exception as exc:
+        logger.warning(
+            "[git-identity] Failed to fetch GitHub profile for user %s: %s",
+            user_id,
+            exc,
+        )
+        return None
+
+    name = data.get("name") or data.get("login") or "AutoGPT User"
+    # GitHub may return email=null if the user has set their email to private.
+    # Fall back to the noreply address GitHub generates for every account.
+    email = data.get("email")
+    if not email:
+        gh_id = data.get("id", "")
+        login = data.get("login", "user")
+        email = f"{gh_id}+{login}@users.noreply.github.com"
+
+    identity = {
+        "GIT_AUTHOR_NAME": name,
+        "GIT_AUTHOR_EMAIL": email,
+        "GIT_COMMITTER_NAME": name,
+        "GIT_COMMITTER_EMAIL": email,
+    }
+    _gh_identity_cache[user_id] = identity
+    return identity
--- a/autogpt_platform/backend/backend/copilot/integration_creds_test.py
+++ b/autogpt_platform/backend/backend/copilot/integration_creds_test.py
@@ -9,6 +9,8 @@ from backend.copilot.integration_creds import (
    _NULL_CACHE_TTL,
    _TOKEN_CACHE_TTL,
    PROVIDER_ENV_VARS,
+    _gh_identity_cache,
+    _gh_identity_null_cache,
    _null_cache,
    _token_cache,
    get_integration_env_vars,
@@ -49,9 +51,13 @@ def clear_caches():
    """Ensure clean caches before and after every test."""
    _token_cache.clear()
    _null_cache.clear()
+    _gh_identity_cache.clear()
+    _gh_identity_null_cache.clear()
    yield
    _token_cache.clear()
    _null_cache.clear()
+    _gh_identity_cache.clear()
+    _gh_identity_null_cache.clear()


 class TestInvalidateUserProviderCache:
@@ -77,6 +83,34 @@ class TestInvalidateUserProviderCache:
        invalidate_user_provider_cache(_USER, _PROVIDER)
        assert other_key in _token_cache

+    def test_clears_gh_identity_cache_for_github_provider(self):
+        """When provider is 'github', identity caches must also be cleared."""
+        _gh_identity_cache[_USER] = {
+            "GIT_AUTHOR_NAME": "Old Name",
+            "GIT_AUTHOR_EMAIL": "old@example.com",
+            "GIT_COMMITTER_NAME": "Old Name",
+            "GIT_COMMITTER_EMAIL": "old@example.com",
+        }
+        invalidate_user_provider_cache(_USER, "github")
+        assert _USER not in _gh_identity_cache
+
+    def test_clears_gh_identity_null_cache_for_github_provider(self):
+        """When provider is 'github', the identity null-cache must also be cleared."""
+        _gh_identity_null_cache[_USER] = True
+        invalidate_user_provider_cache(_USER, "github")
+        assert _USER not in _gh_identity_null_cache
+
+    def test_does_not_clear_gh_identity_cache_for_other_providers(self):
+        """When provider is NOT 'github', identity caches must be left alone."""
+        _gh_identity_cache[_USER] = {
+            "GIT_AUTHOR_NAME": "Some Name",
+            "GIT_AUTHOR_EMAIL": "some@example.com",
+            "GIT_COMMITTER_NAME": "Some Name",
+            "GIT_COMMITTER_EMAIL": "some@example.com",
+        }
+        invalidate_user_provider_cache(_USER, "some-other-provider")
+        assert _USER in _gh_identity_cache
+

 class TestGetProviderToken:
    @pytest.mark.asyncio(loop_scope="session")
@@ -129,8 +163,15 @@ class TestGetProviderToken:
        assert result == "oauth-tok"

    @pytest.mark.asyncio(loop_scope="session")
-    async def test_oauth2_refresh_failure_returns_none(self):
-        """On refresh failure, return None instead of caching a stale token."""
+    async def test_oauth2_refresh_failure_returns_none_without_null_cache(self):
+        """On refresh failure, return None but do NOT cache in null_cache.
+
+        The user has credentials — they just couldn't be refreshed right now
+        (e.g. transient network error or event-loop mismatch in the copilot
+        executor).  Caching a negative result would block all credential
+        lookups for 60 s even though the creds exist and may refresh fine
+        on the next attempt.
+        """
        oauth_creds = _make_oauth2_creds("stale-oauth-tok")
        mock_manager = MagicMock()
        mock_manager.store.get_creds_by_provider = AsyncMock(return_value=[oauth_creds])
@@ -141,6 +182,8 @@ class TestGetProviderToken:

        # Stale tokens must NOT be returned — forces re-auth.
        assert result is None
+        # Must NOT cache negative result when refresh failed — next call retries.
+        assert (_USER, _PROVIDER) not in _null_cache

    @pytest.mark.asyncio(loop_scope="session")
    async def test_no_credentials_caches_null_entry(self):
@@ -176,6 +219,96 @@ class TestGetProviderToken:
        assert _NULL_CACHE_TTL < _TOKEN_CACHE_TTL


+class TestThreadSafetyLocks:
+    """Bug reproduction: shared AsyncRedisKeyedMutex across threads caused
+    'Future attached to a different loop' when copilot workers accessed
+    credentials from different event loops."""
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_store_locks_returns_per_thread_instance(self):
+        """IntegrationCredentialsStore.locks() must return different instances
+        for different threads (via @thread_cached)."""
+        import asyncio
+        import concurrent.futures
+
+        from backend.integrations.credentials_store import IntegrationCredentialsStore
+
+        store = IntegrationCredentialsStore()
+
+        async def get_locks_id():
+            mock_redis = AsyncMock()
+            with patch(
+                "backend.integrations.credentials_store.get_redis_async",
+                return_value=mock_redis,
+            ):
+                locks = await store.locks()
+                return id(locks)
+
+        # Get locks from main thread
+        main_id = await get_locks_id()
+
+        # Get locks from a worker thread
+        def run_in_thread():
+            loop = asyncio.new_event_loop()
+            try:
+                return loop.run_until_complete(get_locks_id())
+            finally:
+                loop.close()
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            worker_id = await asyncio.get_event_loop().run_in_executor(
+                pool, run_in_thread
+            )
+
+        assert main_id != worker_id, (
+            "Store.locks() returned the same instance across threads. "
+            "This would cause 'Future attached to a different loop' errors."
+        )
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_manager_delegates_to_store_locks(self):
+        """IntegrationCredentialsManager.locks() should delegate to store."""
+        from backend.integrations.creds_manager import IntegrationCredentialsManager
+
+        manager = IntegrationCredentialsManager()
+        mock_redis = AsyncMock()
+
+        with patch(
+            "backend.integrations.credentials_store.get_redis_async",
+            return_value=mock_redis,
+        ):
+            locks = await manager.locks()
+
+        # Should have gotten it from the store
+        assert locks is not None
+
+
+class TestRefreshUnlockedPath:
+    """Bug reproduction: copilot worker threads need lock-free refresh because
+    Redis-backed asyncio.Lock created on one event loop can't be used on another."""
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_refresh_if_needed_lock_false_skips_redis(self):
+        """refresh_if_needed(lock=False) must not touch Redis locks at all."""
+        from backend.integrations.creds_manager import IntegrationCredentialsManager
+
+        manager = IntegrationCredentialsManager()
+        creds = _make_oauth2_creds()
+
+        mock_handler = MagicMock()
+        mock_handler.needs_refresh = MagicMock(return_value=False)
+
+        with patch(
+            "backend.integrations.creds_manager._get_provider_oauth_handler",
+            new_callable=AsyncMock,
+            return_value=mock_handler,
+        ):
+            result = await manager.refresh_if_needed(_USER, creds, lock=False)
+
+        # Should return credentials without touching locks
+        assert result.id == creds.id
+
+
 class TestGetIntegrationEnvVars:
    @pytest.mark.asyncio(loop_scope="session")
    async def test_injects_all_env_vars_for_provider(self):
--- a/autogpt_platform/backend/backend/copilot/permissions.py
+++ b/autogpt_platform/backend/backend/copilot/permissions.py
@@ -66,6 +66,7 @@ from pydantic import BaseModel, PrivateAttr
 ToolName = Literal[
    # Platform tools (must match keys in TOOL_REGISTRY)
    "add_understanding",
+    "ask_question",
    "bash_exec",
    "browser_act",
    "browser_navigate",
@@ -102,6 +103,7 @@ ToolName = Literal[
    "web_fetch",
    "write_workspace_file",
    # SDK built-ins
+    "Agent",
    "Edit",
    "Glob",
    "Grep",
--- a/autogpt_platform/backend/backend/copilot/permissions_test.py
+++ b/autogpt_platform/backend/backend/copilot/permissions_test.py
@@ -544,6 +544,7 @@ class TestApplyToolPermissions:
 class TestSdkBuiltinToolNames:
    def test_expected_builtins_present(self):
        expected = {
+            "Agent",
            "Read",
            "Write",
            "Edit",
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -18,6 +18,18 @@ After `write_workspace_file`, embed the `download_url` in Markdown:
 - Image: `![chart](workspace://file_id#image/png)`
 - Video: `![recording](workspace://file_id#video/mp4)`

+### Handling binary/image data in tool outputs — CRITICAL
+When a tool output contains base64-encoded binary data (images, PDFs, etc.):
+1. **NEVER** try to inline or render the base64 content in your response.
+2. **Save** the data to workspace using `write_workspace_file` (pass the base64 data URI as content).
+3. **Show** the result via the workspace download URL in Markdown: `![image](workspace://file_id#image/png)`.
+
+### Passing large data between tools — CRITICAL
+When tool outputs produce large text that you need to feed into another tool:
+- **NEVER** copy-paste the full text into the next tool call argument.
+- **Save** the output to a file (workspace or local), then use `@@agptfile:` references.
+- This avoids token limits and ensures data integrity.
+
 ### File references — @@agptfile:
 Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
 - `workspace://<file_id>` or `workspace:///<path>` — workspace files
@@ -138,6 +150,11 @@ parent autopilot handles orchestration.
 # E2B-only notes — E2B has full internet access so gh CLI works there.
 # Not shown in local (bubblewrap) mode: --unshare-net blocks all network.
 _E2B_TOOL_NOTES = """
+### SDK tool-result files in E2B
+When you `Read` an SDK tool-result file, it is automatically copied into the
+sandbox so `bash_exec` can access it for further processing.
+The exact sandbox path is shown in the `[Sandbox copy available at ...]` note.
+
 ### GitHub CLI (`gh`) and git
 - If the user has connected their GitHub account, both `gh` and `git` are
  pre-authenticated — use them directly without any manual login step.
@@ -203,19 +220,22 @@ def _build_storage_supplement(
   - Files here **survive across sessions indefinitely**

 ### Moving files between storages
- **{file_move_name_1_to_2}**: Copy to persistent workspace
- **{file_move_name_2_to_1}**: Download for processing
+- **{file_move_name_1_to_2}**: `write_workspace_file(filename="output.json", source_path="/path/to/local/file")`
+- **{file_move_name_2_to_1}**: `read_workspace_file(path="tool-outputs/data.json", save_to_path="{working_dir}/data.json")`

 ### File persistence
 Important files (code, configs, outputs) should be saved to workspace to ensure they persist.

 ### SDK tool-result files
 When tool outputs are large, the SDK truncates them and saves the full output to
-a local file under `~/.claude/projects/.../tool-results/`. To read these files,
-always use `Read` (NOT `bash_exec`, NOT `read_workspace_file`).
-These files are on the host filesystem — `bash_exec` runs in the sandbox and
-CANNOT access them. `read_workspace_file` reads from cloud workspace storage,
-where SDK tool-results are NOT stored.
+a local file under `~/.claude/projects/.../tool-results/` (or `tool-outputs/`).
+To read these files, use `Read` — it reads from the host filesystem.
+
+### Large tool outputs saved to workspace
+When a tool output contains `<tool-output-truncated workspace_path="...">`, the
+full output is in workspace storage (NOT on the local filesystem). To access it:
+- Use `read_workspace_file(path="...", offset=..., length=50000)` for reading sections.
+- To process in the sandbox, use `read_workspace_file(path="...", save_to_path="{working_dir}/file.json")` first, then use `bash_exec` on the local copy.
 {_SHARED_TOOL_NOTES}{extra_notes}"""


--- a/autogpt_platform/backend/backend/copilot/prompting_test.py
+++ b/autogpt_platform/backend/backend/copilot/prompting_test.py
@@ -6,16 +6,23 @@ from pathlib import Path
 class TestAgentGenerationGuideContainsClarifySection:
    """The agent generation guide must include the clarification section."""

-    def test_guide_includes_clarify_before_building(self):
+    def test_guide_includes_clarify_section(self):
        guide_path = Path(__file__).parent / "sdk" / "agent_generation_guide.md"
        content = guide_path.read_text(encoding="utf-8")
-        assert "Clarifying Before Building" in content
+        assert "Before or During Building" in content

    def test_guide_mentions_find_block_for_clarification(self):
        guide_path = Path(__file__).parent / "sdk" / "agent_generation_guide.md"
        content = guide_path.read_text(encoding="utf-8")
-        # find_block must appear in the clarification section (before the workflow)
-        clarify_section = content.split("Clarifying Before Building")[1].split(
+        clarify_section = content.split("Before or During Building")[1].split(
            "### Workflow"
        )[0]
        assert "find_block" in clarify_section
+
+    def test_guide_mentions_ask_question_tool(self):
+        guide_path = Path(__file__).parent / "sdk" / "agent_generation_guide.md"
+        content = guide_path.read_text(encoding="utf-8")
+        clarify_section = content.split("Before or During Building")[1].split(
+            "### Workflow"
+        )[0]
+        assert "ask_question" in clarify_section
--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -161,8 +161,9 @@ async def reset_daily_usage(user_id: str, daily_token_limit: int = 0) -> bool:
        daily_token_limit: The configured daily token limit. When positive,
            the weekly counter is reduced by this amount.

-    Fails open: returns False if Redis is unavailable (consistent with
-    the fail-open design of this module).
+    Returns False if Redis is unavailable so the caller can handle
+    compensation (fail-closed for billed operations, unlike the read-only
+    rate-limit checks which fail-open).
    """
    now = datetime.now(UTC)
    try:
--- a/autogpt_platform/backend/backend/copilot/reset_usage_test.py
+++ b/autogpt_platform/backend/backend/copilot/reset_usage_test.py
@@ -70,6 +70,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", _make_config(daily_token_limit=0)),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(0, 12_500_000)),
+            ),
        ):
            with pytest.raises(HTTPException) as exc_info:
                await reset_copilot_usage(user_id="user-1")
@@ -83,6 +87,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()) as mock_release,
@@ -112,6 +120,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()) as mock_release,
@@ -141,6 +153,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()),
@@ -171,6 +187,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=3)),
        ):
            with pytest.raises(HTTPException) as exc_info:
@@ -208,6 +228,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()) as mock_release,
@@ -228,6 +252,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", _make_config()),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=None)),
        ):
            with pytest.raises(HTTPException) as exc_info:
@@ -245,6 +273,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()),
@@ -275,6 +307,10 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
+            patch(
+                f"{_MODULE}.get_global_rate_limits",
+                AsyncMock(return_value=(2_500_000, 12_500_000)),
+            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()),
--- a/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md
+++ b/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md
@@ -3,40 +3,55 @@
 You can create, edit, and customize agents directly. You ARE the brain —
 generate the agent JSON yourself using block schemas, then validate and save.

-### Clarifying Before Building
+### Clarifying — Before or During Building

-Before starting the workflow below, check whether the user's goal is
-**ambiguous** — missing the output format, delivery channel, data source,
-or trigger. If so:
-1. Call `find_block` with a query targeting the ambiguous dimension to
-   discover what the platform actually supports.
-2. Ask the user **one concrete question** grounded in the discovered
+Use `ask_question` whenever the user's intent is ambiguous — whether
+that's before starting or midway through the workflow. Common moments:
+
+- **Before building**: output format, delivery channel, data source, or
+  trigger is unspecified.
+- **During block discovery**: multiple blocks could fit and the user
+  should choose.
+- **During JSON generation**: a wiring decision depends on user
+  preference.
+
+Steps:
+1. Call `find_block` (or another discovery tool) to learn what the
+   platform actually supports for the ambiguous dimension.
+2. Call `ask_question` with a concrete question listing the discovered
   options (e.g. "The platform supports Gmail, Slack, and Google Docs —
   which should the agent use for delivery?").
-3. **Wait for the user's answer** before proceeding.
+3. **Wait for the user's answer** before continuing.

 **Skip this** when the goal already specifies all dimensions (e.g.
 "scrape prices from Amazon and email me daily").

 ### Workflow for Creating/Editing Agents

-1. **Discover blocks**: Call `find_block(query, include_schemas=true)` to
+1. **If editing**: First narrow to the specific agent by UUID, then fetch its
+   graph: `find_library_agent(query="<agent_id>", include_graph=true)`. This
+   returns the full graph structure (nodes + links). **Never edit blindly** —
+   always inspect the current graph first so you know exactly what to change.
+   Avoid using `include_graph=true` with broad keyword searches, as fetching
+   multiple graphs at once is expensive and consumes LLM context budget.
+2. **Discover blocks**: Call `find_block(query, include_schemas=true)` to
   search for relevant blocks. This returns block IDs, names, descriptions,
   and full input/output schemas.
-2. **Find library agents**: Call `find_library_agent` to discover reusable
+3. **Find library agents**: Call `find_library_agent` to discover reusable
   agents that can be composed as sub-agents via `AgentExecutorBlock`.
-3. **Generate JSON**: Build the agent JSON using block schemas:
-   - Use block IDs from step 1 as `block_id` in nodes
+4. **Generate/modify JSON**: Build or modify the agent JSON using block schemas:
+   - Use block IDs from step 2 as `block_id` in nodes
   - Wire outputs to inputs using links
   - Set design-time config in `input_default`
   - Use `AgentInputBlock` for values the user provides at runtime
-4. **Write to workspace**: Save the JSON to a workspace file so the user
+   - When editing, apply targeted changes and preserve unchanged parts
+5. **Write to workspace**: Save the JSON to a workspace file so the user
   can review it: `write_workspace_file(filename="agent.json", content=...)`
-5. **Validate**: Call `validate_agent_graph` with the agent JSON to check
+6. **Validate**: Call `validate_agent_graph` with the agent JSON to check
   for errors
-6. **Fix if needed**: Call `fix_agent_graph` to auto-fix common issues,
+7. **Fix if needed**: Call `fix_agent_graph` to auto-fix common issues,
   or fix manually based on the error descriptions. Iterate until valid.
-7. **Save**: Call `create_agent` (new) or `edit_agent` (existing) with
+8. **Save**: Call `create_agent` (new) or `edit_agent` (existing) with
   the final `agent_json`

 ### Agent JSON Structure
@@ -89,8 +104,8 @@ These define the agent's interface — what it accepts and what it produces.

 **AgentDropdownInputBlock** (ID: `655d6fdf-a334-421c-b733-520549c07cd1`):
 - Specialized input block that presents a dropdown/select to the user
- Required `input_default` fields: `name` (str), `placeholder_values` (list of options, must have at least one)
- Optional: `title`, `description`, `value` (default selection)
+- Required `input_default` fields: `name` (str)
+- Optional: `options` (list of dropdown values; when omitted/empty, input behaves as free-text), `title`, `description`, `value` (default selection)
 - Output: `result` — the user-selected value at runtime
 - Use this instead of AgentInputBlock when the user should pick from a fixed set of options

@@ -245,6 +260,17 @@ real API calls, credentials, or credits:
 3. **Iterate**: If the dry run reveals wiring issues or missing inputs, fix
   the agent JSON and re-save before suggesting a real execution.

+**Special block behaviour in dry-run mode:**
+- **OrchestratorBlock** and **AgentExecutorBlock** execute for real so the
+  orchestrator can make LLM calls and agent executors can spawn child graphs.
+  Their downstream tool blocks and child-graph blocks are still simulated.
+  Note: real LLM inference calls are made (consuming API quota), even though
+  platform credits are not charged. Agent-mode iterations are capped at 1 in
+  dry-run to keep it fast.
+- **MCPToolBlock** is simulated using the selected tool's name and JSON Schema
+  so the LLM can produce a realistic mock response without connecting to the
+  MCP server.
+
 ### Example: Simple AI Text Processor

 A minimal agent with input, processing, and output:
--- a/autogpt_platform/backend/backend/copilot/sdk/conftest.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/conftest.py
@@ -2,14 +2,30 @@

 from __future__ import annotations

+from collections.abc import AsyncIterator
 from unittest.mock import patch
 from uuid import uuid4

 import pytest
+import pytest_asyncio

 from backend.util import json


+@pytest_asyncio.fixture(scope="session", loop_scope="session", name="server")
+async def _server_noop() -> None:
+    """No-op server stub — SDK tests don't need the full backend."""
+    return None
+
+
+@pytest_asyncio.fixture(
+    scope="session", loop_scope="session", autouse=True, name="graph_cleanup"
+)
+async def _graph_cleanup_noop() -> AsyncIterator[None]:
+    """No-op graph cleanup stub."""
+    yield
+
+
@pytest.fixture()
 def mock_chat_config():
    """Mock ChatConfig so compact_transcript tests skip real config lookup."""
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
@@ -8,6 +8,9 @@ SDK-internal paths (``~/.claude/projects/…/tool-results/``) are handled
 by the separate ``Read`` MCP tool registered in ``tool_adapter.py``.
 """

+import asyncio
+import base64
+import hashlib
 import itertools
 import json
 import logging
@@ -28,6 +31,12 @@ from backend.copilot.context import (

 logger = logging.getLogger(__name__)

+# Default number of lines returned by ``read_file`` when the caller does not
+# specify a limit.  Also used as the threshold in ``bridge_to_sandbox`` to
+# decide whether the model is requesting the full file (and thus whether the
+# bridge copy is worthwhile).
+_DEFAULT_READ_LIMIT = 2000
+

 async def _check_sandbox_symlink_escape(
    sandbox: Any,
@@ -89,7 +98,7 @@ def _get_sandbox_and_path(
    return sandbox, remote


-async def _sandbox_write(sandbox: Any, path: str, content: str) -> None:
+async def _sandbox_write(sandbox: Any, path: str, content: str | bytes) -> None:
    """Write *content* to *path* inside the sandbox.

    The E2B filesystem API (``sandbox.files.write``) and the command API
@@ -102,11 +111,14 @@ async def _sandbox_write(sandbox: Any, path: str, content: str) -> None:
    To work around this, writes targeting ``/tmp`` are performed via
    ``tee`` through the command API, which runs as the sandbox ``user``
    and can therefore always overwrite user-owned files.
+
+    *content* may be ``str`` (text) or ``bytes`` (binary).  Both paths
+    are handled correctly: text is encoded to bytes for the base64 shell
+    pipe, and raw bytes are passed through without any encoding.
    """
    if path == "/tmp" or path.startswith("/tmp/"):
-        import base64 as _b64
-
-        encoded = _b64.b64encode(content.encode()).decode()
+        raw = content.encode() if isinstance(content, str) else content
+        encoded = base64.b64encode(raw).decode()
        result = await sandbox.commands.run(
            f"echo {shlex.quote(encoded)} | base64 -d > {shlex.quote(path)}",
            cwd=E2B_WORKDIR,
@@ -128,14 +140,25 @@ async def _handle_read_file(args: dict[str, Any]) -> dict[str, Any]:
    """Read lines from a sandbox file, falling back to the local host for SDK-internal paths."""
    file_path: str = args.get("file_path", "")
    offset: int = max(0, int(args.get("offset", 0)))
-    limit: int = max(1, int(args.get("limit", 2000)))
+    limit: int = max(1, int(args.get("limit", _DEFAULT_READ_LIMIT)))

    if not file_path:
        return _mcp("file_path is required", error=True)

-    # SDK-internal paths (tool-results, ephemeral working dir) stay on the host.
+    # SDK-internal paths (tool-results/tool-outputs, ephemeral working dir)
+    # stay on the host.  When E2B is active, also copy the file into the
+    # sandbox so bash_exec can access it for further processing.
    if _is_allowed_local(file_path):
-        return _read_local(file_path, offset, limit)
+        result = _read_local(file_path, offset, limit)
+        if not result.get("isError"):
+            sandbox = _get_sandbox()
+            if sandbox is not None:
+                annotation = await bridge_and_annotate(
+                    sandbox, file_path, offset, limit
+                )
+                if annotation:
+                    result["content"][0]["text"] += annotation
+        return result

    result = _get_sandbox_and_path(file_path)
    if isinstance(result, dict):
@@ -302,6 +325,103 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:
    return _mcp(output if output else "No matches found.")


+# Bridging: copy SDK-internal files into E2B sandbox
+
+# Files larger than this are written to /home/user/ via sandbox.files.write()
+# instead of /tmp/ via shell base64, to avoid shell argument length limits
+# and E2B command timeouts.  Base64 expands content by ~33%, so keep this
+# well under the typical Linux ARG_MAX (128 KB).
+_BRIDGE_SHELL_MAX_BYTES = 32 * 1024  # 32 KB
+# Files larger than this are skipped entirely to avoid excessive transfer times.
+_BRIDGE_SKIP_BYTES = 50 * 1024 * 1024  # 50 MB
+
+
+async def bridge_to_sandbox(
+    sandbox: Any, file_path: str, offset: int, limit: int
+) -> str | None:
+    """Best-effort copy of a host-side SDK file into the E2B sandbox.
+
+    When the model reads an SDK-internal file (e.g. tool-results), it often
+    wants to process the data with bash.  Copying the file into the sandbox
+    under a stable name lets ``bash_exec`` access it without extra steps.
+
+    Only copies when offset=0 and limit is large enough to indicate the model
+    wants the full file.  Errors are logged but never propagated.
+
+    Returns the sandbox path on success, or ``None`` on skip/failure.
+
+    Size handling:
+    - <= 32 KB: written to ``/tmp/<hash>-<basename>`` via shell base64
+      (``_sandbox_write``).  Kept small to stay within ARG_MAX.
+    - 32 KB - 50 MB: written to ``/home/user/<hash>-<basename>`` via
+      ``sandbox.files.write()`` to avoid shell argument length limits.
+    - > 50 MB: skipped entirely with a warning.
+
+    The sandbox filename is prefixed with a short hash of the full source
+    path to avoid collisions when different source files share the same
+    basename (e.g. multiple ``result.json`` files).
+    """
+    if offset != 0 or limit < _DEFAULT_READ_LIMIT:
+        return None
+    try:
+        expanded = os.path.realpath(os.path.expanduser(file_path))
+        basename = os.path.basename(expanded)
+        source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
+        unique_name = f"{source_id}-{basename}"
+        file_size = os.path.getsize(expanded)
+        if file_size > _BRIDGE_SKIP_BYTES:
+            logger.warning(
+                "[E2B] Skipping bridge for large file (%d bytes): %s",
+                file_size,
+                basename,
+            )
+            return None
+
+        def _read_bytes() -> bytes:
+            with open(expanded, "rb") as fh:
+                return fh.read()
+
+        raw_content = await asyncio.to_thread(_read_bytes)
+        try:
+            text_content: str | None = raw_content.decode("utf-8")
+        except UnicodeDecodeError:
+            text_content = None
+        data: str | bytes = text_content if text_content is not None else raw_content
+        if file_size <= _BRIDGE_SHELL_MAX_BYTES:
+            sandbox_path = f"/tmp/{unique_name}"
+            await _sandbox_write(sandbox, sandbox_path, data)
+        else:
+            sandbox_path = f"/home/user/{unique_name}"
+            await sandbox.files.write(sandbox_path, data)
+        logger.info(
+            "[E2B] Bridged SDK file to sandbox: %s -> %s", basename, sandbox_path
+        )
+        return sandbox_path
+    except Exception:
+        logger.warning(
+            "[E2B] Failed to bridge SDK file to sandbox: %s",
+            file_path,
+            exc_info=True,
+        )
+        return None
+
+
+async def bridge_and_annotate(
+    sandbox: Any, file_path: str, offset: int, limit: int
+) -> str | None:
+    """Bridge a host file to the sandbox and return a newline-prefixed annotation.
+
+    Combines ``bridge_to_sandbox`` with the standard annotation suffix so
+    callers don't need to duplicate the pattern.  Returns a string like
+    ``"\\n[Sandbox copy available at /tmp/abc-file.txt]"`` on success, or
+    ``None`` if bridging was skipped or failed.
+    """
+    sandbox_path = await bridge_to_sandbox(sandbox, file_path, offset, limit)
+    if sandbox_path is None:
+        return None
+    return f"\n[Sandbox copy available at {sandbox_path}]"
+
+
 # Local read (for SDK-internal paths)


--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
@@ -3,6 +3,7 @@
 Pure unit tests with no external dependencies (no E2B, no sandbox).
 """

+import hashlib
 import os
 import shutil
 from types import SimpleNamespace
@@ -13,12 +14,26 @@ import pytest
 from backend.copilot.context import E2B_WORKDIR, SDK_PROJECTS_DIR, _current_project_dir

 from .e2b_file_tools import (
+    _BRIDGE_SHELL_MAX_BYTES,
+    _BRIDGE_SKIP_BYTES,
+    _DEFAULT_READ_LIMIT,
    _check_sandbox_symlink_escape,
    _read_local,
    _sandbox_write,
+    bridge_and_annotate,
+    bridge_to_sandbox,
    resolve_sandbox_path,
 )

+
+def _expected_bridge_path(file_path: str, prefix: str = "/tmp") -> str:
+    """Compute the expected sandbox path for a bridged file."""
+    expanded = os.path.realpath(os.path.expanduser(file_path))
+    basename = os.path.basename(expanded)
+    source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
+    return f"{prefix}/{source_id}-{basename}"
+
+
 # ---------------------------------------------------------------------------
 # resolve_sandbox_path — sandbox path normalisation & boundary enforcement
 # ---------------------------------------------------------------------------
@@ -91,9 +106,9 @@ class TestResolveSandboxPath:
 # ---------------------------------------------------------------------------
 # _read_local — host filesystem reads with allowlist enforcement
 #
-# In E2B mode, _read_local only allows tool-results paths (via
-# is_allowed_local_path without sdk_cwd).  Regular files live on the
-# sandbox, not the host.
+# In E2B mode, _read_local only allows tool-results/tool-outputs paths
+# (via is_allowed_local_path without sdk_cwd).  Regular files live on
+# the sandbox, not the host.
 # ---------------------------------------------------------------------------


@@ -119,7 +134,7 @@ class TestReadLocal:
        )
        token = _current_project_dir.set(encoded)
        try:
-            result = _read_local(filepath, offset=0, limit=2000)
+            result = _read_local(filepath, offset=0, limit=_DEFAULT_READ_LIMIT)
            assert result["isError"] is False
            assert "line 1" in result["content"][0]["text"]
            assert "line 2" in result["content"][0]["text"]
@@ -127,6 +142,25 @@ class TestReadLocal:
            _current_project_dir.reset(token)
            os.unlink(filepath)

+    def test_read_tool_outputs_file(self):
+        """Reading a tool-outputs file should also succeed."""
+        encoded = "-tmp-copilot-e2b-test-read-outputs"
+        tool_outputs_dir = os.path.join(
+            SDK_PROJECTS_DIR, encoded, self._CONV_UUID, "tool-outputs"
+        )
+        os.makedirs(tool_outputs_dir, exist_ok=True)
+        filepath = os.path.join(tool_outputs_dir, "sdk-abc123.json")
+        with open(filepath, "w") as f:
+            f.write('{"data": "test"}\n')
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local(filepath, offset=0, limit=_DEFAULT_READ_LIMIT)
+            assert result["isError"] is False
+            assert "test" in result["content"][0]["text"]
+        finally:
+            _current_project_dir.reset(token)
+            shutil.rmtree(os.path.join(SDK_PROJECTS_DIR, encoded), ignore_errors=True)
+
    def test_read_disallowed_path_blocked(self):
        """Reading /etc/passwd should be blocked by the allowlist."""
        result = _read_local("/etc/passwd", offset=0, limit=10)
@@ -335,3 +369,199 @@ class TestSandboxWrite:
        encoded_in_cmd = call_args.split("echo ")[1].split(" |")[0].strip("'")
        decoded = base64.b64decode(encoded_in_cmd).decode()
        assert decoded == content
+
+
+# ---------------------------------------------------------------------------
+# bridge_to_sandbox — copy SDK-internal files into E2B sandbox
+# ---------------------------------------------------------------------------
+
+
+def _make_bridge_sandbox() -> SimpleNamespace:
+    """Build a sandbox mock suitable for bridge_to_sandbox tests."""
+    run_result = SimpleNamespace(stdout="", stderr="", exit_code=0)
+    commands = SimpleNamespace(run=AsyncMock(return_value=run_result))
+    files = SimpleNamespace(write=AsyncMock())
+    return SimpleNamespace(commands=commands, files=files)
+
+
+class TestBridgeToSandbox:
+    @pytest.mark.asyncio
+    async def test_happy_path_small_file(self, tmp_path):
+        """A small file is bridged to /tmp/<hash>-<basename> via _sandbox_write."""
+        f = tmp_path / "result.json"
+        f.write_text('{"ok": true}')
+        sandbox = _make_bridge_sandbox()
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        expected = _expected_bridge_path(str(f))
+        assert result == expected
+        sandbox.commands.run.assert_called_once()
+        cmd = sandbox.commands.run.call_args[0][0]
+        assert "result.json" in cmd
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skip_when_offset_nonzero(self, tmp_path):
+        """Bridging is skipped when offset != 0 (partial read)."""
+        f = tmp_path / "data.txt"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=10, limit=_DEFAULT_READ_LIMIT
+        )
+
+        assert result is None
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skip_when_limit_too_small(self, tmp_path):
+        """Bridging is skipped when limit < _DEFAULT_READ_LIMIT (partial read)."""
+        f = tmp_path / "data.txt"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+
+        await bridge_to_sandbox(sandbox, str(f), offset=0, limit=100)
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_nonexistent_file_does_not_raise(self, tmp_path):
+        """Bridging a non-existent file logs but does not propagate errors."""
+        sandbox = _make_bridge_sandbox()
+
+        await bridge_to_sandbox(
+            sandbox, str(tmp_path / "ghost.txt"), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_sandbox_write_failure_returns_none(self, tmp_path):
+        """If sandbox write fails, returns None (best-effort)."""
+        f = tmp_path / "data.txt"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+        sandbox.commands.run.side_effect = RuntimeError("E2B timeout")
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_large_file_uses_files_api(self, tmp_path):
+        """Files > 32 KB but <= 50 MB are written to /home/user/ via files.write."""
+        f = tmp_path / "big.json"
+        f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
+        sandbox = _make_bridge_sandbox()
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        expected = _expected_bridge_path(str(f), prefix="/home/user")
+        assert result == expected
+        sandbox.files.write.assert_called_once()
+        call_args = sandbox.files.write.call_args[0]
+        assert call_args[0] == expected
+        sandbox.commands.run.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_small_binary_file_preserves_bytes(self, tmp_path):
+        """A small binary file is bridged to /tmp via base64 without corruption."""
+        binary_data = bytes(range(256))
+        f = tmp_path / "image.png"
+        f.write_bytes(binary_data)
+        sandbox = _make_bridge_sandbox()
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        expected = _expected_bridge_path(str(f))
+        assert result == expected
+        sandbox.commands.run.assert_called_once()
+        cmd = sandbox.commands.run.call_args[0][0]
+        assert "base64" in cmd
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_large_binary_file_writes_raw_bytes(self, tmp_path):
+        """A large binary file is bridged to /home/user/ as raw bytes."""
+        binary_data = bytes(range(256)) * 200
+        f = tmp_path / "photo.jpg"
+        f.write_bytes(binary_data)
+        sandbox = _make_bridge_sandbox()
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        expected = _expected_bridge_path(str(f), prefix="/home/user")
+        assert result == expected
+        sandbox.files.write.assert_called_once()
+        call_args = sandbox.files.write.call_args[0]
+        assert call_args[0] == expected
+        assert call_args[1] == binary_data
+        sandbox.commands.run.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_very_large_file_skipped(self, tmp_path):
+        """Files > 50 MB are skipped entirely."""
+        f = tmp_path / "huge.bin"
+        # Create a sparse file to avoid actually writing 50 MB
+        with open(f, "wb") as fh:
+            fh.seek(_BRIDGE_SKIP_BYTES + 1)
+            fh.write(b"\0")
+        sandbox = _make_bridge_sandbox()
+
+        result = await bridge_to_sandbox(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        assert result is None
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# bridge_and_annotate — shared helper wrapping bridge_to_sandbox + annotation
+# ---------------------------------------------------------------------------
+
+
+class TestBridgeAndAnnotate:
+    @pytest.mark.asyncio
+    async def test_returns_annotation_on_success(self, tmp_path):
+        """On success, returns a newline-prefixed annotation with the sandbox path."""
+        f = tmp_path / "data.json"
+        f.write_text('{"ok": true}')
+        sandbox = _make_bridge_sandbox()
+
+        annotation = await bridge_and_annotate(
+            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
+        )
+
+        expected_path = _expected_bridge_path(str(f))
+        assert annotation == f"\n[Sandbox copy available at {expected_path}]"
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_skipped(self, tmp_path):
+        """When bridging is skipped (e.g. offset != 0), returns None."""
+        f = tmp_path / "data.json"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+
+        annotation = await bridge_and_annotate(
+            sandbox, str(f), offset=10, limit=_DEFAULT_READ_LIMIT
+        )
+
+        assert annotation is None
--- a/autogpt_platform/backend/backend/copilot/sdk/env.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/env.py
@@ -20,6 +20,7 @@ config = ChatConfig()
 def build_sdk_env(
    session_id: str | None = None,
    user_id: str | None = None,
+    sdk_cwd: str | None = None,
 ) -> dict[str, str]:
    """Build env vars for the SDK CLI subprocess.

@@ -29,25 +30,35 @@ def build_sdk_env(
       ``ANTHROPIC_API_KEY`` from the parent environment.
    3. **OpenRouter** (default) — overrides base URL and auth token to
       route through the proxy, with Langfuse trace headers.
+
+    When *sdk_cwd* is provided, ``CLAUDE_CODE_TMPDIR`` is set so that
+    the CLI writes temp/sub-agent output inside the per-session workspace
+    directory rather than an inaccessible system temp path.
    """
    # --- Mode 1: Claude Code subscription auth ---
    if config.use_claude_code_subscription:
        validate_subscription()
-        return {
+        env: dict[str, str] = {
            "ANTHROPIC_API_KEY": "",
            "ANTHROPIC_AUTH_TOKEN": "",
            "ANTHROPIC_BASE_URL": "",
        }
+        if sdk_cwd:
+            env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
+        return env

    # --- Mode 2: Direct Anthropic (no proxy hop) ---
    if not config.openrouter_active:
-        return {}
+        env = {}
+        if sdk_cwd:
+            env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
+        return env

    # --- Mode 3: OpenRouter proxy ---
    base = (config.base_url or "").rstrip("/")
    if base.endswith("/v1"):
        base = base[:-3]
-    env: dict[str, str] = {
+    env = {
        "ANTHROPIC_BASE_URL": base,
        "ANTHROPIC_AUTH_TOKEN": config.api_key or "",
        "ANTHROPIC_API_KEY": "",  # force CLI to use AUTH_TOKEN
@@ -65,4 +76,7 @@ def build_sdk_env(
    if parts:
        env["ANTHROPIC_CUSTOM_HEADERS"] = "\n".join(parts)

+    if sdk_cwd:
+        env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
+
    return env
--- a/autogpt_platform/backend/backend/copilot/sdk/env_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/env_test.py
@@ -240,3 +240,54 @@ class TestBuildSdkEnvModePriority:
            "ANTHROPIC_AUTH_TOKEN": "",
            "ANTHROPIC_BASE_URL": "",
        }
+
+
+# ---------------------------------------------------------------------------
+# CLAUDE_CODE_TMPDIR integration
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeTmpdir:
+    """Verify build_sdk_env() sets CLAUDE_CODE_TMPDIR from *sdk_cwd*."""
+
+    def test_tmpdir_set_when_sdk_cwd_is_truthy(self):
+        """CLAUDE_CODE_TMPDIR is set to sdk_cwd when sdk_cwd is truthy."""
+        cfg = _make_config(use_openrouter=False)
+        with patch("backend.copilot.sdk.env.config", cfg):
+            from backend.copilot.sdk.env import build_sdk_env
+
+            result = build_sdk_env(sdk_cwd="/tmp/copilot-workspace")
+
+        assert result["CLAUDE_CODE_TMPDIR"] == "/tmp/copilot-workspace"
+
+    def test_tmpdir_not_set_when_sdk_cwd_is_none(self):
+        """CLAUDE_CODE_TMPDIR is NOT in the env when sdk_cwd is None."""
+        cfg = _make_config(use_openrouter=False)
+        with patch("backend.copilot.sdk.env.config", cfg):
+            from backend.copilot.sdk.env import build_sdk_env
+
+            result = build_sdk_env(sdk_cwd=None)
+
+        assert "CLAUDE_CODE_TMPDIR" not in result
+
+    def test_tmpdir_not_set_when_sdk_cwd_is_empty_string(self):
+        """CLAUDE_CODE_TMPDIR is NOT in the env when sdk_cwd is empty string."""
+        cfg = _make_config(use_openrouter=False)
+        with patch("backend.copilot.sdk.env.config", cfg):
+            from backend.copilot.sdk.env import build_sdk_env
+
+            result = build_sdk_env(sdk_cwd="")
+
+        assert "CLAUDE_CODE_TMPDIR" not in result
+
+    @patch("backend.copilot.sdk.env.validate_subscription")
+    def test_tmpdir_set_in_subscription_mode(self, mock_validate):
+        """CLAUDE_CODE_TMPDIR is set even in subscription mode."""
+        cfg = _make_config(use_claude_code_subscription=True)
+        with patch("backend.copilot.sdk.env.config", cfg):
+            from backend.copilot.sdk.env import build_sdk_env
+
+            result = build_sdk_env(sdk_cwd="/tmp/sub-workspace")
+
+        assert result["CLAUDE_CODE_TMPDIR"] == "/tmp/sub-workspace"
+        assert result["ANTHROPIC_API_KEY"] == ""
--- a/autogpt_platform/backend/backend/copilot/sdk/response_adapter_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/response_adapter_test.py
@@ -29,6 +29,7 @@ from backend.copilot.response_model import (
    StreamToolOutputAvailable,
 )

+from .compaction import compaction_events
 from .response_adapter import SDKResponseAdapter
 from .tool_adapter import MCP_TOOL_PREFIX
 from .tool_adapter import _pending_tool_outputs as _pto
@@ -689,3 +690,102 @@ def test_already_resolved_tool_skipped_in_user_message():
    assert (
        len(output_events) == 0
    ), "Already-resolved tool should not emit duplicate output"
+
+
+# -- _end_text_if_open before compaction -------------------------------------
+
+
+def test_end_text_if_open_emits_text_end_before_finish_step():
+    """StreamTextEnd must be emitted before StreamFinishStep during compaction.
+
+    When ``emit_end_if_ready`` fires compaction events while a text block is
+    still open, ``_end_text_if_open`` must close it first.  If StreamFinishStep
+    arrives before StreamTextEnd, the Vercel AI SDK clears ``activeTextParts``
+    and raises "Received text-end for missing text part".
+    """
+    adapter = _adapter()
+
+    # Open a text block by processing an AssistantMessage with text
+    msg = AssistantMessage(content=[TextBlock(text="partial response")], model="test")
+    adapter.convert_message(msg)
+    assert adapter.has_started_text
+    assert not adapter.has_ended_text
+
+    # Simulate what service.py does before yielding compaction events
+    pre_close: list[StreamBaseResponse] = []
+    adapter._end_text_if_open(pre_close)
+    combined = pre_close + list(compaction_events("Compacted transcript"))
+
+    text_end_idx = next(
+        (i for i, e in enumerate(combined) if isinstance(e, StreamTextEnd)), None
+    )
+    finish_step_idx = next(
+        (i for i, e in enumerate(combined) if isinstance(e, StreamFinishStep)), None
+    )
+
+    assert text_end_idx is not None, "StreamTextEnd must be present"
+    assert finish_step_idx is not None, "StreamFinishStep must be present"
+    assert text_end_idx < finish_step_idx, (
+        f"StreamTextEnd (idx={text_end_idx}) must precede "
+        f"StreamFinishStep (idx={finish_step_idx}) — otherwise the Vercel AI SDK "
+        "clears activeTextParts before text-end arrives"
+    )
+
+
+def test_step_open_must_reset_after_compaction_finish_step():
+    """Adapter step_open must be reset when compaction emits StreamFinishStep.
+
+    Compaction events bypass the adapter, so service.py must explicitly clear
+    step_open after yielding a StreamFinishStep from compaction. Without this,
+    the next AssistantMessage skips StreamStartStep because the adapter still
+    thinks a step is open.
+    """
+    adapter = _adapter()
+
+    # Open a step + text block via an AssistantMessage
+    msg = AssistantMessage(content=[TextBlock(text="thinking...")], model="test")
+    adapter.convert_message(msg)
+    assert adapter.step_open is True
+
+    # Simulate what service.py does: close text, then check compaction events
+    pre_close: list[StreamBaseResponse] = []
+    adapter._end_text_if_open(pre_close)
+
+    events = list(compaction_events("Compacted transcript"))
+    if any(isinstance(ev, StreamFinishStep) for ev in events):
+        adapter.step_open = False
+
+    assert (
+        adapter.step_open is False
+    ), "step_open must be False after compaction emits StreamFinishStep"
+
+    # Next AssistantMessage must open a new step
+    msg2 = AssistantMessage(content=[TextBlock(text="continued")], model="test")
+    results = adapter.convert_message(msg2)
+    assert any(
+        isinstance(r, StreamStartStep) for r in results
+    ), "A new StreamStartStep must be emitted after compaction closed the step"
+
+
+def test_end_text_if_open_no_op_when_no_text_open():
+    """_end_text_if_open emits nothing when no text block is open."""
+    adapter = _adapter()
+    results: list[StreamBaseResponse] = []
+    adapter._end_text_if_open(results)
+    assert results == []
+
+
+def test_end_text_if_open_no_op_after_text_already_ended():
+    """_end_text_if_open emits nothing when the text block is already closed."""
+    adapter = _adapter()
+    msg = AssistantMessage(content=[TextBlock(text="hello")], model="test")
+    adapter.convert_message(msg)
+    # Close it once
+    first: list[StreamBaseResponse] = []
+    adapter._end_text_if_open(first)
+    assert len(first) == 1
+    assert isinstance(first[0], StreamTextEnd)
+    # Second call must be a no-op
+    second: list[StreamBaseResponse] = []
+    adapter._end_text_if_open(second)
+    assert second == []
--- a/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py
@@ -1010,7 +1010,7 @@ def _make_sdk_patches(
        (f"{_SVC}.create_security_hooks", dict(return_value=MagicMock())),
        (f"{_SVC}.get_copilot_tool_names", dict(return_value=[])),
        (f"{_SVC}.get_sdk_disallowed_tools", dict(return_value=[])),
-        (f"{_SVC}.build_sdk_env", dict(return_value=None)),
+        (f"{_SVC}.build_sdk_env", dict(return_value={})),
        (f"{_SVC}._resolve_sdk_model", dict(return_value=None)),
        (f"{_SVC}.set_execution_context", {}),
        (
@@ -1487,3 +1487,188 @@ class TestStreamChatCompletionRetryIntegration:
        errors = [e for e in events if isinstance(e, StreamError)]
        assert not errors, f"Unexpected StreamError: {errors}"
        assert any(isinstance(e, StreamStart) for e in events)
+
+    @pytest.mark.asyncio
+    async def test_result_message_success_subtype_prompt_too_long_triggers_compaction(
+        self,
+    ):
+        """CLI returns ResultMessage(subtype="success") with result="Prompt is too long".
+
+        The SDK internally compacts but the transcript is still too long.  It
+        returns subtype="success" (process completed) with result="Prompt is
+        too long" (the actual rejection message).  The retry loop must detect
+        this as a context-length error and trigger compaction — the subtype
+        "success" must not fool it into treating this as a real response.
+        """
+        import contextlib
+
+        from claude_agent_sdk import ResultMessage
+
+        from backend.copilot.response_model import StreamError, StreamStart
+        from backend.copilot.sdk.service import stream_chat_completion_sdk
+
+        session = self._make_session()
+        success_result = self._make_result_message()
+        attempt_count = [0]
+
+        error_result = ResultMessage(
+            subtype="success",
+            result="Prompt is too long",
+            duration_ms=100,
+            duration_api_ms=0,
+            is_error=False,
+            num_turns=1,
+            session_id="test-session-id",
+        )
+
+        def _client_factory(*args, **kwargs):
+            attempt_count[0] += 1
+
+            async def _receive_error():
+                yield error_result
+
+            async def _receive_success():
+                yield success_result
+
+            client = MagicMock()
+            client._transport = MagicMock()
+            client._transport.write = AsyncMock()
+            client.query = AsyncMock()
+            if attempt_count[0] == 1:
+                client.receive_response = _receive_error
+            else:
+                client.receive_response = _receive_success
+            cm = AsyncMock()
+            cm.__aenter__.return_value = client
+            cm.__aexit__.return_value = None
+            return cm
+
+        original_transcript = _build_transcript(
+            [("user", "prior question"), ("assistant", "prior answer")]
+        )
+        compacted_transcript = _build_transcript(
+            [("user", "[summary]"), ("assistant", "summary reply")]
+        )
+
+        patches = _make_sdk_patches(
+            session,
+            original_transcript=original_transcript,
+            compacted_transcript=compacted_transcript,
+            client_side_effect=_client_factory,
+        )
+
+        events = []
+        with contextlib.ExitStack() as stack:
+            for target, kwargs in patches:
+                stack.enter_context(patch(target, **kwargs))
+            async for event in stream_chat_completion_sdk(
+                session_id="test-session-id",
+                message="hello",
+                is_user_message=True,
+                user_id="test-user",
+                session=session,
+            ):
+                events.append(event)
+
+        assert attempt_count[0] == 2, (
+            f"Expected 2 SDK attempts (subtype='success' with 'Prompt is too long' "
+            f"result should trigger compaction retry), got {attempt_count[0]}"
+        )
+        errors = [e for e in events if isinstance(e, StreamError)]
+        assert not errors, f"Unexpected StreamError: {errors}"
+        assert any(isinstance(e, StreamStart) for e in events)
+
+    @pytest.mark.asyncio
+    async def test_assistant_message_error_content_prompt_too_long_triggers_compaction(
+        self,
+    ):
+        """AssistantMessage.error="invalid_request" with content "Prompt is too long".
+
+        The SDK returns error type "invalid_request" but puts the actual
+        rejection message ("Prompt is too long") in the content blocks.
+        The retry loop must detect this via content inspection (sdk_error
+        being set confirms it's an error message, not user content).
+        """
+        import contextlib
+
+        from claude_agent_sdk import AssistantMessage, ResultMessage, TextBlock
+
+        from backend.copilot.response_model import StreamError, StreamStart
+        from backend.copilot.sdk.service import stream_chat_completion_sdk
+
+        session = self._make_session()
+        success_result = self._make_result_message()
+        attempt_count = [0]
+
+        def _client_factory(*args, **kwargs):
+            attempt_count[0] += 1
+
+            async def _receive_error():
+                # SDK returns invalid_request with "Prompt is too long" in content.
+                # ResultMessage.result is a non-PTL value ("done") to isolate
+                # the AssistantMessage content detection path exclusively.
+                yield AssistantMessage(
+                    content=[TextBlock(text="Prompt is too long")],
+                    model="<synthetic>",
+                    error="invalid_request",
+                )
+                yield ResultMessage(
+                    subtype="success",
+                    result="done",
+                    duration_ms=100,
+                    duration_api_ms=0,
+                    is_error=False,
+                    num_turns=1,
+                    session_id="test-session-id",
+                )
+
+            async def _receive_success():
+                yield success_result
+
+            client = MagicMock()
+            client._transport = MagicMock()
+            client._transport.write = AsyncMock()
+            client.query = AsyncMock()
+            if attempt_count[0] == 1:
+                client.receive_response = _receive_error
+            else:
+                client.receive_response = _receive_success
+            cm = AsyncMock()
+            cm.__aenter__.return_value = client
+            cm.__aexit__.return_value = None
+            return cm
+
+        original_transcript = _build_transcript(
+            [("user", "prior question"), ("assistant", "prior answer")]
+        )
+        compacted_transcript = _build_transcript(
+            [("user", "[summary]"), ("assistant", "summary reply")]
+        )
+
+        patches = _make_sdk_patches(
+            session,
+            original_transcript=original_transcript,
+            compacted_transcript=compacted_transcript,
+            client_side_effect=_client_factory,
+        )
+
+        events = []
+        with contextlib.ExitStack() as stack:
+            for target, kwargs in patches:
+                stack.enter_context(patch(target, **kwargs))
+            async for event in stream_chat_completion_sdk(
+                session_id="test-session-id",
+                message="hello",
+                is_user_message=True,
+                user_id="test-user",
+                session=session,
+            ):
+                events.append(event)
+
+        assert attempt_count[0] == 2, (
+            f"Expected 2 SDK attempts (AssistantMessage error content 'Prompt is "
+            f"too long' should trigger compaction retry), got {attempt_count[0]}"
+        )
+        errors = [e for e in events if isinstance(e, StreamError)]
+        assert not errors, f"Unexpected StreamError: {errors}"
+        assert any(isinstance(e, StreamStart) for e in events)
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
@@ -22,6 +22,38 @@ from .tool_adapter import (

 logger = logging.getLogger(__name__)

+# The SDK CLI uses "Task" in older versions and "Agent" in v2.x+.
+# Shared across all sessions — used by security hooks for sub-agent detection.
+_SUBAGENT_TOOLS: frozenset[str] = frozenset({"Task", "Agent"})
+
+# Unicode ranges stripped by _sanitize():
+#   - BiDi overrides (U+202A-U+202E, U+2066-U+2069) can trick reviewers
+#     into misreading code/logs.
+#   - Zero-width characters (U+200B-U+200F, U+FEFF) can hide content.
+_BIDI_AND_ZW_CHARS = set(
+    chr(c)
+    for r in (range(0x202A, 0x202F), range(0x2066, 0x206A), range(0x200B, 0x2010))
+    for c in r
+) | {"\ufeff"}
+
+
+def _sanitize(value: str, max_len: int = 200) -> str:
+    """Strip control characters and truncate for safe logging.
+
+    Removes C0 (U+0000-U+001F), DEL (U+007F), C1 (U+0080-U+009F),
+    Unicode BiDi overrides, and zero-width characters to prevent
+    log injection and visual spoofing.
+    """
+    cleaned = "".join(
+        c
+        for c in value
+        if c >= " "
+        and c != "\x7f"
+        and not ("\x80" <= c <= "\x9f")
+        and c not in _BIDI_AND_ZW_CHARS
+    )
+    return cleaned[:max_len]
+

 def _deny(reason: str) -> dict[str, Any]:
    """Return a hook denial response."""
@@ -136,11 +168,13 @@ def create_security_hooks(
    - PostToolUse: Log successful tool executions
    - PostToolUseFailure: Log and handle failed tool executions
    - PreCompact: Log context compaction events (SDK handles compaction automatically)
+    - SubagentStart: Log sub-agent lifecycle start
+    - SubagentStop: Log sub-agent lifecycle end

    Args:
        user_id: Current user ID for isolation validation
        sdk_cwd: SDK working directory for workspace-scoped tool validation
-        max_subtasks: Maximum concurrent Task (sub-agent) spawns allowed per session
+        max_subtasks: Maximum concurrent sub-agent spawns allowed per session
        on_compact: Callback invoked when SDK starts compacting context.
            Receives the transcript_path from the hook input.

@@ -151,9 +185,19 @@ def create_security_hooks(
        from claude_agent_sdk import HookMatcher
        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput

-        # Per-session tracking for Task sub-agent concurrency.
+        # Per-session tracking for sub-agent concurrency.
        # Set of tool_use_ids that consumed a slot — len() is the active count.
-        task_tool_use_ids: set[str] = set()
+        #
+        # LIMITATION: For background (async) agents the SDK returns the
+        # Agent/Task tool immediately with {isAsync: true}, which triggers
+        # PostToolUse and releases the slot while the agent is still running.
+        # SubagentStop fires later when the background process finishes but
+        # does not currently hold a slot.  This means the concurrency limit
+        # only gates *launches*, not true concurrent execution.  To fix this
+        # we would need to track background agent_ids separately and release
+        # in SubagentStop, but the SDK does not guarantee SubagentStop fires
+        # for every background agent (e.g. on session abort).
+        subagent_tool_use_ids: set[str] = set()

        async def pre_tool_use_hook(
            input_data: HookInput,
@@ -165,29 +209,22 @@ def create_security_hooks(
            tool_name = cast(str, input_data.get("tool_name", ""))
            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))

-            # Rate-limit Task (sub-agent) spawns per session
-            if tool_name == "Task":
-                # Block background task execution first — denied calls
-                # should not consume a subtask slot.
-                if tool_input.get("run_in_background"):
-                    logger.info(f"[SDK] Blocked background Task, user={user_id}")
-                    return cast(
-                        SyncHookJSONOutput,
-                        _deny(
-                            "Background task execution is not supported. "
-                            "Run tasks in the foreground instead "
-                            "(remove the run_in_background parameter)."
-                        ),
-                    )
-                if len(task_tool_use_ids) >= max_subtasks:
+            # Rate-limit sub-agent spawns per session.
+            # The SDK CLI renamed "Task" → "Agent" in v2.x; handle both.
+            if tool_name in _SUBAGENT_TOOLS:
+                # Background agents are allowed — the SDK returns immediately
+                # with {isAsync: true} and the model polls via TaskOutput.
+                # Still count them against the concurrency limit.
+                if len(subagent_tool_use_ids) >= max_subtasks:
                    logger.warning(
-                        f"[SDK] Task limit reached ({max_subtasks}), user={user_id}"
+                        f"[SDK] Sub-agent limit reached ({max_subtasks}), "
+                        f"user={user_id}"
                    )
                    return cast(
                        SyncHookJSONOutput,
                        _deny(
-                            f"Maximum {max_subtasks} concurrent sub-tasks. "
-                            "Wait for running sub-tasks to finish, "
+                            f"Maximum {max_subtasks} concurrent sub-agents. "
+                            "Wait for running sub-agents to finish, "
                            "or continue in the main conversation."
                        ),
                    )
@@ -208,20 +245,20 @@ def create_security_hooks(
            if result:
                return cast(SyncHookJSONOutput, result)

-            # Reserve the Task slot only after all validations pass
-            if tool_name == "Task" and tool_use_id is not None:
-                task_tool_use_ids.add(tool_use_id)
+            # Reserve the sub-agent slot only after all validations pass
+            if tool_name in _SUBAGENT_TOOLS and tool_use_id is not None:
+                subagent_tool_use_ids.add(tool_use_id)

            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
            return cast(SyncHookJSONOutput, {})

-        def _release_task_slot(tool_name: str, tool_use_id: str | None) -> None:
-            """Release a Task concurrency slot if one was reserved."""
-            if tool_name == "Task" and tool_use_id in task_tool_use_ids:
-                task_tool_use_ids.discard(tool_use_id)
+        def _release_subagent_slot(tool_name: str, tool_use_id: str | None) -> None:
+            """Release a sub-agent concurrency slot if one was reserved."""
+            if tool_name in _SUBAGENT_TOOLS and tool_use_id in subagent_tool_use_ids:
+                subagent_tool_use_ids.discard(tool_use_id)
                logger.info(
-                    "[SDK] Task slot released, active=%d/%d, user=%s",
-                    len(task_tool_use_ids),
+                    "[SDK] Sub-agent slot released, active=%d/%d, user=%s",
+                    len(subagent_tool_use_ids),
                    max_subtasks,
                    user_id,
                )
@@ -241,13 +278,14 @@ def create_security_hooks(
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))

-            _release_task_slot(tool_name, tool_use_id)
+            _release_subagent_slot(tool_name, tool_use_id)
            is_builtin = not tool_name.startswith(MCP_TOOL_PREFIX)
+            safe_tool_use_id = _sanitize(str(tool_use_id or ""), max_len=12)
            logger.info(
                "[SDK] PostToolUse: %s (builtin=%s, tool_use_id=%s)",
                tool_name,
                is_builtin,
-                (tool_use_id or "")[:12],
+                safe_tool_use_id,
            )

            # Stash output for SDK built-in tools so the response adapter can
@@ -256,7 +294,7 @@ def create_security_hooks(
            if is_builtin:
                tool_response = input_data.get("tool_response")
                if tool_response is not None:
-                    resp_preview = str(tool_response)[:100]
+                    resp_preview = _sanitize(str(tool_response), max_len=100)
                    logger.info(
                        "[SDK] Stashing builtin output for %s (%d chars): %s...",
                        tool_name,
@@ -280,13 +318,17 @@ def create_security_hooks(
            """Log failed tool executions for debugging."""
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))
-            error = input_data.get("error", "Unknown error")
+            error = _sanitize(str(input_data.get("error", "Unknown error")))
+            safe_tool_use_id = _sanitize(str(tool_use_id or ""))
            logger.warning(
-                f"[SDK] Tool failed: {tool_name}, error={error}, "
-                f"user={user_id}, tool_use_id={tool_use_id}"
+                "[SDK] Tool failed: %s, error=%s, user=%s, tool_use_id=%s",
+                tool_name,
+                error,
+                user_id,
+                safe_tool_use_id,
            )

-            _release_task_slot(tool_name, tool_use_id)
+            _release_subagent_slot(tool_name, tool_use_id)

            return cast(SyncHookJSONOutput, {})

@@ -301,16 +343,14 @@ def create_security_hooks(
            This hook provides visibility into when compaction happens.
            """
            _ = context, tool_use_id
-            trigger = input_data.get("trigger", "auto")
+            trigger = _sanitize(str(input_data.get("trigger", "auto")), max_len=50)
            # Sanitize untrusted input: strip control chars for logging AND
            # for the value passed downstream.  read_compacted_entries()
            # validates against _projects_base() as defence-in-depth, but
            # sanitizing here prevents log injection and rejects obviously
            # malformed paths early.
-            transcript_path = (
-                str(input_data.get("transcript_path", ""))
-                .replace("\n", "")
-                .replace("\r", "")
+            transcript_path = _sanitize(
+                str(input_data.get("transcript_path", "")), max_len=500
            )
            logger.info(
                "[SDK] Context compaction triggered: %s, user=%s, transcript_path=%s",
@@ -322,6 +362,44 @@ def create_security_hooks(
                on_compact(transcript_path)
            return cast(SyncHookJSONOutput, {})

+        async def subagent_start_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log when a sub-agent starts execution."""
+            _ = context, tool_use_id
+            agent_id = _sanitize(str(input_data.get("agent_id", "?")))
+            agent_type = _sanitize(str(input_data.get("agent_type", "?")))
+            logger.info(
+                "[SDK] SubagentStart: agent_id=%s, type=%s, user=%s",
+                agent_id,
+                agent_type,
+                user_id,
+            )
+            return cast(SyncHookJSONOutput, {})
+
+        async def subagent_stop_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log when a sub-agent stops."""
+            _ = context, tool_use_id
+            agent_id = _sanitize(str(input_data.get("agent_id", "?")))
+            agent_type = _sanitize(str(input_data.get("agent_type", "?")))
+            transcript = _sanitize(
+                str(input_data.get("agent_transcript_path", "")), max_len=500
+            )
+            logger.info(
+                "[SDK] SubagentStop: agent_id=%s, type=%s, user=%s, transcript=%s",
+                agent_id,
+                agent_type,
+                user_id,
+                transcript,
+            )
+            return cast(SyncHookJSONOutput, {})
+
        hooks: dict[str, Any] = {
            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
@@ -329,6 +407,8 @@ def create_security_hooks(
                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
            ],
            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
+            "SubagentStart": [HookMatcher(matcher="*", hooks=[subagent_start_hook])],
+            "SubagentStop": [HookMatcher(matcher="*", hooks=[subagent_stop_hook])],
        }

        return hooks
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks_test.py
@@ -5,6 +5,7 @@ They validate that the security hooks correctly block unauthorized paths,
 tool access, and dangerous input patterns.
 """

+import logging
 import os

 import pytest
@@ -136,8 +137,20 @@ def test_read_tool_results_allowed():
        _current_project_dir.reset(token)


+def test_read_tool_outputs_allowed():
+    """tool-outputs/ paths should be allowed, same as tool-results/."""
+    home = os.path.expanduser("~")
+    path = f"{home}/.claude/projects/-tmp-copilot-abc123/a1b2c3d4-e5f6-7890-abcd-ef1234567890/tool-outputs/12345.txt"
+    token = _current_project_dir.set("-tmp-copilot-abc123")
+    try:
+        result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
+        assert result == {}
+    finally:
+        _current_project_dir.reset(token)
+
+
 def test_read_claude_projects_settings_json_denied():
-    """SDK-internal artifacts like settings.json are NOT accessible — only tool-results/ is."""
+    """SDK-internal artifacts like settings.json are NOT accessible — only tool-results/tool-outputs is."""
    home = os.path.expanduser("~")
    path = f"{home}/.claude/projects/-tmp-copilot-abc123/settings.json"
    token = _current_project_dir.set("-tmp-copilot-abc123")
@@ -233,16 +246,15 @@ def _hooks():

@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
@pytest.mark.asyncio
-async def test_task_background_blocked(_hooks):
-    """Task with run_in_background=true must be denied."""
+async def test_task_background_allowed(_hooks):
+    """Task with run_in_background=true is allowed (SDK handles async lifecycle)."""
    pre, _, _ = _hooks
    result = await pre(
        {"tool_name": "Task", "tool_input": {"run_in_background": True, "prompt": "x"}},
-        tool_use_id=None,
+        tool_use_id="tu-bg-1",
        context={},
    )
-    assert _is_denied(result)
-    assert "foreground" in _reason(result).lower()
+    assert not _is_denied(result)


@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
@@ -356,3 +368,303 @@ async def test_task_slot_released_on_failure(_hooks):
        context={},
    )
    assert not _is_denied(result)
+
+
+# ---------------------------------------------------------------------------
+# "Agent" tool name (SDK v2.x+ renamed "Task" → "Agent")
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_agent_background_allowed(_hooks):
+    """Agent with run_in_background=true is allowed (SDK handles async lifecycle)."""
+    pre, _, _ = _hooks
+    result = await pre(
+        {
+            "tool_name": "Agent",
+            "tool_input": {"run_in_background": True, "prompt": "x"},
+        },
+        tool_use_id="tu-agent-bg-1",
+        context={},
+    )
+    assert not _is_denied(result)
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_agent_foreground_allowed(_hooks):
+    """Agent without run_in_background should be allowed."""
+    pre, _, _ = _hooks
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "do stuff"}},
+        tool_use_id="tu-agent-1",
+        context={},
+    )
+    assert not _is_denied(result)
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_background_agent_counts_against_limit(_hooks):
+    """Background agents still consume concurrency slots."""
+    pre, _, _ = _hooks
+    # Two background agents fill the limit
+    for i in range(2):
+        result = await pre(
+            {
+                "tool_name": "Agent",
+                "tool_input": {"run_in_background": True, "prompt": "bg"},
+            },
+            tool_use_id=f"tu-bglimit-{i}",
+            context={},
+        )
+        assert not _is_denied(result)
+    # Third (background or foreground) should be denied
+    result = await pre(
+        {
+            "tool_name": "Agent",
+            "tool_input": {"run_in_background": True, "prompt": "over"},
+        },
+        tool_use_id="tu-bglimit-2",
+        context={},
+    )
+    assert _is_denied(result)
+    assert "Maximum" in _reason(result)
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_agent_limit_enforced(_hooks):
+    """Agent spawns beyond max_subtasks should be denied."""
+    pre, _, _ = _hooks
+    # First two should pass
+    for i in range(2):
+        result = await pre(
+            {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
+            tool_use_id=f"tu-agent-limit-{i}",
+            context={},
+        )
+        assert not _is_denied(result)
+
+    # Third should be denied (limit=2)
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "over limit"}},
+        tool_use_id="tu-agent-limit-2",
+        context={},
+    )
+    assert _is_denied(result)
+    assert "Maximum" in _reason(result)
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_agent_slot_released_on_completion(_hooks):
+    """Completing an Agent should free a slot so new Agents can be spawned."""
+    pre, post, _ = _hooks
+    # Fill both slots
+    for i in range(2):
+        result = await pre(
+            {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
+            tool_use_id=f"tu-agent-comp-{i}",
+            context={},
+        )
+        assert not _is_denied(result)
+
+    # Third should be denied — at capacity
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "over"}},
+        tool_use_id="tu-agent-comp-2",
+        context={},
+    )
+    assert _is_denied(result)
+
+    # Complete first agent — frees a slot
+    await post(
+        {"tool_name": "Agent", "tool_input": {}},
+        tool_use_id="tu-agent-comp-0",
+        context={},
+    )
+
+    # Now a new Agent should be allowed
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "after release"}},
+        tool_use_id="tu-agent-comp-3",
+        context={},
+    )
+    assert not _is_denied(result)
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_agent_slot_released_on_failure(_hooks):
+    """A failed Agent should also free its concurrency slot."""
+    pre, _, post_failure = _hooks
+    # Fill both slots
+    for i in range(2):
+        result = await pre(
+            {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
+            tool_use_id=f"tu-agent-fail-{i}",
+            context={},
+        )
+        assert not _is_denied(result)
+
+    # At capacity
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "over"}},
+        tool_use_id="tu-agent-fail-2",
+        context={},
+    )
+    assert _is_denied(result)
+
+    # Fail first agent — should free a slot
+    await post_failure(
+        {"tool_name": "Agent", "tool_input": {}, "error": "something broke"},
+        tool_use_id="tu-agent-fail-0",
+        context={},
+    )
+
+    # New Agent should be allowed
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "after failure"}},
+        tool_use_id="tu-agent-fail-3",
+        context={},
+    )
+    assert not _is_denied(result)
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_mixed_task_agent_share_slots(_hooks):
+    """Task and Agent share the same concurrency pool."""
+    pre, post, _ = _hooks
+    # Fill one slot with Task, one with Agent
+    result = await pre(
+        {"tool_name": "Task", "tool_input": {"prompt": "ok"}},
+        tool_use_id="tu-mix-task",
+        context={},
+    )
+    assert not _is_denied(result)
+
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
+        tool_use_id="tu-mix-agent",
+        context={},
+    )
+    assert not _is_denied(result)
+
+    # Third (either name) should be denied
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "over"}},
+        tool_use_id="tu-mix-over",
+        context={},
+    )
+    assert _is_denied(result)
+
+    # Release the Task slot
+    await post(
+        {"tool_name": "Task", "tool_input": {}},
+        tool_use_id="tu-mix-task",
+        context={},
+    )
+
+    # Now an Agent should be allowed
+    result = await pre(
+        {"tool_name": "Agent", "tool_input": {"prompt": "after task release"}},
+        tool_use_id="tu-mix-new",
+        context={},
+    )
+    assert not _is_denied(result)
+
+
+# ---------------------------------------------------------------------------
+# SubagentStart / SubagentStop hooks
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture()
+def _subagent_hooks():
+    """Create hooks and return (subagent_start, subagent_stop) handlers."""
+    hooks = create_security_hooks(user_id="u1", sdk_cwd=SDK_CWD, max_subtasks=2)
+    start = hooks["SubagentStart"][0].hooks[0]
+    stop = hooks["SubagentStop"][0].hooks[0]
+    return start, stop
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_subagent_start_hook_returns_empty(_subagent_hooks):
+    """SubagentStart hook should return an empty dict (logging only)."""
+    start, _ = _subagent_hooks
+    result = await start(
+        {"agent_id": "sa-123", "agent_type": "research"},
+        tool_use_id=None,
+        context={},
+    )
+    assert result == {}
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_subagent_stop_hook_returns_empty(_subagent_hooks):
+    """SubagentStop hook should return an empty dict (logging only)."""
+    _, stop = _subagent_hooks
+    result = await stop(
+        {
+            "agent_id": "sa-123",
+            "agent_type": "research",
+            "agent_transcript_path": "/tmp/transcript.txt",
+        },
+        tool_use_id=None,
+        context={},
+    )
+    assert result == {}
+
+
+@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
+@pytest.mark.asyncio
+async def test_subagent_hooks_sanitize_inputs(_subagent_hooks, caplog):
+    """SubagentStart/Stop should sanitize control chars from inputs."""
+    start, stop = _subagent_hooks
+    # Inject control characters (C0, DEL, C1, BiDi overrides, zero-width)
+    # — hook should not raise AND logs must be clean
+    with caplog.at_level(logging.DEBUG, logger="backend.copilot.sdk.security_hooks"):
+        result = await start(
+            {
+                "agent_id": "sa\n-injected\r\x00\x7f",
+                "agent_type": "safe\x80_type\x9f\ttab",
+            },
+            tool_use_id=None,
+            context={},
+        )
+    assert result == {}
+    # Control chars must be stripped from the logged values
+    for record in caplog.records:
+        assert "\x00" not in record.message
+        assert "\r" not in record.message
+        assert "\n" not in record.message
+        assert "\x7f" not in record.message
+        assert "\x80" not in record.message
+        assert "\x9f" not in record.message
+    assert "safe_type" in caplog.text
+
+    caplog.clear()
+    with caplog.at_level(logging.DEBUG, logger="backend.copilot.sdk.security_hooks"):
+        result = await stop(
+            {
+                "agent_id": "sa\n-injected\x7f",
+                "agent_type": "type\r\x80\x9f",
+                "agent_transcript_path": "/tmp/\x00malicious\npath\u202a\u200b",
+            },
+            tool_use_id=None,
+            context={},
+        )
+    assert result == {}
+    for record in caplog.records:
+        assert "\x00" not in record.message
+        assert "\r" not in record.message
+        assert "\n" not in record.message
+        assert "\x7f" not in record.message
+        assert "\u202a" not in record.message
+        assert "\u200b" not in record.message
+    assert "/tmp/maliciouspath" in caplog.text
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1310,10 +1310,16 @@ async def _run_stream_attempt(
                # AssistantMessage.error (not as a Python exception).
                # Re-raise so the outer retry loop can compact the
                # transcript and retry with reduced context.
-                # Only check error_text (the error field), not the
-                # content preview — content may contain arbitrary text
-                # that false-positives the pattern match.
-                if _is_prompt_too_long(Exception(error_text)):
+                # Check both error_text and error_preview: sdk_error
+                # being set confirms this is an error message (not user
+                # content), so checking content is safe. The actual
+                # error description (e.g. "Prompt is too long") may be
+                # in the content, not the error type field
+                # (e.g. error="invalid_request", content="Prompt is
+                # too long").
+                if _is_prompt_too_long(Exception(error_text)) or _is_prompt_too_long(
+                    Exception(error_preview)
+                ):
                    logger.warning(
                        "%s Prompt-too-long detected via AssistantMessage "
                        "error — raising for retry",
@@ -1414,13 +1420,16 @@ async def _run_stream_attempt(
                        ctx.log_prefix,
                        sdk_msg.result or "(no error message provided)",
                    )
-                    # If the CLI itself rejected the prompt as too long
-                    # (pre-API check, duration_api_ms=0), re-raise as an
-                    # exception so the retry loop can trigger compaction.
-                    # Without this, the ResultMessage is silently consumed
-                    # and the retry/compaction mechanism is never invoked.
-                    if _is_prompt_too_long(RuntimeError(sdk_msg.result or "")):
-                        raise RuntimeError("Prompt is too long")
+
+                # Check for prompt-too-long regardless of subtype — the
+                # SDK may return subtype="success" with result="Prompt is
+                # too long" when the CLI rejects the prompt before calling
+                # the API (cost_usd=0, no tokens consumed).  If we only
+                # check the "error" subtype path, the stream appears to
+                # complete normally, the synthetic error text is stored
+                # in the transcript, and the session grows without bound.
+                if _is_prompt_too_long(RuntimeError(sdk_msg.result or "")):
+                    raise RuntimeError("Prompt is too long")

                # Capture token usage from ResultMessage.
                # Anthropic reports cached tokens separately:
@@ -1453,6 +1462,23 @@ async def _run_stream_attempt(
            # Emit compaction end if SDK finished compacting.
            # Sync TranscriptBuilder with the CLI's active context.
            compact_result = await ctx.compaction.emit_end_if_ready(ctx.session)
+            if compact_result.events:
+                # Compaction events end with StreamFinishStep, which maps to
+                # Vercel AI SDK's "finish-step" — that clears activeTextParts.
+                # Close any open text block BEFORE the compaction events so
+                # the text-end arrives before finish-step, preventing
+                # "text-end for missing text part" errors on the frontend.
+                pre_close: list[StreamBaseResponse] = []
+                state.adapter._end_text_if_open(pre_close)
+                # Compaction events bypass the adapter, so sync step state
+                # when a StreamFinishStep is present — otherwise the adapter
+                # will skip StreamStartStep on the next AssistantMessage.
+                if any(
+                    isinstance(ev, StreamFinishStep) for ev in compact_result.events
+                ):
+                    state.adapter.step_open = False
+                for r in pre_close:
+                    yield r
            for ev in compact_result.events:
                yield ev
            entries_replaced = False
@@ -1858,7 +1884,10 @@ async def stream_chat_completion_sdk(
        )

        # Fail fast when no API credentials are available at all.
-        sdk_env = build_sdk_env(session_id=session_id, user_id=user_id)
+        # sdk_cwd routes the CLI's temp dir into the per-session workspace
+        # so sub-agent output files land inside sdk_cwd (see build_sdk_env).
+        sdk_env = build_sdk_env(session_id=session_id, user_id=user_id, sdk_cwd=sdk_cwd)
+
        if not config.api_key and not config.use_claude_code_subscription:
            raise RuntimeError(
                "No API key configured. Set OPEN_ROUTER_API_KEY, "
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
@@ -38,7 +38,7 @@ from backend.copilot.tools import TOOL_REGISTRY
 from backend.copilot.tools.base import BaseTool
 from backend.util.truncate import truncate

-from .e2b_file_tools import E2B_FILE_TOOL_NAMES, E2B_FILE_TOOLS
+from .e2b_file_tools import E2B_FILE_TOOL_NAMES, E2B_FILE_TOOLS, bridge_and_annotate

 if TYPE_CHECKING:
    from e2b import AsyncSandbox
@@ -387,7 +387,16 @@ async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
            selected = list(itertools.islice(f, offset, offset + limit))
        # Cleanup happens in _cleanup_sdk_tool_results after session ends;
        # don't delete here — the SDK may read in multiple chunks.
-        return _mcp_ok("".join(selected))
+        #
+        # When E2B is active, also copy the file into the sandbox so
+        # bash_exec can process it (the model often uses Read then bash).
+        text = "".join(selected)
+        sandbox = _current_sandbox.get(None)
+        if sandbox is not None:
+            annotation = await bridge_and_annotate(sandbox, resolved, offset, limit)
+            if annotation:
+                text += annotation
+        return _mcp_ok(text)
    except FileNotFoundError:
        return _mcp_err(f"File not found: {file_path}")
    except Exception as e:
@@ -581,13 +590,14 @@ def create_copilot_mcp_server(*, use_e2b: bool = False):
 # Security hooks validate that file paths stay within sdk_cwd.
 # Bash is NOT included — use the sandboxed MCP bash_exec tool instead,
 # which provides kernel-level network isolation via unshare --net.
-# Task allows spawning sub-agents (rate-limited by security hooks).
+# Task/Agent allows spawning sub-agents (rate-limited by security hooks).
+#   The CLI renamed "Task" → "Agent" in v2.x; both are listed for compat.
 # WebSearch uses Brave Search via Anthropic's API — safe, no SSRF risk.
 # TodoWrite manages the task checklist shown in the UI — no security concern.
 # In E2B mode, all five are disabled — MCP equivalents provide direct sandbox
 # access.  read_file also handles local tool-results and ephemeral reads.
 _SDK_BUILTIN_FILE_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep"]
-_SDK_BUILTIN_ALWAYS = ["Task", "WebSearch", "TodoWrite"]
+_SDK_BUILTIN_ALWAYS = ["Task", "Agent", "WebSearch", "TodoWrite"]
 _SDK_BUILTIN_TOOLS = [*_SDK_BUILTIN_FILE_TOOLS, *_SDK_BUILTIN_ALWAYS]

 # SDK built-in tools that must be explicitly blocked.
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
@@ -619,3 +619,95 @@ class TestSDKDisallowedTools:
    def test_webfetch_tool_is_disallowed(self):
        """WebFetch is disallowed due to SSRF risk."""
        assert "WebFetch" in SDK_DISALLOWED_TOOLS
+
+
+# ---------------------------------------------------------------------------
+# _read_file_handler — bridge_and_annotate integration
+# ---------------------------------------------------------------------------
+
+
+class TestReadFileHandlerBridge:
+    """Verify that _read_file_handler calls bridge_and_annotate when a sandbox is active."""
+
+    @pytest.fixture(autouse=True)
+    def _init_context(self):
+        set_execution_context(
+            user_id="test",
+            session=None,  # type: ignore[arg-type]
+            sandbox=None,
+            sdk_cwd="/tmp/copilot-bridge-test",
+        )
+
+    @pytest.mark.asyncio
+    async def test_bridge_called_when_sandbox_active(self, tmp_path, monkeypatch):
+        """When a sandbox is set, bridge_and_annotate is called and its annotation appended."""
+        from backend.copilot.context import _current_sandbox
+
+        from .tool_adapter import _read_file_handler
+
+        test_file = tmp_path / "tool-results" / "data.json"
+        test_file.parent.mkdir(parents=True, exist_ok=True)
+        test_file.write_text('{"ok": true}\n')
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.tool_adapter.is_allowed_local_path",
+            lambda path, cwd: True,
+        )
+
+        fake_sandbox = object()
+        token = _current_sandbox.set(fake_sandbox)  # type: ignore[arg-type]
+        try:
+            bridge_calls: list[tuple] = []
+
+            async def fake_bridge_and_annotate(sandbox, file_path, offset, limit):
+                bridge_calls.append((sandbox, file_path, offset, limit))
+                return "\n[Sandbox copy available at /tmp/abc-data.json]"
+
+            monkeypatch.setattr(
+                "backend.copilot.sdk.tool_adapter.bridge_and_annotate",
+                fake_bridge_and_annotate,
+            )
+
+            result = await _read_file_handler(
+                {"file_path": str(test_file), "offset": 0, "limit": 2000}
+            )
+
+            assert result["isError"] is False
+            assert len(bridge_calls) == 1
+            assert bridge_calls[0][0] is fake_sandbox
+            assert "/tmp/abc-data.json" in result["content"][0]["text"]
+        finally:
+            _current_sandbox.reset(token)
+
+    @pytest.mark.asyncio
+    async def test_bridge_not_called_without_sandbox(self, tmp_path, monkeypatch):
+        """When no sandbox is set, bridge_and_annotate is not called."""
+        from .tool_adapter import _read_file_handler
+
+        test_file = tmp_path / "tool-results" / "data.json"
+        test_file.parent.mkdir(parents=True, exist_ok=True)
+        test_file.write_text('{"ok": true}\n')
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.tool_adapter.is_allowed_local_path",
+            lambda path, cwd: True,
+        )
+
+        bridge_calls: list[tuple] = []
+
+        async def fake_bridge_and_annotate(sandbox, file_path, offset, limit):
+            bridge_calls.append((sandbox, file_path, offset, limit))
+            return "\n[Sandbox copy available at /tmp/abc-data.json]"
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.tool_adapter.bridge_and_annotate",
+            fake_bridge_and_annotate,
+        )
+
+        result = await _read_file_handler(
+            {"file_path": str(test_file), "offset": 0, "limit": 2000}
+        )
+
+        assert result["isError"] is False
+        assert len(bridge_calls) == 0
+        assert "Sandbox copy" not in result["content"][0]["text"]
--- a/autogpt_platform/backend/backend/copilot/tools/init.py
+++ b/autogpt_platform/backend/backend/copilot/tools/init.py
@@ -10,6 +10,7 @@ from backend.copilot.tracking import track_tool_called
 from .add_understanding import AddUnderstandingTool
 from .agent_browser import BrowserActTool, BrowserNavigateTool, BrowserScreenshotTool
 from .agent_output import AgentOutputTool
+from .ask_question import AskQuestionTool
 from .base import BaseTool
 from .bash_exec import BashExecTool
 from .connect_integration import ConnectIntegrationTool
@@ -55,6 +56,7 @@ logger = logging.getLogger(__name__)
 # Single source of truth for all tools
 TOOL_REGISTRY: dict[str, BaseTool] = {
    "add_understanding": AddUnderstandingTool(),
+    "ask_question": AskQuestionTool(),
    "create_agent": CreateAgentTool(),
    "customize_agent": CustomizeAgentTool(),
    "edit_agent": EditAgentTool(),
--- a/autogpt_platform/backend/backend/copilot/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_search.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import asyncio
 import logging
 from typing import TYPE_CHECKING, Literal

@@ -9,7 +10,7 @@ if TYPE_CHECKING:
    from backend.api.features.library.model import LibraryAgent
    from backend.api.features.store.model import StoreAgent, StoreAgentDetails

-from backend.data.db_accessors import library_db, store_db
+from backend.data.db_accessors import graph_db, library_db, store_db
 from backend.util.exceptions import DatabaseError, NotFoundError

 from .models import (
@@ -34,12 +35,13 @@ async def search_agents(
    source: SearchSource,
    session_id: str | None = None,
    user_id: str | None = None,
+    include_graph: bool = False,
 ) -> ToolResponseBase:
    """Search for agents in marketplace or user library."""
    if source == "marketplace":
        return await _search_marketplace(query, session_id)
    else:
-        return await _search_library(query, session_id, user_id)
+        return await _search_library(query, session_id, user_id, include_graph)


 async def _search_marketplace(query: str, session_id: str | None) -> ToolResponseBase:
@@ -105,7 +107,10 @@ async def _search_marketplace(query: str, session_id: str | None) -> ToolRespons


 async def _search_library(
-    query: str, session_id: str | None, user_id: str | None
+    query: str,
+    session_id: str | None,
+    user_id: str | None,
+    include_graph: bool = False,
 ) -> ToolResponseBase:
    """Search user's library agents, with direct UUID lookup fallback."""
    if not user_id:
@@ -149,6 +154,10 @@ async def _search_library(
            session_id=session_id,
        )

+    truncation_notice: str | None = None
+    if include_graph and agents:
+        truncation_notice = await _enrich_agents_with_graph(agents, user_id)
+
    if not agents:
        if not query:
            return NoResultsResponse(
@@ -182,13 +191,17 @@ async def _search_library(
    else:
        title = f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} in your library for '{query}'"

+    message = (
+        "Found agents in the user's library. You can provide a link to view "
+        "an agent at: /library/agents/{agent_id}. Use agent_output to get "
+        "execution results, or run_agent to execute. Let the user know we can "
+        "create a custom agent for them based on their needs."
+    )
+    if truncation_notice:
+        message = f"{message}\n\nNote: {truncation_notice}"
+
    return AgentsFoundResponse(
-        message=(
-            "Found agents in the user's library. You can provide a link to view "
-            "an agent at: /library/agents/{agent_id}. Use agent_output to get "
-            "execution results, or run_agent to execute. Let the user know we can "
-            "create a custom agent for them based on their needs."
-        ),
+        message=message,
        title=title,
        agents=agents,
        count=len(agents),
@@ -196,6 +209,81 @@ async def _search_library(
    )


+_MAX_GRAPH_FETCHES = 10
+
+
+_GRAPH_FETCH_TIMEOUT = 15  # seconds
+
+
+async def _enrich_agents_with_graph(
+    agents: list[AgentInfo], user_id: str
+) -> str | None:
+    """Fetch and attach full Graph (nodes + links) to each agent in-place.
+
+    Only the first ``_MAX_GRAPH_FETCHES`` agents with a ``graph_id`` are
+    enriched.  If some agents are skipped, a truncation notice is returned
+    so the caller can surface it to the copilot.
+
+    Graphs are fetched with ``for_export=True`` so that credentials, API keys,
+    and other secrets in ``input_default`` are stripped before the data reaches
+    the LLM context.
+
+    Returns a truncation notice string when some agents were skipped, or
+    ``None`` when all eligible agents were enriched.
+    """
+    with_graph_id = [a for a in agents if a.graph_id]
+    fetchable = with_graph_id[:_MAX_GRAPH_FETCHES]
+    if not fetchable:
+        return None
+
+    gdb = graph_db()
+
+    async def _fetch(agent: AgentInfo) -> None:
+        graph_id = agent.graph_id
+        if not graph_id:
+            return
+        try:
+            graph = await gdb.get_graph(
+                graph_id,
+                version=agent.graph_version,
+                user_id=user_id,
+                for_export=True,
+            )
+            if graph is None:
+                logger.warning("Graph not found for agent %s", graph_id)
+            agent.graph = graph
+        except Exception as e:
+            logger.warning("Failed to fetch graph for agent %s: %s", graph_id, e)
+
+    try:
+        await asyncio.wait_for(
+            asyncio.gather(*[_fetch(a) for a in fetchable]),
+            timeout=_GRAPH_FETCH_TIMEOUT,
+        )
+    except asyncio.TimeoutError:
+        logger.warning(
+            "include_graph: timed out after %ds fetching graphs", _GRAPH_FETCH_TIMEOUT
+        )
+
+    skipped = len(with_graph_id) - len(fetchable)
+    if skipped > 0:
+        logger.warning(
+            "include_graph: fetched graphs for %d/%d agents "
+            "(_MAX_GRAPH_FETCHES=%d, %d skipped)",
+            len(fetchable),
+            len(with_graph_id),
+            _MAX_GRAPH_FETCHES,
+            skipped,
+        )
+        return (
+            f"Graph data included for {len(fetchable)} of "
+            f"{len(with_graph_id)} eligible agents (limit: {_MAX_GRAPH_FETCHES}). "
+            f"To fetch graphs for remaining agents, narrow your search to a "
+            f"specific agent by UUID."
+        )
+    return None
+
+
 def _marketplace_agent_to_info(agent: StoreAgent | StoreAgentDetails) -> AgentInfo:
    """Convert a marketplace agent (StoreAgent or StoreAgentDetails) to an AgentInfo."""
    return AgentInfo(
--- a/autogpt_platform/backend/backend/copilot/tools/agent_search_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_search_test.py
@@ -1,11 +1,12 @@
 """Tests for agent search direct lookup functionality."""

+import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

-from .agent_search import search_agents
-from .models import AgentsFoundResponse, NoResultsResponse
+from .agent_search import _enrich_agents_with_graph, search_agents
+from .models import AgentInfo, AgentsFoundResponse, NoResultsResponse

 _TEST_USER_ID = "test-user-agent-search"

@@ -133,10 +134,10 @@ class TestMarketplaceSlugLookup:
 class TestLibraryUUIDLookup:
    """Tests for UUID direct lookup in library search."""

-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_uuid_lookup_found_by_graph_id(self):
-        """UUID query matching a graph_id returns the agent directly."""
-        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
+    @staticmethod
+    def _make_mock_library_agent(
+        agent_id: str = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
+    ) -> MagicMock:
        mock_agent = MagicMock()
        mock_agent.id = "lib-agent-id"
        mock_agent.name = "My Library Agent"
@@ -150,6 +151,13 @@ class TestLibraryUUIDLookup:
        mock_agent.graph_version = 1
        mock_agent.input_schema = {}
        mock_agent.output_schema = {}
+        return mock_agent
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_uuid_lookup_found_by_graph_id(self):
+        """UUID query matching a graph_id returns the agent directly."""
+        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
+        mock_agent = self._make_mock_library_agent(agent_id)

        mock_lib_db = MagicMock()
        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
@@ -168,3 +176,427 @@ class TestLibraryUUIDLookup:
        assert isinstance(response, AgentsFoundResponse)
        assert response.count == 1
        assert response.agents[0].name == "My Library Agent"
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_include_graph_fetches_graph(self):
+        """include_graph=True attaches BaseGraph to agent results."""
+        from backend.data.graph import BaseGraph
+
+        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
+        mock_agent = self._make_mock_library_agent(agent_id)
+        mock_lib_db = MagicMock()
+        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
+
+        fake_graph = BaseGraph(id=agent_id, name="My Library Agent", description="test")
+        mock_graph_db = MagicMock()
+        mock_graph_db.get_graph = AsyncMock(return_value=fake_graph)
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_graph_db,
+            ),
+        ):
+            response = await search_agents(
+                query=agent_id,
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert response.agents[0].graph is not None
+        assert response.agents[0].graph.id == agent_id
+        mock_graph_db.get_graph.assert_awaited_once_with(
+            agent_id,
+            version=1,
+            user_id=_TEST_USER_ID,
+            for_export=True,
+        )
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_include_graph_false_skips_fetch(self):
+        """include_graph=False (default) does not fetch graph data."""
+        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
+        mock_agent = self._make_mock_library_agent(agent_id)
+        mock_lib_db = MagicMock()
+        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
+
+        mock_graph_db = MagicMock()
+        mock_graph_db.get_graph = AsyncMock()
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_graph_db,
+            ),
+        ):
+            response = await search_agents(
+                query=agent_id,
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=False,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert response.agents[0].graph is None
+        mock_graph_db.get_graph.assert_not_awaited()
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_include_graph_handles_fetch_failure(self):
+        """include_graph=True still returns agents when graph fetch fails."""
+        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
+        mock_agent = self._make_mock_library_agent(agent_id)
+        mock_lib_db = MagicMock()
+        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
+
+        mock_graph_db = MagicMock()
+        mock_graph_db.get_graph = AsyncMock(side_effect=Exception("DB down"))
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_graph_db,
+            ),
+        ):
+            response = await search_agents(
+                query=agent_id,
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert response.agents[0].graph is None
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_include_graph_handles_none_return(self):
+        """include_graph=True handles get_graph returning None."""
+        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
+        mock_agent = self._make_mock_library_agent(agent_id)
+        mock_lib_db = MagicMock()
+        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
+
+        mock_graph_db = MagicMock()
+        mock_graph_db.get_graph = AsyncMock(return_value=None)
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_graph_db,
+            ),
+        ):
+            response = await search_agents(
+                query=agent_id,
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert response.agents[0].graph is None
+
+
+class TestEnrichAgentsWithGraph:
+    """Tests for _enrich_agents_with_graph edge cases."""
+
+    @staticmethod
+    def _make_mock_library_agent(
+        agent_id: str = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
+        graph_id: str | None = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
+    ) -> MagicMock:
+        mock_agent = MagicMock()
+        mock_agent.id = f"lib-{agent_id[:8]}"
+        mock_agent.name = f"Agent {agent_id[:8]}"
+        mock_agent.description = "A library agent"
+        mock_agent.creator_name = "testuser"
+        mock_agent.status.value = "HEALTHY"
+        mock_agent.can_access_graph = True
+        mock_agent.has_external_trigger = False
+        mock_agent.new_output = False
+        mock_agent.graph_id = graph_id
+        mock_agent.graph_version = 1
+        mock_agent.input_schema = {}
+        mock_agent.output_schema = {}
+        return mock_agent
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_truncation_surfaces_in_response(self):
+        """When >_MAX_GRAPH_FETCHES agents have graphs, the response contains a truncation notice."""
+        from backend.copilot.tools.agent_search import _MAX_GRAPH_FETCHES
+        from backend.data.graph import BaseGraph
+
+        agent_count = _MAX_GRAPH_FETCHES + 5
+        mock_agents = []
+        for i in range(agent_count):
+            uid = f"a1b2c3d4-e5f6-4a7b-8c9d-{i:012d}"
+            mock_agents.append(self._make_mock_library_agent(uid, uid))
+
+        mock_lib_db = MagicMock()
+        mock_search_results = MagicMock()
+        mock_search_results.agents = mock_agents
+        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
+
+        fake_graph = BaseGraph(id="x", name="g", description="d")
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_gdb,
+            ),
+        ):
+            response = await search_agents(
+                query="",
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert mock_gdb.get_graph.await_count == _MAX_GRAPH_FETCHES
+        enriched = [a for a in response.agents if a.graph is not None]
+        assert len(enriched) == _MAX_GRAPH_FETCHES
+        assert "Graph data included for" in response.message
+        assert str(_MAX_GRAPH_FETCHES) in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_mixed_graph_id_presence(self):
+        """Agents without graph_id are skipped during enrichment."""
+        from backend.data.graph import BaseGraph
+
+        agent_with = self._make_mock_library_agent(
+            "aaaa0000-0000-0000-0000-000000000001",
+            "aaaa0000-0000-0000-0000-000000000001",
+        )
+        agent_without = self._make_mock_library_agent(
+            "bbbb0000-0000-0000-0000-000000000002",
+            graph_id=None,
+        )
+
+        mock_lib_db = MagicMock()
+        mock_search_results = MagicMock()
+        mock_search_results.agents = [agent_with, agent_without]
+        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
+
+        fake_graph = BaseGraph(
+            id="aaaa0000-0000-0000-0000-000000000001", name="g", description="d"
+        )
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_gdb,
+            ),
+        ):
+            response = await search_agents(
+                query="",
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert len(response.agents) == 2
+        assert response.agents[0].graph is not None
+        assert response.agents[1].graph is None
+        mock_gdb.get_graph.assert_awaited_once()
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_partial_failure_across_multiple_agents(self):
+        """When some graph fetches fail, successful ones still have graphs attached."""
+        from backend.data.graph import BaseGraph
+
+        id_ok = "aaaa0000-0000-0000-0000-000000000001"
+        id_fail = "bbbb0000-0000-0000-0000-000000000002"
+        agent_ok = self._make_mock_library_agent(id_ok, id_ok)
+        agent_fail = self._make_mock_library_agent(id_fail, id_fail)
+
+        mock_lib_db = MagicMock()
+        mock_search_results = MagicMock()
+        mock_search_results.agents = [agent_ok, agent_fail]
+        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
+
+        fake_graph = BaseGraph(id=id_ok, name="g", description="d")
+
+        async def _side_effect(graph_id, **kwargs):
+            if graph_id == id_fail:
+                raise Exception("DB error")
+            return fake_graph
+
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock(side_effect=_side_effect)
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_gdb,
+            ),
+        ):
+            response = await search_agents(
+                query="",
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert response.agents[0].graph is not None
+        assert response.agents[0].graph.id == id_ok
+        assert response.agents[1].graph is None
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_keyword_search_with_include_graph(self):
+        """include_graph works via keyword search (non-UUID path)."""
+        from backend.data.graph import BaseGraph
+
+        agent_id = "cccc0000-0000-0000-0000-000000000003"
+        mock_agent = self._make_mock_library_agent(agent_id, agent_id)
+
+        mock_lib_db = MagicMock()
+        mock_search_results = MagicMock()
+        mock_search_results.agents = [mock_agent]
+        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
+
+        fake_graph = BaseGraph(id=agent_id, name="g", description="d")
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
+
+        with (
+            patch(
+                "backend.copilot.tools.agent_search.library_db",
+                return_value=mock_lib_db,
+            ),
+            patch(
+                "backend.copilot.tools.agent_search.graph_db",
+                return_value=mock_gdb,
+            ),
+        ):
+            response = await search_agents(
+                query="email",
+                source="library",
+                session_id="s",
+                user_id=_TEST_USER_ID,
+                include_graph=True,
+            )
+
+        assert isinstance(response, AgentsFoundResponse)
+        assert response.agents[0].graph is not None
+        assert response.agents[0].graph.id == agent_id
+        mock_gdb.get_graph.assert_awaited_once()
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_timeout_preserves_successful_fetches(self):
+        """On timeout, agents that already fetched their graph keep the result."""
+        fast_agent = AgentInfo(
+            id="a1",
+            name="Fast",
+            description="d",
+            source="library",
+            graph_id="fast-graph",
+        )
+        slow_agent = AgentInfo(
+            id="a2",
+            name="Slow",
+            description="d",
+            source="library",
+            graph_id="slow-graph",
+        )
+        fake_graph = MagicMock()
+        fake_graph.id = "graph-1"
+
+        async def mock_get_graph(
+            graph_id, *, version=None, user_id=None, for_export=False
+        ):
+            if graph_id == "fast-graph":
+                return fake_graph
+            await asyncio.sleep(999)
+            return MagicMock()
+
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock(side_effect=mock_get_graph)
+
+        with (
+            patch("backend.copilot.tools.agent_search.graph_db", return_value=mock_gdb),
+            patch("backend.copilot.tools.agent_search._GRAPH_FETCH_TIMEOUT", 0.1),
+        ):
+            await _enrich_agents_with_graph([fast_agent, slow_agent], _TEST_USER_ID)
+
+        assert fast_agent.graph is fake_graph
+        assert slow_agent.graph is None
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_enrich_success(self):
+        """All agents get their graphs when no timeout occurs."""
+        agent = AgentInfo(
+            id="a1", name="Test", description="d", source="library", graph_id="g1"
+        )
+        fake_graph = MagicMock()
+        fake_graph.id = "graph-1"
+
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
+
+        with patch(
+            "backend.copilot.tools.agent_search.graph_db", return_value=mock_gdb
+        ):
+            result = await _enrich_agents_with_graph([agent], _TEST_USER_ID)
+
+        assert agent.graph is fake_graph
+        assert result is None
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_enrich_skips_agents_without_graph_id(self):
+        """Agents without graph_id are not fetched."""
+        agent_no_id = AgentInfo(
+            id="a1", name="Test", description="d", source="library", graph_id=None
+        )
+
+        mock_gdb = MagicMock()
+        mock_gdb.get_graph = AsyncMock()
+
+        with patch(
+            "backend.copilot.tools.agent_search.graph_db", return_value=mock_gdb
+        ):
+            result = await _enrich_agents_with_graph([agent_no_id], _TEST_USER_ID)
+
+        mock_gdb.get_graph.assert_not_called()
+        assert result is None
--- a/autogpt_platform/backend/backend/copilot/tools/ask_question.py
+++ b/autogpt_platform/backend/backend/copilot/tools/ask_question.py
@@ -0,0 +1,93 @@
+"""AskQuestionTool - Ask the user a clarifying question before proceeding."""
+
+from typing import Any
+
+from backend.copilot.model import ChatSession
+
+from .base import BaseTool
+from .models import ClarificationNeededResponse, ClarifyingQuestion, ToolResponseBase
+
+
+class AskQuestionTool(BaseTool):
+    """Ask the user a clarifying question and wait for their answer.
+
+    Use this tool when the user's request is ambiguous and you need more
+    information before proceeding. Call find_block or other discovery tools
+    first to ground your question in real platform options, then call this
+    tool with a concrete question listing those options.
+    """
+
+    @property
+    def name(self) -> str:
+        return "ask_question"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Ask the user a clarifying question. Use when the request is "
+            "ambiguous and you need to confirm intent, choose between options, "
+            "or gather missing details before proceeding."
+        )
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "question": {
+                    "type": "string",
+                    "description": (
+                        "The concrete question to ask the user. Should list "
+                        "real options when applicable."
+                    ),
+                },
+                "options": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": (
+                        "Options for the user to choose from "
+                        "(e.g. ['Email', 'Slack', 'Google Docs'])."
+                    ),
+                },
+                "keyword": {
+                    "type": "string",
+                    "description": "Short label identifying what the question is about.",
+                },
+            },
+            "required": ["question"],
+        }
+
+    @property
+    def requires_auth(self) -> bool:
+        return False
+
+    async def _execute(
+        self,
+        user_id: str | None,
+        session: ChatSession,
+        **kwargs: Any,
+    ) -> ToolResponseBase:
+        del user_id  # unused; required by BaseTool contract
+        question_raw = kwargs.get("question")
+        if not isinstance(question_raw, str) or not question_raw.strip():
+            raise ValueError("ask_question requires a non-empty 'question' string")
+        question = question_raw.strip()
+        raw_options = kwargs.get("options", [])
+        if not isinstance(raw_options, list):
+            raw_options = []
+        options: list[str] = [str(o) for o in raw_options if o]
+        raw_keyword = kwargs.get("keyword", "")
+        keyword: str = str(raw_keyword) if raw_keyword else ""
+        session_id = session.session_id if session else None
+
+        example = ", ".join(options) if options else None
+        clarifying_question = ClarifyingQuestion(
+            question=question,
+            keyword=keyword,
+            example=example,
+        )
+        return ClarificationNeededResponse(
+            message=question,
+            session_id=session_id,
+            questions=[clarifying_question],
+        )
--- a/autogpt_platform/backend/backend/copilot/tools/ask_question_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/ask_question_test.py
@@ -0,0 +1,99 @@
+"""Tests for AskQuestionTool."""
+
+import pytest
+
+from backend.copilot.model import ChatSession
+from backend.copilot.tools.ask_question import AskQuestionTool
+from backend.copilot.tools.models import ClarificationNeededResponse
+
+
+@pytest.fixture()
+def tool() -> AskQuestionTool:
+    return AskQuestionTool()
+
+
+@pytest.fixture()
+def session() -> ChatSession:
+    return ChatSession.new(user_id="test-user", dry_run=False)
+
+
+@pytest.mark.asyncio
+async def test_execute_with_options(tool: AskQuestionTool, session: ChatSession):
+    result = await tool._execute(
+        user_id=None,
+        session=session,
+        question="Which channel?",
+        options=["Email", "Slack", "Google Docs"],
+        keyword="channel",
+    )
+
+    assert isinstance(result, ClarificationNeededResponse)
+    assert result.message == "Which channel?"
+    assert result.session_id == session.session_id
+    assert len(result.questions) == 1
+
+    q = result.questions[0]
+    assert q.question == "Which channel?"
+    assert q.keyword == "channel"
+    assert q.example == "Email, Slack, Google Docs"
+
+
+@pytest.mark.asyncio
+async def test_execute_without_options(tool: AskQuestionTool, session: ChatSession):
+    result = await tool._execute(
+        user_id=None,
+        session=session,
+        question="What format do you want?",
+    )
+
+    assert isinstance(result, ClarificationNeededResponse)
+    assert result.message == "What format do you want?"
+    assert len(result.questions) == 1
+
+    q = result.questions[0]
+    assert q.question == "What format do you want?"
+    assert q.keyword == ""
+    assert q.example is None
+
+
+@pytest.mark.asyncio
+async def test_execute_with_keyword_only(tool: AskQuestionTool, session: ChatSession):
+    result = await tool._execute(
+        user_id=None,
+        session=session,
+        question="How often should it run?",
+        keyword="trigger",
+    )
+
+    assert isinstance(result, ClarificationNeededResponse)
+    q = result.questions[0]
+    assert q.keyword == "trigger"
+    assert q.example is None
+
+
+@pytest.mark.asyncio
+async def test_execute_rejects_empty_question(
+    tool: AskQuestionTool, session: ChatSession
+):
+    with pytest.raises(ValueError, match="non-empty"):
+        await tool._execute(user_id=None, session=session, question="")
+
+    with pytest.raises(ValueError, match="non-empty"):
+        await tool._execute(user_id=None, session=session, question="   ")
+
+
+@pytest.mark.asyncio
+async def test_execute_coerces_invalid_options(
+    tool: AskQuestionTool, session: ChatSession
+):
+    """LLM may send options as a string instead of a list; should not crash."""
+    result = await tool._execute(
+        user_id=None,
+        session=session,
+        question="Pick one",
+        options="not-a-list",  # type: ignore[arg-type]
+    )
+
+    assert isinstance(result, ClarificationNeededResponse)
+    q = result.questions[0]
+    assert q.example is None
--- a/autogpt_platform/backend/backend/copilot/tools/base.py
+++ b/autogpt_platform/backend/backend/copilot/tools/base.py
@@ -91,10 +91,16 @@ async def _persist_and_summarize(
        f"\nFull output ({total:,} chars) saved to workspace. "
        f"Use read_workspace_file("
        f'path="{file_path}", offset=<char_offset>, length=50000) '
-        f"to read any section."
+        f"to read any section. "
+        f"To process the file in the sandbox/working dir, use "
+        f"read_workspace_file("
+        f'path="{file_path}", save_to_path="<working_dir>/{tool_call_id}.json") '
+        f"first, then use bash_exec to work with the local copy."
    )
+    # Use workspace:// prefix so the model doesn't confuse the workspace path
+    # with a local filesystem path (e.g. ~/.claude/projects/.../tool-outputs/).
    return (
-        f'<tool-output-truncated total_chars={total} path="{file_path}">\n'
+        f'<tool-output-truncated total_chars={total} workspace_path="{file_path}">\n'
        f"{preview}\n"
        f"{retrieval}\n"
        f"</tool-output-truncated>"
--- a/autogpt_platform/backend/backend/copilot/tools/base_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/base_test.py
@@ -67,7 +67,7 @@ class TestPersistAndSummarize:
        assert "<tool-output-truncated" in result
        assert "</tool-output-truncated>" in result
        assert "total_chars=200000" in result
-        assert 'path="tool-outputs/tc-123.json"' in result
+        assert 'workspace_path="tool-outputs/tc-123.json"' in result
        assert "read_workspace_file" in result
        # Middle-out sentinel from truncate()
        assert "omitted" in result
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
@@ -22,7 +22,10 @@ from e2b import AsyncSandbox
 from e2b.exceptions import TimeoutException

 from backend.copilot.context import E2B_WORKDIR, get_current_sandbox
-from backend.copilot.integration_creds import get_integration_env_vars
+from backend.copilot.integration_creds import (
+    get_github_user_git_identity,
+    get_integration_env_vars,
+)
 from backend.copilot.model import ChatSession

 from .base import BaseTool
@@ -159,6 +162,12 @@ class BashExecTool(BaseTool):
            secret_values = [v for v in integration_env.values() if v]
            envs.update(integration_env)

+            # Set git author/committer identity from the user's GitHub profile
+            # so commits made in the sandbox are attributed correctly.
+            git_identity = await get_github_user_git_identity(user_id)
+            if git_identity:
+                envs.update(git_identity)
+
        try:
            result = await sandbox.commands.run(
                f"bash -c {shlex.quote(command)}",
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec_test.py
@@ -38,7 +38,10 @@ class TestBashExecE2BTokenInjection:
        with patch(
            "backend.copilot.tools.bash_exec.get_integration_env_vars",
            new=AsyncMock(return_value=env_vars),
-        ) as mock_get_env:
+        ) as mock_get_env, patch(
+            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
+            new=AsyncMock(return_value=None),
+        ):
            result = await tool._execute_on_e2b(
                sandbox=sandbox,
                command="echo hi",
@@ -53,6 +56,66 @@ class TestBashExecE2BTokenInjection:
        assert call_kwargs["envs"]["GITHUB_TOKEN"] == "gh-secret"
        assert isinstance(result, BashExecResponse)

+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_git_identity_set_from_github_profile(self):
+        """When user has a connected GitHub account, git env vars are set from their profile."""
+        tool = _make_tool()
+        session = make_session(user_id=_USER)
+        sandbox = _make_sandbox(stdout="ok")
+        identity = {
+            "GIT_AUTHOR_NAME": "Test User",
+            "GIT_AUTHOR_EMAIL": "test@example.com",
+            "GIT_COMMITTER_NAME": "Test User",
+            "GIT_COMMITTER_EMAIL": "test@example.com",
+        }
+
+        with patch(
+            "backend.copilot.tools.bash_exec.get_integration_env_vars",
+            new=AsyncMock(return_value={}),
+        ), patch(
+            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
+            new=AsyncMock(return_value=identity),
+        ):
+            await tool._execute_on_e2b(
+                sandbox=sandbox,
+                command="git commit -m test",
+                timeout=10,
+                session_id=session.session_id,
+                user_id=_USER,
+            )
+
+        call_kwargs = sandbox.commands.run.call_args[1]
+        assert call_kwargs["envs"]["GIT_AUTHOR_NAME"] == "Test User"
+        assert call_kwargs["envs"]["GIT_AUTHOR_EMAIL"] == "test@example.com"
+        assert call_kwargs["envs"]["GIT_COMMITTER_NAME"] == "Test User"
+        assert call_kwargs["envs"]["GIT_COMMITTER_EMAIL"] == "test@example.com"
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_no_git_identity_when_github_not_connected(self):
+        """When user has no GitHub account, git identity env vars are absent."""
+        tool = _make_tool()
+        session = make_session(user_id=_USER)
+        sandbox = _make_sandbox(stdout="ok")
+
+        with patch(
+            "backend.copilot.tools.bash_exec.get_integration_env_vars",
+            new=AsyncMock(return_value={}),
+        ), patch(
+            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
+            new=AsyncMock(return_value=None),
+        ):
+            await tool._execute_on_e2b(
+                sandbox=sandbox,
+                command="echo hi",
+                timeout=10,
+                session_id=session.session_id,
+                user_id=_USER,
+            )
+
+        call_kwargs = sandbox.commands.run.call_args[1]
+        assert "GIT_AUTHOR_NAME" not in call_kwargs["envs"]
+        assert "GIT_COMMITTER_EMAIL" not in call_kwargs["envs"]
+
    @pytest.mark.asyncio(loop_scope="session")
    async def test_no_token_injection_when_user_id_is_none(self):
        """When user_id is None, get_integration_env_vars must NOT be called."""
@@ -63,7 +126,10 @@ class TestBashExecE2BTokenInjection:
        with patch(
            "backend.copilot.tools.bash_exec.get_integration_env_vars",
            new=AsyncMock(return_value={"GH_TOKEN": "should-not-appear"}),
-        ) as mock_get_env:
+        ) as mock_get_env, patch(
+            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
+            new=AsyncMock(return_value=None),
+        ) as mock_get_identity:
            result = await tool._execute_on_e2b(
                sandbox=sandbox,
                command="echo hi",
@@ -73,6 +139,8 @@ class TestBashExecE2BTokenInjection:
            )

        mock_get_env.assert_not_called()
+        mock_get_identity.assert_not_called()
        call_kwargs = sandbox.commands.run.call_args[1]
        assert "GH_TOKEN" not in call_kwargs["envs"]
+        assert "GIT_AUTHOR_NAME" not in call_kwargs["envs"]
        assert isinstance(result, BashExecResponse)
--- a/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
@@ -20,7 +20,8 @@ class FindLibraryAgentTool(BaseTool):
    def description(self) -> str:
        return (
            "Search user's library agents. Returns graph_id, schemas for sub-agent composition. "
-            "Omit query to list all."
+            "Omit query to list all. Set include_graph=true to also fetch the full "
+            "graph structure (nodes + links) for debugging or editing."
        )

    @property
@@ -32,6 +33,15 @@ class FindLibraryAgentTool(BaseTool):
                    "type": "string",
                    "description": "Search by name/description. Omit to list all.",
                },
+                "include_graph": {
+                    "type": "boolean",
+                    "description": (
+                        "When true, includes the full graph structure "
+                        "(nodes + links) for each found agent. "
+                        "Use when you need to inspect, debug, or edit an agent."
+                    ),
+                    "default": False,
+                },
            },
            "required": [],
        }
@@ -45,6 +55,7 @@ class FindLibraryAgentTool(BaseTool):
        user_id: str | None,
        session: ChatSession,
        query: str = "",
+        include_graph: bool = False,
        **kwargs,
    ) -> ToolResponseBase:
        return await search_agents(
@@ -52,4 +63,5 @@ class FindLibraryAgentTool(BaseTool):
            source="library",
            session_id=session.session_id,
            user_id=user_id,
+            include_graph=include_graph,
        )
--- a/autogpt_platform/backend/backend/copilot/tools/models.py
+++ b/autogpt_platform/backend/backend/copilot/tools/models.py
@@ -6,6 +6,7 @@ from typing import Any, Literal

 from pydantic import BaseModel, Field

+from backend.data.graph import BaseGraph
 from backend.data.model import CredentialsMetaInput


@@ -122,6 +123,10 @@ class AgentInfo(BaseModel):
        default=None,
        description="Input schema for the agent, including field names, types, and defaults",
    )
+    graph: BaseGraph | None = Field(
+        default=None,
+        description="Full graph structure (nodes + links) when include_graph is requested",
+    )


 class AgentsFoundResponse(ToolResponseBase):
--- a/autogpt_platform/backend/backend/copilot/tools/test_dry_run.py
+++ b/autogpt_platform/backend/backend/copilot/tools/test_dry_run.py
@@ -10,7 +10,11 @@ import backend.copilot.tools.run_block as run_block_module
 from backend.copilot.tools.helpers import execute_block
 from backend.copilot.tools.models import BlockOutputResponse, ErrorResponse
 from backend.copilot.tools.run_block import RunBlockTool
-from backend.executor.simulator import build_simulation_prompt, simulate_block
+from backend.executor.simulator import (
+    build_simulation_prompt,
+    prepare_dry_run,
+    simulate_block,
+)

 # ---------------------------------------------------------------------------
 # Helpers
@@ -75,7 +79,8 @@ def make_openai_response(
 async def test_simulate_block_basic():
    """simulate_block returns correct (output_name, output_data) tuples.

-    Empty "error" pins are dropped at source — only non-empty errors are yielded.
+    Empty error pins should be omitted (not yielded) — only pins with
+    meaningful values are forwarded.
    """
    mock_block = make_mock_block()
    mock_client = AsyncMock()
@@ -85,13 +90,14 @@ async def test_simulate_block_basic():

    with patch(
        "backend.executor.simulator.get_openai_client", return_value=mock_client
-    ):
+    ) as mock_get_client:
        outputs = []
        async for name, data in simulate_block(mock_block, {"query": "test"}):
            outputs.append((name, data))

+    mock_get_client.assert_called_once_with(prefer_openrouter=True)
    assert ("result", "simulated output") in outputs
-    # Empty error pin is dropped at the simulator level
+    # Empty error pin should NOT be yielded — the simulator omits empty values
    assert ("error", "") not in outputs


@@ -147,7 +153,7 @@ async def test_simulate_block_all_retries_exhausted():

@pytest.mark.asyncio
 async def test_simulate_block_missing_output_pins():
-    """LLM response missing some output pins; verify non-error pins filled with None."""
+    """LLM response missing some output pins; they are omitted (not yielded)."""
    mock_block = make_mock_block(
        output_props={
            "result": {"type": "string"},
@@ -169,30 +175,9 @@ async def test_simulate_block_missing_output_pins():
            outputs[name] = data

    assert outputs["result"] == "hello"
-    assert outputs["count"] is None  # missing pin filled with None
-    assert "error" not in outputs  # missing error pin is omitted entirely
-
-
-@pytest.mark.asyncio
-async def test_simulate_block_keeps_nonempty_error():
-    """simulate_block keeps non-empty error pins (simulated logical errors)."""
-    mock_block = make_mock_block()
-    mock_client = AsyncMock()
-    mock_client.chat.completions.create = AsyncMock(
-        return_value=make_openai_response(
-            '{"result": "", "error": "API rate limit exceeded"}'
-        )
-    )
-
-    with patch(
-        "backend.executor.simulator.get_openai_client", return_value=mock_client
-    ):
-        outputs = []
-        async for name, data in simulate_block(mock_block, {"query": "test"}):
-            outputs.append((name, data))
-
-    assert ("result", "") in outputs
-    assert ("error", "API rate limit exceeded") in outputs
+    # Missing pins are omitted — only pins with meaningful values are yielded
+    assert "count" not in outputs
+    assert "error" not in outputs


@pytest.mark.asyncio
@@ -228,17 +213,19 @@ async def test_simulate_block_truncates_long_inputs():
    assert len(parsed["text"]) < 25000


-def test_build_simulation_prompt_excludes_error_from_must_include():
-    """The 'MUST include' prompt line should NOT list 'error' — the prompt
-    already instructs the LLM to OMIT error unless simulating a logical error.
-    Including it in 'MUST include' would be contradictory."""
+def test_build_simulation_prompt_lists_available_output_pins():
+    """The prompt should list available output pins (excluding error) so the LLM
+    knows which keys it MUST include.  Error is excluded because the prompt
+    tells the LLM to omit it unless simulating a logical failure."""
    block = make_mock_block()  # default output_props has "result" and "error"
    system_prompt, _ = build_simulation_prompt(block, {"query": "test"})
-    must_include_line = [
-        line for line in system_prompt.splitlines() if "MUST include" in line
+    available_line = [
+        line for line in system_prompt.splitlines() if "Available output pins" in line
    ][0]
-    assert '"result"' in must_include_line
-    assert '"error"' not in must_include_line
+    assert '"result"' in available_line
+    # "error" is intentionally excluded from the required output pins list
+    # since the prompt instructs the LLM to omit it unless simulating errors
+    assert '"error"' not in available_line


 # ---------------------------------------------------------------------------
@@ -493,3 +480,146 @@ async def test_execute_block_dry_run_simulator_error_returns_error_response():

    assert isinstance(response, ErrorResponse)
    assert "[SIMULATOR ERROR" in response.message
+
+
+# ---------------------------------------------------------------------------
+# prepare_dry_run tests
+# ---------------------------------------------------------------------------
+
+
+def test_prepare_dry_run_orchestrator_block():
+    """prepare_dry_run caps iterations and overrides model to simulation model."""
+    from backend.blocks.orchestrator import OrchestratorBlock
+
+    block = OrchestratorBlock()
+    input_data = {"prompt": "hello", "model": "gpt-4o", "agent_mode_max_iterations": 10}
+    with patch(
+        "backend.executor.simulator._get_platform_openrouter_key",
+        return_value="sk-or-test-key",
+    ):
+        result = prepare_dry_run(block, input_data)
+
+    assert result is not None
+    # Model is overridden to the simulation model (not the user's model).
+    assert result["model"] != "gpt-4o"
+    assert result["agent_mode_max_iterations"] == 1
+    assert result["_dry_run_api_key"] == "sk-or-test-key"
+    # Original input_data should not be mutated.
+    assert input_data["model"] == "gpt-4o"
+
+
+def test_prepare_dry_run_agent_executor_block():
+    """prepare_dry_run returns a copy of input_data for AgentExecutorBlock.
+
+    AgentExecutorBlock must execute for real during dry-run so it can spawn
+    a child graph execution (whose blocks are then simulated).  Its Output
+    schema has no properties, so LLM simulation would yield zero outputs.
+    """
+    from backend.blocks.agent import AgentExecutorBlock
+
+    block = AgentExecutorBlock()
+    input_data = {
+        "user_id": "u1",
+        "graph_id": "g1",
+        "graph_version": 1,
+        "inputs": {"text": "hello"},
+        "input_schema": {},
+        "output_schema": {},
+    }
+    result = prepare_dry_run(block, input_data)
+
+    assert result is not None
+    # Input data is returned as-is (no model swap needed).
+    assert result["user_id"] == "u1"
+    assert result["graph_id"] == "g1"
+    # Original input_data should not be mutated.
+    assert result is not input_data
+
+
+def test_prepare_dry_run_regular_block_returns_none():
+    """prepare_dry_run returns None for a regular block (use simulator)."""
+    mock_block = make_mock_block()
+    assert prepare_dry_run(mock_block, {"query": "test"}) is None
+
+
+# ---------------------------------------------------------------------------
+# Input/output block passthrough tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_simulate_agent_input_block_passthrough():
+    """AgentInputBlock should pass through the value directly, no LLM call."""
+    from backend.blocks.io import AgentInputBlock
+
+    block = AgentInputBlock()
+    outputs = []
+    async for name, data in simulate_block(
+        block, {"value": "hello world", "name": "q"}
+    ):
+        outputs.append((name, data))
+
+    assert outputs == [("result", "hello world")]
+
+
+@pytest.mark.asyncio
+async def test_simulate_agent_dropdown_input_block_passthrough():
+    """AgentDropdownInputBlock (subclass of AgentInputBlock) should pass through."""
+    from backend.blocks.io import AgentDropdownInputBlock
+
+    block = AgentDropdownInputBlock()
+    outputs = []
+    async for name, data in simulate_block(
+        block,
+        {
+            "value": "Option B",
+            "name": "sev",
+            "options": ["Option A", "Option B"],
+        },
+    ):
+        outputs.append((name, data))
+
+    assert outputs == [("result", "Option B")]
+
+
+@pytest.mark.asyncio
+async def test_simulate_agent_input_block_none_value_falls_back_to_name():
+    """AgentInputBlock with value=None falls back to the input name."""
+    from backend.blocks.io import AgentInputBlock
+
+    block = AgentInputBlock()
+    outputs = []
+    async for name, data in simulate_block(block, {"value": None, "name": "q"}):
+        outputs.append((name, data))
+
+    # When value is None, the simulator falls back to the "name" field
+    assert outputs == [("result", "q")]
+
+
+@pytest.mark.asyncio
+async def test_simulate_agent_output_block_passthrough():
+    """AgentOutputBlock should pass through value as output."""
+    from backend.blocks.io import AgentOutputBlock
+
+    block = AgentOutputBlock()
+    outputs = []
+    async for name, data in simulate_block(
+        block, {"value": "result text", "name": "out1"}
+    ):
+        outputs.append((name, data))
+
+    assert ("output", "result text") in outputs
+    assert ("name", "out1") in outputs
+
+
+@pytest.mark.asyncio
+async def test_simulate_agent_output_block_no_name():
+    """AgentOutputBlock without name in input should still yield output."""
+    from backend.blocks.io import AgentOutputBlock
+
+    block = AgentOutputBlock()
+    outputs = []
+    async for name, data in simulate_block(block, {"value": 42}):
+        outputs.append((name, data))
+
+    assert outputs == [("output", 42)]
--- a/autogpt_platform/backend/backend/data/onboarding.py
+++ b/autogpt_platform/backend/backend/data/onboarding.py
@@ -436,6 +436,28 @@ async def get_recommended_agents(user_id: str) -> list[StoreAgentDetails]:
    return [StoreAgentDetails.from_db(agent) for agent in recommended_agents]


+def format_onboarding_for_extraction(
+    user_name: str,
+    user_role: str,
+    pain_points: list[str],
+) -> str:
+    """Format onboarding wizard answers as Q&A text for LLM extraction."""
+
+    def normalize(value: str) -> str:
+        return " ".join(value.strip().split())
+
+    name = normalize(user_name)
+    role = normalize(user_role)
+    points = [normalize(p) for p in pain_points if normalize(p)]
+
+    lines = [
+        f"Q: What is your name?\nA: {name}",
+        f"Q: What best describes your role?\nA: {role}",
+        f"Q: What tasks are eating your time?\nA: {', '.join(points)}",
+    ]
+    return "\n\n".join(lines)
+
+
@cached(maxsize=1, ttl_seconds=300)  # Cache for 5 minutes since this rarely changes
 async def onboarding_enabled() -> bool:
    """
--- a/autogpt_platform/backend/backend/data/onboarding_test.py
+++ b/autogpt_platform/backend/backend/data/onboarding_test.py
@@ -0,0 +1,27 @@
+from backend.data.onboarding import format_onboarding_for_extraction
+
+
+def test_format_onboarding_for_extraction_basic():
+    result = format_onboarding_for_extraction(
+        user_name="John",
+        user_role="Founder/CEO",
+        pain_points=["Finding leads", "Email & outreach"],
+    )
+    assert "Q: What is your name?" in result
+    assert "A: John" in result
+    assert "Q: What best describes your role?" in result
+    assert "A: Founder/CEO" in result
+    assert "Q: What tasks are eating your time?" in result
+    assert "Finding leads" in result
+    assert "Email & outreach" in result
+
+
+def test_format_onboarding_for_extraction_with_other():
+    result = format_onboarding_for_extraction(
+        user_name="Jane",
+        user_role="Data Scientist",
+        pain_points=["Research", "Building dashboards"],
+    )
+    assert "A: Jane" in result
+    assert "A: Data Scientist" in result
+    assert "Research, Building dashboards" in result
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -81,7 +81,7 @@ from backend.util.settings import Settings
 from .activity_status_generator import generate_activity_status_for_execution
 from .automod.manager import automod_manager
 from .cluster_lock import ClusterLock
-from .simulator import simulate_block
+from .simulator import get_dry_run_credentials, prepare_dry_run, simulate_block
 from .utils import (
    GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS,
    GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
@@ -279,6 +279,21 @@ async def execute_node(
        "nodes_to_skip": nodes_to_skip or set(),
    }

+    # For special blocks in dry-run, prepare_dry_run returns a (possibly
+    # modified) copy of input_data so the block executes for real.  For all
+    # other blocks it returns None -> use LLM simulator.
+    # OrchestratorBlock uses the platform's simulation model + OpenRouter key
+    # so no user credentials are needed.
+    _dry_run_input: dict[str, Any] | None = None
+    if execution_context.dry_run:
+        _dry_run_input = prepare_dry_run(node_block, input_data)
+    if _dry_run_input is not None:
+        input_data = _dry_run_input
+
+    # Check for dry-run platform credentials (OrchestratorBlock uses the
+    # platform's OpenRouter key instead of user credentials).
+    _dry_run_creds = get_dry_run_credentials(input_data) if _dry_run_input else None
+
    # Last-minute fetch credentials + acquire a system-wide read-write lock to prevent
    # changes during execution. ⚠️ This means a set of credentials can only be used by
    # one (running) block at a time; simultaneous execution of blocks using same
@@ -288,6 +303,12 @@ async def execute_node(

    # Handle regular credentials fields
    for field_name, input_type in input_model.get_credentials_fields().items():
+        # Dry-run platform credentials bypass the credential store
+        if _dry_run_creds is not None:
+            input_data[field_name] = None
+            extra_exec_kwargs[field_name] = _dry_run_creds
+            continue
+
        field_value = input_data.get(field_name)
        if not field_value or (
            isinstance(field_value, dict) and not field_value.get("id")
@@ -375,7 +396,7 @@ async def execute_node(
        scope.set_tag(f"execution_context.{k}", v)

    try:
-        if execution_context.dry_run:
+        if execution_context.dry_run and _dry_run_input is None:
            block_iter = simulate_block(node_block, input_data)
        else:
            block_iter = node_block.execute(input_data, **extra_exec_kwargs)
--- a/autogpt_platform/backend/backend/executor/simulator.py
+++ b/autogpt_platform/backend/backend/executor/simulator.py
@@ -2,58 +2,65 @@
 LLM-powered block simulator for dry-run execution.

 When dry_run=True, instead of calling the real block, this module
-role-plays the block's execution using an LLM. No real API calls,
-no side effects. The LLM is grounded by:
+role-plays the block's execution using an LLM.  For most blocks no real
+API calls or side effects occur.
+
+Special cases (no LLM simulation needed):
+  - OrchestratorBlock executes for real with the platform's simulation model
+    (iterations capped to 1).  Uses the platform OpenRouter key so no user
+    credentials are required.  Falls back to LLM simulation if the platform
+    key is unavailable.
+  - AgentExecutorBlock executes for real so it can spawn child graph executions
+    (whose blocks are then simulated).  No credentials needed.
+  - AgentInputBlock (and all subclasses) and AgentOutputBlock are pure
+    passthrough -- they forward their input values directly.
+  - MCPToolBlock is simulated via the generic LLM prompt (with run() source code).
+
+OrchestratorBlock and AgentExecutorBlock are handled in manager.py via
+``prepare_dry_run``.
+
+The LLM simulation is grounded by:
  - Block name and description
  - Input/output schemas (from block.input_schema.jsonschema() / output_schema.jsonschema())
+  - The block's run() source code (via inspect.getsource)
  - The actual input values

 Inspired by https://github.com/Significant-Gravitas/agent-simulator
 """

+import inspect
 import json
 import logging
 from collections.abc import AsyncGenerator
 from typing import Any

+from backend.blocks.agent import AgentExecutorBlock
+from backend.blocks.io import AgentInputBlock, AgentOutputBlock
+from backend.blocks.orchestrator import OrchestratorBlock
 from backend.util.clients import get_openai_client

 logger = logging.getLogger(__name__)


-# Use the same fast/cheap model the copilot uses for non-primary tasks.
-# Overridable via ChatConfig.title_model if ChatConfig is available.
+# Default simulator model — Gemini 2.5 Flash via OpenRouter (fast, cheap, good at
+# JSON generation).  Configurable via ChatConfig.simulation_model
+# (CHAT_SIMULATION_MODEL env var).
+_DEFAULT_SIMULATOR_MODEL = "google/gemini-2.5-flash"
+
+
 def _simulator_model() -> str:
    try:
        from backend.copilot.config import ChatConfig  # noqa: PLC0415

-        model = ChatConfig().title_model
+        return ChatConfig().simulation_model or _DEFAULT_SIMULATOR_MODEL
    except Exception:
-        model = "openai/gpt-4o-mini"
-
-    # get_openai_client() may return a direct OpenAI client (not OpenRouter).
-    # Direct OpenAI expects bare model names ("gpt-4o-mini"), not the
-    # OpenRouter-prefixed form ("openai/gpt-4o-mini").  Strip the prefix when
-    # the internal OpenAI key is configured (i.e. not going through OpenRouter).
-    try:
-        from backend.util.settings import Settings  # noqa: PLC0415
-
-        secrets = Settings().secrets
-        # get_openai_client() uses the direct OpenAI client whenever
-        # openai_internal_api_key is set, regardless of open_router_api_key.
-        # Strip the provider prefix (e.g. "openai/gpt-4o-mini" → "gpt-4o-mini")
-        # so the model name is valid for the direct OpenAI API.
-        if secrets.openai_internal_api_key and "/" in model:
-            model = model.split("/", 1)[1]
-    except Exception:
-        pass
-
-    return model
+        return _DEFAULT_SIMULATOR_MODEL


 _TEMPERATURE = 0.2
 _MAX_JSON_RETRIES = 5
 _MAX_INPUT_VALUE_CHARS = 20000
+_COMMON_CRED_KEYS = frozenset({"credentials", "api_key", "token", "secret"})


 def _truncate_value(value: Any) -> Any:
@@ -88,73 +95,31 @@ def _describe_schema_pins(schema: dict[str, Any]) -> str:
    return "\n".join(lines) if lines else "(no output pins defined)"


-def build_simulation_prompt(block: Any, input_data: dict[str, Any]) -> tuple[str, str]:
-    """Build (system_prompt, user_prompt) for block simulation."""
-    input_schema = block.input_schema.jsonschema()
-    output_schema = block.output_schema.jsonschema()
-
-    input_pins = _describe_schema_pins(input_schema)
-    output_pins = _describe_schema_pins(output_schema)
-    output_properties = list(output_schema.get("properties", {}).keys())
-    # Build a separate list for the "MUST include" instruction that excludes
-    # "error" — the prompt already tells the LLM to OMIT the error pin unless
-    # simulating a logical error.  Including it in "MUST include" is contradictory.
-    required_output_properties = [k for k in output_properties if k != "error"]
-
-    block_name = getattr(block, "name", type(block).__name__)
-    block_description = getattr(block, "description", "No description available.")
-
-    system_prompt = f"""You are simulating the execution of a software block called "{block_name}".
-
-## Block Description
-{block_description}
-
-## Input Schema
-{input_pins}
-
-## Output Schema (what you must return)
-{output_pins}
-
-Your task: given the current inputs, produce realistic simulated outputs for this block.
-
-Rules:
- Respond with a single JSON object whose keys are EXACTLY the output pin names listed above.
- Assume all credentials and authentication are present and valid. Never simulate authentication failures.
- Make the simulated outputs realistic and consistent with the inputs.
- If there is an "error" pin, OMIT it entirely unless you are simulating a logical error. Only include the "error" pin when there is a genuine error message to report.
- Do not include any extra keys beyond the output pins.
-
-Output pin names you MUST include: {json.dumps(required_output_properties)}
-"""
-
-    safe_inputs = _truncate_input_values(input_data)
-    user_prompt = f"## Current Inputs\n{json.dumps(safe_inputs, indent=2)}"
-
-    return system_prompt, user_prompt
+# ---------------------------------------------------------------------------
+# Shared LLM call helper
+# ---------------------------------------------------------------------------


-async def simulate_block(
-    block: Any,
-    input_data: dict[str, Any],
-) -> AsyncGenerator[tuple[str, Any], None]:
-    """Simulate block execution using an LLM.
+async def _call_llm_for_simulation(
+    system_prompt: str,
+    user_prompt: str,
+    *,
+    label: str = "simulate",
+) -> dict[str, Any]:
+    """Send a simulation prompt to the LLM and return the parsed JSON dict.

-    Yields (output_name, output_data) tuples matching the Block.execute() interface.
-    On unrecoverable failure, yields a single ("error", "[SIMULATOR ERROR ...") tuple.
+    Handles client acquisition, retries on invalid JSON, and logging.
+
+    Raises:
+        RuntimeError: If no LLM client is available.
+        ValueError: If all retry attempts are exhausted.
    """
-    client = get_openai_client()
+    client = get_openai_client(prefer_openrouter=True)
    if client is None:
-        yield (
-            "error",
+        raise RuntimeError(
            "[SIMULATOR ERROR — NOT A BLOCK FAILURE] No LLM client available "
-            "(missing OpenAI/OpenRouter API key).",
+            "(missing OpenAI/OpenRouter API key)."
        )
-        return
-
-    output_schema = block.output_schema.jsonschema()
-    output_properties: dict[str, Any] = output_schema.get("properties", {})
-
-    system_prompt, user_prompt = build_simulation_prompt(block, input_data)

    model = _simulator_model()
    last_error: Exception | None = None
@@ -176,60 +141,366 @@ async def simulate_block(
            if not isinstance(parsed, dict):
                raise ValueError(f"LLM returned non-object JSON: {raw[:200]}")

-            # Fill missing output pins with defaults.
-            # Skip empty "error" pins — an empty string means "no error" and
-            # would only confuse downstream consumers (LLM, frontend).
-            result: dict[str, Any] = {}
-            for pin_name in output_properties:
-                if pin_name in parsed:
-                    value = parsed[pin_name]
-                    # Drop empty/blank error pins: they carry no information.
-                    # Uses strip() intentionally so whitespace-only strings
-                    # (e.g. " ", "\n") are also treated as empty.
-                    if (
-                        pin_name == "error"
-                        and isinstance(value, str)
-                        and not value.strip()
-                    ):
-                        continue
-                    result[pin_name] = value
-                elif pin_name != "error":
-                    # Only fill non-error missing pins with None
-                    result[pin_name] = None
-
            logger.debug(
-                "simulate_block: block=%s attempt=%d tokens=%s/%s",
-                getattr(block, "name", "?"),
+                "simulate(%s): attempt=%d tokens=%s/%s",
+                label,
                attempt + 1,
                getattr(getattr(response, "usage", None), "prompt_tokens", "?"),
                getattr(getattr(response, "usage", None), "completion_tokens", "?"),
            )
-
-            for pin_name, pin_value in result.items():
-                yield pin_name, pin_value
-            return
+            return parsed

        except (json.JSONDecodeError, ValueError) as e:
            last_error = e
            logger.warning(
-                "simulate_block: JSON parse error on attempt %d/%d: %s",
+                "simulate(%s): JSON parse error on attempt %d/%d: %s",
+                label,
                attempt + 1,
                _MAX_JSON_RETRIES,
                e,
            )
        except Exception as e:
            last_error = e
-            logger.error("simulate_block: LLM call failed: %s", e, exc_info=True)
+            logger.error("simulate(%s): LLM call failed: %s", label, e, exc_info=True)
            break

-    logger.error(
-        "simulate_block: all %d retries exhausted for block=%s; last_error=%s",
-        _MAX_JSON_RETRIES,
-        getattr(block, "name", "?"),
-        last_error,
-    )
-    yield (
-        "error",
+    msg = (
        f"[SIMULATOR ERROR — NOT A BLOCK FAILURE] Failed after {_MAX_JSON_RETRIES} "
-        f"attempts: {last_error}",
+        f"attempts: {last_error}"
    )
+    logger.error(
+        "simulate(%s): all retries exhausted; last_error=%s", label, last_error
+    )
+    raise ValueError(msg)
+
+
+# ---------------------------------------------------------------------------
+# Prompt builders
+# ---------------------------------------------------------------------------
+
+
+def build_simulation_prompt(block: Any, input_data: dict[str, Any]) -> tuple[str, str]:
+    """Build (system_prompt, user_prompt) for block simulation."""
+    input_schema = block.input_schema.jsonschema()
+    output_schema = block.output_schema.jsonschema()
+
+    input_pins = _describe_schema_pins(input_schema)
+    output_pins = _describe_schema_pins(output_schema)
+    output_properties = list(output_schema.get("properties", {}).keys())
+    # Build a separate list for the "MUST include" instruction that excludes
+    # "error" — the prompt already tells the LLM to OMIT the error pin unless
+    # simulating a logical error.  Including it in "MUST include" is contradictory.
+    required_output_properties = [k for k in output_properties if k != "error"]
+
+    block_name = getattr(block, "name", type(block).__name__)
+    block_description = getattr(block, "description", "No description available.")
+
+    # Include the block's run() source code so the LLM knows exactly how
+    # inputs are transformed to outputs.  Truncate to avoid blowing up the
+    # prompt for very large blocks.
+    try:
+        run_source = inspect.getsource(block.run)
+        if len(run_source) > _MAX_INPUT_VALUE_CHARS:
+            run_source = run_source[:_MAX_INPUT_VALUE_CHARS] + "\n# ... [TRUNCATED]"
+    except (TypeError, OSError):
+        run_source = ""
+
+    implementation_section = ""
+    if run_source:
+        implementation_section = (
+            "\n## Block Implementation (run function source code)\n"
+            "```python\n"
+            f"{run_source}\n"
+            "```\n"
+        )
+
+    system_prompt = f"""You are simulating the execution of a software block called "{block_name}".
+
+## Block Description
+{block_description}
+
+## Input Schema
+{input_pins}
+
+## Output Schema (what you must return)
+{output_pins}
+{implementation_section}
+Your task: given the current inputs, produce realistic simulated outputs for this block.
+{"Study the block's run() source code above to understand exactly how inputs are transformed to outputs." if run_source else "Use the block description and schemas to infer realistic outputs."}
+
+Rules:
+- Respond with a single JSON object.
+- Only include output pins that have meaningful values. Omit pins with no relevant output.
+- Assume all credentials and API keys are present and valid. Do not simulate auth failures.
+- Generate REALISTIC, useful outputs: real-looking URLs, plausible text, valid data structures.
+- Never return empty strings, null, or "N/A" for pins that should have content.
+- You MAY simulate logical errors (e.g., invalid input format, unsupported operation) when the inputs warrant it — use the "error" pin for these. But do NOT simulate auth/credential errors.
+- Do not include extra keys beyond the defined output pins.
+
+Available output pins: {json.dumps(required_output_properties)}
+"""
+
+    # Strip credentials from input so the LLM doesn't see null/empty creds
+    # and incorrectly simulate auth failures.  Use the block's schema to
+    # detect credential fields when available, falling back to common names.
+    try:
+        cred_fields = set(block.input_schema.get_credentials_fields())
+    except (AttributeError, TypeError):
+        cred_fields = set()
+    exclude_keys = cred_fields | _COMMON_CRED_KEYS
+    safe_inputs = {
+        k: v
+        for k, v in _truncate_input_values(input_data).items()
+        if k not in exclude_keys
+    }
+    user_prompt = f"## Current Inputs\n{json.dumps(safe_inputs, indent=2)}"
+
+    return system_prompt, user_prompt
+
+
+# ---------------------------------------------------------------------------
+# Public simulation functions
+# ---------------------------------------------------------------------------
+
+
+def _get_platform_openrouter_key() -> str | None:
+    """Return the platform's OpenRouter API key, or None if unavailable."""
+    try:
+        from backend.util.settings import Settings  # noqa: PLC0415
+
+        key = Settings().secrets.open_router_api_key
+        return key if key else None
+    except Exception:
+        return None
+
+
+def prepare_dry_run(block: Any, input_data: dict[str, Any]) -> dict[str, Any] | None:
+    """Prepare *input_data* for a dry-run execution of *block*.
+
+    Returns a **modified copy** of *input_data* for blocks that should execute
+    for real with cheap settings, or ``None`` when the block should be
+    LLM-simulated instead.
+
+    - **OrchestratorBlock** executes for real with the platform's simulation
+      model (iterations capped to 1).  Uses the platform OpenRouter key so no
+      user credentials are needed.  Falls back to LLM simulation if the
+      platform key is unavailable.
+    - **AgentExecutorBlock** executes for real so it can spawn a child graph
+      execution.  The child graph inherits ``dry_run=True`` and its blocks
+      are simulated.  No credentials are needed.
+    """
+    if isinstance(block, OrchestratorBlock):
+        or_key = _get_platform_openrouter_key()
+        if not or_key:
+            logger.info(
+                "Dry-run: no platform OpenRouter key, "
+                "falling back to LLM simulation for OrchestratorBlock"
+            )
+            return None
+
+        original = input_data.get("agent_mode_max_iterations", 0)
+        max_iters = 1 if original != 0 else 0
+        sim_model = _simulator_model()
+
+        # Keep the original credentials dict in input_data so the block's
+        # JSON schema validation passes (validate_data strips None values,
+        # making the field absent and failing the "required" check).
+        # The actual credentials are injected via extra_exec_kwargs in
+        # manager.py using _dry_run_api_key.
+        return {
+            **input_data,
+            "agent_mode_max_iterations": max_iters,
+            "model": sim_model,
+            "_dry_run_api_key": or_key,
+        }
+
+    if isinstance(block, AgentExecutorBlock):
+        return {**input_data}
+
+    return None
+
+
+def get_dry_run_credentials(
+    input_data: dict[str, Any],
+) -> Any | None:
+    """Build an ``APIKeyCredentials`` for dry-run OrchestratorBlock execution.
+
+    Returns credentials using the platform's OpenRouter key (injected by
+    ``prepare_dry_run``), or ``None`` if not a dry-run override.
+    """
+    api_key = input_data.pop("_dry_run_api_key", None)
+    if not api_key:
+        return None
+
+    try:
+        from backend.blocks.llm import APIKeyCredentials  # noqa: PLC0415
+        from backend.integrations.providers import ProviderName  # noqa: PLC0415
+
+        return APIKeyCredentials(
+            id="dry-run-platform",
+            provider=ProviderName.OPEN_ROUTER,
+            api_key=api_key,
+            title="Dry-run simulation",
+            expires_at=None,
+        )
+    except Exception:
+        logger.warning("Failed to create dry-run credentials", exc_info=True)
+        return None
+
+
+def _default_for_input_result(result_schema: dict[str, Any], name: str | None) -> Any:
+    """Return a type-appropriate sample value for an AgentInputBlock's result pin.
+
+    Typed subclasses (AgentNumberInputBlock, AgentDateInputBlock, etc.)
+    declare a specific type/format on their ``result`` output.  When dry-run
+    has no user-supplied value, this generates a fallback that matches the
+    expected type so downstream validation doesn't fail with a plain string.
+    """
+    pin_type = result_schema.get("type", "string")
+    fmt = result_schema.get("format")
+
+    if pin_type == "integer":
+        return 0
+    if pin_type == "number":
+        return 0.0
+    if pin_type == "boolean":
+        return False
+    if pin_type == "array":
+        return []
+    if pin_type == "object":
+        return {}
+    if fmt == "date":
+        from datetime import date as _date  # noqa: PLC0415
+
+        return _date.today().isoformat()
+    if fmt == "time":
+        return "00:00:00"
+    # Default: use the block's name as a sample string.
+    return name or "sample input"
+
+
+async def simulate_block(
+    block: Any,
+    input_data: dict[str, Any],
+) -> AsyncGenerator[tuple[str, Any], None]:
+    """Simulate block execution using an LLM.
+
+    All block types (including MCPToolBlock) use the same generic LLM prompt
+    which includes the block's run() source code for accurate simulation.
+
+    Note: callers should check ``prepare_dry_run(block, input_data)`` first.
+    OrchestratorBlock and AgentExecutorBlock execute for real in dry-run mode
+    (see manager.py).
+
+    Yields (output_name, output_data) tuples matching the Block.execute() interface.
+    On unrecoverable failure, yields a single ("error", "[SIMULATOR ERROR ...") tuple.
+    """
+    # Input/output blocks are pure passthrough -- they just forward their
+    # input values.  No LLM simulation needed.
+    if isinstance(block, AgentInputBlock):
+        value = input_data.get("value")
+        if value is None:
+            # Dry-run with no user input: use first dropdown option or name,
+            # then coerce to a type-appropriate fallback so typed subclasses
+            # (e.g. AgentNumberInputBlock → int, AgentDateInputBlock → date)
+            # don't fail validation with a plain string.
+            placeholder = input_data.get("options") or input_data.get(
+                "placeholder_values"
+            )
+            if placeholder and isinstance(placeholder, list) and placeholder:
+                value = placeholder[0]
+            else:
+                result_schema = (
+                    block.output_schema.jsonschema()
+                    .get("properties", {})
+                    .get("result", {})
+                )
+                value = _default_for_input_result(
+                    result_schema, input_data.get("name", "sample input")
+                )
+        yield "result", value
+        return
+
+    if isinstance(block, AgentOutputBlock):
+        # Mirror AgentOutputBlock.run(): if a format string is provided,
+        # apply Jinja2 formatting and yield only "output"; otherwise yield
+        # both "output" (raw value) and "name".
+        fmt = input_data.get("format", "")
+        value = input_data.get("value")
+        name = input_data.get("name", "")
+        if fmt:
+            try:
+                from backend.util.text import TextFormatter  # noqa: PLC0415
+
+                escape_html = input_data.get("escape_html", False)
+                formatter = TextFormatter(autoescape=escape_html)
+                formatted = await formatter.format_string(fmt, {name: value})
+                yield "output", formatted
+            except Exception as e:
+                yield "output", f"Error: {e}, {value}"
+        else:
+            yield "output", value
+            if name:
+                yield "name", name
+        return
+
+    output_schema = block.output_schema.jsonschema()
+    output_properties: dict[str, Any] = output_schema.get("properties", {})
+
+    system_prompt, user_prompt = build_simulation_prompt(block, input_data)
+    label = getattr(block, "name", "?")
+
+    try:
+        parsed = await _call_llm_for_simulation(system_prompt, user_prompt, label=label)
+
+        # Track which pins were yielded so we can fill in missing required
+        # ones afterwards — downstream nodes connected to unyielded pins
+        # would otherwise stall in INCOMPLETE state.
+        yielded_pins: set[str] = set()
+
+        # Yield pins present in the LLM response with meaningful values.
+        # We skip None and empty strings but preserve valid falsy values
+        # like False, 0, and [].
+        for pin_name in output_properties:
+            if pin_name not in parsed:
+                continue
+            value = parsed[pin_name]
+            if value is None or value == "":
+                continue
+            yield pin_name, value
+            yielded_pins.add(pin_name)
+
+        # For any required output pins the LLM omitted (excluding "error"),
+        # yield a type-appropriate default so downstream nodes still fire.
+        required_pins = set(output_schema.get("required", []))
+        for pin_name in required_pins - yielded_pins - {"error"}:
+            pin_schema = output_properties.get(pin_name, {})
+            default = _default_for_schema(pin_schema)
+            logger.debug(
+                "simulate(%s): filling missing required pin %r with default %r",
+                label,
+                pin_name,
+                default,
+            )
+            yield pin_name, default
+
+    except (RuntimeError, ValueError) as e:
+        yield "error", str(e)
+
+
+def _default_for_schema(pin_schema: dict[str, Any]) -> Any:
+    """Return a sensible default value for a JSON schema type."""
+    pin_type = pin_schema.get("type", "string")
+    if pin_type == "string":
+        return ""
+    if pin_type == "integer":
+        return 0
+    if pin_type == "number":
+        return 0.0
+    if pin_type == "boolean":
+        return False
+    if pin_type == "array":
+        return []
+    if pin_type == "object":
+        return {}
+    return ""
--- a/autogpt_platform/backend/backend/executor/simulator_test.py
+++ b/autogpt_platform/backend/backend/executor/simulator_test.py
@@ -0,0 +1,475 @@
+"""Tests for the LLM-powered block simulator (dry-run execution).
+
+Covers:
+  - Prompt building (credential stripping, realistic-output instructions)
+  - Input/output block passthrough
+  - prepare_dry_run routing
+  - simulate_block output-pin filling
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from backend.executor.simulator import (
+    _truncate_input_values,
+    _truncate_value,
+    build_simulation_prompt,
+    prepare_dry_run,
+    simulate_block,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_block(
+    *,
+    name: str = "TestBlock",
+    description: str = "A test block.",
+    input_schema: dict[str, Any] | None = None,
+    output_schema: dict[str, Any] | None = None,
+) -> Any:
+    """Create a minimal mock block for testing."""
+    block = MagicMock()
+    block.name = name
+    block.description = description
+    block.input_schema.jsonschema.return_value = input_schema or {
+        "properties": {"query": {"type": "string"}},
+        "required": ["query"],
+    }
+    block.output_schema.jsonschema.return_value = output_schema or {
+        "properties": {
+            "result": {"type": "string"},
+            "error": {"type": "string"},
+        },
+        "required": ["result"],
+    }
+    return block
+
+
+# ---------------------------------------------------------------------------
+# Truncation
+# ---------------------------------------------------------------------------
+
+
+class TestTruncation:
+    def test_short_string_unchanged(self) -> None:
+        assert _truncate_value("hello") == "hello"
+
+    def test_long_string_truncated(self) -> None:
+        long_str = "x" * 30000
+        result = _truncate_value(long_str)
+        assert result.endswith("... [TRUNCATED]")
+        assert len(result) < 25000
+
+    def test_nested_dict_truncation(self) -> None:
+        data = {"key": "y" * 30000}
+        result = _truncate_input_values(data)
+        assert result["key"].endswith("... [TRUNCATED]")
+
+
+# ---------------------------------------------------------------------------
+# Prompt building
+# ---------------------------------------------------------------------------
+
+
+class TestBuildSimulationPrompt:
+    def test_system_prompt_contains_block_name(self) -> None:
+        block = _make_block(name="WebSearchBlock")
+        system, _user = build_simulation_prompt(block, {"query": "test"})
+        assert "WebSearchBlock" in system
+
+    def test_system_prompt_contains_realistic_instruction(self) -> None:
+        block = _make_block()
+        system, _ = build_simulation_prompt(block, {})
+        assert "REALISTIC" in system
+        assert "Never return empty strings" in system
+
+    def test_system_prompt_contains_no_auth_failure_instruction(self) -> None:
+        block = _make_block()
+        system, _ = build_simulation_prompt(block, {})
+        assert "Do not simulate auth failures" in system
+
+    def test_credentials_stripped_from_user_prompt(self) -> None:
+        block = _make_block()
+        _, user = build_simulation_prompt(
+            block,
+            {
+                "query": "test",
+                "credentials": {"api_key": "sk-secret"},
+                "api_key": "sk-secret",
+                "token": "tok-secret",
+                "secret": "shh",
+                "normal_field": "visible",
+            },
+        )
+        assert "sk-secret" not in user
+        assert "tok-secret" not in user
+        assert "shh" not in user
+        assert "visible" in user
+
+    def test_error_pin_always_empty_instruction(self) -> None:
+        block = _make_block()
+        system, _ = build_simulation_prompt(block, {})
+        assert "error" in system.lower()
+        assert "empty string" in system.lower()
+
+    def test_output_pin_names_in_prompt(self) -> None:
+        block = _make_block(
+            output_schema={
+                "properties": {
+                    "url": {"type": "string"},
+                    "status_code": {"type": "integer"},
+                },
+            }
+        )
+        system, _ = build_simulation_prompt(block, {})
+        assert "url" in system
+        assert "status_code" in system
+
+
+# ---------------------------------------------------------------------------
+# prepare_dry_run routing
+# ---------------------------------------------------------------------------
+
+
+class TestPrepareDryRun:
+    def test_orchestrator_uses_simulation_model(self) -> None:
+        """OrchestratorBlock should use the simulation model and cap iterations."""
+        from unittest.mock import patch
+
+        from backend.blocks.orchestrator import OrchestratorBlock
+
+        block = OrchestratorBlock()
+        with patch(
+            "backend.executor.simulator._get_platform_openrouter_key",
+            return_value="sk-or-test-key",
+        ):
+            result = prepare_dry_run(
+                block,
+                {"agent_mode_max_iterations": 10, "model": "gpt-4o", "other": "val"},
+            )
+        assert result is not None
+        assert result["agent_mode_max_iterations"] == 1
+        assert result["other"] == "val"
+        assert result["model"] != "gpt-4o"  # overridden to simulation model
+        # credentials left as-is so block schema validation passes —
+        # actual creds injected via extra_exec_kwargs in manager.py
+        assert "credentials" not in result
+        assert result["_dry_run_api_key"] == "sk-or-test-key"
+
+    def test_orchestrator_zero_stays_zero(self) -> None:
+        from unittest.mock import patch
+
+        from backend.blocks.orchestrator import OrchestratorBlock
+
+        block = OrchestratorBlock()
+        with patch(
+            "backend.executor.simulator._get_platform_openrouter_key",
+            return_value="sk-or-test-key",
+        ):
+            result = prepare_dry_run(block, {"agent_mode_max_iterations": 0})
+        assert result is not None
+        assert result["agent_mode_max_iterations"] == 0
+
+    def test_orchestrator_falls_back_without_key(self) -> None:
+        """Without platform OpenRouter key, OrchestratorBlock falls back
+        to LLM simulation (returns None)."""
+        from unittest.mock import patch
+
+        from backend.blocks.orchestrator import OrchestratorBlock
+
+        block = OrchestratorBlock()
+        with patch(
+            "backend.executor.simulator._get_platform_openrouter_key",
+            return_value=None,
+        ):
+            result = prepare_dry_run(block, {"agent_mode_max_iterations": 5})
+        assert result is None
+
+    def test_agent_executor_block_passthrough(self) -> None:
+        from backend.blocks.agent import AgentExecutorBlock
+
+        block = AgentExecutorBlock()
+        result = prepare_dry_run(block, {"graph_id": "abc"})
+        assert result is not None
+        assert result["graph_id"] == "abc"
+
+    def test_agent_executor_block_returns_identical_copy(self) -> None:
+        """AgentExecutorBlock must execute for real during dry-run so it can
+        spawn a child graph execution.  ``prepare_dry_run`` returns a shallow
+        copy of input_data with no modifications -- every key/value must be
+        identical, but the returned dict must be a *different* object so
+        callers can mutate it without affecting the original."""
+        from backend.blocks.agent import AgentExecutorBlock
+
+        block = AgentExecutorBlock()
+        input_data = {
+            "user_id": "user-42",
+            "graph_id": "graph-99",
+            "graph_version": 3,
+            "inputs": {"text": "hello"},
+            "input_schema": {"props": "a"},
+            "output_schema": {"props": "b"},
+        }
+        result = prepare_dry_run(block, input_data)
+
+        assert result is not None
+        # Must be a different object (copy, not alias)
+        assert result is not input_data
+        # Every key/value must be identical -- no modifications
+        assert result == input_data
+        # Mutating the copy must not affect the original
+        result["extra"] = "added"
+        assert "extra" not in input_data
+
+    def test_regular_block_returns_none(self) -> None:
+        block = _make_block()
+        result = prepare_dry_run(block, {"query": "test"})
+        assert result is None
+
+
+# ---------------------------------------------------------------------------
+# simulate_block – input/output passthrough
+# ---------------------------------------------------------------------------
+
+
+class TestSimulateBlockPassthrough:
+    @pytest.mark.asyncio
+    async def test_input_block_passthrough_with_value(self) -> None:
+        from backend.blocks.io import AgentInputBlock
+
+        block = AgentInputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(block, {"value": "hello world"}):
+            outputs.append((name, data))
+
+        assert outputs == [("result", "hello world")]
+
+    @pytest.mark.asyncio
+    async def test_input_block_passthrough_without_value_uses_name(self) -> None:
+        from backend.blocks.io import AgentInputBlock
+
+        block = AgentInputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(block, {"name": "user_query"}):
+            outputs.append((name, data))
+
+        assert outputs == [("result", "user_query")]
+
+    @pytest.mark.asyncio
+    async def test_input_block_passthrough_uses_placeholder(self) -> None:
+        from backend.blocks.io import AgentInputBlock
+
+        block = AgentInputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(
+            block, {"options": ["option1", "option2"]}
+        ):
+            outputs.append((name, data))
+
+        assert outputs == [("result", "option1")]
+
+    @pytest.mark.asyncio
+    async def test_output_block_passthrough_no_format(self) -> None:
+        from backend.blocks.io import AgentOutputBlock
+
+        block = AgentOutputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(
+            block, {"value": "result data", "name": "output_name"}
+        ):
+            outputs.append((name, data))
+
+        assert ("output", "result data") in outputs
+        assert ("name", "output_name") in outputs
+
+    @pytest.mark.asyncio
+    async def test_output_block_with_format_applies_jinja2(self) -> None:
+        """When a format string is provided, AgentOutputBlock simulation should
+        apply Jinja2 formatting and yield only 'output' (no 'name' pin)."""
+        from backend.blocks.io import AgentOutputBlock
+
+        block = AgentOutputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(
+            block,
+            {
+                "value": "Hello, World!",
+                "name": "output_1",
+                "format": "{{ output_1 }}!!",
+            },
+        ):
+            outputs.append((name, data))
+
+        assert len(outputs) == 1
+        assert outputs[0] == ("output", "Hello, World!!!")
+
+    @pytest.mark.asyncio
+    async def test_output_block_with_format_no_name_pin(self) -> None:
+        """When format is provided, the 'name' pin must NOT be yielded."""
+        from backend.blocks.io import AgentOutputBlock
+
+        block = AgentOutputBlock()
+
+        output_names = []
+        async for name, data in simulate_block(
+            block,
+            {
+                "value": "42",
+                "name": "output_2",
+                "format": "{{ output_2 }}",
+            },
+        ):
+            output_names.append(name)
+
+        assert "name" not in output_names
+
+    @pytest.mark.asyncio
+    async def test_input_block_no_value_no_name_empty_options(self) -> None:
+        """AgentInputBlock with value=None, name=None, and empty
+        options list must not crash.
+
+        When the ``name`` key is present but explicitly ``None``,
+        ``dict.get("name", "sample input")`` returns ``None`` (the key
+        exists), so the fallback sentinel is *not* used.  The test verifies
+        the code does not raise and yields a single result."""
+        from backend.blocks.io import AgentInputBlock
+
+        block = AgentInputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(
+            block, {"value": None, "name": None, "options": []}
+        ):
+            outputs.append((name, data))
+
+        # Does not crash; yields exactly one output
+        assert len(outputs) == 1
+        assert outputs[0][0] == "result"
+
+    @pytest.mark.asyncio
+    async def test_input_block_missing_all_fields_uses_sentinel(self) -> None:
+        """AgentInputBlock with no value, name, or placeholders at all should
+        fall back to the ``"sample input"`` sentinel."""
+        from backend.blocks.io import AgentInputBlock
+
+        block = AgentInputBlock()
+
+        outputs = []
+        async for name, data in simulate_block(block, {}):
+            outputs.append((name, data))
+
+        assert outputs == [("result", "sample input")]
+
+    @pytest.mark.asyncio
+    async def test_generic_block_zero_outputs_handled(self) -> None:
+        """When the LLM returns a valid JSON object but none of the output pins
+        have meaningful values, ``simulate_block`` should still yield defaults
+        for required output pins so downstream nodes don't stall."""
+        block = _make_block()
+
+        with patch(
+            "backend.executor.simulator._call_llm_for_simulation",
+            new_callable=AsyncMock,
+            # All output pin values are None or empty -- nothing to yield
+            return_value={"result": None, "error": ""},
+        ):
+            outputs = []
+            async for name, data in simulate_block(block, {"query": "test"}):
+                outputs.append((name, data))
+
+            # "result" is required, so a default empty string is yielded
+            assert outputs == [("result", "")]
+
+    @pytest.mark.asyncio
+    async def test_generic_block_calls_llm(self) -> None:
+        """Generic blocks should call _call_llm_for_simulation."""
+        block = _make_block()
+
+        with patch(
+            "backend.executor.simulator._call_llm_for_simulation",
+            new_callable=AsyncMock,
+            return_value={"result": "simulated result", "error": ""},
+        ) as mock_llm:
+            outputs = []
+            async for name, data in simulate_block(block, {"query": "test"}):
+                outputs.append((name, data))
+
+            mock_llm.assert_called_once()
+            assert ("result", "simulated result") in outputs
+            # Empty error pin is omitted — not yielded
+            assert ("error", "") not in outputs
+
+    @pytest.mark.asyncio
+    async def test_generic_block_omits_missing_pins(self) -> None:
+        """Missing output pins are omitted (not yielded)."""
+        block = _make_block()
+
+        with patch(
+            "backend.executor.simulator._call_llm_for_simulation",
+            new_callable=AsyncMock,
+            return_value={"result": "data"},  # missing "error" pin
+        ):
+            outputs: dict[str, Any] = {}
+            async for name, data in simulate_block(block, {"query": "test"}):
+                outputs[name] = data
+
+            assert outputs["result"] == "data"
+            # Missing pins are omitted — only meaningful values are yielded
+            assert "error" not in outputs
+
+    @pytest.mark.asyncio
+    async def test_generic_block_preserves_falsy_values(self) -> None:
+        """Valid falsy values like False, 0, and [] must be yielded, not dropped."""
+        block = _make_block(
+            output_schema={
+                "properties": {
+                    "flag": {"type": "boolean"},
+                    "count": {"type": "integer"},
+                    "items": {"type": "array"},
+                },
+                "required": ["flag", "count", "items"],
+            }
+        )
+
+        with patch(
+            "backend.executor.simulator._call_llm_for_simulation",
+            new_callable=AsyncMock,
+            return_value={"flag": False, "count": 0, "items": []},
+        ):
+            outputs: dict[str, Any] = {}
+            async for name, data in simulate_block(block, {"query": "test"}):
+                outputs[name] = data
+
+            assert outputs["flag"] is False
+            assert outputs["count"] == 0
+            assert outputs["items"] == []
+
+    @pytest.mark.asyncio
+    async def test_llm_failure_yields_error(self) -> None:
+        """When LLM fails, should yield an error tuple."""
+        block = _make_block()
+
+        with patch(
+            "backend.executor.simulator._call_llm_for_simulation",
+            new_callable=AsyncMock,
+            side_effect=RuntimeError("No client"),
+        ):
+            outputs = []
+            async for name, data in simulate_block(block, {"query": "test"}):
+                outputs.append((name, data))
+
+            assert len(outputs) == 1
+            assert outputs[0][0] == "error"
+            assert "No client" in outputs[0][1]
--- a/autogpt_platform/backend/backend/executor/utils.py
+++ b/autogpt_platform/backend/backend/executor/utils.py
@@ -923,6 +923,11 @@ async def add_graph_execution(
            execution_context.parent_execution_id if execution_context else None
        )

+        # When execution_context is provided (e.g. from AgentExecutorBlock),
+        # inherit dry_run so child-graph validation skips credential checks.
+        if execution_context and execution_context.dry_run:
+            dry_run = True
+
        # Create new execution
        graph, starting_nodes_input, compiled_nodes_input_masks, nodes_to_skip = (
            await validate_and_construct_node_execution_input(
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -19,6 +19,7 @@ from backend.data.model import (
    UserPasswordCredentials,
 )
 from backend.data.redis_client import get_redis_async
+from backend.util.cache import thread_cached
 from backend.util.settings import Settings

 settings = Settings()
@@ -304,15 +305,12 @@ def is_system_provider(provider: str) -> bool:


 class IntegrationCredentialsStore:
-    def __init__(self):
-        self._locks = None
-
+    @thread_cached
    async def locks(self) -> AsyncRedisKeyedMutex:
-        if self._locks:
-            return self._locks
-
-        self._locks = AsyncRedisKeyedMutex(await get_redis_async())
-        return self._locks
+        # Per-thread: copilot executor runs worker threads with separate event
+        # loops; AsyncRedisKeyedMutex's internal asyncio.Lock is bound to the
+        # loop it was created on.
+        return AsyncRedisKeyedMutex(await get_redis_async())

    @property
    def db_manager(self):
--- a/autogpt_platform/backend/backend/integrations/creds_manager.py
+++ b/autogpt_platform/backend/backend/integrations/creds_manager.py
@@ -8,7 +8,6 @@ from autogpt_libs.utils.synchronize import AsyncRedisKeyedMutex
 from redis.asyncio.lock import Lock as AsyncRedisLock

 from backend.data.model import Credentials, OAuth2Credentials
-from backend.data.redis_client import get_redis_async
 from backend.integrations.credentials_store import (
    IntegrationCredentialsStore,
    provider_matches,
@@ -106,14 +105,13 @@ class IntegrationCredentialsManager:

    def __init__(self):
        self.store = IntegrationCredentialsStore()
-        self._locks = None

    async def locks(self) -> AsyncRedisKeyedMutex:
-        if self._locks:
-            return self._locks
-
-        self._locks = AsyncRedisKeyedMutex(await get_redis_async())
-        return self._locks
+        # Delegate to store's @thread_cached locks.  Manager uses these for
+        # fine-grained per-credential locking (refresh, acquire); the store
+        # uses its own for coarse per-user integrations locking.  Same mutex
+        # type, different key spaces — no collision.
+        return await self.store.locks()

    async def create(self, user_id: str, credentials: Credentials) -> None:
        result = await self.store.add_creds(user_id, credentials)
@@ -188,35 +186,74 @@ class IntegrationCredentialsManager:

    async def refresh_if_needed(
        self, user_id: str, credentials: OAuth2Credentials, lock: bool = True
+    ) -> OAuth2Credentials:
+        # When lock=False, skip ALL Redis locking (both the outer "refresh" scope
+        # lock and the inner credential lock).  This is used by the copilot's
+        # integration_creds module which runs across multiple threads with separate
+        # event loops; acquiring a Redis lock whose asyncio.Lock() was created on
+        # a different loop raises "Future attached to a different loop".
+        if lock:
+            return await self._refresh_locked(user_id, credentials)
+        return await self._refresh_unlocked(user_id, credentials)
+
+    async def _get_oauth_handler(
+        self, credentials: OAuth2Credentials
+    ) -> "BaseOAuthHandler":
+        """Resolve the appropriate OAuth handler for the given credentials."""
+        if provider_matches(credentials.provider, ProviderName.MCP.value):
+            return create_mcp_oauth_handler(credentials)
+        return await _get_provider_oauth_handler(credentials.provider)
+
+    async def _refresh_locked(
+        self, user_id: str, credentials: OAuth2Credentials
    ) -> OAuth2Credentials:
        async with self._locked(user_id, credentials.id, "refresh"):
-            if provider_matches(credentials.provider, ProviderName.MCP.value):
-                oauth_handler = create_mcp_oauth_handler(credentials)
-            else:
-                oauth_handler = await _get_provider_oauth_handler(credentials.provider)
+            oauth_handler = await self._get_oauth_handler(credentials)
            if oauth_handler.needs_refresh(credentials):
                logger.debug(
-                    f"Refreshing '{credentials.provider}' credentials #{credentials.id}"
+                    "Refreshing '%s' credentials #%s",
+                    credentials.provider,
+                    credentials.id,
                )
-                _lock = None
-                if lock:
-                    # Wait until the credentials are no longer in use anywhere
-                    _lock = await self._acquire_lock(user_id, credentials.id)
+                # Wait until the credentials are no longer in use anywhere
+                _lock = await self._acquire_lock(user_id, credentials.id)
+                try:
+                    fresh_credentials = await oauth_handler.refresh_tokens(credentials)
+                    await self.store.update_creds(user_id, fresh_credentials)
+                    _invoke_creds_changed_hook(user_id, fresh_credentials.provider)
+                    credentials = fresh_credentials
+                finally:
+                    if (await _lock.locked()) and (await _lock.owned()):
+                        try:
+                            await _lock.release()
+                        except Exception:
+                            logger.warning(
+                                "Failed to release OAuth refresh lock",
+                                exc_info=True,
+                            )
+        return credentials

-                fresh_credentials = await oauth_handler.refresh_tokens(credentials)
-                await self.store.update_creds(user_id, fresh_credentials)
-                # Notify listeners so the refreshed token is picked up immediately.
-                _invoke_creds_changed_hook(user_id, fresh_credentials.provider)
-                if _lock and (await _lock.locked()) and (await _lock.owned()):
-                    try:
-                        await _lock.release()
-                    except Exception:
-                        logger.warning(
-                            "Failed to release OAuth refresh lock",
-                            exc_info=True,
-                        )
+    async def _refresh_unlocked(
+        self, user_id: str, credentials: OAuth2Credentials
+    ) -> OAuth2Credentials:
+        """Best-effort token refresh without any Redis locking.

-                credentials = fresh_credentials
+        Safe for use from multi-threaded contexts (e.g. copilot workers) where
+        each thread has its own event loop and sharing Redis-backed asyncio locks
+        is not possible.  Concurrent refreshes are tolerated: the last writer
+        wins, and stale tokens are overwritten.
+        """
+        oauth_handler = await self._get_oauth_handler(credentials)
+        if oauth_handler.needs_refresh(credentials):
+            logger.debug(
+                "Refreshing '%s' credentials #%s (lock-free)",
+                credentials.provider,
+                credentials.id,
+            )
+            fresh_credentials = await oauth_handler.refresh_tokens(credentials)
+            await self.store.update_creds(user_id, fresh_credentials)
+            _invoke_creds_changed_hook(user_id, fresh_credentials.provider)
+            credentials = fresh_credentials
        return credentials

    async def update(self, user_id: str, updated: Credentials) -> None:
@@ -264,7 +301,6 @@ class IntegrationCredentialsManager:

    async def release_all_locks(self):
        """Call this on process termination to ensure all locks are released"""
-        await (await self.locks()).release_all_locks()
        await (await self.store.locks()).release_all_locks()


--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -15,6 +15,7 @@ class ProviderName(str, Enum):
    ANTHROPIC = "anthropic"
    APOLLO = "apollo"
    COMPASS = "compass"
+    DATABASE = "database"
    DISCORD = "discord"
    D_ID = "d_id"
    E2B = "e2b"
--- a/autogpt_platform/backend/backend/util/clients.py
+++ b/autogpt_platform/backend/backend/util/clients.py
@@ -163,23 +163,31 @@ async def get_async_supabase() -> "AClient":


@cached(ttl_seconds=3600)
-def get_openai_client() -> "AsyncOpenAI | None":
+def get_openai_client(*, prefer_openrouter: bool = False) -> "AsyncOpenAI | None":
    """
-    Get a process-cached async OpenAI client for embeddings.
+    Get a process-cached async OpenAI client.

-    Prefers openai_internal_api_key (direct OpenAI). Falls back to
-    open_router_api_key via OpenRouter's OpenAI-compatible endpoint.
-    Returns None if neither key is configured.
+    By default prefers openai_internal_api_key (direct OpenAI) and falls back
+    to open_router_api_key via OpenRouter.
+
+    When ``prefer_openrouter=True``, returns an OpenRouter client or None —
+    does **not** fall back to direct OpenAI (which can't route non-OpenAI
+    models like ``google/gemini-2.5-flash``).
    """
    from openai import AsyncOpenAI

-    if settings.secrets.openai_internal_api_key:
-        return AsyncOpenAI(api_key=settings.secrets.openai_internal_api_key)
-    if settings.secrets.open_router_api_key:
-        return AsyncOpenAI(
-            api_key=settings.secrets.open_router_api_key,
-            base_url=OPENROUTER_BASE_URL,
-        )
+    openai_key = settings.secrets.openai_internal_api_key
+    openrouter_key = settings.secrets.open_router_api_key
+
+    if prefer_openrouter:
+        if openrouter_key:
+            return AsyncOpenAI(api_key=openrouter_key, base_url=OPENROUTER_BASE_URL)
+        return None
+    else:
+        if openai_key:
+            return AsyncOpenAI(api_key=openai_key)
+        if openrouter_key:
+            return AsyncOpenAI(api_key=openrouter_key, base_url=OPENROUTER_BASE_URL)
    return None


--- a/autogpt_platform/backend/backend/util/clients_test.py
+++ b/autogpt_platform/backend/backend/util/clients_test.py
@@ -0,0 +1,69 @@
+"""Tests for get_openai_client prefer_openrouter parameter."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from backend.util.clients import get_openai_client
+
+
+@pytest.fixture(autouse=True)
+def _clear_client_cache():
+    """Clear the @cached singleton between tests."""
+    get_openai_client.cache_clear()
+    yield
+    get_openai_client.cache_clear()
+
+
+def _mock_secrets(*, openai_key: str = "", openrouter_key: str = "") -> MagicMock:
+    secrets = MagicMock()
+    secrets.openai_internal_api_key = openai_key
+    secrets.open_router_api_key = openrouter_key
+    return secrets
+
+
+class TestGetOpenaiClientDefault:
+    def test_prefers_openai_key(self):
+        secrets = _mock_secrets(openai_key="sk-openai", openrouter_key="sk-or")
+        with patch("backend.util.clients.settings") as mock_settings:
+            mock_settings.secrets = secrets
+            client = get_openai_client()
+        assert client is not None
+        assert client.api_key == "sk-openai"
+        assert "openrouter" not in str(client.base_url or "")
+
+    def test_falls_back_to_openrouter(self):
+        secrets = _mock_secrets(openrouter_key="sk-or")
+        with patch("backend.util.clients.settings") as mock_settings:
+            mock_settings.secrets = secrets
+            client = get_openai_client()
+        assert client is not None
+        assert client.api_key == "sk-or"
+
+    def test_returns_none_when_no_keys(self):
+        secrets = _mock_secrets()
+        with patch("backend.util.clients.settings") as mock_settings:
+            mock_settings.secrets = secrets
+            assert get_openai_client() is None
+
+
+class TestGetOpenaiClientPreferOpenrouter:
+    def test_returns_openrouter_client(self):
+        secrets = _mock_secrets(openai_key="sk-openai", openrouter_key="sk-or")
+        with patch("backend.util.clients.settings") as mock_settings:
+            mock_settings.secrets = secrets
+            client = get_openai_client(prefer_openrouter=True)
+        assert client is not None
+        assert client.api_key == "sk-or"
+
+    def test_returns_none_without_openrouter_key(self):
+        secrets = _mock_secrets(openai_key="sk-openai")
+        with patch("backend.util.clients.settings") as mock_settings:
+            mock_settings.secrets = secrets
+            assert get_openai_client(prefer_openrouter=True) is None
+
+    def test_returns_none_when_no_keys(self):
+        secrets = _mock_secrets()
+        with patch("backend.util.clients.settings") as mock_settings:
+            mock_settings.secrets = secrets
+            assert get_openai_client(prefer_openrouter=True) is None
--- a/autogpt_platform/backend/load-tests/tests/api/graph-execution-test.js
+++ b/autogpt_platform/backend/load-tests/tests/api/graph-execution-test.js
@@ -22,7 +22,6 @@ function generateTestGraph(name = null) {
          input_default: {
            name: "Load Test Input",
            description: "Test input for load testing",
-            placeholder_values: {},
          },
          input_nodes: [],
          output_nodes: ["output_node"],
@@ -59,11 +58,7 @@ function generateExecutionInputs() {
    "Load Test Input": {
      name: "Load Test Input",
      description: "Test input for load testing",
-      placeholder_values: {
-        test_data: `Test execution at ${new Date().toISOString()}`,
-        test_parameter: Math.random().toString(36).substr(2, 9),
-        numeric_value: Math.floor(Math.random() * 1000),
-      },
+      value: `Test execution at ${new Date().toISOString()}`,
    },
  };
 }
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -974,6 +974,128 @@ files = [
 ]
 markers = {dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}

+[[package]]
+name = "coverage"
+version = "7.13.5"
+description = "Code coverage measurement for Python"
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"},
+    {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"},
+    {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"},
+    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"},
+    {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"},
+    {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"},
+    {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"},
+    {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"},
+    {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"},
+    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"},
+    {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"},
+    {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"},
+    {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"},
+    {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"},
+    {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"},
+    {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"},
+    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"},
+    {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"},
+    {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"},
+    {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"},
+    {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"},
+    {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"},
+    {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"},
+    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"},
+    {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"},
+    {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"},
+    {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"},
+    {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"},
+    {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"},
+    {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"},
+    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"},
+    {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"},
+    {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"},
+    {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"},
+    {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"},
+    {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"},
+    {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"},
+    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"},
+    {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"},
+    {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"},
+    {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"},
+    {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"},
+    {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"},
+    {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"},
+    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"},
+    {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"},
+    {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"},
+    {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"},
+    {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"},
+    {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"},
+]
+
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+
+[package.extras]
+toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
+
 [[package]]
 name = "crashtest"
 version = "0.4.1"
@@ -5720,6 +5842,75 @@ dev = ["coverage[toml] (==7.10.7)", "cryptography (>=3.4.0)", "pre-commit", "pyt
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==7.10.7)", "pytest (>=8.4.2,<9.0.0)"]

+[[package]]
+name = "pymssql"
+version = "2.3.13"
+description = "DB-API interface to Microsoft SQL Server for Python. (new Cython-based version)"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pymssql-2.3.13-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:476f6f06b2ae5dfbfa0b169a6ecdd0d9ddfedb07f2d6dc97d2dd630ff2d6789a"},
+    {file = "pymssql-2.3.13-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:17942dc9474693ab2229a8a6013e5b9cb1312a5251207552141bb85fcce8c131"},
+    {file = "pymssql-2.3.13-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d87237500def5f743a52e415cd369d632907212154fcc7b4e13f264b4e30021"},
+    {file = "pymssql-2.3.13-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:612ac062027d2118879f11a5986e9d9d82d07ca3545bb98c93200b68826ea687"},
+    {file = "pymssql-2.3.13-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f1897c1b767cc143e77d285123ae5fd4fa7379a1bfec5c515d38826caf084eb6"},
+    {file = "pymssql-2.3.13-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:48631c7b9fd14a1bd5675c521b6082590bf700b7961c65638d237817b3fde735"},
+    {file = "pymssql-2.3.13-cp310-cp310-win_amd64.whl", hash = "sha256:79c759db6e991eeae473b000c2e0a7fb8da799b2da469fe5a10d30916315e0b5"},
+    {file = "pymssql-2.3.13-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:152be40c0d7f5e4b1323f7728b0a01f3ee0082190cfbadf84b2c2e930d57e00e"},
+    {file = "pymssql-2.3.13-cp311-cp311-macosx_15_0_x86_64.whl", hash = "sha256:d94da3a55545c5b6926cb4d1c6469396f0ae32ad5d6932c513f7a0bf569b4799"},
+    {file = "pymssql-2.3.13-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51e42c5defc3667f0803c7ade85db0e6f24b9a1c5a18fcdfa2d09c36bff9b065"},
+    {file = "pymssql-2.3.13-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4aa18944a121f996178e26cadc598abdbf73759f03dc3cd74263fdab1b28cd96"},
+    {file = "pymssql-2.3.13-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:910404e0ec85c4cc7c633ec3df9b04a35f23bb74a844dd377a387026ae635e3a"},
+    {file = "pymssql-2.3.13-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4b834c34e7600369eee7bc877948b53eb0fe6f3689f0888d005ae47dd53c0a66"},
+    {file = "pymssql-2.3.13-cp311-cp311-win_amd64.whl", hash = "sha256:5c2e55b6513f9c5a2f58543233ed40baaa7f91c79e64a5f961ea3fc57a700b80"},
+    {file = "pymssql-2.3.13-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cf4f32b4a05b66f02cb7d55a0f3bcb0574a6f8cf0bee4bea6f7b104038364733"},
+    {file = "pymssql-2.3.13-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:2b056eb175955f7fb715b60dc1c0c624969f4d24dbdcf804b41ab1e640a2b131"},
+    {file = "pymssql-2.3.13-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:319810b89aa64b99d9c5c01518752c813938df230496fa2c4c6dda0603f04c4c"},
+    {file = "pymssql-2.3.13-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0ea72641cb0f8bce7ad8565dbdbda4a7437aa58bce045f2a3a788d71af2e4be"},
+    {file = "pymssql-2.3.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1493f63d213607f708a5722aa230776ada726ccdb94097fab090a1717a2534e0"},
+    {file = "pymssql-2.3.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb3275985c23479e952d6462ae6c8b2b6993ab6b99a92805a9c17942cf3d5b3d"},
+    {file = "pymssql-2.3.13-cp312-cp312-win_amd64.whl", hash = "sha256:a930adda87bdd8351a5637cf73d6491936f34e525a5e513068a6eac742f69cdb"},
+    {file = "pymssql-2.3.13-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:30918bb044242865c01838909777ef5e0f1b9ecd7f5882346aefa57f4414b29c"},
+    {file = "pymssql-2.3.13-cp313-cp313-macosx_15_0_x86_64.whl", hash = "sha256:1c6d0b2d7961f159a07e4f0d8cc81f70ceab83f5e7fd1e832a2d069e1d67ee4e"},
+    {file = "pymssql-2.3.13-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16c5957a3c9e51a03276bfd76a22431e2bc4c565e2e95f2cbb3559312edda230"},
+    {file = "pymssql-2.3.13-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0fddd24efe9d18bbf174fab7c6745b0927773718387f5517cf8082241f721a68"},
+    {file = "pymssql-2.3.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:123c55ee41bc7a82c76db12e2eb189b50d0d7a11222b4f8789206d1cda3b33b9"},
+    {file = "pymssql-2.3.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e053b443e842f9e1698fcb2b23a4bff1ff3d410894d880064e754ad823d541e5"},
+    {file = "pymssql-2.3.13-cp313-cp313-win_amd64.whl", hash = "sha256:5c045c0f1977a679cc30d5acd9da3f8aeb2dc6e744895b26444b4a2f20dad9a0"},
+    {file = "pymssql-2.3.13-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:fc5482969c813b0a45ce51c41844ae5bfa8044ad5ef8b4820ef6de7d4545b7f2"},
+    {file = "pymssql-2.3.13-cp314-cp314-macosx_15_0_x86_64.whl", hash = "sha256:ff5be7ab1d643dbce2ee3424d2ef9ae8e4146cf75bd20946bc7a6108e3ad1e47"},
+    {file = "pymssql-2.3.13-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8d66ce0a249d2e3b57369048d71e1f00d08dfb90a758d134da0250ae7bc739c1"},
+    {file = "pymssql-2.3.13-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d663c908414a6a032f04d17628138b1782af916afc0df9fefac4751fa394c3ac"},
+    {file = "pymssql-2.3.13-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aa5e07eff7e6e8bd4ba22c30e4cb8dd073e138cd272090603609a15cc5dbc75b"},
+    {file = "pymssql-2.3.13-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:db77da1a3fc9b5b5c5400639d79d7658ba7ad620957100c5b025be608b562193"},
+    {file = "pymssql-2.3.13-cp314-cp314-win_amd64.whl", hash = "sha256:7d7037d2b5b907acc7906d0479924db2935a70c720450c41339146a4ada2b93d"},
+    {file = "pymssql-2.3.13-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:b0af51904764811da0bfe4b057b1d72dee11a399ce9ed5770875162772740c8a"},
+    {file = "pymssql-2.3.13-cp39-cp39-macosx_15_0_x86_64.whl", hash = "sha256:0a7e6431925572bc75fb47929ae8ca5b0aac26abfe8b98d4c08daf117b5657f1"},
+    {file = "pymssql-2.3.13-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9b1d5aef2b5f47a7f9d9733caee4d66772681e8f798a0f5e4739a8bdab408c"},
+    {file = "pymssql-2.3.13-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c690f1869dadbf4201b7f51317fceff6e5d8f5175cec6a4a813e06b0dca2d6ed"},
+    {file = "pymssql-2.3.13-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e7c31f192da9d30f0e03ad99e548120a8740a675302e2f04fa8c929f7cbee771"},
+    {file = "pymssql-2.3.13-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f5d995a80996235ed32102a93067ce6a7143cce3bfd4e5042bf600020fc08456"},
+    {file = "pymssql-2.3.13-cp39-cp39-win_amd64.whl", hash = "sha256:6a6c0783d97f57133573a03aad3017917dbdf7831a65e0d84ccf2a85e183ca66"},
+    {file = "pymssql-2.3.13.tar.gz", hash = "sha256:2137e904b1a65546be4ccb96730a391fcd5a85aab8a0632721feb5d7e39cfbce"},
+]
+
+[[package]]
+name = "pymysql"
+version = "1.1.2"
+description = "Pure Python MySQL Driver"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9"},
+    {file = "pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03"},
+]
+
+[package.extras]
+ed25519 = ["PyNaCl (>=1.4.0)"]
+rsa = ["cryptography"]
+
 [[package]]
 name = "pyparsing"
 version = "3.3.2"
@@ -5919,6 +6110,26 @@ typing-extensions = {version = ">=4.12", markers = "python_version < \"3.13\""}
 docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
 testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]

+[[package]]
+name = "pytest-cov"
+version = "7.1.0"
+description = "Pytest plugin for measuring coverage."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678"},
+    {file = "pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2"},
+]
+
+[package.dependencies]
+coverage = {version = ">=7.10.6", extras = ["toml"]}
+pluggy = ">=1.2"
+pytest = ">=7"
+
+[package.extras]
+testing = ["process-tests", "pytest-xdist", "virtualenv"]
+
 [[package]]
 name = "pytest-mock"
 version = "3.15.1"
@@ -7009,6 +7220,22 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
 pymysql = ["pymysql"]
 sqlcipher = ["sqlcipher3_binary"]

+[[package]]
+name = "sqlparse"
+version = "0.5.5"
+description = "A non-validating SQL parser."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "sqlparse-0.5.5-py3-none-any.whl", hash = "sha256:12a08b3bf3eec877c519589833aed092e2444e68240a3577e8e26148acc7b1ba"},
+    {file = "sqlparse-0.5.5.tar.gz", hash = "sha256:e20d4a9b0b8585fdf63b10d30066c7c94c5d7a7ec47c889a2d83a3caa93ff28e"},
+]
+
+[package.extras]
+dev = ["build"]
+doc = ["sphinx"]
+
 [[package]]
 name = "sse-starlette"
 version = "3.2.0"
@@ -8630,4 +8857,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "1dd10577184ebff0d10997f4c6ba49484de79b7fa090946e8e5ce5c5bac3cdeb"
+content-hash = "8dd9db689a2dd57fc3cccea02e596a522f334f6b5ed18e92252555f61835d71d"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -94,7 +94,10 @@ posthog = "^7.6.0"
 fpdf2 = "^2.8.6"
 langsmith = "^0.7.7"
 openpyxl = "^3.1.5"
+pymssql = "^2.3.2"
+pymysql = "^1.1.1"
 pyarrow = "^23.0.0"
+sqlparse = "^0.5.5"

 [tool.poetry.group.dev.dependencies]
 aiohappyeyeballs = "^2.6.1"
@@ -105,6 +108,7 @@ isort = "^5.13.2"
 poethepoet = "^0.41.0"
 pre-commit = "^4.4.0"
 pyright = "^1.1.407"
+pytest-cov = "^7.1.0"
 pytest-mock = "^3.15.1"
 pytest-watcher = "^0.6.3"
 requests = "^2.32.5"
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -15,7 +15,7 @@
    "types": "tsc --noEmit",
    "test": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test",
    "test-ui": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test --ui",
-    "test:unit": "vitest run",
+    "test:unit": "vitest run --coverage",
    "test:unit:watch": "vitest",
    "test:no-build": "playwright test",
    "gentests": "playwright codegen http://localhost:3000",
@@ -122,6 +122,7 @@
    "tailwind-merge": "2.6.0",
    "tailwind-scrollbar": "3.1.0",
    "tailwindcss-animate": "1.0.7",
+    "twemoji": "14.0.2",
    "use-stick-to-bottom": "1.1.2",
    "uuid": "11.1.0",
    "vaul": "1.1.2",
@@ -150,6 +151,7 @@
    "@types/react-modal": "3.16.3",
    "@types/react-window": "2.0.0",
    "@vitejs/plugin-react": "5.1.2",
+    "@vitest/coverage-v8": "4.0.17",
    "axe-playwright": "2.2.2",
    "chromatic": "13.3.3",
    "concurrently": "9.2.1",
--- a/autogpt_platform/frontend/pnpm-lock.yaml
+++ b/autogpt_platform/frontend/pnpm-lock.yaml
@@ -288,6 +288,9 @@ importers:
      tailwindcss-animate:
        specifier: 1.0.7
        version: 1.0.7(tailwindcss@3.4.17)
+      twemoji:
+        specifier: 14.0.2
+        version: 14.0.2
      use-stick-to-bottom:
        specifier: 1.1.2
        version: 1.1.2(react@18.3.1)
@@ -367,6 +370,9 @@ importers:
      '@vitejs/plugin-react':
        specifier: 5.1.2
        version: 5.1.2(vite@7.3.1(@types/node@24.10.0)(jiti@2.6.1)(terser@5.44.1)(yaml@2.8.2))
+      '@vitest/coverage-v8':
+        specifier: 4.0.17
+        version: 4.0.17(vitest@4.0.17(@opentelemetry/api@1.9.0)(@types/node@24.10.0)(happy-dom@20.3.4)(jiti@2.6.1)(jsdom@27.4.0)(msw@2.11.6(@types/node@24.10.0)(typescript@5.9.3))(terser@5.44.1)(yaml@2.8.2))
      axe-playwright:
        specifier: 2.2.2
        version: 2.2.2(playwright@1.56.1)
@@ -629,6 +635,11 @@ packages:
    engines: {node: '>=6.0.0'}
    hasBin: true

+  '@babel/parser@7.29.2':
+    resolution: {integrity: sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==}
+    engines: {node: '>=6.0.0'}
+    hasBin: true
+
  '@babel/plugin-bugfix-firefox-class-in-computed-class-key@7.28.5':
    resolution: {integrity: sha512-87GDMS3tsmMSi/3bWOte1UblL+YUTFMV8SZPZ2eSEL17s74Cw/l63rR6NmGVKMYW2GYi85nE+/d6Hw5N0bEk2Q==}
    engines: {node: '>=6.9.0'}
@@ -1098,6 +1109,14 @@ packages:
    resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==}
    engines: {node: '>=6.9.0'}

+  '@babel/types@7.29.0':
+    resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==}
+    engines: {node: '>=6.9.0'}
+
+  '@bcoe/v8-coverage@1.0.2':
+    resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==}
+    engines: {node: '>=18'}
+
  '@braintree/sanitize-url@7.1.2':
    resolution: {integrity: sha512-jigsZK+sMF/cuiB7sERuo9V7N9jx+dhmHHnQyDSVdpZwVutaBu7WvNYqMDLSgFgfB30n452TP3vjDAvFC973mA==}

@@ -3917,6 +3936,15 @@ packages:
    peerDependencies:
      vite: ^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0

+  '@vitest/coverage-v8@4.0.17':
+    resolution: {integrity: sha512-/6zU2FLGg0jsd+ePZcwHRy3+WpNTBBhDY56P4JTRqUN/Dp6CvOEa9HrikcQ4KfV2b2kAHUFB4dl1SuocWXSFEw==}
+    peerDependencies:
+      '@vitest/browser': 4.0.17
+      vitest: 4.0.17
+    peerDependenciesMeta:
+      '@vitest/browser':
+        optional: true
+
  '@vitest/expect@3.2.4':
    resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==}

@@ -4229,6 +4257,9 @@ packages:
    resolution: {integrity: sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg==}
    engines: {node: '>=4'}

+  ast-v8-to-istanbul@0.3.12:
+    resolution: {integrity: sha512-BRRC8VRZY2R4Z4lFIL35MwNXmwVqBityvOIwETtsCSwvjl0IdgFsy9NhdaA6j74nUdtJJlIypeRhpDam19Wq3g==}
+
  astring@1.9.0:
    resolution: {integrity: sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg==}
    hasBin: true
@@ -5470,6 +5501,10 @@ packages:
    resolution: {integrity: sha512-VWSRii4t0AFm6ixFFmLLx1t7wS1gh+ckoa84aOeapGum0h+EZd1EhEumSB+ZdDLnEPuucsVB9oB7cxJHap6Afg==}
    engines: {node: '>=14.14'}

+  fs-extra@8.1.0:
+    resolution: {integrity: sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==}
+    engines: {node: '>=6 <7 || >=8'}
+
  fs-monkey@1.1.0:
    resolution: {integrity: sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==}

@@ -5709,6 +5744,9 @@ packages:
  html-entities@2.6.0:
    resolution: {integrity: sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==}

+  html-escaper@2.0.2:
+    resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
+
  html-minifier-terser@6.1.0:
    resolution: {integrity: sha512-YXxSlJBZTP7RS3tWnQw74ooKa6L9b9i9QYXY21eUEvhZ3u9XLfv6OnFsQq6RxkhHygsaUMvYsZRV5rU/OVNZxw==}
    engines: {node: '>=12'}
@@ -6004,6 +6042,18 @@ packages:
  isexe@2.0.0:
    resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}

+  istanbul-lib-coverage@3.2.2:
+    resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==}
+    engines: {node: '>=8'}
+
+  istanbul-lib-report@3.0.1:
+    resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==}
+    engines: {node: '>=10'}
+
+  istanbul-reports@3.2.0:
+    resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==}
+    engines: {node: '>=8'}
+
  iterator.prototype@1.1.5:
    resolution: {integrity: sha512-H0dkQoCa3b2VEeKQBOxFph+JAbcrQdE7KC0UkqwpLmv2EC4P41QXP+rqo9wYodACiG5/WM5s9oDApTU8utwj9g==}
    engines: {node: '>= 0.4'}
@@ -6044,6 +6094,9 @@ packages:
      react:
        optional: true

+  js-tokens@10.0.0:
+    resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==}
+
  js-tokens@4.0.0:
    resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}

@@ -6103,6 +6156,12 @@ packages:
  jsonc-parser@2.2.1:
    resolution: {integrity: sha512-o6/yDBYccGvTz1+QFevz6l6OBZ2+fMVu2JZ9CIhzsYRX4mjaK5IyX9eldUdCmga16zlgQxyrj5pt9kzuj2C02w==}

+  jsonfile@4.0.0:
+    resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==}
+
+  jsonfile@5.0.0:
+    resolution: {integrity: sha512-NQRZ5CRo74MhMMC3/3r5g2k4fjodJ/wh8MxjFbCViWKFjxrnudWSY5vomh+23ZaXzAS7J3fBZIR2dV6WbmfM0w==}
+
  jsonfile@6.2.0:
    resolution: {integrity: sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==}

@@ -6299,10 +6358,17 @@ packages:
    resolution: {integrity: sha512-ISQTe55T2ao7XtlAStud6qwYPZjE4GK1S/BeVPus4jrq6JuOnQ00YKQC581RWhR122W7msZV263KzVeLoqidyQ==}
    engines: {node: '>=12'}

+  magicast@0.5.2:
+    resolution: {integrity: sha512-E3ZJh4J3S9KfwdjZhe2afj6R9lGIN5Pher1pF39UGrXRqq/VDaGVIGN13BjHd2u8B61hArAGOnso7nBOouW3TQ==}
+
  make-dir@3.1.0:
    resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
    engines: {node: '>=8'}

+  make-dir@4.0.0:
+    resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
+    engines: {node: '>=10'}
+
  markdown-it@14.1.0:
    resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==}
    hasBin: true
@@ -8165,6 +8231,12 @@ packages:
  tty-browserify@0.0.1:
    resolution: {integrity: sha512-C3TaO7K81YvjCgQH9Q1S3R3P3BtN3RIM8n+OvX4il1K1zgE8ZhI0op7kClgkxtutIE8hQrcrHBXvIheqKUUCxw==}

+  twemoji-parser@14.0.0:
+    resolution: {integrity: sha512-9DUOTGLOWs0pFWnh1p6NF+C3CkQ96PWmEFwhOVmT3WbecRC+68AIqpsnJXygfkFcp4aXbOp8Dwbhh/HQgvoRxA==}
+
+  twemoji@14.0.2:
+    resolution: {integrity: sha512-BzOoXIe1QVdmsUmZ54xbEH+8AgtOKUiG53zO5vVP2iUu6h5u9lN15NcuS6te4OY96qx0H7JK9vjjl9WQbkTRuA==}
+
  type-check@0.4.0:
    resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
    engines: {node: '>= 0.8.0'}
@@ -8289,6 +8361,10 @@ packages:
  unist-util-visit@5.0.0:
    resolution: {integrity: sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==}

+  universalify@0.1.2:
+    resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==}
+    engines: {node: '>= 4.0.0'}
+
  universalify@2.0.1:
    resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==}
    engines: {node: '>= 10.0.0'}
@@ -8998,6 +9074,10 @@ snapshots:
    dependencies:
      '@babel/types': 7.28.5

+  '@babel/parser@7.29.2':
+    dependencies:
+      '@babel/types': 7.29.0
+
  '@babel/plugin-bugfix-firefox-class-in-computed-class-key@7.28.5(@babel/core@7.28.5)':
    dependencies:
      '@babel/core': 7.28.5
@@ -9599,6 +9679,13 @@ snapshots:
      '@babel/helper-string-parser': 7.27.1
      '@babel/helper-validator-identifier': 7.28.5

+  '@babel/types@7.29.0':
+    dependencies:
+      '@babel/helper-string-parser': 7.27.1
+      '@babel/helper-validator-identifier': 7.28.5
+
+  '@bcoe/v8-coverage@1.0.2': {}
+
  '@braintree/sanitize-url@7.1.2': {}

  '@chevrotain/cst-dts-gen@11.0.3':
@@ -12628,6 +12715,20 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

+  '@vitest/coverage-v8@4.0.17(vitest@4.0.17(@opentelemetry/api@1.9.0)(@types/node@24.10.0)(happy-dom@20.3.4)(jiti@2.6.1)(jsdom@27.4.0)(msw@2.11.6(@types/node@24.10.0)(typescript@5.9.3))(terser@5.44.1)(yaml@2.8.2))':
+    dependencies:
+      '@bcoe/v8-coverage': 1.0.2
+      '@vitest/utils': 4.0.17
+      ast-v8-to-istanbul: 0.3.12
+      istanbul-lib-coverage: 3.2.2
+      istanbul-lib-report: 3.0.1
+      istanbul-reports: 3.2.0
+      magicast: 0.5.2
+      obug: 2.1.1
+      std-env: 3.10.0
+      tinyrainbow: 3.0.3
+      vitest: 4.0.17(@opentelemetry/api@1.9.0)(@types/node@24.10.0)(happy-dom@20.3.4)(jiti@2.6.1)(jsdom@27.4.0)(msw@2.11.6(@types/node@24.10.0)(typescript@5.9.3))(terser@5.44.1)(yaml@2.8.2)
+
  '@vitest/expect@3.2.4':
    dependencies:
      '@types/chai': 5.2.3
@@ -13019,6 +13120,12 @@ snapshots:
    dependencies:
      tslib: 2.8.1

+  ast-v8-to-istanbul@0.3.12:
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.31
+      estree-walker: 3.0.3
+      js-tokens: 10.0.0
+
  astring@1.9.0: {}

  async-function@1.0.0: {}
@@ -14114,8 +14221,8 @@ snapshots:
      '@typescript-eslint/parser': 8.52.0(eslint@8.57.1)(typescript@5.9.3)
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1)
-      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1)
+      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
      eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1)
      eslint-plugin-react: 7.37.5(eslint@8.57.1)
      eslint-plugin-react-hooks: 5.2.0(eslint@8.57.1)
@@ -14134,7 +14241,7 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1):
+  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1):
    dependencies:
      '@nolyfill/is-core-module': 1.0.39
      debug: 4.4.3
@@ -14145,22 +14252,22 @@ snapshots:
      tinyglobby: 0.2.15
      unrs-resolver: 1.11.1
    optionalDependencies:
-      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
+      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
    transitivePeerDependencies:
      - supports-color

-  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
+  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1):
    dependencies:
      debug: 3.2.7
    optionalDependencies:
      '@typescript-eslint/parser': 8.52.0(eslint@8.57.1)(typescript@5.9.3)
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1)
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1)
    transitivePeerDependencies:
      - supports-color

-  eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
+  eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1):
    dependencies:
      '@rtsao/scc': 1.1.0
      array-includes: 3.1.9
@@ -14171,7 +14278,7 @@ snapshots:
      doctrine: 2.1.0
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
+      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
      hasown: 2.0.2
      is-core-module: 2.16.1
      is-glob: 4.0.3
@@ -14511,6 +14618,12 @@ snapshots:
      jsonfile: 6.2.0
      universalify: 2.0.1

+  fs-extra@8.1.0:
+    dependencies:
+      graceful-fs: 4.2.11
+      jsonfile: 4.0.0
+      universalify: 0.1.2
+
  fs-monkey@1.1.0: {}

  fs.realpath@1.0.0: {}
@@ -14848,6 +14961,8 @@ snapshots:

  html-entities@2.6.0: {}

+  html-escaper@2.0.2: {}
+
  html-minifier-terser@6.1.0:
    dependencies:
      camel-case: 4.1.2
@@ -15135,6 +15250,19 @@ snapshots:

  isexe@2.0.0: {}

+  istanbul-lib-coverage@3.2.2: {}
+
+  istanbul-lib-report@3.0.1:
+    dependencies:
+      istanbul-lib-coverage: 3.2.2
+      make-dir: 4.0.0
+      supports-color: 7.2.0
+
+  istanbul-reports@3.2.0:
+    dependencies:
+      html-escaper: 2.0.2
+      istanbul-lib-report: 3.0.1
+
  iterator.prototype@1.1.5:
    dependencies:
      define-data-property: 1.1.4
@@ -15169,6 +15297,8 @@ snapshots:
      '@types/react': 18.3.17
      react: 18.3.1

+  js-tokens@10.0.0: {}
+
  js-tokens@4.0.0: {}

  js-yaml@4.1.0:
@@ -15232,6 +15362,16 @@ snapshots:

  jsonc-parser@2.2.1: {}

+  jsonfile@4.0.0:
+    optionalDependencies:
+      graceful-fs: 4.2.11
+
+  jsonfile@5.0.0:
+    dependencies:
+      universalify: 0.1.2
+    optionalDependencies:
+      graceful-fs: 4.2.11
+
  jsonfile@6.2.0:
    dependencies:
      universalify: 2.0.1
@@ -15420,10 +15560,20 @@ snapshots:
    dependencies:
      '@jridgewell/sourcemap-codec': 1.5.5

+  magicast@0.5.2:
+    dependencies:
+      '@babel/parser': 7.29.2
+      '@babel/types': 7.29.0
+      source-map-js: 1.2.1
+
  make-dir@3.1.0:
    dependencies:
      semver: 6.3.1

+  make-dir@4.0.0:
+    dependencies:
+      semver: 7.7.3
+
  markdown-it@14.1.0:
    dependencies:
      argparse: 2.0.1
@@ -17782,6 +17932,15 @@ snapshots:

  tty-browserify@0.0.1: {}

+  twemoji-parser@14.0.0: {}
+
+  twemoji@14.0.2:
+    dependencies:
+      fs-extra: 8.1.0
+      jsonfile: 5.0.0
+      twemoji-parser: 14.0.0
+      universalify: 0.1.2
+
  type-check@0.4.0:
    dependencies:
      prelude-ls: 1.2.1
@@ -17919,6 +18078,8 @@ snapshots:
      unist-util-is: 6.0.1
      unist-util-visit-parents: 6.0.2

+  universalify@0.1.2: {}
+
  universalify@2.0.1: {}

  unplugin@1.0.1:
--- a/autogpt_platform/frontend/public/notification.mp3
+++ b/autogpt_platform/frontend/public/notification.mp3
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/1-welcome/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/1-welcome/page.tsx
@@ -1,33 +0,0 @@
-"use client";
-import { OnboardingText } from "../components/OnboardingText";
-import OnboardingButton from "../components/OnboardingButton";
-import Image from "next/image";
-import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
-
-export default function Page() {
-  useOnboarding(1);
-
-  return (
-    <>
-      <Image
-        src="/gpt_dark_RGB.svg"
-        alt="GPT Dark Logo"
-        className="-mb-2"
-        width={300}
-        height={300}
-      />
-      <OnboardingText className="mb-3" variant="header" center>
-        Welcome to AutoGPT
-      </OnboardingText>
-      <OnboardingText className="mb-12" center>
-        Think of AutoGPT as your digital teammate, working intelligently to
-        <br />
-        complete tasks based on your directions. Let&apos;s learn a bit about
-        you to
-        <br />
-        tailor your experience.
-      </OnboardingText>
-      <OnboardingButton href="/onboarding/2-reason">Continue</OnboardingButton>
-    </>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/2-reason/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/2-reason/page.tsx
@@ -1,69 +0,0 @@
-"use client";
-import OnboardingButton from "../components/OnboardingButton";
-import {
-  OnboardingFooter,
-  OnboardingHeader,
-  OnboardingStep,
-} from "../components/OnboardingStep";
-import { OnboardingText } from "../components/OnboardingText";
-import OnboardingList from "../components/OnboardingList";
-import { isEmptyOrWhitespace } from "@/lib/utils";
-import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
-
-const reasons = [
-  {
-    label: "Content & Marketing",
-    text: "Content creation, social media management, blogging, creative writing",
-    id: "content_marketing",
-  },
-  {
-    label: "Business & Workflow Automation",
-    text: "Operations, task management, productivity",
-    id: "business_workflow_automation",
-  },
-  {
-    label: "Data & Research",
-    text: "Data analysis, insights, research, financial operation",
-    id: "data_research",
-  },
-  {
-    label: "AI & Innovation",
-    text: "AI experimentation, automation testing, advanced AI applications",
-    id: "ai_innovation",
-  },
-  {
-    label: "Personal productivity",
-    text: "Automating daily tasks, organizing information, personal workflows",
-    id: "personal_productivity",
-  },
-];
-
-export default function Page() {
-  const { state, updateState } = useOnboarding(2, "WELCOME");
-
-  return (
-    <OnboardingStep>
-      <OnboardingHeader backHref={"/onboarding/1-welcome"}>
-        <OnboardingText className="mt-4" variant="header" center>
-          What&apos;s your main reason for using AutoGPT?
-        </OnboardingText>
-        <OnboardingText className="mt-1" center>
-          Select the option that best matches your needs
-        </OnboardingText>
-      </OnboardingHeader>
-      <OnboardingList
-        elements={reasons}
-        selectedId={state?.usageReason}
-        onSelect={(usageReason) => updateState({ usageReason })}
-      />
-      <OnboardingFooter>
-        <OnboardingButton
-          href="/onboarding/3-services"
-          disabled={isEmptyOrWhitespace(state?.usageReason)}
-        >
-          Next
-        </OnboardingButton>
-      </OnboardingFooter>
-    </OnboardingStep>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/3-services/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/3-services/page.tsx
@@ -1,171 +0,0 @@
-"use client";
-import OnboardingButton from "../components/OnboardingButton";
-import {
-  OnboardingStep,
-  OnboardingHeader,
-  OnboardingFooter,
-} from "../components/OnboardingStep";
-import { OnboardingText } from "../components/OnboardingText";
-import { OnboardingGrid } from "../components/OnboardingGrid";
-import { useCallback } from "react";
-import OnboardingInput from "../components/OnboardingInput";
-import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
-
-const services = [
-  {
-    name: "D-ID",
-    text: "Generate AI-powered avatars and videos for dynamic content creation.",
-    icon: "/integrations/d-id.png",
-  },
-  {
-    name: "Discord",
-    text: "A chat platform for communities and teams, supporting text, voice, and video.",
-    icon: "/integrations/discord.png",
-  },
-  {
-    name: "GitHub",
-    text: "AutoGPT can track issues, manage repos, and automate workflows with GitHub.",
-    icon: "/integrations/github.png",
-  },
-  {
-    name: "Google Workspace",
-    text: "Automate emails, calendar events, and document management in AutoGPT with Google Workspace.",
-    icon: "/integrations/google.png",
-  },
-  {
-    name: "Google Maps",
-    text: "Fetch locations, directions, and real-time geodata for navigation.",
-    icon: "/integrations/maps.png",
-  },
-  {
-    name: "HubSpot",
-    text: "Manage customer relationships, automate marketing, and track sales.",
-    icon: "/integrations/hubspot.png",
-  },
-  {
-    name: "Linear",
-    text: "Streamline project management and issue tracking with a modern workflow.",
-    icon: "/integrations/linear.png",
-  },
-  {
-    name: "Medium",
-    text: "Publish and explore insightful content with a powerful writing platform.",
-    icon: "/integrations/medium.png",
-  },
-  {
-    name: "Mem0",
-    text: "AI-powered memory assistant for smarter data organization and recall.",
-    icon: "/integrations/mem0.png",
-  },
-  {
-    name: "Notion",
-    text: "Organize work, notes, and databases in an all-in-one workspace.",
-    icon: "/integrations/notion.png",
-  },
-  {
-    name: "NVIDIA",
-    text: "Accelerate AI, graphics, and computing with cutting-edge technology.",
-    icon: "/integrations/nvidia.jpg",
-  },
-  {
-    name: "OpenWeatherMap",
-    text: "Access real-time weather data and forecasts worldwide.",
-    icon: "/integrations/openweathermap.png",
-  },
-  {
-    name: "Pinecone",
-    text: "Store and search vector data for AI-driven applications.",
-    icon: "/integrations/pinecone.png",
-  },
-  {
-    name: "Reddit",
-    text: "Explore trending discussions and engage with online communities.",
-    icon: "/integrations/reddit.png",
-  },
-  {
-    name: "Slant3D",
-    text: "Automate and optimize 3D printing workflows with AI.",
-    icon: "/integrations/slant3d.jpeg",
-  },
-  {
-    name: "SMTP",
-    text: "Send and manage emails with secure and reliable delivery.",
-    icon: "/integrations/smtp.png",
-  },
-  {
-    name: "Todoist",
-    text: "Organize tasks and projects with a simple, intuitive to-do list.",
-    icon: "/integrations/todoist.png",
-  },
-  {
-    name: "Twitter (X)",
-    text: "Stay connected and share updates on the world's biggest conversation platform.",
-    icon: "/integrations/x.png",
-  },
-  {
-    name: "Unreal Speech",
-    text: "Generate natural-sounding AI voices for speech applications.",
-    icon: "/integrations/unreal-speech.png",
-  },
-];
-
-export default function Page() {
-  const { state, updateState } = useOnboarding(3, "USAGE_REASON");
-
-  const switchIntegration = useCallback(
-    (name: string) => {
-      if (!state) {
-        return;
-      }
-
-      const integrations = state.integrations.includes(name)
-        ? state.integrations.filter((i) => i !== name)
-        : [...state.integrations, name];
-
-      updateState({ integrations });
-    },
-    [state, updateState],
-  );
-
-  return (
-    <OnboardingStep>
-      <OnboardingHeader backHref={"/onboarding/2-reason"}>
-        <OnboardingText className="mt-4" variant="header" center>
-          What platforms or services would you like AutoGPT to work with?
-        </OnboardingText>
-        <OnboardingText className="mt-1" center>
-          You can select more than one option
-        </OnboardingText>
-      </OnboardingHeader>
-
-      <div className="w-fit">
-        <OnboardingText className="my-4" variant="subheader">
-          Available integrations
-        </OnboardingText>
-        <OnboardingGrid
-          elements={services}
-          selected={state?.integrations}
-          onSelect={switchIntegration}
-        />
-        <OnboardingText className="mt-12" variant="subheader">
-          Help us grow our integrations
-        </OnboardingText>
-        <OnboardingText className="my-4">
-          Let us know which partnerships you&apos;d like to see next
-        </OnboardingText>
-        <OnboardingInput
-          className="mb-4"
-          placeholder="Others (please specify)"
-          value={state?.otherIntegrations || ""}
-          onChange={(otherIntegrations) => updateState({ otherIntegrations })}
-        />
-      </div>
-
-      <OnboardingFooter>
-        <OnboardingButton className="mb-2" href="/onboarding/4-agent">
-          Next
-        </OnboardingButton>
-      </OnboardingFooter>
-    </OnboardingStep>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/4-agent/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/4-agent/page.tsx
@@ -1,104 +0,0 @@
-"use client";
-import { isEmptyOrWhitespace } from "@/lib/utils";
-import { useRouter } from "next/navigation";
-import { useEffect, useState } from "react";
-import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
-import OnboardingAgentCard from "../components/OnboardingAgentCard";
-import OnboardingButton from "../components/OnboardingButton";
-import {
-  OnboardingFooter,
-  OnboardingHeader,
-  OnboardingStep,
-} from "../components/OnboardingStep";
-import { OnboardingText } from "../components/OnboardingText";
-import { getV1RecommendedOnboardingAgents } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
-import { resolveResponse } from "@/app/api/helpers";
-import { StoreAgentDetails } from "@/app/api/__generated__/models/storeAgentDetails";
-
-export default function Page() {
-  const { state, updateState, completeStep } = useOnboarding(4, "INTEGRATIONS");
-  const [agents, setAgents] = useState<StoreAgentDetails[]>([]);
-  const router = useRouter();
-
-  useEffect(() => {
-    resolveResponse(getV1RecommendedOnboardingAgents()).then((agents) => {
-      if (agents.length < 2) {
-        completeStep("CONGRATS");
-        router.replace("/");
-      }
-      setAgents(agents);
-    });
-  }, []);
-
-  useEffect(() => {
-    // Deselect agent if it's not in the list of agents
-    if (
-      state?.selectedStoreListingVersionId &&
-      agents.length > 0 &&
-      !agents.some(
-        (agent) =>
-          agent.store_listing_version_id ===
-          state.selectedStoreListingVersionId,
-      )
-    ) {
-      updateState({
-        selectedStoreListingVersionId: null,
-        agentInput: {},
-      });
-    }
-  }, [state?.selectedStoreListingVersionId, updateState, agents]);
-
-  return (
-    <OnboardingStep>
-      <OnboardingHeader backHref={"/onboarding/3-services"}>
-        <OnboardingText className="mt-4" variant="header" center>
-          Choose an agent
-        </OnboardingText>
-        <OnboardingText className="mt-1" center>
-          We think these agents are a good match for you based on your answers
-        </OnboardingText>
-      </OnboardingHeader>
-
-      <div className="my-12 flex items-center justify-between gap-5">
-        <OnboardingAgentCard
-          agent={agents[0]}
-          selected={
-            agents[0] !== undefined
-              ? state?.selectedStoreListingVersionId ==
-                agents[0]?.store_listing_version_id
-              : false
-          }
-          onClick={() =>
-            updateState({
-              selectedStoreListingVersionId: agents[0].store_listing_version_id,
-              agentInput: {},
-            })
-          }
-        />
-        <OnboardingAgentCard
-          agent={agents[1]}
-          selected={
-            agents[1] !== undefined
-              ? state?.selectedStoreListingVersionId ==
-                agents[1]?.store_listing_version_id
-              : false
-          }
-          onClick={() =>
-            updateState({
-              selectedStoreListingVersionId: agents[1].store_listing_version_id,
-            })
-          }
-        />
-      </div>
-
-      <OnboardingFooter>
-        <OnboardingButton
-          href="/onboarding/5-run"
-          disabled={isEmptyOrWhitespace(state?.selectedStoreListingVersionId)}
-        >
-          Next
-        </OnboardingButton>
-      </OnboardingFooter>
-    </OnboardingStep>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/AgentOnboardingCredentials.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/AgentOnboardingCredentials.tsx
@@ -1,62 +0,0 @@
-import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
-import { GraphModel } from "@/app/api/__generated__/models/graphModel";
-import { CredentialsInput } from "@/components/contextual/CredentialsInput/CredentialsInput";
-import { useState } from "react";
-import { getSchemaDefaultCredentials } from "../../helpers";
-import { areAllCredentialsSet, getCredentialFields } from "./helpers";
-
-type Credential = CredentialsMetaInput | undefined;
-type Credentials = Record<string, Credential>;
-
-type Props = {
-  agent: GraphModel | null;
-  siblingInputs?: Record<string, any>;
-  onCredentialsChange: (
-    credentials: Record<string, CredentialsMetaInput>,
-  ) => void;
-  onValidationChange: (isValid: boolean) => void;
-  onLoadingChange: (isLoading: boolean) => void;
-};
-
-export function AgentOnboardingCredentials(props: Props) {
-  const [inputCredentials, setInputCredentials] = useState<Credentials>({});
-
-  const fields = getCredentialFields(props.agent);
-  const required = Object.keys(fields || {}).length > 0;
-
-  if (!required) return null;
-
-  function handleSelectCredentials(key: string, value: Credential) {
-    const updated = { ...inputCredentials, [key]: value };
-    setInputCredentials(updated);
-
-    const sanitized: Record<string, CredentialsMetaInput> = {};
-    for (const [k, v] of Object.entries(updated)) {
-      if (v) sanitized[k] = v;
-    }
-
-    props.onCredentialsChange(sanitized);
-
-    const isValid = !required || areAllCredentialsSet(fields, updated);
-    props.onValidationChange(isValid);
-  }
-
-  return (
-    <>
-      {Object.entries(fields).map(([key, inputSubSchema]) => (
-        <div key={key} className="mt-4">
-          <CredentialsInput
-            schema={inputSubSchema}
-            selectedCredentials={
-              inputCredentials[key] ??
-              getSchemaDefaultCredentials(inputSubSchema)
-            }
-            onSelectCredentials={(value) => handleSelectCredentials(key, value)}
-            siblingInputs={props.siblingInputs}
-            onLoaded={(loaded) => props.onLoadingChange(!loaded)}
-          />
-        </div>
-      ))}
-    </>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/helpers.ts
@@ -1,32 +0,0 @@
-import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
-import { GraphModel } from "@/app/api/__generated__/models/graphModel";
-import { BlockIOCredentialsSubSchema } from "@/lib/autogpt-server-api/types";
-
-export function getCredentialFields(
-  agent: GraphModel | null,
-): AgentCredentialsFields {
-  if (!agent) return {};
-
-  const hasNoInputs =
-    !agent.credentials_input_schema ||
-    typeof agent.credentials_input_schema !== "object" ||
-    !("properties" in agent.credentials_input_schema) ||
-    !agent.credentials_input_schema.properties;
-
-  if (hasNoInputs) return {};
-
-  return agent.credentials_input_schema.properties as AgentCredentialsFields;
-}
-
-export type AgentCredentialsFields = Record<
-  string,
-  BlockIOCredentialsSubSchema
->;
-
-export function areAllCredentialsSet(
-  fields: AgentCredentialsFields,
-  inputs: Record<string, CredentialsMetaInput | undefined>,
-) {
-  const required = Object.keys(fields || {});
-  return required.every((k) => Boolean(inputs[k]));
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/RunAgentHint.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/RunAgentHint.tsx
@@ -1,45 +0,0 @@
-import { cn } from "@/lib/utils";
-import { OnboardingText } from "../../components/OnboardingText";
-
-type RunAgentHintProps = {
-  handleNewRun: () => void;
-};
-
-export function RunAgentHint(props: RunAgentHintProps) {
-  return (
-    <div className="ml-[104px] w-[481px] pl-5">
-      <div className="flex flex-col">
-        <OnboardingText variant="header">Run your first agent</OnboardingText>
-        <span className="mt-9 text-base font-normal leading-normal text-zinc-600">
-          A &apos;run&apos; is when your agent starts working on a task
-        </span>
-        <span className="mt-4 text-base font-normal leading-normal text-zinc-600">
-          Click on <b>New Run</b> below to try it out
-        </span>
-
-        <div
-          onClick={props.handleNewRun}
-          className={cn(
-            "mt-16 flex h-[68px] w-[330px] items-center justify-center rounded-xl border-2 border-violet-700 bg-neutral-50",
-            "cursor-pointer transition-all duration-200 ease-in-out hover:bg-violet-50",
-          )}
-        >
-          <svg
-            width="38"
-            height="38"
-            viewBox="0 0 32 32"
-            xmlns="http://www.w3.org/2000/svg"
-          >
-            <g stroke="#6d28d9" strokeWidth="1.2" strokeLinecap="round">
-              <line x1="16" y1="8" x2="16" y2="24" />
-              <line x1="8" y1="16" x2="24" y2="16" />
-            </g>
-          </svg>
-          <span className="ml-3 font-sans text-[19px] font-medium leading-normal text-violet-700">
-            New run
-          </span>
-        </div>
-      </div>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/SelectedAgentCard.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/SelectedAgentCard.tsx
@@ -1,52 +0,0 @@
-import { StoreAgentDetails } from "@/app/api/__generated__/models/storeAgentDetails";
-import StarRating from "../../components/StarRating";
-import SmartImage from "@/components/__legacy__/SmartImage";
-
-type Props = {
-  storeAgent: StoreAgentDetails | null;
-};
-
-export function SelectedAgentCard(props: Props) {
-  return (
-    <div className="fixed left-1/4 top-1/2 w-[481px] -translate-x-1/2 -translate-y-1/2">
-      <div className="h-[156px] w-[481px] rounded-xl bg-white px-6 pb-5 pt-4">
-        <span className="font-sans text-xs font-medium tracking-wide text-zinc-500">
-          SELECTED AGENT
-        </span>
-        {props.storeAgent ? (
-          <div className="mt-4 flex h-20 rounded-lg bg-violet-50 p-3">
-            {/* Left image */}
-            <SmartImage
-              src={props.storeAgent.agent_image[0]}
-              alt="Agent cover"
-              className="w-[350px] rounded-lg"
-            />
-            {/* Right content */}
-            <div className="ml-3 flex flex-1 flex-col">
-              <div className="mb-2 flex flex-col items-start">
-                <span className="data-sentry-unmask w-[292px] truncate font-sans text-[14px] font-medium leading-tight text-zinc-800">
-                  {props.storeAgent.agent_name}
-                </span>
-                <span className="data-sentry-unmask font-norma w-[292px] truncate font-sans text-xs text-zinc-600">
-                  by {props.storeAgent.creator}
-                </span>
-              </div>
-              <div className="flex w-[292px] items-center justify-between">
-                <span className="truncate font-sans text-xs font-normal leading-tight text-zinc-600">
-                  {props.storeAgent.runs.toLocaleString("en-US")} runs
-                </span>
-                <StarRating
-                  className="font-sans text-xs font-normal leading-tight text-zinc-600"
-                  starSize={12}
-                  rating={props.storeAgent.rating || 0}
-                />
-              </div>
-            </div>
-          </div>
-        ) : (
-          <div className="mt-4 flex h-20 animate-pulse rounded-lg bg-gray-300 p-2" />
-        )}
-      </div>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/helpers.ts
@@ -1,57 +0,0 @@
-import type {
-  BlockIOCredentialsSubSchema,
-  CredentialsMetaInput,
-} from "@/lib/autogpt-server-api/types";
-import type { InputValues } from "./types";
-import { GraphModel } from "@/app/api/__generated__/models/graphModel";
-
-export function computeInitialAgentInputs(
-  agent: GraphModel | null,
-  existingInputs?: InputValues | null,
-): InputValues {
-  const properties = agent?.input_schema?.properties || {};
-  const result: InputValues = {};
-
-  Object.entries(properties).forEach(([key, subSchema]) => {
-    if (
-      existingInputs &&
-      key in existingInputs &&
-      existingInputs[key] != null
-    ) {
-      result[key] = existingInputs[key];
-      return;
-    }
-    const def = (subSchema as unknown as { default?: string | number }).default;
-    result[key] = def ?? "";
-  });
-
-  return result;
-}
-
-type IsRunDisabledParams = {
-  agent: GraphModel | null;
-  isRunning: boolean;
-  agentInputs: InputValues | null | undefined;
-};
-
-export function isRunDisabled({
-  agent,
-  isRunning,
-  agentInputs,
-}: IsRunDisabledParams) {
-  const hasEmptyInput = Object.values(agentInputs || {}).some(
-    (value) => String(value).trim() === "",
-  );
-
-  if (hasEmptyInput) return true;
-  if (!agent) return true;
-  if (isRunning) return true;
-
-  return false;
-}
-
-export function getSchemaDefaultCredentials(
-  schema: BlockIOCredentialsSubSchema,
-): CredentialsMetaInput | undefined {
-  return schema.default as CredentialsMetaInput | undefined;
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/page.tsx
@@ -1,124 +0,0 @@
-"use client";
-
-import {
-  Card,
-  CardContent,
-  CardHeader,
-  CardTitle,
-} from "@/components/__legacy__/ui/card";
-import { RunAgentInputs } from "@/components/contextual/RunAgentInputs/RunAgentInputs";
-import { ErrorCard } from "@/components/molecules/ErrorCard/ErrorCard";
-import { CircleNotchIcon } from "@phosphor-icons/react/dist/ssr";
-import { Play } from "lucide-react";
-import OnboardingButton from "../components/OnboardingButton";
-import { OnboardingHeader, OnboardingStep } from "../components/OnboardingStep";
-import { OnboardingText } from "../components/OnboardingText";
-import { AgentOnboardingCredentials } from "./components/AgentOnboardingCredentials/AgentOnboardingCredentials";
-import { RunAgentHint } from "./components/RunAgentHint";
-import { SelectedAgentCard } from "./components/SelectedAgentCard";
-import { isRunDisabled } from "./helpers";
-import type { InputValues } from "./types";
-import { useOnboardingRunStep } from "./useOnboardingRunStep";
-
-export default function Page() {
-  const {
-    ready,
-    error,
-    showInput,
-    agentGraph,
-    onboarding,
-    storeAgent,
-    runningAgent,
-    handleSetAgentInput,
-    handleRunAgent,
-    handleNewRun,
-    handleCredentialsChange,
-    handleCredentialsValidationChange,
-    handleCredentialsLoadingChange,
-  } = useOnboardingRunStep();
-
-  if (error) {
-    return <ErrorCard responseError={error} />;
-  }
-
-  if (!ready) {
-    return (
-      <div className="flex flex-col gap-8">
-        <CircleNotchIcon className="size-10 animate-spin" />
-      </div>
-    );
-  }
-
-  return (
-    <OnboardingStep dotted>
-      <OnboardingHeader backHref={"/onboarding/4-agent"} transparent />
-      <div className="flex min-h-[80vh] items-center justify-center">
-        <SelectedAgentCard storeAgent={storeAgent} />
-        <div className="w-[481px]" />
-        {!showInput ? (
-          <RunAgentHint handleNewRun={handleNewRun} />
-        ) : (
-          <div className="ml-[104px] w-[481px] pl-5">
-            <div className="flex flex-col">
-              <OnboardingText variant="header">
-                Provide details for your agent
-              </OnboardingText>
-              <span className="mt-9 text-base font-normal leading-normal text-zinc-600">
-                Give your agent the details it needs to work—just enter <br />
-                the key information and get started.
-              </span>
-              <span className="mt-4 text-base font-normal leading-normal text-zinc-600">
-                When you&apos;re done, click <b>Run Agent</b>.
-              </span>
-
-              <Card className="agpt-box mt-4">
-                <CardHeader>
-                  <CardTitle className="font-poppins text-lg">Input</CardTitle>
-                </CardHeader>
-                <CardContent className="flex flex-col gap-4">
-                  {Object.entries(
-                    agentGraph?.input_schema.properties || {},
-                  ).map(([key, inputSubSchema]) => (
-                    <RunAgentInputs
-                      key={key}
-                      schema={inputSubSchema}
-                      value={onboarding.state?.agentInput?.[key]}
-                      placeholder={inputSubSchema.description}
-                      onChange={(value) => handleSetAgentInput(key, value)}
-                    />
-                  ))}
-                  <AgentOnboardingCredentials
-                    agent={agentGraph}
-                    siblingInputs={
-                      (onboarding.state?.agentInput as Record<string, any>) ||
-                      undefined
-                    }
-                    onCredentialsChange={handleCredentialsChange}
-                    onValidationChange={handleCredentialsValidationChange}
-                    onLoadingChange={handleCredentialsLoadingChange}
-                  />
-                </CardContent>
-              </Card>
-              <OnboardingButton
-                variant="violet"
-                className="mt-8 w-[136px]"
-                loading={runningAgent}
-                disabled={isRunDisabled({
-                  agent: agentGraph,
-                  isRunning: runningAgent,
-                  agentInputs:
-                    (onboarding.state?.agentInput as unknown as InputValues) ||
-                    null,
-                })}
-                onClick={handleRunAgent}
-                icon={<Play className="mr-2" size={18} />}
-              >
-                Run agent
-              </OnboardingButton>
-            </div>
-          </div>
-        )}
-      </div>
-    </OnboardingStep>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/types.ts
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/types.ts
@@ -1,2 +0,0 @@
-export type InputPrimitive = string | number;
-export type InputValues = Record<string, InputPrimitive>;
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/useOnboardingRunStep.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/useOnboardingRunStep.tsx
@@ -1,157 +0,0 @@
-import { useToast } from "@/components/molecules/Toast/use-toast";
-import { useBackendAPI } from "@/lib/autogpt-server-api/context";
-import { useOnboarding } from "@/providers/onboarding/onboarding-provider";
-import { useRouter } from "next/navigation";
-import { useEffect, useState } from "react";
-import { computeInitialAgentInputs } from "./helpers";
-import { InputValues } from "./types";
-import { okData, resolveResponse } from "@/app/api/helpers";
-import { postV2AddMarketplaceAgent } from "@/app/api/__generated__/endpoints/library/library";
-import {
-  useGetV2GetAgentByVersion,
-  useGetV2GetAgentGraph,
-} from "@/app/api/__generated__/endpoints/store/store";
-import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
-import { GraphID } from "@/lib/autogpt-server-api";
-
-export function useOnboardingRunStep() {
-  const onboarding = useOnboarding(undefined, "AGENT_CHOICE");
-
-  const [showInput, setShowInput] = useState(false);
-  const [runningAgent, setRunningAgent] = useState(false);
-
-  const [inputCredentials, setInputCredentials] = useState<
-    Record<string, CredentialsMetaInput>
-  >({});
-
-  const [credentialsValid, setCredentialsValid] = useState(true);
-  const [credentialsLoaded, setCredentialsLoaded] = useState(false);
-
-  const { toast } = useToast();
-  const router = useRouter();
-  const api = useBackendAPI();
-
-  const currentAgentVersion =
-    onboarding.state?.selectedStoreListingVersionId ?? "";
-
-  const {
-    data: storeAgent,
-    error: storeAgentQueryError,
-    isSuccess: storeAgentQueryIsSuccess,
-  } = useGetV2GetAgentByVersion(currentAgentVersion, {
-    query: {
-      enabled: !!currentAgentVersion,
-      select: okData,
-    },
-  });
-
-  const {
-    data: agentGraphMeta,
-    error: agentGraphQueryError,
-    isSuccess: agentGraphQueryIsSuccess,
-  } = useGetV2GetAgentGraph(currentAgentVersion, {
-    query: {
-      enabled: !!currentAgentVersion,
-      select: okData,
-    },
-  });
-
-  useEffect(() => {
-    onboarding.setStep(5);
-  }, []);
-
-  useEffect(() => {
-    if (agentGraphMeta && onboarding.state) {
-      const initialAgentInputs = computeInitialAgentInputs(
-        agentGraphMeta,
-        (onboarding.state.agentInput as unknown as InputValues) || null,
-      );
-
-      onboarding.updateState({ agentInput: initialAgentInputs });
-    }
-  }, [agentGraphMeta]);
-
-  function handleNewRun() {
-    if (!onboarding.state) return;
-
-    setShowInput(true);
-    onboarding.setStep(6);
-    onboarding.completeStep("AGENT_NEW_RUN");
-  }
-
-  function handleSetAgentInput(key: string, value: string) {
-    if (!onboarding.state) return;
-
-    onboarding.updateState({
-      agentInput: {
-        ...onboarding.state.agentInput,
-        [key]: value,
-      },
-    });
-  }
-
-  async function handleRunAgent() {
-    if (!agentGraphMeta || !storeAgent || !onboarding.state) {
-      toast({
-        title: "Error getting agent",
-        description:
-          "Either the agent is not available or there was an error getting it.",
-        variant: "destructive",
-      });
-
-      return;
-    }
-
-    setRunningAgent(true);
-
-    try {
-      const libraryAgent = await resolveResponse(
-        postV2AddMarketplaceAgent({
-          store_listing_version_id: storeAgent?.store_listing_version_id || "",
-          source: "onboarding",
-        }),
-      );
-
-      const { id: runID } = await api.executeGraph(
-        libraryAgent.graph_id as GraphID,
-        libraryAgent.graph_version,
-        onboarding.state.agentInput || {},
-        inputCredentials,
-        "onboarding",
-      );
-
-      onboarding.updateState({ onboardingAgentExecutionId: runID });
-
-      router.push("/onboarding/6-congrats");
-    } catch (error) {
-      console.error("Error running agent:", error);
-
-      toast({
-        title: "Error running agent",
-        description:
-          "There was an error running your agent. Please try again or try choosing a different agent if it still fails.",
-        variant: "destructive",
-      });
-
-      setRunningAgent(false);
-    }
-  }
-
-  return {
-    ready: agentGraphQueryIsSuccess && storeAgentQueryIsSuccess,
-    error: agentGraphQueryError || storeAgentQueryError,
-    agentGraph: agentGraphMeta || null,
-    onboarding,
-    showInput,
-    storeAgent: storeAgent || null,
-    runningAgent,
-    credentialsValid,
-    credentialsLoaded,
-    handleSetAgentInput,
-    handleRunAgent,
-    handleNewRun,
-    handleCredentialsChange: setInputCredentials,
-    handleCredentialsValidationChange: setCredentialsValid,
-    handleCredentialsLoadingChange: (v: boolean) => setCredentialsLoaded(!v),
-  };
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/6-congrats/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/6-congrats/page.tsx
@@ -1,127 +0,0 @@
-"use client";
-import { useBackendAPI } from "@/lib/autogpt-server-api/context";
-import { cn } from "@/lib/utils";
-import { useRouter } from "next/navigation";
-import { useEffect, useRef, useState } from "react";
-import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
-import { resolveResponse } from "@/app/api/helpers";
-import { getV1OnboardingState } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
-import { postV2AddMarketplaceAgent } from "@/app/api/__generated__/endpoints/library/library";
-import { Confetti } from "@/components/molecules/Confetti/Confetti";
-import type { ConfettiRef } from "@/components/molecules/Confetti/Confetti";
-
-export default function Page() {
-  const { completeStep } = useOnboarding(7, "AGENT_INPUT");
-  const router = useRouter();
-  const api = useBackendAPI();
-  const [showText, setShowText] = useState(false);
-  const [showSubtext, setShowSubtext] = useState(false);
-  const confettiRef = useRef<ConfettiRef>(null);
-
-  useEffect(() => {
-    // Fire side cannons for a celebratory effect
-    const duration = 1500;
-    const end = Date.now() + duration;
-
-    function frame() {
-      confettiRef.current?.fire({
-        particleCount: 4,
-        angle: 60,
-        spread: 70,
-        origin: { x: 0, y: 0.6 },
-        shapes: ["square"],
-        scalar: 0.8,
-        gravity: 0.6,
-        decay: 0.93,
-      });
-      confettiRef.current?.fire({
-        particleCount: 4,
-        angle: 120,
-        spread: 70,
-        origin: { x: 1, y: 0.6 },
-        shapes: ["square"],
-        scalar: 0.8,
-        gravity: 0.6,
-        decay: 0.93,
-      });
-
-      if (Date.now() < end) {
-        requestAnimationFrame(frame);
-      }
-    }
-
-    frame();
-
-    const timer0 = setTimeout(() => {
-      setShowText(true);
-    }, 100);
-
-    const timer1 = setTimeout(() => {
-      setShowSubtext(true);
-    }, 500);
-
-    const timer2 = setTimeout(async () => {
-      completeStep("CONGRATS");
-
-      try {
-        const onboarding = await resolveResponse(getV1OnboardingState());
-        if (onboarding?.selectedStoreListingVersionId) {
-          try {
-            const libraryAgent = await resolveResponse(
-              postV2AddMarketplaceAgent({
-                store_listing_version_id:
-                  onboarding.selectedStoreListingVersionId,
-                source: "onboarding",
-              }),
-            );
-            router.replace(`/library/agents/${libraryAgent.id}`);
-          } catch (error) {
-            console.error("Failed to add agent to library:", error);
-            router.replace("/library");
-          }
-        } else {
-          router.replace("/library");
-        }
-      } catch (error) {
-        console.error("Failed to get onboarding data:", error);
-        router.replace("/library");
-      }
-    }, 3000);
-
-    return () => {
-      clearTimeout(timer0);
-      clearTimeout(timer1);
-      clearTimeout(timer2);
-    };
-  }, [completeStep, router, api]);
-
-  return (
-    <div className="flex h-screen w-screen flex-col items-center justify-center bg-violet-100">
-      <Confetti ref={confettiRef} manualstart />
-      <div
-        className={cn(
-          "z-10 -mb-16 text-9xl duration-500",
-          showText ? "opacity-100" : "opacity-0",
-        )}
-      >
-        🎉
-      </div>
-      <h1
-        className={cn(
-          "font-poppins text-9xl font-medium tracking-tighter text-violet-700 duration-500",
-          showText ? "opacity-100" : "opacity-0",
-        )}
-      >
-        Congrats!
-      </h1>
-      <p
-        className={cn(
-          "mb-16 mt-4 font-poppins text-2xl font-medium text-violet-800 transition-opacity duration-500",
-          showSubtext ? "opacity-100" : "opacity-0",
-        )}
-      >
-        You earned 3$ for running your first agent
-      </p>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingAgentCard.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingAgentCard.tsx
@@ -1,105 +0,0 @@
-import { cn } from "@/lib/utils";
-import StarRating from "./StarRating";
-import SmartImage from "@/components/__legacy__/SmartImage";
-import { StoreAgentDetails } from "@/app/api/__generated__/models/storeAgentDetails";
-
-type OnboardingAgentCardProps = {
-  agent?: StoreAgentDetails;
-  selected?: boolean;
-  onClick: () => void;
-};
-
-export default function OnboardingAgentCard({
-  agent,
-  selected,
-  onClick,
-}: OnboardingAgentCardProps) {
-  if (!agent) {
-    return (
-      <div
-        className={cn(
-          "relative animate-pulse",
-          "h-[394px] w-[368px] rounded-[20px] border border-transparent bg-zinc-200",
-        )}
-      />
-    );
-  }
-
-  const {
-    agent_image,
-    creator_avatar,
-    agent_name,
-    description,
-    creator,
-    runs,
-    rating,
-  } = agent;
-
-  return (
-    <div
-      className={cn(
-        "relative cursor-pointer transition-all duration-200 ease-in-out",
-        "h-[394px] w-[368px] rounded-[20px] border border-transparent bg-white",
-        selected ? "bg-[#F5F3FF80]" : "hover:border-zinc-400",
-      )}
-      onClick={onClick}
-    >
-      {/* Image container */}
-      <div className="relative">
-        <SmartImage
-          src={agent_image?.[0]}
-          alt="Agent cover"
-          className="m-2 h-[196px] w-[350px] rounded-[16px]"
-        />
-        {/* Profile picture overlay */}
-        <div className="absolute bottom-2 left-4">
-          <SmartImage
-            src={creator_avatar}
-            alt="Profile picture"
-            className="h-[50px] w-[50px] rounded-full border border-white"
-          />
-        </div>
-      </div>
-
-      {/* Content container */}
-      <div className="flex h-[180px] flex-col justify-between px-4 pb-3">
-        {/* Text content wrapper */}
-        <div>
-          {/* Title - 2 lines max */}
-          <p className="data-sentry-unmask text-md line-clamp-2 max-h-[50px] font-sans text-base font-medium leading-normal text-zinc-800">
-            {agent_name}
-          </p>
-
-          {/* Author - single line with truncate */}
-          <p className="data-sentry-unmask truncate text-sm font-normal leading-normal text-zinc-600">
-            by {creator}
-          </p>
-
-          {/* Description - 3 lines max */}
-          <p
-            className={cn(
-              "mt-2 line-clamp-3 text-sm leading-5",
-              selected ? "text-zinc-500" : "text-zinc-400",
-            )}
-          >
-            {description}
-          </p>
-        </div>
-
-        {/* Bottom stats */}
-        <div className="flex w-full items-center justify-between">
-          <span className="mt-1 font-sans text-sm font-medium text-zinc-800">
-            {runs?.toLocaleString("en-US")} runs
-          </span>
-          <StarRating rating={rating} />
-        </div>
-      </div>
-      <div
-        className={cn(
-          "pointer-events-none absolute inset-0 rounded-[20px] border-2 transition-all duration-200 ease-in-out",
-          selected ? "border-violet-700" : "border-transparent",
-        )}
-      />
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingBackButton.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingBackButton.tsx
@@ -1,20 +0,0 @@
-import { ChevronLeft } from "lucide-react";
-import Link from "next/link";
-
-interface OnboardingBackButtonProps {
-  href: string;
-}
-
-export default function OnboardingBackButton({
-  href,
-}: OnboardingBackButtonProps) {
-  return (
-    <Link
-      className="flex items-center gap-2 font-sans text-base font-medium text-zinc-700 transition-colors duration-200 hover:text-zinc-800"
-      href={href}
-    >
-      <ChevronLeft size={24} className="-mr-1" />
-      <span>Back</span>
-    </Link>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingButton.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingButton.tsx
@@ -1,76 +0,0 @@
-import { useCallback, useMemo, useState } from "react";
-import { LoadingSpinner } from "@/components/__legacy__/ui/loading";
-import { cn } from "@/lib/utils";
-import Link from "next/link";
-
-const variants = {
-  default: "bg-zinc-700 hover:bg-zinc-800",
-  violet: "bg-violet-600 hover:bg-violet-700",
-};
-
-type OnboardingButtonProps = {
-  className?: string;
-  variant?: keyof typeof variants;
-  children?: React.ReactNode;
-  loading?: boolean;
-  disabled?: boolean;
-  onClick?: () => void;
-  href?: string;
-  icon?: React.ReactNode;
-};
-
-export default function OnboardingButton({
-  className,
-  variant = "default",
-  children,
-  loading,
-  disabled,
-  onClick,
-  href,
-  icon,
-}: OnboardingButtonProps) {
-  const [internalLoading, setInternalLoading] = useState(false);
-  const isLoading = loading !== undefined ? loading : internalLoading;
-
-  const buttonClasses = useMemo(
-    () =>
-      cn(
-        "font-sans text-white text-sm font-medium",
-        "inline-flex justify-center items-center",
-        "h-12 min-w-[100px] rounded-full py-3 px-5",
-        "transition-colors duration-200",
-        className,
-        disabled ? "bg-zinc-300 cursor-not-allowed" : variants[variant],
-      ),
-    [disabled, variant, className],
-  );
-
-  const onClickInternal = useCallback(() => {
-    setInternalLoading(true);
-    if (onClick) {
-      onClick();
-    }
-  }, [setInternalLoading, onClick]);
-
-  if (href && !disabled) {
-    return (
-      <Link href={href} onClick={onClickInternal} className={buttonClasses}>
-        {isLoading && <LoadingSpinner className="mr-2 size-5" />}
-        {icon && !isLoading && <>{icon}</>}
-        {children}
-      </Link>
-    );
-  }
-
-  return (
-    <button
-      onClick={onClickInternal}
-      disabled={disabled}
-      className={buttonClasses}
-    >
-      {isLoading && <LoadingSpinner className="mr-2 size-5" />}
-      {icon && !isLoading && <>{icon}</>}
-      {children}
-    </button>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingGrid.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingGrid.tsx
@@ -1,86 +0,0 @@
-import { cn } from "@/lib/utils";
-import SmartImage from "@/components/__legacy__/SmartImage";
-
-type OnboardingGridElementProps = {
-  name: string;
-  text: string;
-  icon: string;
-  selected: boolean;
-  onClick: () => void;
-};
-
-function OnboardingGridElement({
-  name,
-  text,
-  icon,
-  selected,
-  onClick,
-}: OnboardingGridElementProps) {
-  return (
-    <button
-      className={cn(
-        "relative flex h-[236px] w-[200px] flex-col items-start gap-2 rounded-xl border border-transparent bg-white p-[15px] font-sans",
-        "transition-all duration-200 ease-in-out",
-        selected ? "bg-[#F5F3FF80]" : "hover:border-zinc-400",
-      )}
-      onClick={onClick}
-    >
-      <SmartImage
-        src={icon}
-        alt={`Logo of ${name}`}
-        imageContain
-        className="h-12 w-12 rounded-lg"
-      />
-      <span className="text-md mt-4 w-full text-left font-medium leading-normal text-[#121212]">
-        {name}
-      </span>
-      <span className="w-full text-left text-[11.5px] font-normal leading-5 text-zinc-500">
-        {text}
-      </span>
-      <div
-        className={cn(
-          "pointer-events-none absolute inset-0 rounded-xl border-2 transition-all duration-200 ease-in-out",
-          selected ? "border-violet-700" : "border-transparent",
-        )}
-      />
-    </button>
-  );
-}
-
-type OnboardingGridProps = {
-  className?: string;
-  elements: Array<{
-    name: string;
-    text: string;
-    icon: string;
-  }>;
-  selected?: string[];
-  onSelect: (name: string) => void;
-};
-
-export function OnboardingGrid({
-  className,
-  elements,
-  selected,
-  onSelect,
-}: OnboardingGridProps) {
-  return (
-    <div
-      className={cn(
-        className,
-        "grid grid-cols-1 gap-3 sm:grid-cols-2 lg:grid-cols-4",
-      )}
-    >
-      {elements.map((element) => (
-        <OnboardingGridElement
-          key={element.name}
-          name={element.name}
-          text={element.text}
-          icon={element.icon}
-          selected={selected?.includes(element.name) || false}
-          onClick={() => onSelect(element.name)}
-        />
-      ))}
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingInput.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingInput.tsx
@@ -1,29 +0,0 @@
-import { cn } from "@/lib/utils";
-
-interface OnboardingInputProps {
-  className?: string;
-  placeholder: string;
-  value: string;
-  onChange: (value: string) => void;
-}
-
-export default function OnboardingInput({
-  className,
-  placeholder,
-  value,
-  onChange,
-}: OnboardingInputProps) {
-  return (
-    <input
-      className={cn(
-        className,
-        "font-poppin relative h-[50px] w-[512px] rounded-[25px] border border-transparent bg-white px-4 text-sm font-normal leading-normal text-zinc-900",
-        "transition-all duration-200 ease-in-out placeholder:text-zinc-400",
-        "focus:border-transparent focus:bg-[#F5F3FF80] focus:outline-none focus:ring-2 focus:ring-violet-700",
-      )}
-      placeholder={placeholder}
-      value={value}
-      onChange={(e) => onChange(e.target.value)}
-    />
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingList.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingList.tsx
@@ -1,135 +0,0 @@
-import { cn } from "@/lib/utils";
-import { Check } from "lucide-react";
-import { useCallback, useEffect, useRef, useState } from "react";
-
-type OnboardingListElementProps = {
-  label: string;
-  text: string;
-  selected?: boolean;
-  custom?: boolean;
-  onClick: (content: string) => void;
-};
-
-export function OnboardingListElement({
-  label,
-  text,
-  selected,
-  custom,
-  onClick,
-}: OnboardingListElementProps) {
-  const inputRef = useRef<HTMLInputElement>(null);
-  const [content, setContent] = useState(text);
-
-  useEffect(() => {
-    if (selected && custom && inputRef.current) {
-      inputRef.current.focus();
-    }
-  }, [selected, custom]);
-
-  const setCustomText = (e: React.ChangeEvent<HTMLInputElement>) => {
-    setContent(e.target.value);
-    onClick(e.target.value);
-  };
-
-  return (
-    <button
-      onClick={() => onClick(content)}
-      className={cn(
-        "relative flex h-[78px] w-[530px] items-center rounded-xl border border-transparent px-5 py-4 transition-all duration-200 ease-in-out",
-        selected ? "bg-[#F5F3FF80]" : "bg-white hover:border-zinc-400",
-      )}
-    >
-      <div className="flex w-full flex-col items-start gap-1">
-        <span className="text-sm font-medium text-zinc-700">{label}</span>
-        {custom && selected ? (
-          <input
-            ref={inputRef}
-            className={cn(
-              selected ? "text-zinc-600" : "text-zinc-400",
-              "font-poppin w-full border-0 bg-[#F5F3FF80] text-sm focus:outline-none",
-            )}
-            placeholder="Please specify"
-            value={content}
-            onChange={setCustomText}
-          />
-        ) : (
-          <span
-            className={cn(
-              selected ? "text-zinc-600" : "text-zinc-400",
-              "text-sm",
-            )}
-          >
-            {custom ? "Please specify" : text}
-          </span>
-        )}
-      </div>
-      {!custom && (
-        <div className="absolute right-4">
-          <Check
-            size={24}
-            className={cn(
-              "transition-all duration-200 ease-in-out",
-              selected ? "text-violet-700" : "text-transparent",
-            )}
-          />
-        </div>
-      )}
-      <div
-        className={cn(
-          "pointer-events-none absolute inset-0 rounded-xl border-2 transition-all duration-200 ease-in-out",
-          selected ? "border-violet-700" : "border-transparent",
-        )}
-      />
-    </button>
-  );
-}
-
-type OnboardingListProps = {
-  className?: string;
-  elements: Array<{
-    label: string;
-    text: string;
-    id: string;
-  }>;
-  selectedId?: string | null;
-  onSelect: (id: string) => void;
-};
-
-function OnboardingList({
-  className,
-  elements,
-  selectedId,
-  onSelect,
-}: OnboardingListProps) {
-  const isCustom = useCallback(() => {
-    return (
-      selectedId !== null &&
-      !elements.some((element) => element.id === selectedId)
-    );
-  }, [selectedId, elements]);
-
-  return (
-    <div className={cn(className, "flex flex-col gap-2")}>
-      {elements.map((element) => (
-        <OnboardingListElement
-          key={element.id}
-          label={element.label}
-          text={element.text}
-          selected={element.id === selectedId}
-          onClick={() => onSelect(element.id)}
-        />
-      ))}
-      <OnboardingListElement
-        label="Other"
-        text={isCustom() ? selectedId! : ""}
-        selected={isCustom()}
-        custom
-        onClick={(c) => {
-          onSelect(c);
-        }}
-      />
-    </div>
-  );
-}
-
-export default OnboardingList;
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingProgress.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingProgress.tsx
@@ -1,45 +0,0 @@
-import { useState, useEffect, useRef } from "react";
-
-interface OnboardingProgressProps {
-  totalSteps: number;
-  toStep: number;
-}
-
-export default function OnboardingProgress({
-  totalSteps,
-  toStep,
-}: OnboardingProgressProps) {
-  const [animatedStep, setAnimatedStep] = useState(toStep - 1);
-  const isInitialMount = useRef(true);
-
-  useEffect(() => {
-    if (isInitialMount.current) {
-      // On initial mount, just set the position without animation
-      isInitialMount.current = false;
-      return;
-    }
-    // After initial mount, animate position changes
-    setAnimatedStep(toStep - 1);
-  }, [toStep]);
-
-  return (
-    <div className="relative flex items-center justify-center gap-3">
-      {/* Background circles */}
-      {Array.from({ length: totalSteps + 1 }).map((_, index) => (
-        <div key={index} className="h-2 w-2 rounded-full bg-zinc-400" />
-      ))}
-
-      {/* Animated progress indicator */}
-      <div
-        className={`absolute left-0 h-2 w-7 rounded-full bg-zinc-400 ${
-          !isInitialMount.current
-            ? "transition-all duration-300 ease-in-out"
-            : ""
-        }`}
-        style={{
-          transform: `translateX(${animatedStep * 20}px)`,
-        }}
-      />
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingStep.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingStep.tsx
@@ -1,66 +0,0 @@
-"use client";
-import { ReactNode } from "react";
-import OnboardingBackButton from "./OnboardingBackButton";
-import { cn } from "@/lib/utils";
-import OnboardingProgress from "./OnboardingProgress";
-import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
-
-export function OnboardingStep({
-  dotted,
-  children,
-}: {
-  dotted?: boolean;
-  children: ReactNode;
-}) {
-  return (
-    <div className="relative flex min-h-screen w-full flex-col">
-      {dotted && (
-        <div className="absolute left-1/2 h-full w-1/2 bg-white bg-[radial-gradient(#e5e7eb77_1px,transparent_1px)] [background-size:10px_10px]"></div>
-      )}
-      <div className="z-10 flex flex-col items-center">{children}</div>
-    </div>
-  );
-}
-
-interface OnboardingHeaderProps {
-  backHref: string;
-  transparent?: boolean;
-  children?: ReactNode;
-}
-
-export function OnboardingHeader({
-  backHref,
-  transparent,
-  children,
-}: OnboardingHeaderProps) {
-  const { step } = useOnboarding();
-
-  return (
-    <div className="sticky top-0 z-10 w-full">
-      <div
-        className={cn(transparent ? "bg-transparent" : "bg-gray-100", "pb-5")}
-      >
-        <div className="flex w-full items-center justify-between px-5 py-4">
-          <OnboardingBackButton href={backHref} />
-          <OnboardingProgress totalSteps={5} toStep={(step || 1) - 1} />
-        </div>
-        {children}
-      </div>
-
-      {!transparent && (
-        <div className="h-4 w-full bg-gradient-to-b from-gray-100 via-gray-100/50 to-transparent" />
-      )}
-    </div>
-  );
-}
-
-export function OnboardingFooter({ children }: { children?: ReactNode }) {
-  return (
-    <div className="sticky bottom-0 z-10 w-full">
-      <div className="h-4 w-full bg-gradient-to-t from-gray-100 via-gray-100/50 to-transparent" />
-      <div className="flex justify-center bg-gray-100">
-        <div className="px-5 py-5">{children}</div>
-      </div>
-    </div>
-  );
-}
--- a/Show More
+++ b/Show More