fix(frontend): remove test screenshots from repo

Remove binary test screenshots that bloat the repo. Test evidence should be in the PR description or CI artifacts, not committed.
test: add test screenshots for PR #12598 stream timeout verification
2026-04-08 03:00:28 -04:00 · 2026-04-01 18:03:00 +02:00 · 2026-04-01 17:59:17 +02:00 · 2026-04-01 17:59:17 +02:00 · 2026-04-01 17:59:17 +02:00 · 2026-04-01 17:59:17 +02:00
2444 changed files with 822838 additions and 53148 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,10 +0,0 @@
-{
-  "permissions": {
-    "allowedTools": [
-      "Read", "Grep", "Glob",
-      "Bash(ls:*)", "Bash(cat:*)", "Bash(grep:*)", "Bash(find:*)",
-      "Bash(git status:*)", "Bash(git diff:*)", "Bash(git log:*)", "Bash(git worktree:*)",
-      "Bash(tmux:*)", "Bash(sleep:*)", "Bash(branchlet:*)"
-    ]
-  }
-}
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -6,19 +6,11 @@ on:
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/forge/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/forge/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'

 concurrency:
  group: ${{ format('classic-autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -27,22 +19,47 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic
+    working-directory: classic/original_autogpt

 jobs:
  test:
    permissions:
      contents: read
    timeout-minutes: 30
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+        platform-os: [ubuntu, macos, macos-arm64, windows]
+    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

    steps:
-      - name: Start MinIO service
+      # Quite slow on macOS (2~4 minutes to set up Docker)
+      # - name: Set up Docker (macOS)
+      #   if: runner.os == 'macOS'
+      #   uses: crazy-max/ghaction-setup-docker@v3
+
+      - name: Start MinIO service (Linux)
+        if: runner.os == 'Linux'
        working-directory: '.'
        run: |
          docker pull minio/minio:edge-cicd
          docker run -d -p 9000:9000 minio/minio:edge-cicd

+      - name: Start MinIO service (macOS)
+        if: runner.os == 'macOS'
+        working-directory: ${{ runner.temp }}
+        run: |
+          brew install minio/stable/minio
+          mkdir data
+          minio server ./data &
+
+      # No MinIO on Windows:
+      # - Windows doesn't support running Linux Docker containers
+      # - It doesn't seem possible to start background processes on Windows. They are
+      #   killed after the step returns.
+      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
+
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
@@ -54,23 +71,41 @@ jobs:
          git config --global user.name "Auto-GPT-Bot"
          git config --global user.email "github-bot@agpt.co"

-      - name: Set up Python 3.12
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: "3.12"
+          python-version: ${{ matrix.python-version }}

      - id: get_date
        name: Get date
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Set up Python dependency cache
+        # On Windows, unpacking cached dependencies takes longer than just installing them
+        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }}

-      - name: Install Poetry
-        run: curl -sSL https://install.python-poetry.org | python3 -
+      - name: Install Poetry (Unix)
+        if: runner.os != 'Windows'
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+
+          if [ "${{ runner.os }}" = "macOS" ]; then
+            PATH="$HOME/.local/bin:$PATH"
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install Poetry (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+
+          $env:PATH += ";$env:APPDATA\Python\Scripts"
+          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH

      - name: Install Python dependencies
        run: poetry install
@@ -81,13 +116,12 @@ jobs:
            --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
            --numprocesses=logical --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            original_autogpt/tests/unit original_autogpt/tests/integration
+            tests/unit tests/integration
        env:
          CI: true
          PLAIN_OUTPUT: True
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          S3_ENDPOINT_URL: http://127.0.0.1:9000
+          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin

@@ -101,11 +135,11 @@ jobs:
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: autogpt-agent
+          flags: autogpt-agent,${{ runner.os }}

      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/logs/
+          path: classic/original_autogpt/logs/
--- a/.github/workflows/classic-autogpt-docker-ci.yml
+++ b/.github/workflows/classic-autogpt-docker-ci.yml
@@ -148,7 +148,7 @@ jobs:
            --entrypoint poetry ${{ env.IMAGE_NAME }} run \
            pytest -v --cov=autogpt --cov-branch --cov-report term-missing \
            --numprocesses=4 --durations=10 \
-            original_autogpt/tests/unit original_autogpt/tests/integration 2>&1 | tee test_output.txt
+            tests/unit tests/integration 2>&1 | tee test_output.txt

          test_failure=${PIPESTATUS[0]}

--- a/.github/workflows/classic-autogpts-ci.yml
+++ b/.github/workflows/classic-autogpts-ci.yml
@@ -10,9 +10,10 @@ on:
      - '.github/workflows/classic-autogpts-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - 'classic/benchmark/**'
+      - 'classic/run'
+      - 'classic/cli.py'
+      - 'classic/setup.py'
      - '!**/*.md'
  pull_request:
    branches: [ master, dev, release-* ]
@@ -20,9 +21,10 @@ on:
      - '.github/workflows/classic-autogpts-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - 'classic/benchmark/**'
+      - 'classic/run'
+      - 'classic/cli.py'
+      - 'classic/setup.py'
      - '!**/*.md'

 defaults:
@@ -33,9 +35,13 @@ defaults:
 jobs:
  serve-agent-protocol:
    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        agent-name: [ original_autogpt ]
+      fail-fast: false
    timeout-minutes: 20
    env:
-      min-python-version: '3.12'
+      min-python-version: '3.10'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -49,22 +55,22 @@ jobs:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
+        working-directory: ./classic/${{ matrix.agent-name }}/
        run: |
          curl -sSL https://install.python-poetry.org | python -

-      - name: Install dependencies
-        run: poetry install
-
-      - name: Run smoke tests with direct-benchmark
+      - name: Run regression tests
        run: |
-          poetry run direct-benchmark run \
-            --strategies one_shot \
-            --models claude \
-            --tests ReadFile,WriteFile \
-            --json
+          ./run agent start ${{ matrix.agent-name }}
+          cd ${{ matrix.agent-name }}
+          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
+          poetry run agbenchmark --test=WriteFile
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          AGENT_NAME: ${{ matrix.agent-name }}
          REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
-          NONINTERACTIVE_MODE: "true"
-          CI: true
+          HELICONE_CACHE_ENABLED: false
+          HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
+          REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }}
+          TELEMETRY_ENVIRONMENT: autogpt-ci
+          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
--- a/.github/workflows/classic-benchmark-ci.yml
+++ b/.github/workflows/classic-benchmark-ci.yml
@@ -1,24 +1,18 @@
-name: Classic - Direct Benchmark CI
+name: Classic - AGBenchmark CI

 on:
  push:
    branches: [ master, dev, ci-test* ]
    paths:
-      - 'classic/direct_benchmark/**'
-      - 'classic/original_autogpt/**'
-      - 'classic/forge/**'
+      - 'classic/benchmark/**'
+      - '!classic/benchmark/reports/**'
      - .github/workflows/classic-benchmark-ci.yml
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
-      - 'classic/direct_benchmark/**'
-      - 'classic/original_autogpt/**'
-      - 'classic/forge/**'
+      - 'classic/benchmark/**'
+      - '!classic/benchmark/reports/**'
      - .github/workflows/classic-benchmark-ci.yml
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'

 concurrency:
  group: ${{ format('benchmark-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -29,16 +23,23 @@ defaults:
    shell: bash

 env:
-  min-python-version: '3.12'
+  min-python-version: '3.10'

 jobs:
-  benchmark-tests:
-    runs-on: ubuntu-latest
+  test:
+    permissions:
+      contents: read
    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+        platform-os: [ubuntu, macos, macos-arm64, windows]
+    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
    defaults:
      run:
        shell: bash
-        working-directory: classic
+        working-directory: classic/benchmark
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -46,88 +47,71 @@ jobs:
          fetch-depth: 0
          submodules: true

-      - name: Set up Python ${{ env.min-python-version }}
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: ${{ env.min-python-version }}
+          python-version: ${{ matrix.python-version }}

      - name: Set up Python dependency cache
+        # On Windows, unpacking cached dependencies takes longer than just installing them
+        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/benchmark/poetry.lock') }}

-      - name: Install Poetry
+      - name: Install Poetry (Unix)
+        if: runner.os != 'Windows'
        run: |
          curl -sSL https://install.python-poetry.org | python3 -

-      - name: Install dependencies
+          if [ "${{ runner.os }}" = "macOS" ]; then
+            PATH="$HOME/.local/bin:$PATH"
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install Poetry (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+
+          $env:PATH += ";$env:APPDATA\Python\Scripts"
+          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
+
+      - name: Install Python dependencies
        run: poetry install

-      - name: Run basic benchmark tests
+      - name: Run pytest with coverage
        run: |
-          echo "Testing ReadFile challenge with one_shot strategy..."
-          poetry run direct-benchmark run \
-            --fresh \
-            --strategies one_shot \
-            --models claude \
-            --tests ReadFile \
-            --json
-
-          echo "Testing WriteFile challenge..."
-          poetry run direct-benchmark run \
-            --fresh \
-            --strategies one_shot \
-            --models claude \
-            --tests WriteFile \
-            --json
+          poetry run pytest -vv \
+            --cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \
+            --durations=10 \
+            --junitxml=junit.xml -o junit_family=legacy \
+            tests
        env:
          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"

-      - name: Test category filtering
-        run: |
-          echo "Testing coding category..."
-          poetry run direct-benchmark run \
-            --fresh \
-            --strategies one_shot \
-            --models claude \
-            --categories coding \
-            --tests ReadFile,WriteFile \
-            --json
-        env:
-          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"
+      - name: Upload test results to Codecov
+        if: ${{ !cancelled() }}  # Run even if tests fail
+        uses: codecov/test-results-action@v1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}

-      - name: Test multiple strategies
-        run: |
-          echo "Testing multiple strategies..."
-          poetry run direct-benchmark run \
-            --fresh \
-            --strategies one_shot,plan_execute \
-            --models claude \
-            --tests ReadFile \
-            --parallel 2 \
-            --json
-        env:
-          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          flags: agbenchmark,${{ runner.os }}

-  # Run regression tests on maintain challenges
-  regression-tests:
+  self-test-with-agent:
    runs-on: ubuntu-latest
-    timeout-minutes: 45
-    if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/dev'
-    defaults:
-      run:
-        shell: bash
-        working-directory: classic
+    strategy:
+      matrix:
+        agent-name: [forge]
+      fail-fast: false
+    timeout-minutes: 20
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -140,31 +124,53 @@ jobs:
        with:
          python-version: ${{ env.min-python-version }}

-      - name: Set up Python dependency cache
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
-
      - name: Install Poetry
        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-      - name: Install dependencies
-        run: poetry install
+          curl -sSL https://install.python-poetry.org | python -

      - name: Run regression tests
+        working-directory: classic
        run: |
-          echo "Running regression tests (previously beaten challenges)..."
-          poetry run direct-benchmark run \
-            --fresh \
-            --strategies one_shot \
-            --models claude \
-            --maintain \
-            --parallel 4 \
-            --json
+          ./run agent start ${{ matrix.agent-name }}
+          cd ${{ matrix.agent-name }}
+
+          set +e # Ignore non-zero exit codes and continue execution
+          echo "Running the following command: poetry run agbenchmark --maintain --mock"
+          poetry run agbenchmark --maintain --mock
+          EXIT_CODE=$?
+          set -e  # Stop ignoring non-zero exit codes
+          # Check if the exit code was 5, and if so, exit with 0 instead
+          if [ $EXIT_CODE -eq 5 ]; then
+            echo "regression_tests.json is empty."
+          fi
+
+          echo "Running the following command: poetry run agbenchmark --mock"
+          poetry run agbenchmark --mock
+
+          echo "Running the following command: poetry run agbenchmark --mock --category=data"
+          poetry run agbenchmark --mock --category=data
+
+          echo "Running the following command: poetry run agbenchmark --mock --category=coding"
+          poetry run agbenchmark --mock --category=coding
+
+          # echo "Running the following command: poetry run agbenchmark --test=WriteFile"
+          # poetry run agbenchmark --test=WriteFile
+          cd ../benchmark
+          poetry install
+          echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
+          export BUILD_SKILL_TREE=true
+
+          # poetry run agbenchmark --mock
+
+          # CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../classic/frontend/assets)') || echo "No diffs"
+          # if [ ! -z "$CHANGED" ]; then
+          #   echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
+          #   echo "$CHANGED"
+          #   exit 1
+          # else
+          #   echo "No unstaged changes."
+          # fi
        env:
-          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"
+          TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
+          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -6,15 +6,13 @@ on:
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - '!classic/forge/tests/vcr_cassettes'

 concurrency:
  group: ${{ format('forge-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -23,60 +21,131 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic
+    working-directory: classic/forge

 jobs:
  test:
    permissions:
      contents: read
    timeout-minutes: 30
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+        platform-os: [ubuntu, macos, macos-arm64, windows]
+    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

    steps:
-      - name: Start MinIO service
+      # Quite slow on macOS (2~4 minutes to set up Docker)
+      # - name: Set up Docker (macOS)
+      #   if: runner.os == 'macOS'
+      #   uses: crazy-max/ghaction-setup-docker@v3
+
+      - name: Start MinIO service (Linux)
+        if: runner.os == 'Linux'
        working-directory: '.'
        run: |
          docker pull minio/minio:edge-cicd
          docker run -d -p 9000:9000 minio/minio:edge-cicd

+      - name: Start MinIO service (macOS)
+        if: runner.os == 'macOS'
+        working-directory: ${{ runner.temp }}
+        run: |
+          brew install minio/stable/minio
+          mkdir data
+          minio server ./data &
+
+      # No MinIO on Windows:
+      # - Windows doesn't support running Linux Docker containers
+      # - It doesn't seem possible to start background processes on Windows. They are
+      #   killed after the step returns.
+      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
+
      - name: Checkout repository
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: true

-      - name: Set up Python 3.12
+      - name: Checkout cassettes
+        if: ${{ startsWith(github.event_name, 'pull_request') }}
+        env:
+          PR_BASE: ${{ github.event.pull_request.base.ref }}
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        run: |
+          cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
+          cassette_base_branch="${PR_BASE}"
+          cd tests/vcr_cassettes
+
+          if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then
+            cassette_base_branch="master"
+          fi
+
+          if git ls-remote --exit-code --heads origin $cassette_branch ; then
+            git fetch origin $cassette_branch
+            git fetch origin $cassette_base_branch
+
+            git checkout $cassette_branch
+
+            # Pick non-conflicting cassette updates from the base branch
+            git merge --no-commit --strategy-option=ours origin/$cassette_base_branch
+            echo "Using cassettes from mirror branch '$cassette_branch'," \
+              "synced to upstream branch '$cassette_base_branch'."
+          else
+            git checkout -b $cassette_branch
+            echo "Branch '$cassette_branch' does not exist in cassette submodule." \
+              "Using cassettes from '$cassette_base_branch'."
+          fi
+
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: "3.12"
+          python-version: ${{ matrix.python-version }}

      - name: Set up Python dependency cache
+        # On Windows, unpacking cached dependencies takes longer than just installing them
+        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/forge/poetry.lock') }}

-      - name: Install Poetry
-        run: curl -sSL https://install.python-poetry.org | python3 -
+      - name: Install Poetry (Unix)
+        if: runner.os != 'Windows'
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+
+          if [ "${{ runner.os }}" = "macOS" ]; then
+            PATH="$HOME/.local/bin:$PATH"
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install Poetry (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+
+          $env:PATH += ";$env:APPDATA\Python\Scripts"
+          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH

      - name: Install Python dependencies
        run: poetry install

-      - name: Install Playwright browsers
-        run: poetry run playwright install chromium
-
      - name: Run pytest with coverage
        run: |
          poetry run pytest -vv \
            --cov=forge --cov-branch --cov-report term-missing --cov-report xml \
            --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            forge/forge forge/tests
+            forge
        env:
          CI: true
          PLAIN_OUTPUT: True
-          # API keys - tests that need these will skip if not available
-          # Secrets are not available to fork PRs (GitHub security feature)
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          S3_ENDPOINT_URL: http://127.0.0.1:9000
+          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin

@@ -90,11 +159,85 @@ jobs:
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: forge
+          flags: forge,${{ runner.os }}
+
+      - id: setup_git_auth
+        name: Set up git token authentication
+        # Cassettes may be pushed even when tests fail
+        if: success() || failure()
+        run: |
+          config_key="http.${{ github.server_url }}/.extraheader"
+          if [ "${{ runner.os }}" = 'macOS' ]; then
+            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64)
+          else
+            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64 -w0)
+          fi
+
+          git config "$config_key" \
+            "Authorization: Basic $base64_pat"
+
+          cd tests/vcr_cassettes
+          git config "$config_key" \
+            "Authorization: Basic $base64_pat"
+
+          echo "config_key=$config_key" >> $GITHUB_OUTPUT
+
+      - id: push_cassettes
+        name: Push updated cassettes
+        # For pull requests, push updated cassettes even when tests fail
+        if: github.event_name == 'push' || (! github.event.pull_request.head.repo.fork && (success() || failure()))
+        env:
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        run: |
+          if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
+            is_pull_request=true
+            cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
+          else
+            cassette_branch="${{ github.ref_name }}"
+          fi
+
+          cd tests/vcr_cassettes
+          # Commit & push changes to cassettes if any
+          if ! git diff --quiet; then
+            git add .
+            git commit -m "Auto-update cassettes"
+            git push origin HEAD:$cassette_branch
+            if [ ! $is_pull_request ]; then
+              cd ../..
+              git add tests/vcr_cassettes
+              git commit -m "Update cassette submodule"
+              git push origin HEAD:$cassette_branch
+            fi
+            echo "updated=true" >> $GITHUB_OUTPUT
+          else
+            echo "updated=false" >> $GITHUB_OUTPUT
+            echo "No cassette changes to commit"
+          fi
+
+      - name: Post Set up git token auth
+        if: steps.setup_git_auth.outcome == 'success'
+        run: |
+          git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
+          git submodule foreach git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
+
+      - name: Apply "behaviour change" label and comment on PR
+        if: ${{ startsWith(github.event_name, 'pull_request') }}
+        run: |
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          TOKEN="${{ secrets.PAT_REVIEW }}"
+          REPO="${{ github.repository }}"
+
+          if [[ "${{ steps.push_cassettes.outputs.updated }}" == "true" ]]; then
+            echo "Adding label and comment..."
+            echo $TOKEN | gh auth login --with-token
+            gh issue edit $PR_NUMBER --add-label "behaviour change"
+            gh issue comment $PR_NUMBER --body "You changed AutoGPT's behaviour on ${{ runner.os }}. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
+          fi

      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/logs/
+          path: classic/forge/logs/
--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -0,0 +1,60 @@
+name: Classic - Frontend CI/CD
+
+on:
+  push:
+    branches:
+      - master
+      - dev
+      - 'ci-test*' # This will match any branch that starts with "ci-test"
+    paths:
+      - 'classic/frontend/**'
+      - '.github/workflows/classic-frontend-ci.yml'
+  pull_request:
+    paths:
+      - 'classic/frontend/**'
+      - '.github/workflows/classic-frontend-ci.yml'
+
+jobs:
+  build:
+    permissions:
+      contents: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    env:
+      BUILD_BRANCH: ${{ format('classic-frontend-build/{0}', github.ref_name) }}
+
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Setup Flutter
+        uses: subosito/flutter-action@v2
+        with:
+          flutter-version: '3.13.2'
+
+      - name: Build Flutter to Web
+        run: |
+          cd classic/frontend
+          flutter build web --base-href /app/
+
+      # - name: Commit and Push to ${{ env.BUILD_BRANCH }}
+      #   if: github.event_name == 'push'
+      #   run: |
+      #     git config --local user.email "action@github.com"
+      #     git config --local user.name "GitHub Action"
+      #     git add classic/frontend/build/web
+      #     git checkout -B ${{ env.BUILD_BRANCH }}
+      #     git commit -m "Update frontend build to ${GITHUB_SHA:0:7}" -a
+      #     git push -f origin ${{ env.BUILD_BRANCH }}
+
+      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
+        if: github.event_name == 'push'
+        uses: peter-evans/create-pull-request@v8
+        with:
+          add-paths: classic/frontend/build/web
+          base: ${{ github.ref_name }}
+          branch: ${{ env.BUILD_BRANCH }}
+          delete-branch: true
+          title: "Update frontend build in `${{ github.ref_name }}`"
+          body: "This PR updates the frontend build based on commit ${{ github.sha }}."
+          commit-message: "Update frontend build based on commit ${{ github.sha }}"
--- a/.github/workflows/classic-python-checks.yml
+++ b/.github/workflows/classic-python-checks.yml
@@ -7,9 +7,7 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - 'classic/benchmark/**'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
@@ -18,9 +16,7 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - 'classic/benchmark/**'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'

@@ -31,13 +27,44 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic

 jobs:
+  get-changed-parts:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - id: changes-in
+        name: Determine affected subprojects
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            original_autogpt:
+              - classic/original_autogpt/autogpt/**
+              - classic/original_autogpt/tests/**
+              - classic/original_autogpt/poetry.lock
+            forge:
+              - classic/forge/forge/**
+              - classic/forge/tests/**
+              - classic/forge/poetry.lock
+            benchmark:
+              - classic/benchmark/agbenchmark/**
+              - classic/benchmark/tests/**
+              - classic/benchmark/poetry.lock
+    outputs:
+      changed-parts: ${{ steps.changes-in.outputs.changes }}
+
  lint:
+    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.12"
+      min-python-version: "3.10"
+
+    strategy:
+      matrix:
+        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
+      fail-fast: false

    steps:
      - name: Checkout repository
@@ -54,31 +81,42 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

+      # Install dependencies
+
      - name: Install Python dependencies
-        run: poetry install
+        run: poetry -C classic/${{ matrix.sub-package }} install

      # Lint

      - name: Lint (isort)
        run: poetry run isort --check .
+        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Black)
        if: success() || failure()
        run: poetry run black --check .
+        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Flake8)
        if: success() || failure()
        run: poetry run flake8 .
+        working-directory: classic/${{ matrix.sub-package }}

  types:
+    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.12"
+      min-python-version: "3.10"
+
+    strategy:
+      matrix:
+        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
+      fail-fast: false

    steps:
      - name: Checkout repository
@@ -95,16 +133,19 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

+      # Install dependencies
+
      - name: Install Python dependencies
-        run: poetry install
+        run: poetry -C classic/${{ matrix.sub-package }} install

      # Typecheck

      - name: Typecheck
        if: success() || failure()
        run: poetry run pyright
+        working-directory: classic/${{ matrix.sub-package }}
--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -269,14 +269,12 @@ jobs:
          DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
          DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}

-      - name: Run pytest with coverage
+      - name: Run pytest
        run: |
          if [[ "${{ runner.debug }}" == "1" ]]; then
-            poetry run pytest -s -vv -o log_cli=true -o log_cli_level=DEBUG \
-              --cov=backend --cov-branch --cov-report term-missing --cov-report xml
+            poetry run pytest -s -vv -o log_cli=true -o log_cli_level=DEBUG
          else
-            poetry run pytest -s -vv \
-              --cov=backend --cov-branch --cov-report term-missing --cov-report xml
+            poetry run pytest -s -vv
          fi
        env:
          LOG_LEVEL: ${{ runner.debug && 'DEBUG' || 'INFO' }}
@@ -289,13 +287,11 @@ jobs:
          REDIS_PORT: "6379"
          ENCRYPTION_KEY: "dvziYgz0KSK8FENhju0ZYi8-fRTfAdlz6YLhdB_jhNw=" # DO NOT USE IN PRODUCTION!!

-      - name: Upload coverage reports to Codecov
-        if: ${{ !cancelled() }}
-        uses: codecov/codecov-action@v5
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: platform-backend
-          files: ./autogpt_platform/backend/coverage.xml
+      # - name: Upload coverage reports to Codecov
+      #   uses: codecov/codecov-action@v4
+      #   with:
+      #     token: ${{ secrets.CODECOV_TOKEN }}
+      #     flags: backend,${{ runner.os }}

    env:
      CI: true
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -148,11 +148,3 @@ jobs:

      - name: Run Integration Tests
        run: pnpm test:unit
-
-      - name: Upload coverage reports to Codecov
-        if: ${{ !cancelled() }}
-        uses: codecov/codecov-action@v5
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: platform-frontend
-          files: ./autogpt_platform/frontend/coverage/cobertura-coverage.xml
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@
 classic/original_autogpt/keys.py
 classic/original_autogpt/*.json
 auto_gpt_workspace/*
-.autogpt/
 *.mpeg
 .env
 # Root .env files
@@ -17,7 +16,6 @@ log-ingestion.txt
 /logs
 *.log
 *.mp3
-!autogpt_platform/frontend/public/notification.mp3
 mem.sqlite3
 venvAutoGPT

@@ -161,10 +159,6 @@ CURRENT_BULLETIN.md

 # AgBenchmark
 classic/benchmark/agbenchmark/reports/
-classic/reports/
-classic/direct_benchmark/reports/
-classic/.benchmark_workspaces/
-classic/direct_benchmark/.benchmark_workspaces/

 # Nodejs
 package-lock.json
@@ -183,13 +177,9 @@ autogpt_platform/backend/settings.py

 *.ign.*
 .test-contents
-**/.claude/settings.local.json
 .claude/settings.local.json
 CLAUDE.local.md
 /autogpt_platform/backend/logs
-
-# Test database
-test.db
 .next
 # Implementation plans (generated by AI agents)
 plans/
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "classic/forge/tests/vcr_cassettes"]
+	path = classic/forge/tests/vcr_cassettes
+	url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -84,16 +84,51 @@ repos:
        stages: [pre-commit, post-checkout]

      - id: poetry-install
-        name: Check & Install dependencies - Classic
-        alias: poetry-install-classic
+        name: Check & Install dependencies - Classic - AutoGPT
+        alias: poetry-install-classic-autogpt
        entry: >
          bash -c '
          if [ -n "$PRE_COMMIT_FROM_REF" ]; then
            git diff --name-only "$PRE_COMMIT_FROM_REF" "$PRE_COMMIT_TO_REF"
          else
            git diff --cached --name-only
-          fi | grep -qE "^classic/poetry\.lock$" || exit 0;
-          poetry -C classic install
+          fi | grep -qE "^classic/(original_autogpt|forge)/poetry\.lock$" || exit 0;
+          poetry -C classic/original_autogpt install
+          '
+        # include forge source (since it's a path dependency)
+        always_run: true
+        language: system
+        pass_filenames: false
+        stages: [pre-commit, post-checkout]
+
+      - id: poetry-install
+        name: Check & Install dependencies - Classic - Forge
+        alias: poetry-install-classic-forge
+        entry: >
+          bash -c '
+          if [ -n "$PRE_COMMIT_FROM_REF" ]; then
+            git diff --name-only "$PRE_COMMIT_FROM_REF" "$PRE_COMMIT_TO_REF"
+          else
+            git diff --cached --name-only
+          fi | grep -qE "^classic/forge/poetry\.lock$" || exit 0;
+          poetry -C classic/forge install
+          '
+        always_run: true
+        language: system
+        pass_filenames: false
+        stages: [pre-commit, post-checkout]
+
+      - id: poetry-install
+        name: Check & Install dependencies - Classic - Benchmark
+        alias: poetry-install-classic-benchmark
+        entry: >
+          bash -c '
+          if [ -n "$PRE_COMMIT_FROM_REF" ]; then
+            git diff --name-only "$PRE_COMMIT_FROM_REF" "$PRE_COMMIT_TO_REF"
+          else
+            git diff --cached --name-only
+          fi | grep -qE "^classic/benchmark/poetry\.lock$" || exit 0;
+          poetry -C classic/benchmark install
          '
        always_run: true
        language: system
@@ -188,10 +223,26 @@ repos:
        language: system

      - id: isort
-        name: Lint (isort) - Classic
-        alias: isort-classic
-        entry: bash -c 'cd classic && poetry run isort $(echo "$@" | sed "s|classic/||g")' --
-        files: ^classic/(original_autogpt|forge|direct_benchmark)/
+        name: Lint (isort) - Classic - AutoGPT
+        alias: isort-classic-autogpt
+        entry: poetry -P classic/original_autogpt run isort -p autogpt
+        files: ^classic/original_autogpt/
+        types: [file, python]
+        language: system
+
+      - id: isort
+        name: Lint (isort) - Classic - Forge
+        alias: isort-classic-forge
+        entry: poetry -P classic/forge run isort -p forge
+        files: ^classic/forge/
+        types: [file, python]
+        language: system
+
+      - id: isort
+        name: Lint (isort) - Classic - Benchmark
+        alias: isort-classic-benchmark
+        entry: poetry -P classic/benchmark run isort -p agbenchmark
+        files: ^classic/benchmark/
        types: [file, python]
        language: system

@@ -205,13 +256,26 @@ repos:

  - repo: https://github.com/PyCQA/flake8
    rev: 7.0.0
-    # Use consolidated flake8 config at classic/.flake8
+    # To have flake8 load the config of the individual subprojects, we have to call
+    # them separately.
    hooks:
      - id: flake8
-        name: Lint (Flake8) - Classic
-        alias: flake8-classic
-        files: ^classic/(original_autogpt|forge|direct_benchmark)/
-        args: [--config=classic/.flake8]
+        name: Lint (Flake8) - Classic - AutoGPT
+        alias: flake8-classic-autogpt
+        files: ^classic/original_autogpt/(autogpt|scripts|tests)/
+        args: [--config=classic/original_autogpt/.flake8]
+
+      - id: flake8
+        name: Lint (Flake8) - Classic - Forge
+        alias: flake8-classic-forge
+        files: ^classic/forge/(forge|tests)/
+        args: [--config=classic/forge/.flake8]
+
+      - id: flake8
+        name: Lint (Flake8) - Classic - Benchmark
+        alias: flake8-classic-benchmark
+        files: ^classic/benchmark/(agbenchmark|tests)/((?!reports).)*[/.]
+        args: [--config=classic/benchmark/.flake8]

  - repo: local
    hooks:
@@ -247,10 +311,29 @@ repos:
        pass_filenames: false

      - id: pyright
-        name: Typecheck - Classic
-        alias: pyright-classic
-        entry: poetry -C classic run pyright
-        files: ^classic/(original_autogpt|forge|direct_benchmark)/.*\.py$|^classic/poetry\.lock$
+        name: Typecheck - Classic - AutoGPT
+        alias: pyright-classic-autogpt
+        entry: poetry -C classic/original_autogpt run pyright
+        # include forge source (since it's a path dependency) but exclude *_test.py files:
+        files: ^(classic/original_autogpt/((autogpt|scripts|tests)/|poetry\.lock$)|classic/forge/(forge/.*(?<!_test)\.py|poetry\.lock)$)
+        types: [file]
+        language: system
+        pass_filenames: false
+
+      - id: pyright
+        name: Typecheck - Classic - Forge
+        alias: pyright-classic-forge
+        entry: poetry -C classic/forge run pyright
+        files: ^classic/forge/(forge/|poetry\.lock$)
+        types: [file]
+        language: system
+        pass_filenames: false
+
+      - id: pyright
+        name: Typecheck - Classic - Benchmark
+        alias: pyright-classic-benchmark
+        entry: poetry -C classic/benchmark run pyright
+        files: ^classic/benchmark/(agbenchmark/|tests/|poetry\.lock$)
        types: [file]
        language: system
        pass_filenames: false
@@ -277,9 +360,26 @@ repos:
  #       pass_filenames: false

  #     - id: pytest
-  #       name: Run tests - Classic (excl. slow tests)
-  #       alias: pytest-classic
-  #       entry: bash -c 'cd classic && poetry run pytest -m "not slow"'
-  #       files: ^classic/(original_autogpt|forge|direct_benchmark)/
+  #       name: Run tests - Classic - AutoGPT (excl. slow tests)
+  #       alias: pytest-classic-autogpt
+  #       entry: bash -c 'cd classic/original_autogpt && poetry run pytest --cov=autogpt -m "not slow" tests/unit tests/integration'
+  #       # include forge source (since it's a path dependency) but exclude *_test.py files:
+  #       files: ^(classic/original_autogpt/((autogpt|tests)/|poetry\.lock$)|classic/forge/(forge/.*(?<!_test)\.py|poetry\.lock)$)
+  #       language: system
+  #       pass_filenames: false
+
+  #     - id: pytest
+  #       name: Run tests - Classic - Forge (excl. slow tests)
+  #       alias: pytest-classic-forge
+  #       entry: bash -c 'cd classic/forge && poetry run pytest --cov=forge -m "not slow"'
+  #       files: ^classic/forge/(forge/|tests/|poetry\.lock$)
+  #       language: system
+  #       pass_filenames: false
+
+  #     - id: pytest
+  #       name: Run tests - Classic - Benchmark
+  #       alias: pytest-classic-benchmark
+  #       entry: bash -c 'cd classic/benchmark && poetry run pytest --cov=benchmark'
+  #       files: ^classic/benchmark/(agbenchmark/|tests/|poetry\.lock$)
  #       language: system
  #       pass_filenames: false
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ These examples show just a glimpse of what you can achieve with AutoGPT! You can
 All code and content within the `autogpt_platform` folder is licensed under the Polyform Shield License. This new project is our in-developlemt platform for building, deploying and managing agents.</br>_[Read more about this effort](https://agpt.co/blog/introducing-the-autogpt-platform)_

 🦉 **MIT License:**
-All other portions of the AutoGPT repository (i.e., everything outside the `autogpt_platform` folder) are licensed under the MIT License. This includes the original stand-alone AutoGPT Agent, along with projects such as [Forge](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge) and the [Direct Benchmark](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/direct_benchmark).</br>We also publish additional work under the MIT Licence in other repositories, such as [GravitasML](https://github.com/Significant-Gravitas/gravitasml) which is developed for and used in the AutoGPT Platform. See also our MIT Licenced [Code Ability](https://github.com/Significant-Gravitas/AutoGPT-Code-Ability) project.
+All other portions of the AutoGPT repository (i.e., everything outside the `autogpt_platform` folder) are licensed under the MIT License. This includes the original stand-alone AutoGPT Agent, along with projects such as [Forge](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge), [agbenchmark](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/benchmark) and the [AutoGPT Classic GUI](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/frontend).</br>We also publish additional work under the MIT Licence in other repositories, such as [GravitasML](https://github.com/Significant-Gravitas/gravitasml) which is developed for and used in the AutoGPT Platform. See also our MIT Licenced [Code Ability](https://github.com/Significant-Gravitas/AutoGPT-Code-Ability) project.

 ---
 ### Mission
@@ -150,7 +150,7 @@ Be part of the revolution! **AutoGPT** is here to stay, at the forefront of AI i
 ## 🤖 AutoGPT Classic
 > Below is information about the classic version of AutoGPT.

-**🛠️ [Build your own Agent - Forge](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge)**
+**🛠️ [Build your own Agent - Quickstart](classic/FORGE-QUICKSTART.md)**

 ### 🏗️ Forge

@@ -161,26 +161,46 @@ This guide will walk you through the process of creating your own agent and usin

 📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/forge) about Forge

-### 🎯 Direct Benchmark
+### 🎯 Benchmark

-**Measure your agent's performance!** The `direct_benchmark` harness tests agents directly without the agent protocol overhead. It supports multiple prompt strategies (one_shot, reflexion, plan_execute, tree_of_thoughts, etc.) and model configurations, with parallel execution and detailed reporting.
+**Measure your agent's performance!** The `agbenchmark` can be used with any agent that supports the agent protocol, and the integration with the project's [CLI] makes it even easier to use with AutoGPT and forge-based agents. The benchmark offers a stringent testing environment. Our framework allows for autonomous, objective performance evaluations, ensuring your agents are primed for real-world action.

-📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/direct_benchmark) about the Benchmark
+<!-- TODO: insert visual demonstrating the benchmark -->
+
+📦 [`agbenchmark`](https://pypi.org/project/agbenchmark/) on Pypi
+&ensp;|&ensp;
+📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/benchmark) about the Benchmark
+
+### 💻 UI
+
+**Makes agents easy to use!** The `frontend` gives you a user-friendly interface to control and monitor your agents. It connects to agents through the [agent protocol](#-agent-protocol), ensuring compatibility with many agents from both inside and outside of our ecosystem.
+
+<!-- TODO: insert screenshot of front end -->
+
+The frontend works out-of-the-box with all agents in the repo. Just use the [CLI] to run your agent of choice!
+
+📘 [Learn More](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic/frontend) about the Frontend

 ### ⌨️ CLI

 [CLI]: #-cli

-AutoGPT Classic is run via Poetry from the `classic/` directory:
+To make it as easy as possible to use all of the tools offered by the repository, a CLI is included at the root of the repo:

 ```shell
-cd classic
-poetry install
-poetry run autogpt        # Interactive CLI mode
-poetry run serve --debug  # Agent Protocol server
+$ ./run
+Usage: cli.py [OPTIONS] COMMAND [ARGS]...
+
+Options:
+  --help  Show this message and exit.
+
+Commands:
+  agent      Commands to create, start and stop agents
+  benchmark  Commands to start the benchmark and list tests and categories
+  setup      Installs dependencies needed for your system.
 ```

-See the [classic README](https://github.com/Significant-Gravitas/AutoGPT/tree/master/classic) for full setup instructions.
+Just clone the repo, install dependencies with `./run setup`, and you should be good to go!

 ## 🤔 Questions? Problems? Suggestions?

--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -550,8 +550,6 @@ async def reset_copilot_usage(

    try:
        # Verify the user is actually at or over their daily limit.
-        # (rate_limit_reset_cost intentionally omitted — this object is only
-        # used for limit checks, not returned to the client.)
        usage_status = await get_usage_status(
            user_id=user_id,
            daily_token_limit=daily_limit,
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -481,11 +481,6 @@ async def create_library_agent(
                                    sensitive_action_safe_mode=sensitive_action_safe_mode,
                                ).model_dump()
                            ),
-                            **(
-                                {"Folder": {"connect": {"id": folder_id}}}
-                                if folder_id and graph_entry is graph
-                                else {}
-                            ),
                        },
                    },
                    include=library_agent_include(
--- a/autogpt_platform/backend/backend/api/features/onboarding_profile_test.py
+++ b/autogpt_platform/backend/backend/api/features/onboarding_profile_test.py
@@ -1,61 +0,0 @@
-from unittest.mock import AsyncMock
-
-import fastapi
-import fastapi.testclient
-import pytest
-
-from backend.api.features.v1 import v1_router
-
-app = fastapi.FastAPI()
-app.include_router(v1_router)
-client = fastapi.testclient.TestClient(app)
-
-
-@pytest.fixture(autouse=True)
-def setup_app_auth(mock_jwt_user):
-    from autogpt_libs.auth.jwt_utils import get_jwt_payload
-
-    app.dependency_overrides[get_jwt_payload] = mock_jwt_user["get_jwt_payload"]
-    yield
-    app.dependency_overrides.clear()
-
-
-def test_onboarding_profile_success(mocker):
-    mock_extract = mocker.patch(
-        "backend.api.features.v1.extract_business_understanding",
-        new_callable=AsyncMock,
-    )
-    mock_upsert = mocker.patch(
-        "backend.api.features.v1.upsert_business_understanding",
-        new_callable=AsyncMock,
-    )
-
-    from backend.data.understanding import BusinessUnderstandingInput
-
-    mock_extract.return_value = BusinessUnderstandingInput.model_construct(
-        user_name="John",
-        user_role="Founder/CEO",
-        pain_points=["Finding leads"],
-        suggested_prompts={"Learn": ["How do I automate lead gen?"]},
-    )
-    mock_upsert.return_value = AsyncMock()
-
-    response = client.post(
-        "/onboarding/profile",
-        json={
-            "user_name": "John",
-            "user_role": "Founder/CEO",
-            "pain_points": ["Finding leads", "Email & outreach"],
-        },
-    )
-    assert response.status_code == 200
-    mock_extract.assert_awaited_once()
-    mock_upsert.assert_awaited_once()
-
-
-def test_onboarding_profile_missing_fields():
-    response = client.post(
-        "/onboarding/profile",
-        json={"user_name": "John"},
-    )
-    assert response.status_code == 422
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -63,17 +63,12 @@ from backend.data.onboarding import (
    UserOnboardingUpdate,
    complete_onboarding_step,
    complete_re_run_agent,
-    format_onboarding_for_extraction,
    get_recommended_agents,
    get_user_onboarding,
+    onboarding_enabled,
    reset_user_onboarding,
    update_user_onboarding,
 )
-from backend.data.tally import extract_business_understanding
-from backend.data.understanding import (
-    BusinessUnderstandingInput,
-    upsert_business_understanding,
-)
 from backend.data.user import (
    get_or_create_user,
    get_user_by_id,
@@ -287,33 +282,35 @@ async def get_onboarding_agents(
    return await get_recommended_agents(user_id)


-class OnboardingProfileRequest(pydantic.BaseModel):
-    """Request body for onboarding profile submission."""
-
-    user_name: str = pydantic.Field(min_length=1, max_length=100)
-    user_role: str = pydantic.Field(min_length=1, max_length=100)
-    pain_points: list[str] = pydantic.Field(default_factory=list, max_length=20)
-
-
 class OnboardingStatusResponse(pydantic.BaseModel):
-    """Response for onboarding completion check."""
+    """Response for onboarding status check."""

-    is_completed: bool
+    is_onboarding_enabled: bool
+    is_chat_enabled: bool


@v1_router.get(
-    "/onboarding/completed",
-    summary="Check if onboarding is completed",
+    "/onboarding/enabled",
+    summary="Is onboarding enabled",
    tags=["onboarding", "public"],
    response_model=OnboardingStatusResponse,
-    dependencies=[Security(requires_user)],
 )
-async def is_onboarding_completed(
+async def is_onboarding_enabled(
    user_id: Annotated[str, Security(get_user_id)],
 ) -> OnboardingStatusResponse:
-    user_onboarding = await get_user_onboarding(user_id)
+    # Check if chat is enabled for user
+    is_chat_enabled = await is_feature_enabled(Flag.CHAT, user_id, False)
+
+    # If chat is enabled, skip legacy onboarding
+    if is_chat_enabled:
+        return OnboardingStatusResponse(
+            is_onboarding_enabled=False,
+            is_chat_enabled=True,
+        )
+
    return OnboardingStatusResponse(
-        is_completed=OnboardingStep.VISIT_COPILOT in user_onboarding.completedSteps,
+        is_onboarding_enabled=await onboarding_enabled(),
+        is_chat_enabled=False,
    )


@@ -328,38 +325,6 @@ async def reset_onboarding(user_id: Annotated[str, Security(get_user_id)]):
    return await reset_user_onboarding(user_id)


-@v1_router.post(
-    "/onboarding/profile",
-    summary="Submit onboarding profile",
-    tags=["onboarding"],
-    dependencies=[Security(requires_user)],
-)
-async def submit_onboarding_profile(
-    data: OnboardingProfileRequest,
-    user_id: Annotated[str, Security(get_user_id)],
-):
-    formatted = format_onboarding_for_extraction(
-        user_name=data.user_name,
-        user_role=data.user_role,
-        pain_points=data.pain_points,
-    )
-
-    try:
-        understanding_input = await extract_business_understanding(formatted)
-    except Exception:
-        understanding_input = BusinessUnderstandingInput.model_construct()
-
-    # Ensure the direct fields are set even if LLM missed them
-    understanding_input.user_name = data.user_name
-    understanding_input.user_role = data.user_role
-    if not understanding_input.pain_points:
-        understanding_input.pain_points = data.pain_points
-
-    await upsert_business_understanding(user_id, understanding_input)
-
-    return {"status": "ok"}
-
-
 ########################################################
 ##################### Blocks ###########################
 ########################################################
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -698,30 +698,13 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
            if should_pause:
                return

-        # Validate the input data (original or reviewer-modified) once.
-        # In dry-run mode, credential fields may contain sentinel None values
-        # that would fail JSON schema required checks.  We still validate the
-        # non-credential fields so blocks that execute for real during dry-run
-        # (e.g. AgentExecutorBlock) get proper input validation.
-        is_dry_run = getattr(kwargs.get("execution_context"), "dry_run", False)
-        if is_dry_run:
-            cred_field_names = set(self.input_schema.get_credentials_fields().keys())
-            non_cred_data = {
-                k: v for k, v in input_data.items() if k not in cred_field_names
-            }
-            if error := self.input_schema.validate_data(non_cred_data):
-                raise BlockInputError(
-                    message=f"Unable to execute block with invalid input data: {error}",
-                    block_name=self.name,
-                    block_id=self.id,
-                )
-        else:
-            if error := self.input_schema.validate_data(input_data):
-                raise BlockInputError(
-                    message=f"Unable to execute block with invalid input data: {error}",
-                    block_name=self.name,
-                    block_id=self.id,
-                )
+        # Validate the input data (original or reviewer-modified) once
+        if error := self.input_schema.validate_data(input_data):
+            raise BlockInputError(
+                message=f"Unable to execute block with invalid input data: {error}",
+                block_name=self.name,
+                block_id=self.id,
+            )

        # Use the validated input data
        async for output_name, output_data in self.run(
--- a/autogpt_platform/backend/backend/blocks/agent.py
+++ b/autogpt_platform/backend/backend/blocks/agent.py
@@ -49,17 +49,11 @@ class AgentExecutorBlock(Block):
        @classmethod
        def get_missing_input(cls, data: BlockInput) -> set[str]:
            required_fields = cls.get_input_schema(data).get("required", [])
-            # Check against the nested `inputs` dict, not the top-level node
-            # data — required fields like "topic" live inside data["inputs"],
-            # not at data["topic"].
-            provided = data.get("inputs", {})
-            return set(required_fields) - set(provided)
+            return set(required_fields) - set(data)

        @classmethod
        def get_mismatch_error(cls, data: BlockInput) -> str | None:
-            return validate_with_jsonschema(
-                cls.get_input_schema(data), data.get("inputs", {})
-            )
+            return validate_with_jsonschema(cls.get_input_schema(data), data)

    class Output(BlockSchema):
        # Use BlockSchema to avoid automatic error field that could clash with graph outputs
@@ -94,7 +88,6 @@ class AgentExecutorBlock(Block):
            execution_context=execution_context.model_copy(
                update={"parent_execution_id": graph_exec_id},
            ),
-            dry_run=execution_context.dry_run,
        )

        logger = execution_utils.LogMetadata(
@@ -156,19 +149,14 @@ class AgentExecutorBlock(Block):
                ExecutionStatus.TERMINATED,
                ExecutionStatus.FAILED,
            ]:
-                logger.info(
-                    f"Execution {log_id} skipping event {event.event_type} status={event.status} "
-                    f"node={getattr(event, 'node_exec_id', '?')}"
+                logger.debug(
+                    f"Execution {log_id} received event {event.event_type} with status {event.status}"
                )
                continue

            if event.event_type == ExecutionEventType.GRAPH_EXEC_UPDATE:
                # If the graph execution is COMPLETED, TERMINATED, or FAILED,
                # we can stop listening for further events.
-                logger.info(
-                    f"Execution {log_id} graph completed with status {event.status}, "
-                    f"yielded {len(yielded_node_exec_ids)} outputs"
-                )
                self.merge_stats(
                    NodeExecutionStats(
                        extra_cost=event.stats.cost if event.stats else 0,
--- a/autogpt_platform/backend/backend/blocks/io.py
+++ b/autogpt_platform/backend/backend/blocks/io.py
@@ -2,8 +2,6 @@ import copy
 from datetime import date, time
 from typing import Any, Optional

-from pydantic import AliasChoices, Field
-
 from backend.blocks._base import (
    Block,
    BlockCategory,
@@ -469,8 +467,7 @@ class AgentFileInputBlock(AgentInputBlock):

 class AgentDropdownInputBlock(AgentInputBlock):
    """
-    A specialized text input block that presents a dropdown selector
-    restricted to a fixed set of values.
+    A specialized text input block that relies on placeholder_values to present a dropdown.
    """

    class Input(AgentInputBlock.Input):
@@ -480,23 +477,16 @@ class AgentDropdownInputBlock(AgentInputBlock):
            advanced=False,
            title="Default Value",
        )
-        # Use Field() directly (not SchemaField) to pass validation_alias,
-        # which handles backward compat for legacy "placeholder_values" across
-        # all construction paths (model_construct, __init__, model_validate).
-        options: list = Field(
+        placeholder_values: list = SchemaField(
+            description="Possible values for the dropdown.",
            default_factory=list,
+            advanced=False,
            title="Dropdown Options",
-            description=(
-                "If provided, renders the input as a dropdown selector "
-                "restricted to these values. Leave empty for free-text input."
-            ),
-            validation_alias=AliasChoices("options", "placeholder_values"),
-            json_schema_extra={"advanced": False, "secret": False},
        )

        def generate_schema(self):
            schema = super().generate_schema()
-            if possible_values := self.options:
+            if possible_values := self.placeholder_values:
                schema["enum"] = possible_values
            return schema

@@ -514,13 +504,13 @@ class AgentDropdownInputBlock(AgentInputBlock):
                {
                    "value": "Option A",
                    "name": "dropdown_1",
-                    "options": ["Option A", "Option B", "Option C"],
+                    "placeholder_values": ["Option A", "Option B", "Option C"],
                    "description": "Dropdown example 1",
                },
                {
                    "value": "Option C",
                    "name": "dropdown_2",
-                    "options": ["Option A", "Option B", "Option C"],
+                    "placeholder_values": ["Option A", "Option B", "Option C"],
                    "description": "Dropdown example 2",
                },
            ],
--- a/autogpt_platform/backend/backend/blocks/mcp/block.py
+++ b/autogpt_platform/backend/backend/blocks/mcp/block.py
@@ -89,12 +89,6 @@ class MCPToolBlock(Block):
            default={},
            hidden=True,
        )
-        tool_description: str = SchemaField(
-            description="Description of the selected MCP tool. "
-            "Populated automatically when a tool is selected.",
-            default="",
-            hidden=True,
-        )

        tool_arguments: dict[str, Any] = SchemaField(
            description="Arguments to pass to the selected MCP tool. "
--- a/autogpt_platform/backend/backend/blocks/sql_query_block.py
+++ b/autogpt_platform/backend/backend/blocks/sql_query_block.py
@@ -1,323 +0,0 @@
-import asyncio
-from typing import Any, Literal
-
-from pydantic import SecretStr
-from sqlalchemy.engine.url import URL
-from sqlalchemy.exc import DBAPIError, OperationalError, ProgrammingError
-
-from backend.blocks._base import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
-from backend.blocks.sql_query_helpers import (
-    _DATABASE_TYPE_DEFAULT_PORT,
-    _DATABASE_TYPE_TO_DRIVER,
-    DatabaseType,
-    _execute_query,
-    _sanitize_error,
-    _validate_query_is_read_only,
-    _validate_single_statement,
-)
-from backend.data.model import (
-    CredentialsField,
-    CredentialsMetaInput,
-    SchemaField,
-    UserPasswordCredentials,
-)
-from backend.integrations.providers import ProviderName
-from backend.util.request import resolve_and_check_blocked
-
-TEST_CREDENTIALS = UserPasswordCredentials(
-    id="01234567-89ab-cdef-0123-456789abcdef",
-    provider="database",
-    username=SecretStr("test_user"),
-    password=SecretStr("test_pass"),
-    title="Mock Database credentials",
-)
-
-TEST_CREDENTIALS_INPUT = {
-    "provider": TEST_CREDENTIALS.provider,
-    "id": TEST_CREDENTIALS.id,
-    "type": TEST_CREDENTIALS.type,
-    "title": TEST_CREDENTIALS.title,
-}
-
-DatabaseCredentials = UserPasswordCredentials
-DatabaseCredentialsInput = CredentialsMetaInput[
-    Literal[ProviderName.DATABASE],
-    Literal["user_password"],
-]
-
-
-def DatabaseCredentialsField() -> DatabaseCredentialsInput:
-    return CredentialsField(
-        description="Database username and password",
-    )
-
-
-class SQLQueryBlock(Block):
-    class Input(BlockSchemaInput):
-        database_type: DatabaseType = SchemaField(
-            default=DatabaseType.POSTGRES,
-            description="Database engine",
-            advanced=False,
-        )
-        host: SecretStr = SchemaField(
-            description=(
-                "Database hostname or IP address. "
-                "Treated as a secret to avoid leaking infrastructure details. "
-                "Private/internal IPs are blocked (SSRF protection)."
-            ),
-            placeholder="db.example.com",
-            secret=True,
-        )
-        port: int | None = SchemaField(
-            default=None,
-            description=(
-                "Database port (leave empty for default: "
-                "PostgreSQL: 5432, MySQL: 3306, MSSQL: 1433)"
-            ),
-            ge=1,
-            le=65535,
-        )
-        database: str = SchemaField(
-            description="Name of the database to connect to",
-            placeholder="my_database",
-        )
-        query: str = SchemaField(
-            description="SQL query to execute",
-            placeholder="SELECT * FROM analytics.daily_active_users LIMIT 10",
-        )
-        read_only: bool = SchemaField(
-            default=True,
-            description=(
-                "When enabled (default), only SELECT queries are allowed "
-                "and the database session is set to read-only mode. "
-                "Disable to allow write operations (INSERT, UPDATE, DELETE, etc.)."
-            ),
-        )
-        timeout: int = SchemaField(
-            default=30,
-            description="Query timeout in seconds (max 120)",
-            ge=1,
-            le=120,
-        )
-        max_rows: int = SchemaField(
-            default=1000,
-            description="Maximum number of rows to return (max 10000)",
-            ge=1,
-            le=10000,
-        )
-        credentials: DatabaseCredentialsInput = DatabaseCredentialsField()
-
-    class Output(BlockSchemaOutput):
-        results: list[dict[str, Any]] = SchemaField(
-            description="Query results as a list of row dictionaries"
-        )
-        columns: list[str] = SchemaField(
-            description="Column names from the query result"
-        )
-        row_count: int = SchemaField(description="Number of rows returned")
-        truncated: bool = SchemaField(
-            description=(
-                "True when the result set was capped by max_rows, "
-                "indicating additional rows exist in the database"
-            )
-        )
-        affected_rows: int = SchemaField(
-            description="Number of rows affected by a write query (INSERT/UPDATE/DELETE)"
-        )
-        error: str = SchemaField(description="Error message if the query failed")
-
-    def __init__(self):
-        super().__init__(
-            id="4dc35c0f-4fd8-465e-9616-5a216f1ba2bc",
-            description=(
-                "Execute a SQL query. Read-only by default for safety "
-                "-- disable to allow write operations. "
-                "Supports PostgreSQL, MySQL, and MSSQL via SQLAlchemy."
-            ),
-            categories={BlockCategory.DATA},
-            input_schema=SQLQueryBlock.Input,
-            output_schema=SQLQueryBlock.Output,
-            test_input={
-                "query": "SELECT 1 AS test_col",
-                "database_type": DatabaseType.POSTGRES,
-                "host": "localhost",
-                "database": "test_db",
-                "timeout": 30,
-                "max_rows": 1000,
-                "credentials": TEST_CREDENTIALS_INPUT,
-            },
-            test_credentials=TEST_CREDENTIALS,
-            test_output=[
-                ("results", [{"test_col": 1}]),
-                ("columns", ["test_col"]),
-                ("row_count", 1),
-                ("truncated", False),
-            ],
-            test_mock={
-                "execute_query": lambda *_args, **_kwargs: (
-                    [{"test_col": 1}],
-                    ["test_col"],
-                    -1,
-                    False,
-                ),
-                "check_host_allowed": lambda *_args, **_kwargs: ["127.0.0.1"],
-            },
-        )
-
-    @staticmethod
-    async def check_host_allowed(host: str) -> list[str]:
-        """Validate that the given host is not a private/blocked address.
-
-        Returns the list of resolved IP addresses so the caller can pin the
-        connection to the validated IP (preventing DNS rebinding / TOCTOU).
-        Raises ValueError or OSError if the host is blocked.
-        Extracted as a method so it can be mocked during block tests.
-        """
-        return await resolve_and_check_blocked(host)
-
-    @staticmethod
-    def execute_query(
-        connection_url: URL | str,
-        query: str,
-        timeout: int,
-        max_rows: int,
-        read_only: bool = True,
-        database_type: DatabaseType = DatabaseType.POSTGRES,
-    ) -> tuple[list[dict[str, Any]], list[str], int, bool]:
-        """Execute a SQL query and return (rows, columns, affected_rows, truncated).
-
-        Delegates to ``_execute_query`` in ``sql_query_helpers``.
-        Extracted as a method so it can be mocked during block tests.
-        """
-        return _execute_query(
-            connection_url=connection_url,
-            query=query,
-            timeout=timeout,
-            max_rows=max_rows,
-            read_only=read_only,
-            database_type=database_type,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        credentials: DatabaseCredentials,
-        **_kwargs: Any,
-    ) -> BlockOutput:
-        # Validate query structure and read-only constraints.
-        error = self._validate_query(input_data)
-        if error:
-            yield "error", error
-            return
-
-        # Validate host and resolve for SSRF protection.
-        host, pinned_host, error = await self._resolve_host(input_data)
-        if error:
-            yield "error", error
-            return
-
-        # Build connection URL and execute.
-        port = input_data.port or _DATABASE_TYPE_DEFAULT_PORT[input_data.database_type]
-        username = credentials.username.get_secret_value()
-        connection_url = URL.create(
-            drivername=_DATABASE_TYPE_TO_DRIVER[input_data.database_type],
-            username=username,
-            password=credentials.password.get_secret_value(),
-            host=pinned_host,
-            port=port,
-            database=input_data.database,
-        )
-        conn_str = connection_url.render_as_string(hide_password=True)
-        db_name = input_data.database
-
-        def _sanitize(err: Exception) -> str:
-            return _sanitize_error(
-                str(err).strip(),
-                conn_str,
-                host=pinned_host,
-                original_host=host,
-                username=username,
-                port=port,
-                database=db_name,
-            )
-
-        try:
-            results, columns, affected, truncated = await asyncio.to_thread(
-                self.execute_query,
-                connection_url=connection_url,
-                query=input_data.query,
-                timeout=input_data.timeout,
-                max_rows=input_data.max_rows,
-                read_only=input_data.read_only,
-                database_type=input_data.database_type,
-            )
-            yield "results", results
-            yield "columns", columns
-            yield "row_count", len(results)
-            yield "truncated", truncated
-            if affected >= 0:
-                yield "affected_rows", affected
-        except OperationalError as e:
-            yield (
-                "error",
-                self._classify_operational_error(
-                    _sanitize(e),
-                    input_data.timeout,
-                ),
-            )
-        except ProgrammingError as e:
-            yield "error", f"SQL error: {_sanitize(e)}"
-        except DBAPIError as e:
-            yield "error", f"Database error: {_sanitize(e)}"
-        except ModuleNotFoundError:
-            yield (
-                "error",
-                (
-                    f"Database driver not available for "
-                    f"{input_data.database_type.value}. "
-                    f"Please contact the platform administrator."
-                ),
-            )
-
-    @staticmethod
-    def _validate_query(input_data: "SQLQueryBlock.Input") -> str | None:
-        """Validate query structure and read-only constraints."""
-        stmt_error, parsed_stmt = _validate_single_statement(input_data.query)
-        if stmt_error:
-            return stmt_error
-        assert parsed_stmt is not None
-        if input_data.read_only:
-            return _validate_query_is_read_only(parsed_stmt)
-        return None
-
-    async def _resolve_host(
-        self, input_data: "SQLQueryBlock.Input"
-    ) -> tuple[str, str, str | None]:
-        """Validate and resolve the database host. Returns (host, pinned_ip, error)."""
-        host = input_data.host.get_secret_value().strip()
-        if not host:
-            return "", "", "Database host is required."
-        if host.startswith("/"):
-            return host, "", "Unix socket connections are not allowed."
-        try:
-            resolved_ips = await self.check_host_allowed(host)
-        except (ValueError, OSError) as e:
-            return host, "", f"Blocked host: {str(e).strip()}"
-        return host, resolved_ips[0], None
-
-    @staticmethod
-    def _classify_operational_error(sanitized_msg: str, timeout: int) -> str:
-        """Classify an already-sanitized OperationalError for user display."""
-        lower = sanitized_msg.lower()
-        if "timeout" in lower or "cancel" in lower:
-            return f"Query timed out after {timeout}s."
-        if "connect" in lower:
-            return f"Failed to connect to database: {sanitized_msg}"
-        return f"Database error: {sanitized_msg}"
--- a/autogpt_platform/backend/backend/blocks/sql_query_block_test.py
+++ b/autogpt_platform/backend/backend/blocks/sql_query_block_test.py
--- a/autogpt_platform/backend/backend/blocks/sql_query_helpers.py
+++ b/autogpt_platform/backend/backend/blocks/sql_query_helpers.py
@@ -1,430 +0,0 @@
-import re
-from datetime import date, datetime, time
-from decimal import Decimal
-from enum import Enum
-from typing import Any
-
-import sqlparse
-from sqlalchemy import create_engine, text
-from sqlalchemy.engine.url import URL
-
-
-class DatabaseType(str, Enum):
-    POSTGRES = "postgres"
-    MYSQL = "mysql"
-    MSSQL = "mssql"
-
-
-# Defense-in-depth: reject queries containing data-modifying keywords.
-# These are checked against parsed SQL tokens (not raw text) so column names
-# and string literals do not cause false positives.
-_DISALLOWED_KEYWORDS = {
-    "INSERT",
-    "UPDATE",
-    "DELETE",
-    "DROP",
-    "ALTER",
-    "CREATE",
-    "TRUNCATE",
-    "GRANT",
-    "REVOKE",
-    "COPY",
-    "EXECUTE",
-    "CALL",
-    "SET",
-    "RESET",
-    "DISCARD",
-    "NOTIFY",
-    "DO",
-    # MySQL file exfiltration: LOAD DATA LOCAL INFILE reads server/client files
-    "LOAD",
-    # MySQL REPLACE is INSERT-or-UPDATE; data modification
-    "REPLACE",
-    # ANSI MERGE (UPSERT) modifies data
-    "MERGE",
-    # MSSQL BULK INSERT loads external files into tables
-    "BULK",
-    # MSSQL EXEC / EXEC sp_name runs stored procedures (arbitrary code)
-    "EXEC",
-}
-
-# Map DatabaseType enum values to the expected SQLAlchemy driver prefix.
-_DATABASE_TYPE_TO_DRIVER = {
-    DatabaseType.POSTGRES: "postgresql",
-    DatabaseType.MYSQL: "mysql+pymysql",
-    DatabaseType.MSSQL: "mssql+pymssql",
-}
-
-# Connection timeout in seconds passed to the DBAPI driver (connect_timeout /
-# login_timeout).  This bounds how long the driver waits to establish a TCP
-# connection to the database server.  It is separate from the per-statement
-# timeout configured via SET commands inside _configure_session().
-_CONNECT_TIMEOUT_SECONDS = 10
-
-# Default ports for each database type.
-_DATABASE_TYPE_DEFAULT_PORT = {
-    DatabaseType.POSTGRES: 5432,
-    DatabaseType.MYSQL: 3306,
-    DatabaseType.MSSQL: 1433,
-}
-
-
-def _sanitize_error(
-    error_msg: str,
-    connection_string: str,
-    *,
-    host: str = "",
-    original_host: str = "",
-    username: str = "",
-    port: int = 0,
-    database: str = "",
-) -> str:
-    """Remove connection string, credentials, and infrastructure details
-    from error messages so they are safe to expose to the LLM.
-
-    Scrubs:
-    - The full connection string
-    - URL-embedded credentials (``://user:pass@``)
-    - ``password=<value>`` key-value pairs
-    - The database hostname / IP used for the connection
-    - The original (pre-resolution) hostname provided by the user
-    - Any IPv4 addresses that appear in the message
-    - Any bracketed IPv6 addresses (e.g. ``[::1]``, ``[fe80::1%eth0]``)
-    - The database username
-    - The database port number
-    - The database name
-    """
-    sanitized = error_msg.replace(connection_string, "<connection_string>")
-    sanitized = re.sub(r"password=[^\s&]+", "password=***", sanitized)
-    sanitized = re.sub(r"://[^@]+@", "://***:***@", sanitized)
-
-    # Replace the known host (may be an IP already) before the generic IP pass.
-    # Also replace the original (pre-DNS-resolution) hostname if it differs.
-    if original_host and original_host != host:
-        sanitized = sanitized.replace(original_host, "<host>")
-    if host:
-        sanitized = sanitized.replace(host, "<host>")
-
-    # Replace any remaining IPv4 addresses (e.g. resolved IPs the driver logs)
-    sanitized = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", "<ip>", sanitized)
-
-    # Replace bracketed IPv6 addresses (e.g. "[::1]", "[fe80::1%eth0]")
-    sanitized = re.sub(r"\[[0-9a-fA-F:]+(?:%[^\]]+)?\]", "<ip>", sanitized)
-
-    # Replace the database username (handles double-quoted, single-quoted,
-    # and unquoted formats across PostgreSQL, MySQL, and MSSQL error messages).
-    if username:
-        sanitized = re.sub(
-            r"""for user ["']?""" + re.escape(username) + r"""["']?""",
-            "for user <user>",
-            sanitized,
-        )
-        # Catch remaining bare occurrences in various quote styles:
-        # - PostgreSQL: "FATAL:  role "myuser" does not exist"
-        # - MySQL: "Access denied for user 'myuser'@'host'"
-        # - MSSQL: "Login failed for user 'myuser'"
-        sanitized = sanitized.replace(f'"{username}"', "<user>")
-        sanitized = sanitized.replace(f"'{username}'", "<user>")
-
-    # Replace the port number (handles "port 5432" and ":5432" formats)
-    if port:
-        port_str = re.escape(str(port))
-        sanitized = re.sub(
-            r"(?:port |:)" + port_str + r"(?![0-9])",
-            lambda m: ("port " if m.group().startswith("p") else ":") + "<port>",
-            sanitized,
-        )
-
-    # Replace the database name to avoid leaking internal infrastructure names.
-    # Use word-boundary regex to prevent mangling when the database name is a
-    # common substring (e.g. "test", "data", "on").
-    if database:
-        sanitized = re.sub(r"\b" + re.escape(database) + r"\b", "<database>", sanitized)
-
-    return sanitized
-
-
-def _extract_keyword_tokens(parsed: sqlparse.sql.Statement) -> list[str]:
-    """Extract keyword tokens from a parsed SQL statement.
-
-    Uses sqlparse token type classification to collect Keyword/DML/DDL/DCL
-    tokens. String literals and identifiers have different token types, so
-    they are naturally excluded from the result.
-    """
-    return [
-        token.normalized.upper()
-        for token in parsed.flatten()
-        if token.ttype
-        in (
-            sqlparse.tokens.Keyword,
-            sqlparse.tokens.Keyword.DML,
-            sqlparse.tokens.Keyword.DDL,
-            sqlparse.tokens.Keyword.DCL,
-        )
-    ]
-
-
-def _has_disallowed_into(stmt: sqlparse.sql.Statement) -> bool:
-    """Check if a statement contains a disallowed ``INTO`` clause.
-
-    ``SELECT ... INTO @variable`` is a valid read-only MySQL syntax that stores
-    a query result into a session-scoped user variable.  All other forms of
-    ``INTO`` are data-modifying or file-writing and must be blocked:
-
-    * ``SELECT ... INTO new_table``  (PostgreSQL / MSSQL – creates a table)
-    * ``SELECT ... INTO OUTFILE``    (MySQL – writes to the filesystem)
-    * ``SELECT ... INTO DUMPFILE``   (MySQL – writes to the filesystem)
-    * ``INSERT INTO ...``            (already blocked by INSERT being in the
-      disallowed set, but we reject INTO as well for defense-in-depth)
-
-    Returns ``True`` if the statement contains a disallowed ``INTO``.
-    """
-    flat = list(stmt.flatten())
-    for i, token in enumerate(flat):
-        if not (
-            token.ttype in (sqlparse.tokens.Keyword,)
-            and token.normalized.upper() == "INTO"
-        ):
-            continue
-
-        # Look at the first non-whitespace token after INTO.
-        j = i + 1
-        while j < len(flat) and flat[j].ttype is sqlparse.tokens.Text.Whitespace:
-            j += 1
-
-        if j >= len(flat):
-            # INTO at the very end – malformed, block it.
-            return True
-
-        next_token = flat[j]
-        # MySQL user variable: either a single Name starting with "@"
-        # (e.g. ``@total``) or a bare ``@`` Operator token followed by a Name.
-        if next_token.ttype is sqlparse.tokens.Name and next_token.value.startswith(
-            "@"
-        ):
-            continue
-        if next_token.ttype is sqlparse.tokens.Operator and next_token.value == "@":
-            continue
-
-        # Everything else (table name, OUTFILE, DUMPFILE, etc.) is disallowed.
-        return True
-
-    return False
-
-
-def _validate_query_is_read_only(stmt: sqlparse.sql.Statement) -> str | None:
-    """Validate that a parsed SQL statement is read-only (SELECT/WITH only).
-
-    Accepts an already-parsed statement from ``_validate_single_statement``
-    to avoid re-parsing. Checks:
-    1. Statement type must be SELECT (sqlparse classifies WITH...SELECT as SELECT)
-    2. No disallowed keywords (INSERT, UPDATE, DELETE, DROP, etc.)
-    3. No disallowed INTO clauses (allows MySQL ``SELECT ... INTO @variable``)
-
-    Returns an error message if the query is not read-only, None otherwise.
-    """
-    # sqlparse returns 'SELECT' for SELECT and WITH...SELECT queries
-    if stmt.get_type() != "SELECT":
-        return "Only SELECT queries are allowed."
-
-    # Defense-in-depth: check parsed keyword tokens for disallowed keywords
-    for kw in _extract_keyword_tokens(stmt):
-        # Normalize multi-word tokens (e.g. "SET LOCAL" -> "SET")
-        base_kw = kw.split()[0] if " " in kw else kw
-        if base_kw in _DISALLOWED_KEYWORDS:
-            return f"Disallowed SQL keyword: {kw}"
-
-    # Contextual check for INTO: allow MySQL @variable syntax, block everything else
-    if _has_disallowed_into(stmt):
-        return "Disallowed SQL keyword: INTO"
-
-    return None
-
-
-def _validate_single_statement(
-    query: str,
-) -> tuple[str | None, sqlparse.sql.Statement | None]:
-    """Validate that the query contains exactly one non-empty SQL statement.
-
-    Returns (error_message, parsed_statement). If error_message is not None,
-    the query is invalid and parsed_statement will be None.
-    """
-    stripped = query.strip().rstrip(";").strip()
-    if not stripped:
-        return "Query is empty.", None
-
-    # Parse the SQL using sqlparse for proper tokenization
-    statements = sqlparse.parse(stripped)
-
-    # Filter out empty statements and comment-only statements
-    statements = [
-        s
-        for s in statements
-        if s.tokens
-        and str(s).strip()
-        and not all(
-            t.is_whitespace or t.ttype in sqlparse.tokens.Comment for t in s.flatten()
-        )
-    ]
-
-    if not statements:
-        return "Query is empty.", None
-
-    # Reject multiple statements -- prevents injection via semicolons
-    if len(statements) > 1:
-        return "Only single statements are allowed.", None
-
-    return None, statements[0]
-
-
-def _serialize_value(value: Any) -> Any:
-    """Convert database-specific types to JSON-serializable Python types."""
-    if isinstance(value, Decimal):
-        # NaN / Infinity are not valid JSON numbers; serialize as strings.
-        if value.is_nan() or value.is_infinite():
-            return str(value)
-        # Use int for whole numbers; use str for fractional to preserve exact
-        # precision (float would silently round high-precision analytics values).
-        if value == value.to_integral_value():
-            return int(value)
-        return str(value)
-    if isinstance(value, (datetime, date, time)):
-        return value.isoformat()
-    if isinstance(value, memoryview):
-        return bytes(value).hex()
-    if isinstance(value, bytes):
-        return value.hex()
-    return value
-
-
-def _configure_session(
-    conn: Any,
-    dialect_name: str,
-    timeout_ms: str,
-    read_only: bool,
-) -> None:
-    """Set session-level timeout and read-only mode for the given dialect.
-
-    Timeout limitations by database:
-
-    * **PostgreSQL** – ``statement_timeout`` reliably cancels any running
-      statement (SELECT or DML) after the configured duration.
-    * **MySQL** – ``MAX_EXECUTION_TIME`` only applies to **read-only SELECT**
-      statements.  DML (INSERT/UPDATE/DELETE) and DDL are *not* bounded by
-      this hint; they rely on the server's ``wait_timeout`` /
-      ``interactive_timeout`` instead.  There is no session-level setting in
-      MySQL that reliably cancels long-running writes.
-    * **MSSQL** – ``SET LOCK_TIMEOUT`` only limits how long the server waits
-      to acquire a **lock**.  CPU-bound queries (e.g. large scans, hash
-      joins) that do not block on locks will *not* be cancelled.  MSSQL has
-      no session-level ``statement_timeout`` equivalent; the closest
-      mechanism is Resource Governor (requires sysadmin configuration) or
-      ``CONTEXT_INFO``-based external monitoring.
-
-    Note: SQLite is not supported by this block.  The ``_configure_session``
-    function is a no-op for unrecognised dialect names, so an SQLite engine
-    would skip all SET commands silently.  The block's ``DatabaseType`` enum
-    intentionally excludes SQLite.
-    """
-    if dialect_name == "postgresql":
-        conn.execute(text("SET statement_timeout = " + timeout_ms))
-        if read_only:
-            conn.execute(text("SET default_transaction_read_only = ON"))
-    elif dialect_name == "mysql":
-        # NOTE: MAX_EXECUTION_TIME only applies to SELECT statements.
-        # Write queries (INSERT/UPDATE/DELETE) are not bounded by this
-        # setting; they rely on the database's wait_timeout instead.
-        # See docstring above for full limitations.
-        conn.execute(text("SET SESSION MAX_EXECUTION_TIME = " + timeout_ms))
-        if read_only:
-            conn.execute(text("SET SESSION TRANSACTION READ ONLY"))
-    elif dialect_name == "mssql":
-        # MSSQL: SET LOCK_TIMEOUT limits lock-wait time (ms) only.
-        # CPU-bound queries without lock contention are NOT cancelled.
-        # See docstring above for full limitations.
-        conn.execute(text("SET LOCK_TIMEOUT " + timeout_ms))
-        # MSSQL lacks a session-level read-only mode like
-        # PostgreSQL/MySQL.  Read-only enforcement is handled by
-        # the SQL validation layer (_validate_query_is_read_only)
-        # and the ROLLBACK in the finally block.
-
-
-def _run_in_transaction(
-    conn: Any,
-    dialect_name: str,
-    query: str,
-    max_rows: int,
-    read_only: bool,
-) -> tuple[list[dict[str, Any]], list[str], int, bool]:
-    """Execute a query inside an explicit transaction, returning results.
-
-    Returns ``(rows, columns, affected_rows, truncated)`` where *truncated*
-    is ``True`` when ``fetchmany`` returned exactly ``max_rows`` rows,
-    indicating that additional rows may exist in the result set.
-    """
-    # MSSQL uses T-SQL "BEGIN TRANSACTION"; others use "BEGIN".
-    begin_stmt = "BEGIN TRANSACTION" if dialect_name == "mssql" else "BEGIN"
-    conn.execute(text(begin_stmt))
-    try:
-        result = conn.execute(text(query))
-        affected = result.rowcount if not result.returns_rows else -1
-        columns = list(result.keys()) if result.returns_rows else []
-        rows = result.fetchmany(max_rows) if result.returns_rows else []
-        truncated = len(rows) == max_rows
-        results = [
-            {col: _serialize_value(val) for col, val in zip(columns, row)}
-            for row in rows
-        ]
-    except Exception:
-        try:
-            conn.execute(text("ROLLBACK"))
-        except Exception:
-            pass
-        raise
-    else:
-        conn.execute(text("ROLLBACK" if read_only else "COMMIT"))
-    return results, columns, affected, truncated
-
-
-def _execute_query(
-    connection_url: URL | str,
-    query: str,
-    timeout: int,
-    max_rows: int,
-    read_only: bool = True,
-    database_type: DatabaseType = DatabaseType.POSTGRES,
-) -> tuple[list[dict[str, Any]], list[str], int, bool]:
-    """Execute a SQL query and return (rows, columns, affected_rows, truncated).
-
-    Uses SQLAlchemy to connect to any supported database.
-    For SELECT queries, rows are limited to ``max_rows`` via DBAPI fetchmany.
-    ``truncated`` is ``True`` when the result set was capped by ``max_rows``.
-    For write queries, affected_rows contains the rowcount from the driver.
-    When ``read_only`` is True, the database session is set to read-only
-    mode and the transaction is always rolled back.
-    """
-    # Determine driver-specific connection timeout argument.
-    # pymssql uses "login_timeout", while PostgreSQL/MySQL use "connect_timeout".
-    timeout_key = (
-        "login_timeout" if database_type == DatabaseType.MSSQL else "connect_timeout"
-    )
-    engine = create_engine(
-        connection_url, connect_args={timeout_key: _CONNECT_TIMEOUT_SECONDS}
-    )
-    try:
-        with engine.connect() as conn:
-            # Use AUTOCOMMIT so SET commands take effect immediately.
-            conn = conn.execution_options(isolation_level="AUTOCOMMIT")
-
-            # Compute timeout in milliseconds. The value is Pydantic-validated
-            # (ge=1, le=120), but we use int() as defense-in-depth.
-            # NOTE: SET commands do not support bind parameters in most
-            # databases, so we use str(int(...)) for safe interpolation.
-            timeout_ms = str(int(timeout * 1000))
-
-            _configure_session(conn, engine.dialect.name, timeout_ms, read_only)
-            return _run_in_transaction(
-                conn, engine.dialect.name, query, max_rows, read_only
-            )
-    finally:
-        engine.dispose()
--- a/autogpt_platform/backend/backend/blocks/test/test_block.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_block.py
@@ -300,27 +300,13 @@ def test_agent_input_block_ignores_legacy_placeholder_values():


 def test_dropdown_input_block_produces_enum():
-    """Verify AgentDropdownInputBlock.Input.generate_schema() produces enum
-    using the canonical 'options' field name."""
-    opts = ["Option A", "Option B"]
+    """Verify AgentDropdownInputBlock.Input.generate_schema() produces enum."""
+    options = ["Option A", "Option B"]
    instance = AgentDropdownInputBlock.Input.model_construct(
-        name="choice", value=None, options=opts
+        name="choice", value=None, placeholder_values=options
    )
    schema = instance.generate_schema()
-    assert schema.get("enum") == opts
-
-
-def test_dropdown_input_block_legacy_placeholder_values_produces_enum():
-    """Verify backward compat: passing legacy 'placeholder_values' to
-    AgentDropdownInputBlock still produces enum via model_construct remap."""
-    opts = ["Option A", "Option B"]
-    instance = AgentDropdownInputBlock.Input.model_construct(
-        name="choice", value=None, placeholder_values=opts
-    )
-    schema = instance.generate_schema()
-    assert (
-        schema.get("enum") == opts
-    ), "Legacy placeholder_values should be remapped to options"
+    assert schema.get("enum") == options


 def test_generate_schema_integration_legacy_placeholder_values():
@@ -343,11 +329,11 @@ def test_generate_schema_integration_legacy_placeholder_values():

 def test_generate_schema_integration_dropdown_produces_enum():
    """Test the full Graph._generate_schema path with AgentDropdownInputBlock
-    — verifies enum IS produced for dropdown blocks using canonical field name."""
+    — verifies enum IS produced for dropdown blocks."""
    dropdown_input_default = {
        "name": "color",
        "value": None,
-        "options": ["Red", "Green", "Blue"],
+        "placeholder_values": ["Red", "Green", "Blue"],
    }
    result = BaseGraph._generate_schema(
        (AgentDropdownInputBlock.Input, dropdown_input_default),
@@ -358,36 +344,3 @@ def test_generate_schema_integration_dropdown_produces_enum():
        "Green",
        "Blue",
    ], "Graph schema should contain enum from AgentDropdownInputBlock"
-
-
-def test_generate_schema_integration_dropdown_legacy_placeholder_values():
-    """Test the full Graph._generate_schema path with AgentDropdownInputBlock
-    using legacy 'placeholder_values' — verifies backward compat produces enum."""
-    legacy_dropdown_input_default = {
-        "name": "color",
-        "value": None,
-        "placeholder_values": ["Red", "Green", "Blue"],
-    }
-    result = BaseGraph._generate_schema(
-        (AgentDropdownInputBlock.Input, legacy_dropdown_input_default),
-    )
-    color_props = result["properties"]["color"]
-    assert color_props.get("enum") == [
-        "Red",
-        "Green",
-        "Blue",
-    ], "Legacy placeholder_values should still produce enum via model_construct remap"
-
-
-def test_dropdown_input_block_init_legacy_placeholder_values():
-    """Verify backward compat: constructing AgentDropdownInputBlock.Input via
-    model_validate with legacy 'placeholder_values' correctly maps to 'options'."""
-    opts = ["Option A", "Option B"]
-    instance = AgentDropdownInputBlock.Input.model_validate(
-        {"name": "choice", "value": None, "placeholder_values": opts}
-    )
-    assert (
-        instance.options == opts
-    ), "Legacy placeholder_values should be remapped to options via model_validate"
-    schema = instance.generate_schema()
-    assert schema.get("enum") == opts
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -20,10 +20,6 @@ class ChatConfig(BaseSettings):
        default="openai/gpt-4o-mini",
        description="Model to use for generating session titles (should be fast/cheap)",
    )
-    simulation_model: str = Field(
-        default="google/gemini-2.5-flash",
-        description="Model for dry-run block simulation (should be fast/cheap with good JSON output)",
-    )
    api_key: str | None = Field(default=None, description="OpenAI API key")
    base_url: str | None = Field(
        default=OPENROUTER_BASE_URL,
--- a/autogpt_platform/backend/backend/copilot/context.py
+++ b/autogpt_platform/backend/backend/copilot/context.py
@@ -149,8 +149,7 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:

    Allowed:
    - Files under *sdk_cwd* (``/tmp/copilot-<session>/``)
-    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``
-      or ``tool-outputs/...``.
+    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``.
      The SDK nests tool-results under a conversation UUID directory;
      the UUID segment is validated with ``_UUID_RE``.
    """
@@ -175,20 +174,17 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
        # Defence-in-depth: ensure project_dir didn't escape the base.
        if not project_dir.startswith(SDK_PROJECTS_DIR + os.sep):
            return False
-        # Only allow: <encoded-cwd>/<uuid>/<tool-dir>/<file>
+        # Only allow: <encoded-cwd>/<uuid>/tool-results/<file>
        # The SDK always creates a conversation UUID directory between
-        # the project dir and the tool directory.
-        # Accept both "tool-results" (SDK's persisted outputs) and
-        # "tool-outputs" (the model sometimes confuses workspace paths
-        # with filesystem paths and generates this variant).
+        # the project dir and tool-results/.
        if resolved.startswith(project_dir + os.sep):
            relative = resolved[len(project_dir) + 1 :]
            parts = relative.split(os.sep)
-            # Require exactly: [<uuid>, "tool-results"|"tool-outputs", <file>, ...]
+            # Require exactly: [<uuid>, "tool-results", <file>, ...]
            if (
                len(parts) >= 3
                and _UUID_RE.match(parts[0])
-                and parts[1] in ("tool-results", "tool-outputs")
+                and parts[1] == "tool-results"
            ):
                return True

--- a/autogpt_platform/backend/backend/copilot/context_test.py
+++ b/autogpt_platform/backend/backend/copilot/context_test.py
@@ -134,21 +134,6 @@ def test_is_allowed_local_path_tool_results_with_uuid():
        _current_project_dir.set("")


-def test_is_allowed_local_path_tool_outputs_with_uuid():
-    """Files under <encoded-cwd>/<uuid>/tool-outputs/ are also allowed."""
-    encoded = "test-encoded-dir"
-    conv_uuid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
-    path = os.path.join(
-        SDK_PROJECTS_DIR, encoded, conv_uuid, "tool-outputs", "output.json"
-    )
-
-    _current_project_dir.set(encoded)
-    try:
-        assert is_allowed_local_path(path, sdk_cwd=None)
-    finally:
-        _current_project_dir.set("")
-
-
 def test_is_allowed_local_path_tool_results_without_uuid_rejected():
    """Direct <encoded-cwd>/tool-results/ (no UUID) is rejected."""
    encoded = "test-encoded-dir"
@@ -174,7 +159,7 @@ def test_is_allowed_local_path_sibling_of_tool_results_is_rejected():


 def test_is_allowed_local_path_valid_uuid_wrong_segment_name_rejected():
-    """A valid UUID dir but non-'tool-results'/'tool-outputs' second segment is rejected."""
+    """A valid UUID dir but non-'tool-results' second segment is rejected."""
    encoded = "test-encoded-dir"
    uuid_str = "12345678-1234-5678-9abc-def012345678"
    path = os.path.join(
--- a/autogpt_platform/backend/backend/copilot/integration_creds.py
+++ b/autogpt_platform/backend/backend/copilot/integration_creds.py
@@ -59,16 +59,6 @@ _null_cache: TTLCache[tuple[str, str], bool] = TTLCache(
    maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
 )

-# GitHub user identity caches (keyed by user_id only, not provider tuple).
-# Declared here so invalidate_user_provider_cache() can reference them.
-_GH_IDENTITY_CACHE_TTL = 600.0  # 10 min — profile data rarely changes
-_gh_identity_cache: TTLCache[str, dict[str, str]] = TTLCache(
-    maxsize=_CACHE_MAX_SIZE, ttl=_GH_IDENTITY_CACHE_TTL
-)
-_gh_identity_null_cache: TTLCache[str, bool] = TTLCache(
-    maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
-)
-

 def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
    """Remove the cached entry for *user_id*/*provider* from both caches.
@@ -76,19 +66,11 @@ def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
    Call this after storing new credentials so that the next
    ``get_provider_token()`` call performs a fresh DB lookup instead of
    serving a stale TTL-cached result.
-
-    For GitHub specifically, also clears the git-identity caches so that
-    ``get_github_user_git_identity()`` re-fetches the user's profile on
-    the next call instead of serving stale identity data.
    """
    key = (user_id, provider)
    _token_cache.pop(key, None)
    _null_cache.pop(key, None)

-    if provider == "github":
-        _gh_identity_cache.pop(user_id, None)
-        _gh_identity_null_cache.pop(user_id, None)
-

 # Register this module's cache-bust function with the credentials manager so
 # that any create/update/delete operation immediately evicts stale cache
@@ -141,7 +123,6 @@ async def get_provider_token(user_id: str, provider: str) -> str | None:
        [c for c in creds_list if c.type == "oauth2"],
        key=lambda c: 0 if "repo" in (cast(OAuth2Credentials, c).scopes or []) else 1,
    )
-    refresh_failed = False
    for creds in oauth2_creds:
        if creds.type == "oauth2":
            try:
@@ -160,7 +141,6 @@ async def get_provider_token(user_id: str, provider: str) -> str | None:
                # Do NOT fall back to the stale token — it is likely expired
                # or revoked.  Returning None forces the caller to re-auth,
                # preventing the LLM from receiving a non-functional token.
-                refresh_failed = True
                continue
            _token_cache[cache_key] = token
            return token
@@ -172,12 +152,8 @@ async def get_provider_token(user_id: str, provider: str) -> str | None:
            _token_cache[cache_key] = token
            return token

-    # Only cache "not connected" when the user truly has no credentials for this
-    # provider.  If we had OAuth credentials but refresh failed (e.g. transient
-    # network error, event-loop mismatch), do NOT cache the negative result —
-    # the next call should retry the refresh instead of being blocked for 60 s.
-    if not refresh_failed:
-        _null_cache[cache_key] = True
+    # No credentials found — cache to avoid repeated DB hits.
+    _null_cache[cache_key] = True
    return None


@@ -195,76 +171,3 @@ async def get_integration_env_vars(user_id: str) -> dict[str, str]:
            for var in var_names:
                env[var] = token
    return env
-
-
-# ---------------------------------------------------------------------------
-# GitHub user identity (for git committer env vars)
-# ---------------------------------------------------------------------------
-
-
-async def get_github_user_git_identity(user_id: str) -> dict[str, str] | None:
-    """Fetch the GitHub user's name and email for git committer env vars.
-
-    Uses the ``/user`` GitHub API endpoint with the user's stored token.
-    Returns a dict with ``GIT_AUTHOR_NAME``, ``GIT_AUTHOR_EMAIL``,
-    ``GIT_COMMITTER_NAME``, and ``GIT_COMMITTER_EMAIL`` if the user has a
-    connected GitHub account.  Returns ``None`` otherwise.
-
-    Results are cached for 10 minutes; "not connected" results are cached for
-    60 s (same as null-token cache).
-    """
-    if user_id in _gh_identity_null_cache:
-        return None
-    if cached := _gh_identity_cache.get(user_id):
-        return cached
-
-    token = await get_provider_token(user_id, "github")
-    if not token:
-        _gh_identity_null_cache[user_id] = True
-        return None
-
-    import aiohttp
-
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(
-                "https://api.github.com/user",
-                headers={
-                    "Authorization": f"token {token}",
-                    "Accept": "application/vnd.github+json",
-                },
-                timeout=aiohttp.ClientTimeout(total=5),
-            ) as resp:
-                if resp.status != 200:
-                    logger.warning(
-                        "[git-identity] GitHub /user returned %s for user %s",
-                        resp.status,
-                        user_id,
-                    )
-                    return None
-                data = await resp.json()
-    except Exception as exc:
-        logger.warning(
-            "[git-identity] Failed to fetch GitHub profile for user %s: %s",
-            user_id,
-            exc,
-        )
-        return None
-
-    name = data.get("name") or data.get("login") or "AutoGPT User"
-    # GitHub may return email=null if the user has set their email to private.
-    # Fall back to the noreply address GitHub generates for every account.
-    email = data.get("email")
-    if not email:
-        gh_id = data.get("id", "")
-        login = data.get("login", "user")
-        email = f"{gh_id}+{login}@users.noreply.github.com"
-
-    identity = {
-        "GIT_AUTHOR_NAME": name,
-        "GIT_AUTHOR_EMAIL": email,
-        "GIT_COMMITTER_NAME": name,
-        "GIT_COMMITTER_EMAIL": email,
-    }
-    _gh_identity_cache[user_id] = identity
-    return identity
--- a/autogpt_platform/backend/backend/copilot/integration_creds_test.py
+++ b/autogpt_platform/backend/backend/copilot/integration_creds_test.py
@@ -9,8 +9,6 @@ from backend.copilot.integration_creds import (
    _NULL_CACHE_TTL,
    _TOKEN_CACHE_TTL,
    PROVIDER_ENV_VARS,
-    _gh_identity_cache,
-    _gh_identity_null_cache,
    _null_cache,
    _token_cache,
    get_integration_env_vars,
@@ -51,13 +49,9 @@ def clear_caches():
    """Ensure clean caches before and after every test."""
    _token_cache.clear()
    _null_cache.clear()
-    _gh_identity_cache.clear()
-    _gh_identity_null_cache.clear()
    yield
    _token_cache.clear()
    _null_cache.clear()
-    _gh_identity_cache.clear()
-    _gh_identity_null_cache.clear()


 class TestInvalidateUserProviderCache:
@@ -83,34 +77,6 @@ class TestInvalidateUserProviderCache:
        invalidate_user_provider_cache(_USER, _PROVIDER)
        assert other_key in _token_cache

-    def test_clears_gh_identity_cache_for_github_provider(self):
-        """When provider is 'github', identity caches must also be cleared."""
-        _gh_identity_cache[_USER] = {
-            "GIT_AUTHOR_NAME": "Old Name",
-            "GIT_AUTHOR_EMAIL": "old@example.com",
-            "GIT_COMMITTER_NAME": "Old Name",
-            "GIT_COMMITTER_EMAIL": "old@example.com",
-        }
-        invalidate_user_provider_cache(_USER, "github")
-        assert _USER not in _gh_identity_cache
-
-    def test_clears_gh_identity_null_cache_for_github_provider(self):
-        """When provider is 'github', the identity null-cache must also be cleared."""
-        _gh_identity_null_cache[_USER] = True
-        invalidate_user_provider_cache(_USER, "github")
-        assert _USER not in _gh_identity_null_cache
-
-    def test_does_not_clear_gh_identity_cache_for_other_providers(self):
-        """When provider is NOT 'github', identity caches must be left alone."""
-        _gh_identity_cache[_USER] = {
-            "GIT_AUTHOR_NAME": "Some Name",
-            "GIT_AUTHOR_EMAIL": "some@example.com",
-            "GIT_COMMITTER_NAME": "Some Name",
-            "GIT_COMMITTER_EMAIL": "some@example.com",
-        }
-        invalidate_user_provider_cache(_USER, "some-other-provider")
-        assert _USER in _gh_identity_cache
-

 class TestGetProviderToken:
    @pytest.mark.asyncio(loop_scope="session")
@@ -163,15 +129,8 @@ class TestGetProviderToken:
        assert result == "oauth-tok"

    @pytest.mark.asyncio(loop_scope="session")
-    async def test_oauth2_refresh_failure_returns_none_without_null_cache(self):
-        """On refresh failure, return None but do NOT cache in null_cache.
-
-        The user has credentials — they just couldn't be refreshed right now
-        (e.g. transient network error or event-loop mismatch in the copilot
-        executor).  Caching a negative result would block all credential
-        lookups for 60 s even though the creds exist and may refresh fine
-        on the next attempt.
-        """
+    async def test_oauth2_refresh_failure_returns_none(self):
+        """On refresh failure, return None instead of caching a stale token."""
        oauth_creds = _make_oauth2_creds("stale-oauth-tok")
        mock_manager = MagicMock()
        mock_manager.store.get_creds_by_provider = AsyncMock(return_value=[oauth_creds])
@@ -182,8 +141,6 @@ class TestGetProviderToken:

        # Stale tokens must NOT be returned — forces re-auth.
        assert result is None
-        # Must NOT cache negative result when refresh failed — next call retries.
-        assert (_USER, _PROVIDER) not in _null_cache

    @pytest.mark.asyncio(loop_scope="session")
    async def test_no_credentials_caches_null_entry(self):
@@ -219,96 +176,6 @@ class TestGetProviderToken:
        assert _NULL_CACHE_TTL < _TOKEN_CACHE_TTL


-class TestThreadSafetyLocks:
-    """Bug reproduction: shared AsyncRedisKeyedMutex across threads caused
-    'Future attached to a different loop' when copilot workers accessed
-    credentials from different event loops."""
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_store_locks_returns_per_thread_instance(self):
-        """IntegrationCredentialsStore.locks() must return different instances
-        for different threads (via @thread_cached)."""
-        import asyncio
-        import concurrent.futures
-
-        from backend.integrations.credentials_store import IntegrationCredentialsStore
-
-        store = IntegrationCredentialsStore()
-
-        async def get_locks_id():
-            mock_redis = AsyncMock()
-            with patch(
-                "backend.integrations.credentials_store.get_redis_async",
-                return_value=mock_redis,
-            ):
-                locks = await store.locks()
-                return id(locks)
-
-        # Get locks from main thread
-        main_id = await get_locks_id()
-
-        # Get locks from a worker thread
-        def run_in_thread():
-            loop = asyncio.new_event_loop()
-            try:
-                return loop.run_until_complete(get_locks_id())
-            finally:
-                loop.close()
-
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            worker_id = await asyncio.get_event_loop().run_in_executor(
-                pool, run_in_thread
-            )
-
-        assert main_id != worker_id, (
-            "Store.locks() returned the same instance across threads. "
-            "This would cause 'Future attached to a different loop' errors."
-        )
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_manager_delegates_to_store_locks(self):
-        """IntegrationCredentialsManager.locks() should delegate to store."""
-        from backend.integrations.creds_manager import IntegrationCredentialsManager
-
-        manager = IntegrationCredentialsManager()
-        mock_redis = AsyncMock()
-
-        with patch(
-            "backend.integrations.credentials_store.get_redis_async",
-            return_value=mock_redis,
-        ):
-            locks = await manager.locks()
-
-        # Should have gotten it from the store
-        assert locks is not None
-
-
-class TestRefreshUnlockedPath:
-    """Bug reproduction: copilot worker threads need lock-free refresh because
-    Redis-backed asyncio.Lock created on one event loop can't be used on another."""
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_refresh_if_needed_lock_false_skips_redis(self):
-        """refresh_if_needed(lock=False) must not touch Redis locks at all."""
-        from backend.integrations.creds_manager import IntegrationCredentialsManager
-
-        manager = IntegrationCredentialsManager()
-        creds = _make_oauth2_creds()
-
-        mock_handler = MagicMock()
-        mock_handler.needs_refresh = MagicMock(return_value=False)
-
-        with patch(
-            "backend.integrations.creds_manager._get_provider_oauth_handler",
-            new_callable=AsyncMock,
-            return_value=mock_handler,
-        ):
-            result = await manager.refresh_if_needed(_USER, creds, lock=False)
-
-        # Should return credentials without touching locks
-        assert result.id == creds.id
-
-
 class TestGetIntegrationEnvVars:
    @pytest.mark.asyncio(loop_scope="session")
    async def test_injects_all_env_vars_for_provider(self):
--- a/autogpt_platform/backend/backend/copilot/permissions.py
+++ b/autogpt_platform/backend/backend/copilot/permissions.py
@@ -66,7 +66,6 @@ from pydantic import BaseModel, PrivateAttr
 ToolName = Literal[
    # Platform tools (must match keys in TOOL_REGISTRY)
    "add_understanding",
-    "ask_question",
    "bash_exec",
    "browser_act",
    "browser_navigate",
@@ -103,7 +102,6 @@ ToolName = Literal[
    "web_fetch",
    "write_workspace_file",
    # SDK built-ins
-    "Agent",
    "Edit",
    "Glob",
    "Grep",
--- a/autogpt_platform/backend/backend/copilot/permissions_test.py
+++ b/autogpt_platform/backend/backend/copilot/permissions_test.py
@@ -544,7 +544,6 @@ class TestApplyToolPermissions:
 class TestSdkBuiltinToolNames:
    def test_expected_builtins_present(self):
        expected = {
-            "Agent",
            "Read",
            "Write",
            "Edit",
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -18,18 +18,6 @@ After `write_workspace_file`, embed the `download_url` in Markdown:
 - Image: `![chart](workspace://file_id#image/png)`
 - Video: `![recording](workspace://file_id#video/mp4)`

-### Handling binary/image data in tool outputs — CRITICAL
-When a tool output contains base64-encoded binary data (images, PDFs, etc.):
-1. **NEVER** try to inline or render the base64 content in your response.
-2. **Save** the data to workspace using `write_workspace_file` (pass the base64 data URI as content).
-3. **Show** the result via the workspace download URL in Markdown: `![image](workspace://file_id#image/png)`.
-
-### Passing large data between tools — CRITICAL
-When tool outputs produce large text that you need to feed into another tool:
- **NEVER** copy-paste the full text into the next tool call argument.
- **Save** the output to a file (workspace or local), then use `@@agptfile:` references.
- This avoids token limits and ensures data integrity.
-
 ### File references — @@agptfile:
 Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
 - `workspace://<file_id>` or `workspace:///<path>` — workspace files
@@ -150,11 +138,6 @@ parent autopilot handles orchestration.
 # E2B-only notes — E2B has full internet access so gh CLI works there.
 # Not shown in local (bubblewrap) mode: --unshare-net blocks all network.
 _E2B_TOOL_NOTES = """
-### SDK tool-result files in E2B
-When you `Read` an SDK tool-result file, it is automatically copied into the
-sandbox so `bash_exec` can access it for further processing.
-The exact sandbox path is shown in the `[Sandbox copy available at ...]` note.
-
 ### GitHub CLI (`gh`) and git
 - If the user has connected their GitHub account, both `gh` and `git` are
  pre-authenticated — use them directly without any manual login step.
@@ -220,22 +203,19 @@ def _build_storage_supplement(
   - Files here **survive across sessions indefinitely**

 ### Moving files between storages
- **{file_move_name_1_to_2}**: `write_workspace_file(filename="output.json", source_path="/path/to/local/file")`
- **{file_move_name_2_to_1}**: `read_workspace_file(path="tool-outputs/data.json", save_to_path="{working_dir}/data.json")`
+- **{file_move_name_1_to_2}**: Copy to persistent workspace
+- **{file_move_name_2_to_1}**: Download for processing

 ### File persistence
 Important files (code, configs, outputs) should be saved to workspace to ensure they persist.

 ### SDK tool-result files
 When tool outputs are large, the SDK truncates them and saves the full output to
-a local file under `~/.claude/projects/.../tool-results/` (or `tool-outputs/`).
-To read these files, use `Read` — it reads from the host filesystem.
-
-### Large tool outputs saved to workspace
-When a tool output contains `<tool-output-truncated workspace_path="...">`, the
-full output is in workspace storage (NOT on the local filesystem). To access it:
- Use `read_workspace_file(path="...", offset=..., length=50000)` for reading sections.
- To process in the sandbox, use `read_workspace_file(path="...", save_to_path="{working_dir}/file.json")` first, then use `bash_exec` on the local copy.
+a local file under `~/.claude/projects/.../tool-results/`. To read these files,
+always use `Read` (NOT `bash_exec`, NOT `read_workspace_file`).
+These files are on the host filesystem — `bash_exec` runs in the sandbox and
+CANNOT access them. `read_workspace_file` reads from cloud workspace storage,
+where SDK tool-results are NOT stored.
 {_SHARED_TOOL_NOTES}{extra_notes}"""


--- a/autogpt_platform/backend/backend/copilot/prompting_test.py
+++ b/autogpt_platform/backend/backend/copilot/prompting_test.py
@@ -6,23 +6,16 @@ from pathlib import Path
 class TestAgentGenerationGuideContainsClarifySection:
    """The agent generation guide must include the clarification section."""

-    def test_guide_includes_clarify_section(self):
+    def test_guide_includes_clarify_before_building(self):
        guide_path = Path(__file__).parent / "sdk" / "agent_generation_guide.md"
        content = guide_path.read_text(encoding="utf-8")
-        assert "Before or During Building" in content
+        assert "Clarifying Before Building" in content

    def test_guide_mentions_find_block_for_clarification(self):
        guide_path = Path(__file__).parent / "sdk" / "agent_generation_guide.md"
        content = guide_path.read_text(encoding="utf-8")
-        clarify_section = content.split("Before or During Building")[1].split(
+        # find_block must appear in the clarification section (before the workflow)
+        clarify_section = content.split("Clarifying Before Building")[1].split(
            "### Workflow"
        )[0]
        assert "find_block" in clarify_section
-
-    def test_guide_mentions_ask_question_tool(self):
-        guide_path = Path(__file__).parent / "sdk" / "agent_generation_guide.md"
-        content = guide_path.read_text(encoding="utf-8")
-        clarify_section = content.split("Before or During Building")[1].split(
-            "### Workflow"
-        )[0]
-        assert "ask_question" in clarify_section
--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -161,9 +161,8 @@ async def reset_daily_usage(user_id: str, daily_token_limit: int = 0) -> bool:
        daily_token_limit: The configured daily token limit. When positive,
            the weekly counter is reduced by this amount.

-    Returns False if Redis is unavailable so the caller can handle
-    compensation (fail-closed for billed operations, unlike the read-only
-    rate-limit checks which fail-open).
+    Fails open: returns False if Redis is unavailable (consistent with
+    the fail-open design of this module).
    """
    now = datetime.now(UTC)
    try:
--- a/autogpt_platform/backend/backend/copilot/reset_usage_test.py
+++ b/autogpt_platform/backend/backend/copilot/reset_usage_test.py
@@ -70,10 +70,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", _make_config(daily_token_limit=0)),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(0, 12_500_000)),
-            ),
        ):
            with pytest.raises(HTTPException) as exc_info:
                await reset_copilot_usage(user_id="user-1")
@@ -87,10 +83,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()) as mock_release,
@@ -120,10 +112,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()) as mock_release,
@@ -153,10 +141,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()),
@@ -187,10 +171,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=3)),
        ):
            with pytest.raises(HTTPException) as exc_info:
@@ -228,10 +208,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()) as mock_release,
@@ -252,10 +228,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", _make_config()),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=None)),
        ):
            with pytest.raises(HTTPException) as exc_info:
@@ -273,10 +245,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()),
@@ -307,10 +275,6 @@ class TestResetCopilotUsage:
        with (
            patch(f"{_MODULE}.config", cfg),
            patch(f"{_MODULE}.settings", _mock_settings()),
-            patch(
-                f"{_MODULE}.get_global_rate_limits",
-                AsyncMock(return_value=(2_500_000, 12_500_000)),
-            ),
            patch(f"{_MODULE}.get_daily_reset_count", AsyncMock(return_value=0)),
            patch(f"{_MODULE}.acquire_reset_lock", AsyncMock(return_value=True)),
            patch(f"{_MODULE}.release_reset_lock", AsyncMock()),
--- a/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md
+++ b/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md
@@ -3,55 +3,40 @@
 You can create, edit, and customize agents directly. You ARE the brain —
 generate the agent JSON yourself using block schemas, then validate and save.

-### Clarifying — Before or During Building
+### Clarifying Before Building

-Use `ask_question` whenever the user's intent is ambiguous — whether
-that's before starting or midway through the workflow. Common moments:
-
- **Before building**: output format, delivery channel, data source, or
-  trigger is unspecified.
- **During block discovery**: multiple blocks could fit and the user
-  should choose.
- **During JSON generation**: a wiring decision depends on user
-  preference.
-
-Steps:
-1. Call `find_block` (or another discovery tool) to learn what the
-   platform actually supports for the ambiguous dimension.
-2. Call `ask_question` with a concrete question listing the discovered
+Before starting the workflow below, check whether the user's goal is
+**ambiguous** — missing the output format, delivery channel, data source,
+or trigger. If so:
+1. Call `find_block` with a query targeting the ambiguous dimension to
+   discover what the platform actually supports.
+2. Ask the user **one concrete question** grounded in the discovered
   options (e.g. "The platform supports Gmail, Slack, and Google Docs —
   which should the agent use for delivery?").
-3. **Wait for the user's answer** before continuing.
+3. **Wait for the user's answer** before proceeding.

 **Skip this** when the goal already specifies all dimensions (e.g.
 "scrape prices from Amazon and email me daily").

 ### Workflow for Creating/Editing Agents

-1. **If editing**: First narrow to the specific agent by UUID, then fetch its
-   graph: `find_library_agent(query="<agent_id>", include_graph=true)`. This
-   returns the full graph structure (nodes + links). **Never edit blindly** —
-   always inspect the current graph first so you know exactly what to change.
-   Avoid using `include_graph=true` with broad keyword searches, as fetching
-   multiple graphs at once is expensive and consumes LLM context budget.
-2. **Discover blocks**: Call `find_block(query, include_schemas=true)` to
+1. **Discover blocks**: Call `find_block(query, include_schemas=true)` to
   search for relevant blocks. This returns block IDs, names, descriptions,
   and full input/output schemas.
-3. **Find library agents**: Call `find_library_agent` to discover reusable
+2. **Find library agents**: Call `find_library_agent` to discover reusable
   agents that can be composed as sub-agents via `AgentExecutorBlock`.
-4. **Generate/modify JSON**: Build or modify the agent JSON using block schemas:
-   - Use block IDs from step 2 as `block_id` in nodes
+3. **Generate JSON**: Build the agent JSON using block schemas:
+   - Use block IDs from step 1 as `block_id` in nodes
   - Wire outputs to inputs using links
   - Set design-time config in `input_default`
   - Use `AgentInputBlock` for values the user provides at runtime
-   - When editing, apply targeted changes and preserve unchanged parts
-5. **Write to workspace**: Save the JSON to a workspace file so the user
+4. **Write to workspace**: Save the JSON to a workspace file so the user
   can review it: `write_workspace_file(filename="agent.json", content=...)`
-6. **Validate**: Call `validate_agent_graph` with the agent JSON to check
+5. **Validate**: Call `validate_agent_graph` with the agent JSON to check
   for errors
-7. **Fix if needed**: Call `fix_agent_graph` to auto-fix common issues,
+6. **Fix if needed**: Call `fix_agent_graph` to auto-fix common issues,
   or fix manually based on the error descriptions. Iterate until valid.
-8. **Save**: Call `create_agent` (new) or `edit_agent` (existing) with
+7. **Save**: Call `create_agent` (new) or `edit_agent` (existing) with
   the final `agent_json`

 ### Agent JSON Structure
@@ -104,8 +89,8 @@ These define the agent's interface — what it accepts and what it produces.

 **AgentDropdownInputBlock** (ID: `655d6fdf-a334-421c-b733-520549c07cd1`):
 - Specialized input block that presents a dropdown/select to the user
- Required `input_default` fields: `name` (str)
- Optional: `options` (list of dropdown values; when omitted/empty, input behaves as free-text), `title`, `description`, `value` (default selection)
+- Required `input_default` fields: `name` (str), `placeholder_values` (list of options, must have at least one)
+- Optional: `title`, `description`, `value` (default selection)
 - Output: `result` — the user-selected value at runtime
 - Use this instead of AgentInputBlock when the user should pick from a fixed set of options

@@ -260,17 +245,6 @@ real API calls, credentials, or credits:
 3. **Iterate**: If the dry run reveals wiring issues or missing inputs, fix
   the agent JSON and re-save before suggesting a real execution.

-**Special block behaviour in dry-run mode:**
- **OrchestratorBlock** and **AgentExecutorBlock** execute for real so the
-  orchestrator can make LLM calls and agent executors can spawn child graphs.
-  Their downstream tool blocks and child-graph blocks are still simulated.
-  Note: real LLM inference calls are made (consuming API quota), even though
-  platform credits are not charged. Agent-mode iterations are capped at 1 in
-  dry-run to keep it fast.
- **MCPToolBlock** is simulated using the selected tool's name and JSON Schema
-  so the LLM can produce a realistic mock response without connecting to the
-  MCP server.
-
 ### Example: Simple AI Text Processor

 A minimal agent with input, processing, and output:
--- a/autogpt_platform/backend/backend/copilot/sdk/conftest.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/conftest.py
@@ -2,30 +2,14 @@

 from __future__ import annotations

-from collections.abc import AsyncIterator
 from unittest.mock import patch
 from uuid import uuid4

 import pytest
-import pytest_asyncio

 from backend.util import json


-@pytest_asyncio.fixture(scope="session", loop_scope="session", name="server")
-async def _server_noop() -> None:
-    """No-op server stub — SDK tests don't need the full backend."""
-    return None
-
-
-@pytest_asyncio.fixture(
-    scope="session", loop_scope="session", autouse=True, name="graph_cleanup"
-)
-async def _graph_cleanup_noop() -> AsyncIterator[None]:
-    """No-op graph cleanup stub."""
-    yield
-
-
@pytest.fixture()
 def mock_chat_config():
    """Mock ChatConfig so compact_transcript tests skip real config lookup."""
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
@@ -8,9 +8,6 @@ SDK-internal paths (``~/.claude/projects/…/tool-results/``) are handled
 by the separate ``Read`` MCP tool registered in ``tool_adapter.py``.
 """

-import asyncio
-import base64
-import hashlib
 import itertools
 import json
 import logging
@@ -31,12 +28,6 @@ from backend.copilot.context import (

 logger = logging.getLogger(__name__)

-# Default number of lines returned by ``read_file`` when the caller does not
-# specify a limit.  Also used as the threshold in ``bridge_to_sandbox`` to
-# decide whether the model is requesting the full file (and thus whether the
-# bridge copy is worthwhile).
-_DEFAULT_READ_LIMIT = 2000
-

 async def _check_sandbox_symlink_escape(
    sandbox: Any,
@@ -98,7 +89,7 @@ def _get_sandbox_and_path(
    return sandbox, remote


-async def _sandbox_write(sandbox: Any, path: str, content: str | bytes) -> None:
+async def _sandbox_write(sandbox: Any, path: str, content: str) -> None:
    """Write *content* to *path* inside the sandbox.

    The E2B filesystem API (``sandbox.files.write``) and the command API
@@ -111,14 +102,11 @@ async def _sandbox_write(sandbox: Any, path: str, content: str | bytes) -> None:
    To work around this, writes targeting ``/tmp`` are performed via
    ``tee`` through the command API, which runs as the sandbox ``user``
    and can therefore always overwrite user-owned files.
-
-    *content* may be ``str`` (text) or ``bytes`` (binary).  Both paths
-    are handled correctly: text is encoded to bytes for the base64 shell
-    pipe, and raw bytes are passed through without any encoding.
    """
    if path == "/tmp" or path.startswith("/tmp/"):
-        raw = content.encode() if isinstance(content, str) else content
-        encoded = base64.b64encode(raw).decode()
+        import base64 as _b64
+
+        encoded = _b64.b64encode(content.encode()).decode()
        result = await sandbox.commands.run(
            f"echo {shlex.quote(encoded)} | base64 -d > {shlex.quote(path)}",
            cwd=E2B_WORKDIR,
@@ -140,25 +128,14 @@ async def _handle_read_file(args: dict[str, Any]) -> dict[str, Any]:
    """Read lines from a sandbox file, falling back to the local host for SDK-internal paths."""
    file_path: str = args.get("file_path", "")
    offset: int = max(0, int(args.get("offset", 0)))
-    limit: int = max(1, int(args.get("limit", _DEFAULT_READ_LIMIT)))
+    limit: int = max(1, int(args.get("limit", 2000)))

    if not file_path:
        return _mcp("file_path is required", error=True)

-    # SDK-internal paths (tool-results/tool-outputs, ephemeral working dir)
-    # stay on the host.  When E2B is active, also copy the file into the
-    # sandbox so bash_exec can access it for further processing.
+    # SDK-internal paths (tool-results, ephemeral working dir) stay on the host.
    if _is_allowed_local(file_path):
-        result = _read_local(file_path, offset, limit)
-        if not result.get("isError"):
-            sandbox = _get_sandbox()
-            if sandbox is not None:
-                annotation = await bridge_and_annotate(
-                    sandbox, file_path, offset, limit
-                )
-                if annotation:
-                    result["content"][0]["text"] += annotation
-        return result
+        return _read_local(file_path, offset, limit)

    result = _get_sandbox_and_path(file_path)
    if isinstance(result, dict):
@@ -325,103 +302,6 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:
    return _mcp(output if output else "No matches found.")


-# Bridging: copy SDK-internal files into E2B sandbox
-
-# Files larger than this are written to /home/user/ via sandbox.files.write()
-# instead of /tmp/ via shell base64, to avoid shell argument length limits
-# and E2B command timeouts.  Base64 expands content by ~33%, so keep this
-# well under the typical Linux ARG_MAX (128 KB).
-_BRIDGE_SHELL_MAX_BYTES = 32 * 1024  # 32 KB
-# Files larger than this are skipped entirely to avoid excessive transfer times.
-_BRIDGE_SKIP_BYTES = 50 * 1024 * 1024  # 50 MB
-
-
-async def bridge_to_sandbox(
-    sandbox: Any, file_path: str, offset: int, limit: int
-) -> str | None:
-    """Best-effort copy of a host-side SDK file into the E2B sandbox.
-
-    When the model reads an SDK-internal file (e.g. tool-results), it often
-    wants to process the data with bash.  Copying the file into the sandbox
-    under a stable name lets ``bash_exec`` access it without extra steps.
-
-    Only copies when offset=0 and limit is large enough to indicate the model
-    wants the full file.  Errors are logged but never propagated.
-
-    Returns the sandbox path on success, or ``None`` on skip/failure.
-
-    Size handling:
-    - <= 32 KB: written to ``/tmp/<hash>-<basename>`` via shell base64
-      (``_sandbox_write``).  Kept small to stay within ARG_MAX.
-    - 32 KB - 50 MB: written to ``/home/user/<hash>-<basename>`` via
-      ``sandbox.files.write()`` to avoid shell argument length limits.
-    - > 50 MB: skipped entirely with a warning.
-
-    The sandbox filename is prefixed with a short hash of the full source
-    path to avoid collisions when different source files share the same
-    basename (e.g. multiple ``result.json`` files).
-    """
-    if offset != 0 or limit < _DEFAULT_READ_LIMIT:
-        return None
-    try:
-        expanded = os.path.realpath(os.path.expanduser(file_path))
-        basename = os.path.basename(expanded)
-        source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
-        unique_name = f"{source_id}-{basename}"
-        file_size = os.path.getsize(expanded)
-        if file_size > _BRIDGE_SKIP_BYTES:
-            logger.warning(
-                "[E2B] Skipping bridge for large file (%d bytes): %s",
-                file_size,
-                basename,
-            )
-            return None
-
-        def _read_bytes() -> bytes:
-            with open(expanded, "rb") as fh:
-                return fh.read()
-
-        raw_content = await asyncio.to_thread(_read_bytes)
-        try:
-            text_content: str | None = raw_content.decode("utf-8")
-        except UnicodeDecodeError:
-            text_content = None
-        data: str | bytes = text_content if text_content is not None else raw_content
-        if file_size <= _BRIDGE_SHELL_MAX_BYTES:
-            sandbox_path = f"/tmp/{unique_name}"
-            await _sandbox_write(sandbox, sandbox_path, data)
-        else:
-            sandbox_path = f"/home/user/{unique_name}"
-            await sandbox.files.write(sandbox_path, data)
-        logger.info(
-            "[E2B] Bridged SDK file to sandbox: %s -> %s", basename, sandbox_path
-        )
-        return sandbox_path
-    except Exception:
-        logger.warning(
-            "[E2B] Failed to bridge SDK file to sandbox: %s",
-            file_path,
-            exc_info=True,
-        )
-        return None
-
-
-async def bridge_and_annotate(
-    sandbox: Any, file_path: str, offset: int, limit: int
-) -> str | None:
-    """Bridge a host file to the sandbox and return a newline-prefixed annotation.
-
-    Combines ``bridge_to_sandbox`` with the standard annotation suffix so
-    callers don't need to duplicate the pattern.  Returns a string like
-    ``"\\n[Sandbox copy available at /tmp/abc-file.txt]"`` on success, or
-    ``None`` if bridging was skipped or failed.
-    """
-    sandbox_path = await bridge_to_sandbox(sandbox, file_path, offset, limit)
-    if sandbox_path is None:
-        return None
-    return f"\n[Sandbox copy available at {sandbox_path}]"
-
-
 # Local read (for SDK-internal paths)


--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
@@ -3,7 +3,6 @@
 Pure unit tests with no external dependencies (no E2B, no sandbox).
 """

-import hashlib
 import os
 import shutil
 from types import SimpleNamespace
@@ -14,26 +13,12 @@ import pytest
 from backend.copilot.context import E2B_WORKDIR, SDK_PROJECTS_DIR, _current_project_dir

 from .e2b_file_tools import (
-    _BRIDGE_SHELL_MAX_BYTES,
-    _BRIDGE_SKIP_BYTES,
-    _DEFAULT_READ_LIMIT,
    _check_sandbox_symlink_escape,
    _read_local,
    _sandbox_write,
-    bridge_and_annotate,
-    bridge_to_sandbox,
    resolve_sandbox_path,
 )

-
-def _expected_bridge_path(file_path: str, prefix: str = "/tmp") -> str:
-    """Compute the expected sandbox path for a bridged file."""
-    expanded = os.path.realpath(os.path.expanduser(file_path))
-    basename = os.path.basename(expanded)
-    source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
-    return f"{prefix}/{source_id}-{basename}"
-
-
 # ---------------------------------------------------------------------------
 # resolve_sandbox_path — sandbox path normalisation & boundary enforcement
 # ---------------------------------------------------------------------------
@@ -106,9 +91,9 @@ class TestResolveSandboxPath:
 # ---------------------------------------------------------------------------
 # _read_local — host filesystem reads with allowlist enforcement
 #
-# In E2B mode, _read_local only allows tool-results/tool-outputs paths
-# (via is_allowed_local_path without sdk_cwd).  Regular files live on
-# the sandbox, not the host.
+# In E2B mode, _read_local only allows tool-results paths (via
+# is_allowed_local_path without sdk_cwd).  Regular files live on the
+# sandbox, not the host.
 # ---------------------------------------------------------------------------


@@ -134,7 +119,7 @@ class TestReadLocal:
        )
        token = _current_project_dir.set(encoded)
        try:
-            result = _read_local(filepath, offset=0, limit=_DEFAULT_READ_LIMIT)
+            result = _read_local(filepath, offset=0, limit=2000)
            assert result["isError"] is False
            assert "line 1" in result["content"][0]["text"]
            assert "line 2" in result["content"][0]["text"]
@@ -142,25 +127,6 @@ class TestReadLocal:
            _current_project_dir.reset(token)
            os.unlink(filepath)

-    def test_read_tool_outputs_file(self):
-        """Reading a tool-outputs file should also succeed."""
-        encoded = "-tmp-copilot-e2b-test-read-outputs"
-        tool_outputs_dir = os.path.join(
-            SDK_PROJECTS_DIR, encoded, self._CONV_UUID, "tool-outputs"
-        )
-        os.makedirs(tool_outputs_dir, exist_ok=True)
-        filepath = os.path.join(tool_outputs_dir, "sdk-abc123.json")
-        with open(filepath, "w") as f:
-            f.write('{"data": "test"}\n')
-        token = _current_project_dir.set(encoded)
-        try:
-            result = _read_local(filepath, offset=0, limit=_DEFAULT_READ_LIMIT)
-            assert result["isError"] is False
-            assert "test" in result["content"][0]["text"]
-        finally:
-            _current_project_dir.reset(token)
-            shutil.rmtree(os.path.join(SDK_PROJECTS_DIR, encoded), ignore_errors=True)
-
    def test_read_disallowed_path_blocked(self):
        """Reading /etc/passwd should be blocked by the allowlist."""
        result = _read_local("/etc/passwd", offset=0, limit=10)
@@ -369,199 +335,3 @@ class TestSandboxWrite:
        encoded_in_cmd = call_args.split("echo ")[1].split(" |")[0].strip("'")
        decoded = base64.b64decode(encoded_in_cmd).decode()
        assert decoded == content
-
-
-# ---------------------------------------------------------------------------
-# bridge_to_sandbox — copy SDK-internal files into E2B sandbox
-# ---------------------------------------------------------------------------
-
-
-def _make_bridge_sandbox() -> SimpleNamespace:
-    """Build a sandbox mock suitable for bridge_to_sandbox tests."""
-    run_result = SimpleNamespace(stdout="", stderr="", exit_code=0)
-    commands = SimpleNamespace(run=AsyncMock(return_value=run_result))
-    files = SimpleNamespace(write=AsyncMock())
-    return SimpleNamespace(commands=commands, files=files)
-
-
-class TestBridgeToSandbox:
-    @pytest.mark.asyncio
-    async def test_happy_path_small_file(self, tmp_path):
-        """A small file is bridged to /tmp/<hash>-<basename> via _sandbox_write."""
-        f = tmp_path / "result.json"
-        f.write_text('{"ok": true}')
-        sandbox = _make_bridge_sandbox()
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        expected = _expected_bridge_path(str(f))
-        assert result == expected
-        sandbox.commands.run.assert_called_once()
-        cmd = sandbox.commands.run.call_args[0][0]
-        assert "result.json" in cmd
-        sandbox.files.write.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_skip_when_offset_nonzero(self, tmp_path):
-        """Bridging is skipped when offset != 0 (partial read)."""
-        f = tmp_path / "data.txt"
-        f.write_text("content")
-        sandbox = _make_bridge_sandbox()
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=10, limit=_DEFAULT_READ_LIMIT
-        )
-
-        assert result is None
-        sandbox.commands.run.assert_not_called()
-        sandbox.files.write.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_skip_when_limit_too_small(self, tmp_path):
-        """Bridging is skipped when limit < _DEFAULT_READ_LIMIT (partial read)."""
-        f = tmp_path / "data.txt"
-        f.write_text("content")
-        sandbox = _make_bridge_sandbox()
-
-        await bridge_to_sandbox(sandbox, str(f), offset=0, limit=100)
-
-        sandbox.commands.run.assert_not_called()
-        sandbox.files.write.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_nonexistent_file_does_not_raise(self, tmp_path):
-        """Bridging a non-existent file logs but does not propagate errors."""
-        sandbox = _make_bridge_sandbox()
-
-        await bridge_to_sandbox(
-            sandbox, str(tmp_path / "ghost.txt"), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        sandbox.commands.run.assert_not_called()
-        sandbox.files.write.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_sandbox_write_failure_returns_none(self, tmp_path):
-        """If sandbox write fails, returns None (best-effort)."""
-        f = tmp_path / "data.txt"
-        f.write_text("content")
-        sandbox = _make_bridge_sandbox()
-        sandbox.commands.run.side_effect = RuntimeError("E2B timeout")
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        assert result is None
-
-    @pytest.mark.asyncio
-    async def test_large_file_uses_files_api(self, tmp_path):
-        """Files > 32 KB but <= 50 MB are written to /home/user/ via files.write."""
-        f = tmp_path / "big.json"
-        f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
-        sandbox = _make_bridge_sandbox()
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        expected = _expected_bridge_path(str(f), prefix="/home/user")
-        assert result == expected
-        sandbox.files.write.assert_called_once()
-        call_args = sandbox.files.write.call_args[0]
-        assert call_args[0] == expected
-        sandbox.commands.run.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_small_binary_file_preserves_bytes(self, tmp_path):
-        """A small binary file is bridged to /tmp via base64 without corruption."""
-        binary_data = bytes(range(256))
-        f = tmp_path / "image.png"
-        f.write_bytes(binary_data)
-        sandbox = _make_bridge_sandbox()
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        expected = _expected_bridge_path(str(f))
-        assert result == expected
-        sandbox.commands.run.assert_called_once()
-        cmd = sandbox.commands.run.call_args[0][0]
-        assert "base64" in cmd
-        sandbox.files.write.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_large_binary_file_writes_raw_bytes(self, tmp_path):
-        """A large binary file is bridged to /home/user/ as raw bytes."""
-        binary_data = bytes(range(256)) * 200
-        f = tmp_path / "photo.jpg"
-        f.write_bytes(binary_data)
-        sandbox = _make_bridge_sandbox()
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        expected = _expected_bridge_path(str(f), prefix="/home/user")
-        assert result == expected
-        sandbox.files.write.assert_called_once()
-        call_args = sandbox.files.write.call_args[0]
-        assert call_args[0] == expected
-        assert call_args[1] == binary_data
-        sandbox.commands.run.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_very_large_file_skipped(self, tmp_path):
-        """Files > 50 MB are skipped entirely."""
-        f = tmp_path / "huge.bin"
-        # Create a sparse file to avoid actually writing 50 MB
-        with open(f, "wb") as fh:
-            fh.seek(_BRIDGE_SKIP_BYTES + 1)
-            fh.write(b"\0")
-        sandbox = _make_bridge_sandbox()
-
-        result = await bridge_to_sandbox(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        assert result is None
-
-        sandbox.commands.run.assert_not_called()
-        sandbox.files.write.assert_not_called()
-
-
-# ---------------------------------------------------------------------------
-# bridge_and_annotate — shared helper wrapping bridge_to_sandbox + annotation
-# ---------------------------------------------------------------------------
-
-
-class TestBridgeAndAnnotate:
-    @pytest.mark.asyncio
-    async def test_returns_annotation_on_success(self, tmp_path):
-        """On success, returns a newline-prefixed annotation with the sandbox path."""
-        f = tmp_path / "data.json"
-        f.write_text('{"ok": true}')
-        sandbox = _make_bridge_sandbox()
-
-        annotation = await bridge_and_annotate(
-            sandbox, str(f), offset=0, limit=_DEFAULT_READ_LIMIT
-        )
-
-        expected_path = _expected_bridge_path(str(f))
-        assert annotation == f"\n[Sandbox copy available at {expected_path}]"
-
-    @pytest.mark.asyncio
-    async def test_returns_none_when_skipped(self, tmp_path):
-        """When bridging is skipped (e.g. offset != 0), returns None."""
-        f = tmp_path / "data.json"
-        f.write_text("content")
-        sandbox = _make_bridge_sandbox()
-
-        annotation = await bridge_and_annotate(
-            sandbox, str(f), offset=10, limit=_DEFAULT_READ_LIMIT
-        )
-
-        assert annotation is None
--- a/autogpt_platform/backend/backend/copilot/sdk/env.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/env.py
@@ -20,7 +20,6 @@ config = ChatConfig()
 def build_sdk_env(
    session_id: str | None = None,
    user_id: str | None = None,
-    sdk_cwd: str | None = None,
 ) -> dict[str, str]:
    """Build env vars for the SDK CLI subprocess.

@@ -30,35 +29,25 @@ def build_sdk_env(
       ``ANTHROPIC_API_KEY`` from the parent environment.
    3. **OpenRouter** (default) — overrides base URL and auth token to
       route through the proxy, with Langfuse trace headers.
-
-    When *sdk_cwd* is provided, ``CLAUDE_CODE_TMPDIR`` is set so that
-    the CLI writes temp/sub-agent output inside the per-session workspace
-    directory rather than an inaccessible system temp path.
    """
    # --- Mode 1: Claude Code subscription auth ---
    if config.use_claude_code_subscription:
        validate_subscription()
-        env: dict[str, str] = {
+        return {
            "ANTHROPIC_API_KEY": "",
            "ANTHROPIC_AUTH_TOKEN": "",
            "ANTHROPIC_BASE_URL": "",
        }
-        if sdk_cwd:
-            env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
-        return env

    # --- Mode 2: Direct Anthropic (no proxy hop) ---
    if not config.openrouter_active:
-        env = {}
-        if sdk_cwd:
-            env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
-        return env
+        return {}

    # --- Mode 3: OpenRouter proxy ---
    base = (config.base_url or "").rstrip("/")
    if base.endswith("/v1"):
        base = base[:-3]
-    env = {
+    env: dict[str, str] = {
        "ANTHROPIC_BASE_URL": base,
        "ANTHROPIC_AUTH_TOKEN": config.api_key or "",
        "ANTHROPIC_API_KEY": "",  # force CLI to use AUTH_TOKEN
@@ -76,7 +65,4 @@ def build_sdk_env(
    if parts:
        env["ANTHROPIC_CUSTOM_HEADERS"] = "\n".join(parts)

-    if sdk_cwd:
-        env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
-
    return env
--- a/autogpt_platform/backend/backend/copilot/sdk/env_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/env_test.py
@@ -240,54 +240,3 @@ class TestBuildSdkEnvModePriority:
            "ANTHROPIC_AUTH_TOKEN": "",
            "ANTHROPIC_BASE_URL": "",
        }
-
-
-# ---------------------------------------------------------------------------
-# CLAUDE_CODE_TMPDIR integration
-# ---------------------------------------------------------------------------
-
-
-class TestClaudeCodeTmpdir:
-    """Verify build_sdk_env() sets CLAUDE_CODE_TMPDIR from *sdk_cwd*."""
-
-    def test_tmpdir_set_when_sdk_cwd_is_truthy(self):
-        """CLAUDE_CODE_TMPDIR is set to sdk_cwd when sdk_cwd is truthy."""
-        cfg = _make_config(use_openrouter=False)
-        with patch("backend.copilot.sdk.env.config", cfg):
-            from backend.copilot.sdk.env import build_sdk_env
-
-            result = build_sdk_env(sdk_cwd="/tmp/copilot-workspace")
-
-        assert result["CLAUDE_CODE_TMPDIR"] == "/tmp/copilot-workspace"
-
-    def test_tmpdir_not_set_when_sdk_cwd_is_none(self):
-        """CLAUDE_CODE_TMPDIR is NOT in the env when sdk_cwd is None."""
-        cfg = _make_config(use_openrouter=False)
-        with patch("backend.copilot.sdk.env.config", cfg):
-            from backend.copilot.sdk.env import build_sdk_env
-
-            result = build_sdk_env(sdk_cwd=None)
-
-        assert "CLAUDE_CODE_TMPDIR" not in result
-
-    def test_tmpdir_not_set_when_sdk_cwd_is_empty_string(self):
-        """CLAUDE_CODE_TMPDIR is NOT in the env when sdk_cwd is empty string."""
-        cfg = _make_config(use_openrouter=False)
-        with patch("backend.copilot.sdk.env.config", cfg):
-            from backend.copilot.sdk.env import build_sdk_env
-
-            result = build_sdk_env(sdk_cwd="")
-
-        assert "CLAUDE_CODE_TMPDIR" not in result
-
-    @patch("backend.copilot.sdk.env.validate_subscription")
-    def test_tmpdir_set_in_subscription_mode(self, mock_validate):
-        """CLAUDE_CODE_TMPDIR is set even in subscription mode."""
-        cfg = _make_config(use_claude_code_subscription=True)
-        with patch("backend.copilot.sdk.env.config", cfg):
-            from backend.copilot.sdk.env import build_sdk_env
-
-            result = build_sdk_env(sdk_cwd="/tmp/sub-workspace")
-
-        assert result["CLAUDE_CODE_TMPDIR"] == "/tmp/sub-workspace"
-        assert result["ANTHROPIC_API_KEY"] == ""
--- a/autogpt_platform/backend/backend/copilot/sdk/response_adapter_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/response_adapter_test.py
@@ -29,7 +29,6 @@ from backend.copilot.response_model import (
    StreamToolOutputAvailable,
 )

-from .compaction import compaction_events
 from .response_adapter import SDKResponseAdapter
 from .tool_adapter import MCP_TOOL_PREFIX
 from .tool_adapter import _pending_tool_outputs as _pto
@@ -690,102 +689,3 @@ def test_already_resolved_tool_skipped_in_user_message():
    assert (
        len(output_events) == 0
    ), "Already-resolved tool should not emit duplicate output"
-
-
-# -- _end_text_if_open before compaction -------------------------------------
-
-
-def test_end_text_if_open_emits_text_end_before_finish_step():
-    """StreamTextEnd must be emitted before StreamFinishStep during compaction.
-
-    When ``emit_end_if_ready`` fires compaction events while a text block is
-    still open, ``_end_text_if_open`` must close it first.  If StreamFinishStep
-    arrives before StreamTextEnd, the Vercel AI SDK clears ``activeTextParts``
-    and raises "Received text-end for missing text part".
-    """
-    adapter = _adapter()
-
-    # Open a text block by processing an AssistantMessage with text
-    msg = AssistantMessage(content=[TextBlock(text="partial response")], model="test")
-    adapter.convert_message(msg)
-    assert adapter.has_started_text
-    assert not adapter.has_ended_text
-
-    # Simulate what service.py does before yielding compaction events
-    pre_close: list[StreamBaseResponse] = []
-    adapter._end_text_if_open(pre_close)
-    combined = pre_close + list(compaction_events("Compacted transcript"))
-
-    text_end_idx = next(
-        (i for i, e in enumerate(combined) if isinstance(e, StreamTextEnd)), None
-    )
-    finish_step_idx = next(
-        (i for i, e in enumerate(combined) if isinstance(e, StreamFinishStep)), None
-    )
-
-    assert text_end_idx is not None, "StreamTextEnd must be present"
-    assert finish_step_idx is not None, "StreamFinishStep must be present"
-    assert text_end_idx < finish_step_idx, (
-        f"StreamTextEnd (idx={text_end_idx}) must precede "
-        f"StreamFinishStep (idx={finish_step_idx}) — otherwise the Vercel AI SDK "
-        "clears activeTextParts before text-end arrives"
-    )
-
-
-def test_step_open_must_reset_after_compaction_finish_step():
-    """Adapter step_open must be reset when compaction emits StreamFinishStep.
-
-    Compaction events bypass the adapter, so service.py must explicitly clear
-    step_open after yielding a StreamFinishStep from compaction. Without this,
-    the next AssistantMessage skips StreamStartStep because the adapter still
-    thinks a step is open.
-    """
-    adapter = _adapter()
-
-    # Open a step + text block via an AssistantMessage
-    msg = AssistantMessage(content=[TextBlock(text="thinking...")], model="test")
-    adapter.convert_message(msg)
-    assert adapter.step_open is True
-
-    # Simulate what service.py does: close text, then check compaction events
-    pre_close: list[StreamBaseResponse] = []
-    adapter._end_text_if_open(pre_close)
-
-    events = list(compaction_events("Compacted transcript"))
-    if any(isinstance(ev, StreamFinishStep) for ev in events):
-        adapter.step_open = False
-
-    assert (
-        adapter.step_open is False
-    ), "step_open must be False after compaction emits StreamFinishStep"
-
-    # Next AssistantMessage must open a new step
-    msg2 = AssistantMessage(content=[TextBlock(text="continued")], model="test")
-    results = adapter.convert_message(msg2)
-    assert any(
-        isinstance(r, StreamStartStep) for r in results
-    ), "A new StreamStartStep must be emitted after compaction closed the step"
-
-
-def test_end_text_if_open_no_op_when_no_text_open():
-    """_end_text_if_open emits nothing when no text block is open."""
-    adapter = _adapter()
-    results: list[StreamBaseResponse] = []
-    adapter._end_text_if_open(results)
-    assert results == []
-
-
-def test_end_text_if_open_no_op_after_text_already_ended():
-    """_end_text_if_open emits nothing when the text block is already closed."""
-    adapter = _adapter()
-    msg = AssistantMessage(content=[TextBlock(text="hello")], model="test")
-    adapter.convert_message(msg)
-    # Close it once
-    first: list[StreamBaseResponse] = []
-    adapter._end_text_if_open(first)
-    assert len(first) == 1
-    assert isinstance(first[0], StreamTextEnd)
-    # Second call must be a no-op
-    second: list[StreamBaseResponse] = []
-    adapter._end_text_if_open(second)
-    assert second == []
--- a/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py
@@ -1010,7 +1010,7 @@ def _make_sdk_patches(
        (f"{_SVC}.create_security_hooks", dict(return_value=MagicMock())),
        (f"{_SVC}.get_copilot_tool_names", dict(return_value=[])),
        (f"{_SVC}.get_sdk_disallowed_tools", dict(return_value=[])),
-        (f"{_SVC}.build_sdk_env", dict(return_value={})),
+        (f"{_SVC}.build_sdk_env", dict(return_value=None)),
        (f"{_SVC}._resolve_sdk_model", dict(return_value=None)),
        (f"{_SVC}.set_execution_context", {}),
        (
@@ -1487,188 +1487,3 @@ class TestStreamChatCompletionRetryIntegration:
        errors = [e for e in events if isinstance(e, StreamError)]
        assert not errors, f"Unexpected StreamError: {errors}"
        assert any(isinstance(e, StreamStart) for e in events)
-
-    @pytest.mark.asyncio
-    async def test_result_message_success_subtype_prompt_too_long_triggers_compaction(
-        self,
-    ):
-        """CLI returns ResultMessage(subtype="success") with result="Prompt is too long".
-
-        The SDK internally compacts but the transcript is still too long.  It
-        returns subtype="success" (process completed) with result="Prompt is
-        too long" (the actual rejection message).  The retry loop must detect
-        this as a context-length error and trigger compaction — the subtype
-        "success" must not fool it into treating this as a real response.
-        """
-        import contextlib
-
-        from claude_agent_sdk import ResultMessage
-
-        from backend.copilot.response_model import StreamError, StreamStart
-        from backend.copilot.sdk.service import stream_chat_completion_sdk
-
-        session = self._make_session()
-        success_result = self._make_result_message()
-        attempt_count = [0]
-
-        error_result = ResultMessage(
-            subtype="success",
-            result="Prompt is too long",
-            duration_ms=100,
-            duration_api_ms=0,
-            is_error=False,
-            num_turns=1,
-            session_id="test-session-id",
-        )
-
-        def _client_factory(*args, **kwargs):
-            attempt_count[0] += 1
-
-            async def _receive_error():
-                yield error_result
-
-            async def _receive_success():
-                yield success_result
-
-            client = MagicMock()
-            client._transport = MagicMock()
-            client._transport.write = AsyncMock()
-            client.query = AsyncMock()
-            if attempt_count[0] == 1:
-                client.receive_response = _receive_error
-            else:
-                client.receive_response = _receive_success
-            cm = AsyncMock()
-            cm.__aenter__.return_value = client
-            cm.__aexit__.return_value = None
-            return cm
-
-        original_transcript = _build_transcript(
-            [("user", "prior question"), ("assistant", "prior answer")]
-        )
-        compacted_transcript = _build_transcript(
-            [("user", "[summary]"), ("assistant", "summary reply")]
-        )
-
-        patches = _make_sdk_patches(
-            session,
-            original_transcript=original_transcript,
-            compacted_transcript=compacted_transcript,
-            client_side_effect=_client_factory,
-        )
-
-        events = []
-        with contextlib.ExitStack() as stack:
-            for target, kwargs in patches:
-                stack.enter_context(patch(target, **kwargs))
-            async for event in stream_chat_completion_sdk(
-                session_id="test-session-id",
-                message="hello",
-                is_user_message=True,
-                user_id="test-user",
-                session=session,
-            ):
-                events.append(event)
-
-        assert attempt_count[0] == 2, (
-            f"Expected 2 SDK attempts (subtype='success' with 'Prompt is too long' "
-            f"result should trigger compaction retry), got {attempt_count[0]}"
-        )
-        errors = [e for e in events if isinstance(e, StreamError)]
-        assert not errors, f"Unexpected StreamError: {errors}"
-        assert any(isinstance(e, StreamStart) for e in events)
-
-    @pytest.mark.asyncio
-    async def test_assistant_message_error_content_prompt_too_long_triggers_compaction(
-        self,
-    ):
-        """AssistantMessage.error="invalid_request" with content "Prompt is too long".
-
-        The SDK returns error type "invalid_request" but puts the actual
-        rejection message ("Prompt is too long") in the content blocks.
-        The retry loop must detect this via content inspection (sdk_error
-        being set confirms it's an error message, not user content).
-        """
-        import contextlib
-
-        from claude_agent_sdk import AssistantMessage, ResultMessage, TextBlock
-
-        from backend.copilot.response_model import StreamError, StreamStart
-        from backend.copilot.sdk.service import stream_chat_completion_sdk
-
-        session = self._make_session()
-        success_result = self._make_result_message()
-        attempt_count = [0]
-
-        def _client_factory(*args, **kwargs):
-            attempt_count[0] += 1
-
-            async def _receive_error():
-                # SDK returns invalid_request with "Prompt is too long" in content.
-                # ResultMessage.result is a non-PTL value ("done") to isolate
-                # the AssistantMessage content detection path exclusively.
-                yield AssistantMessage(
-                    content=[TextBlock(text="Prompt is too long")],
-                    model="<synthetic>",
-                    error="invalid_request",
-                )
-                yield ResultMessage(
-                    subtype="success",
-                    result="done",
-                    duration_ms=100,
-                    duration_api_ms=0,
-                    is_error=False,
-                    num_turns=1,
-                    session_id="test-session-id",
-                )
-
-            async def _receive_success():
-                yield success_result
-
-            client = MagicMock()
-            client._transport = MagicMock()
-            client._transport.write = AsyncMock()
-            client.query = AsyncMock()
-            if attempt_count[0] == 1:
-                client.receive_response = _receive_error
-            else:
-                client.receive_response = _receive_success
-            cm = AsyncMock()
-            cm.__aenter__.return_value = client
-            cm.__aexit__.return_value = None
-            return cm
-
-        original_transcript = _build_transcript(
-            [("user", "prior question"), ("assistant", "prior answer")]
-        )
-        compacted_transcript = _build_transcript(
-            [("user", "[summary]"), ("assistant", "summary reply")]
-        )
-
-        patches = _make_sdk_patches(
-            session,
-            original_transcript=original_transcript,
-            compacted_transcript=compacted_transcript,
-            client_side_effect=_client_factory,
-        )
-
-        events = []
-        with contextlib.ExitStack() as stack:
-            for target, kwargs in patches:
-                stack.enter_context(patch(target, **kwargs))
-            async for event in stream_chat_completion_sdk(
-                session_id="test-session-id",
-                message="hello",
-                is_user_message=True,
-                user_id="test-user",
-                session=session,
-            ):
-                events.append(event)
-
-        assert attempt_count[0] == 2, (
-            f"Expected 2 SDK attempts (AssistantMessage error content 'Prompt is "
-            f"too long' should trigger compaction retry), got {attempt_count[0]}"
-        )
-        errors = [e for e in events if isinstance(e, StreamError)]
-        assert not errors, f"Unexpected StreamError: {errors}"
-        assert any(isinstance(e, StreamStart) for e in events)
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
@@ -22,38 +22,6 @@ from .tool_adapter import (

 logger = logging.getLogger(__name__)

-# The SDK CLI uses "Task" in older versions and "Agent" in v2.x+.
-# Shared across all sessions — used by security hooks for sub-agent detection.
-_SUBAGENT_TOOLS: frozenset[str] = frozenset({"Task", "Agent"})
-
-# Unicode ranges stripped by _sanitize():
-#   - BiDi overrides (U+202A-U+202E, U+2066-U+2069) can trick reviewers
-#     into misreading code/logs.
-#   - Zero-width characters (U+200B-U+200F, U+FEFF) can hide content.
-_BIDI_AND_ZW_CHARS = set(
-    chr(c)
-    for r in (range(0x202A, 0x202F), range(0x2066, 0x206A), range(0x200B, 0x2010))
-    for c in r
-) | {"\ufeff"}
-
-
-def _sanitize(value: str, max_len: int = 200) -> str:
-    """Strip control characters and truncate for safe logging.
-
-    Removes C0 (U+0000-U+001F), DEL (U+007F), C1 (U+0080-U+009F),
-    Unicode BiDi overrides, and zero-width characters to prevent
-    log injection and visual spoofing.
-    """
-    cleaned = "".join(
-        c
-        for c in value
-        if c >= " "
-        and c != "\x7f"
-        and not ("\x80" <= c <= "\x9f")
-        and c not in _BIDI_AND_ZW_CHARS
-    )
-    return cleaned[:max_len]
-

 def _deny(reason: str) -> dict[str, Any]:
    """Return a hook denial response."""
@@ -168,13 +136,11 @@ def create_security_hooks(
    - PostToolUse: Log successful tool executions
    - PostToolUseFailure: Log and handle failed tool executions
    - PreCompact: Log context compaction events (SDK handles compaction automatically)
-    - SubagentStart: Log sub-agent lifecycle start
-    - SubagentStop: Log sub-agent lifecycle end

    Args:
        user_id: Current user ID for isolation validation
        sdk_cwd: SDK working directory for workspace-scoped tool validation
-        max_subtasks: Maximum concurrent sub-agent spawns allowed per session
+        max_subtasks: Maximum concurrent Task (sub-agent) spawns allowed per session
        on_compact: Callback invoked when SDK starts compacting context.
            Receives the transcript_path from the hook input.

@@ -185,19 +151,9 @@ def create_security_hooks(
        from claude_agent_sdk import HookMatcher
        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput

-        # Per-session tracking for sub-agent concurrency.
+        # Per-session tracking for Task sub-agent concurrency.
        # Set of tool_use_ids that consumed a slot — len() is the active count.
-        #
-        # LIMITATION: For background (async) agents the SDK returns the
-        # Agent/Task tool immediately with {isAsync: true}, which triggers
-        # PostToolUse and releases the slot while the agent is still running.
-        # SubagentStop fires later when the background process finishes but
-        # does not currently hold a slot.  This means the concurrency limit
-        # only gates *launches*, not true concurrent execution.  To fix this
-        # we would need to track background agent_ids separately and release
-        # in SubagentStop, but the SDK does not guarantee SubagentStop fires
-        # for every background agent (e.g. on session abort).
-        subagent_tool_use_ids: set[str] = set()
+        task_tool_use_ids: set[str] = set()

        async def pre_tool_use_hook(
            input_data: HookInput,
@@ -209,22 +165,29 @@ def create_security_hooks(
            tool_name = cast(str, input_data.get("tool_name", ""))
            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))

-            # Rate-limit sub-agent spawns per session.
-            # The SDK CLI renamed "Task" → "Agent" in v2.x; handle both.
-            if tool_name in _SUBAGENT_TOOLS:
-                # Background agents are allowed — the SDK returns immediately
-                # with {isAsync: true} and the model polls via TaskOutput.
-                # Still count them against the concurrency limit.
-                if len(subagent_tool_use_ids) >= max_subtasks:
+            # Rate-limit Task (sub-agent) spawns per session
+            if tool_name == "Task":
+                # Block background task execution first — denied calls
+                # should not consume a subtask slot.
+                if tool_input.get("run_in_background"):
+                    logger.info(f"[SDK] Blocked background Task, user={user_id}")
+                    return cast(
+                        SyncHookJSONOutput,
+                        _deny(
+                            "Background task execution is not supported. "
+                            "Run tasks in the foreground instead "
+                            "(remove the run_in_background parameter)."
+                        ),
+                    )
+                if len(task_tool_use_ids) >= max_subtasks:
                    logger.warning(
-                        f"[SDK] Sub-agent limit reached ({max_subtasks}), "
-                        f"user={user_id}"
+                        f"[SDK] Task limit reached ({max_subtasks}), user={user_id}"
                    )
                    return cast(
                        SyncHookJSONOutput,
                        _deny(
-                            f"Maximum {max_subtasks} concurrent sub-agents. "
-                            "Wait for running sub-agents to finish, "
+                            f"Maximum {max_subtasks} concurrent sub-tasks. "
+                            "Wait for running sub-tasks to finish, "
                            "or continue in the main conversation."
                        ),
                    )
@@ -245,20 +208,20 @@ def create_security_hooks(
            if result:
                return cast(SyncHookJSONOutput, result)

-            # Reserve the sub-agent slot only after all validations pass
-            if tool_name in _SUBAGENT_TOOLS and tool_use_id is not None:
-                subagent_tool_use_ids.add(tool_use_id)
+            # Reserve the Task slot only after all validations pass
+            if tool_name == "Task" and tool_use_id is not None:
+                task_tool_use_ids.add(tool_use_id)

            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
            return cast(SyncHookJSONOutput, {})

-        def _release_subagent_slot(tool_name: str, tool_use_id: str | None) -> None:
-            """Release a sub-agent concurrency slot if one was reserved."""
-            if tool_name in _SUBAGENT_TOOLS and tool_use_id in subagent_tool_use_ids:
-                subagent_tool_use_ids.discard(tool_use_id)
+        def _release_task_slot(tool_name: str, tool_use_id: str | None) -> None:
+            """Release a Task concurrency slot if one was reserved."""
+            if tool_name == "Task" and tool_use_id in task_tool_use_ids:
+                task_tool_use_ids.discard(tool_use_id)
                logger.info(
-                    "[SDK] Sub-agent slot released, active=%d/%d, user=%s",
-                    len(subagent_tool_use_ids),
+                    "[SDK] Task slot released, active=%d/%d, user=%s",
+                    len(task_tool_use_ids),
                    max_subtasks,
                    user_id,
                )
@@ -278,14 +241,13 @@ def create_security_hooks(
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))

-            _release_subagent_slot(tool_name, tool_use_id)
+            _release_task_slot(tool_name, tool_use_id)
            is_builtin = not tool_name.startswith(MCP_TOOL_PREFIX)
-            safe_tool_use_id = _sanitize(str(tool_use_id or ""), max_len=12)
            logger.info(
                "[SDK] PostToolUse: %s (builtin=%s, tool_use_id=%s)",
                tool_name,
                is_builtin,
-                safe_tool_use_id,
+                (tool_use_id or "")[:12],
            )

            # Stash output for SDK built-in tools so the response adapter can
@@ -294,7 +256,7 @@ def create_security_hooks(
            if is_builtin:
                tool_response = input_data.get("tool_response")
                if tool_response is not None:
-                    resp_preview = _sanitize(str(tool_response), max_len=100)
+                    resp_preview = str(tool_response)[:100]
                    logger.info(
                        "[SDK] Stashing builtin output for %s (%d chars): %s...",
                        tool_name,
@@ -318,17 +280,13 @@ def create_security_hooks(
            """Log failed tool executions for debugging."""
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))
-            error = _sanitize(str(input_data.get("error", "Unknown error")))
-            safe_tool_use_id = _sanitize(str(tool_use_id or ""))
+            error = input_data.get("error", "Unknown error")
            logger.warning(
-                "[SDK] Tool failed: %s, error=%s, user=%s, tool_use_id=%s",
-                tool_name,
-                error,
-                user_id,
-                safe_tool_use_id,
+                f"[SDK] Tool failed: {tool_name}, error={error}, "
+                f"user={user_id}, tool_use_id={tool_use_id}"
            )

-            _release_subagent_slot(tool_name, tool_use_id)
+            _release_task_slot(tool_name, tool_use_id)

            return cast(SyncHookJSONOutput, {})

@@ -343,14 +301,16 @@ def create_security_hooks(
            This hook provides visibility into when compaction happens.
            """
            _ = context, tool_use_id
-            trigger = _sanitize(str(input_data.get("trigger", "auto")), max_len=50)
+            trigger = input_data.get("trigger", "auto")
            # Sanitize untrusted input: strip control chars for logging AND
            # for the value passed downstream.  read_compacted_entries()
            # validates against _projects_base() as defence-in-depth, but
            # sanitizing here prevents log injection and rejects obviously
            # malformed paths early.
-            transcript_path = _sanitize(
-                str(input_data.get("transcript_path", "")), max_len=500
+            transcript_path = (
+                str(input_data.get("transcript_path", ""))
+                .replace("\n", "")
+                .replace("\r", "")
            )
            logger.info(
                "[SDK] Context compaction triggered: %s, user=%s, transcript_path=%s",
@@ -362,44 +322,6 @@ def create_security_hooks(
                on_compact(transcript_path)
            return cast(SyncHookJSONOutput, {})

-        async def subagent_start_hook(
-            input_data: HookInput,
-            tool_use_id: str | None,
-            context: HookContext,
-        ) -> SyncHookJSONOutput:
-            """Log when a sub-agent starts execution."""
-            _ = context, tool_use_id
-            agent_id = _sanitize(str(input_data.get("agent_id", "?")))
-            agent_type = _sanitize(str(input_data.get("agent_type", "?")))
-            logger.info(
-                "[SDK] SubagentStart: agent_id=%s, type=%s, user=%s",
-                agent_id,
-                agent_type,
-                user_id,
-            )
-            return cast(SyncHookJSONOutput, {})
-
-        async def subagent_stop_hook(
-            input_data: HookInput,
-            tool_use_id: str | None,
-            context: HookContext,
-        ) -> SyncHookJSONOutput:
-            """Log when a sub-agent stops."""
-            _ = context, tool_use_id
-            agent_id = _sanitize(str(input_data.get("agent_id", "?")))
-            agent_type = _sanitize(str(input_data.get("agent_type", "?")))
-            transcript = _sanitize(
-                str(input_data.get("agent_transcript_path", "")), max_len=500
-            )
-            logger.info(
-                "[SDK] SubagentStop: agent_id=%s, type=%s, user=%s, transcript=%s",
-                agent_id,
-                agent_type,
-                user_id,
-                transcript,
-            )
-            return cast(SyncHookJSONOutput, {})
-
        hooks: dict[str, Any] = {
            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
@@ -407,8 +329,6 @@ def create_security_hooks(
                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
            ],
            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
-            "SubagentStart": [HookMatcher(matcher="*", hooks=[subagent_start_hook])],
-            "SubagentStop": [HookMatcher(matcher="*", hooks=[subagent_stop_hook])],
        }

        return hooks
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks_test.py
@@ -5,7 +5,6 @@ They validate that the security hooks correctly block unauthorized paths,
 tool access, and dangerous input patterns.
 """

-import logging
 import os

 import pytest
@@ -137,20 +136,8 @@ def test_read_tool_results_allowed():
        _current_project_dir.reset(token)


-def test_read_tool_outputs_allowed():
-    """tool-outputs/ paths should be allowed, same as tool-results/."""
-    home = os.path.expanduser("~")
-    path = f"{home}/.claude/projects/-tmp-copilot-abc123/a1b2c3d4-e5f6-7890-abcd-ef1234567890/tool-outputs/12345.txt"
-    token = _current_project_dir.set("-tmp-copilot-abc123")
-    try:
-        result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
-        assert result == {}
-    finally:
-        _current_project_dir.reset(token)
-
-
 def test_read_claude_projects_settings_json_denied():
-    """SDK-internal artifacts like settings.json are NOT accessible — only tool-results/tool-outputs is."""
+    """SDK-internal artifacts like settings.json are NOT accessible — only tool-results/ is."""
    home = os.path.expanduser("~")
    path = f"{home}/.claude/projects/-tmp-copilot-abc123/settings.json"
    token = _current_project_dir.set("-tmp-copilot-abc123")
@@ -246,15 +233,16 @@ def _hooks():

@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
@pytest.mark.asyncio
-async def test_task_background_allowed(_hooks):
-    """Task with run_in_background=true is allowed (SDK handles async lifecycle)."""
+async def test_task_background_blocked(_hooks):
+    """Task with run_in_background=true must be denied."""
    pre, _, _ = _hooks
    result = await pre(
        {"tool_name": "Task", "tool_input": {"run_in_background": True, "prompt": "x"}},
-        tool_use_id="tu-bg-1",
+        tool_use_id=None,
        context={},
    )
-    assert not _is_denied(result)
+    assert _is_denied(result)
+    assert "foreground" in _reason(result).lower()


@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
@@ -368,303 +356,3 @@ async def test_task_slot_released_on_failure(_hooks):
        context={},
    )
    assert not _is_denied(result)
-
-
-# ---------------------------------------------------------------------------
-# "Agent" tool name (SDK v2.x+ renamed "Task" → "Agent")
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_agent_background_allowed(_hooks):
-    """Agent with run_in_background=true is allowed (SDK handles async lifecycle)."""
-    pre, _, _ = _hooks
-    result = await pre(
-        {
-            "tool_name": "Agent",
-            "tool_input": {"run_in_background": True, "prompt": "x"},
-        },
-        tool_use_id="tu-agent-bg-1",
-        context={},
-    )
-    assert not _is_denied(result)
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_agent_foreground_allowed(_hooks):
-    """Agent without run_in_background should be allowed."""
-    pre, _, _ = _hooks
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "do stuff"}},
-        tool_use_id="tu-agent-1",
-        context={},
-    )
-    assert not _is_denied(result)
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_background_agent_counts_against_limit(_hooks):
-    """Background agents still consume concurrency slots."""
-    pre, _, _ = _hooks
-    # Two background agents fill the limit
-    for i in range(2):
-        result = await pre(
-            {
-                "tool_name": "Agent",
-                "tool_input": {"run_in_background": True, "prompt": "bg"},
-            },
-            tool_use_id=f"tu-bglimit-{i}",
-            context={},
-        )
-        assert not _is_denied(result)
-    # Third (background or foreground) should be denied
-    result = await pre(
-        {
-            "tool_name": "Agent",
-            "tool_input": {"run_in_background": True, "prompt": "over"},
-        },
-        tool_use_id="tu-bglimit-2",
-        context={},
-    )
-    assert _is_denied(result)
-    assert "Maximum" in _reason(result)
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_agent_limit_enforced(_hooks):
-    """Agent spawns beyond max_subtasks should be denied."""
-    pre, _, _ = _hooks
-    # First two should pass
-    for i in range(2):
-        result = await pre(
-            {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
-            tool_use_id=f"tu-agent-limit-{i}",
-            context={},
-        )
-        assert not _is_denied(result)
-
-    # Third should be denied (limit=2)
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "over limit"}},
-        tool_use_id="tu-agent-limit-2",
-        context={},
-    )
-    assert _is_denied(result)
-    assert "Maximum" in _reason(result)
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_agent_slot_released_on_completion(_hooks):
-    """Completing an Agent should free a slot so new Agents can be spawned."""
-    pre, post, _ = _hooks
-    # Fill both slots
-    for i in range(2):
-        result = await pre(
-            {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
-            tool_use_id=f"tu-agent-comp-{i}",
-            context={},
-        )
-        assert not _is_denied(result)
-
-    # Third should be denied — at capacity
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "over"}},
-        tool_use_id="tu-agent-comp-2",
-        context={},
-    )
-    assert _is_denied(result)
-
-    # Complete first agent — frees a slot
-    await post(
-        {"tool_name": "Agent", "tool_input": {}},
-        tool_use_id="tu-agent-comp-0",
-        context={},
-    )
-
-    # Now a new Agent should be allowed
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "after release"}},
-        tool_use_id="tu-agent-comp-3",
-        context={},
-    )
-    assert not _is_denied(result)
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_agent_slot_released_on_failure(_hooks):
-    """A failed Agent should also free its concurrency slot."""
-    pre, _, post_failure = _hooks
-    # Fill both slots
-    for i in range(2):
-        result = await pre(
-            {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
-            tool_use_id=f"tu-agent-fail-{i}",
-            context={},
-        )
-        assert not _is_denied(result)
-
-    # At capacity
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "over"}},
-        tool_use_id="tu-agent-fail-2",
-        context={},
-    )
-    assert _is_denied(result)
-
-    # Fail first agent — should free a slot
-    await post_failure(
-        {"tool_name": "Agent", "tool_input": {}, "error": "something broke"},
-        tool_use_id="tu-agent-fail-0",
-        context={},
-    )
-
-    # New Agent should be allowed
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "after failure"}},
-        tool_use_id="tu-agent-fail-3",
-        context={},
-    )
-    assert not _is_denied(result)
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_mixed_task_agent_share_slots(_hooks):
-    """Task and Agent share the same concurrency pool."""
-    pre, post, _ = _hooks
-    # Fill one slot with Task, one with Agent
-    result = await pre(
-        {"tool_name": "Task", "tool_input": {"prompt": "ok"}},
-        tool_use_id="tu-mix-task",
-        context={},
-    )
-    assert not _is_denied(result)
-
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "ok"}},
-        tool_use_id="tu-mix-agent",
-        context={},
-    )
-    assert not _is_denied(result)
-
-    # Third (either name) should be denied
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "over"}},
-        tool_use_id="tu-mix-over",
-        context={},
-    )
-    assert _is_denied(result)
-
-    # Release the Task slot
-    await post(
-        {"tool_name": "Task", "tool_input": {}},
-        tool_use_id="tu-mix-task",
-        context={},
-    )
-
-    # Now an Agent should be allowed
-    result = await pre(
-        {"tool_name": "Agent", "tool_input": {"prompt": "after task release"}},
-        tool_use_id="tu-mix-new",
-        context={},
-    )
-    assert not _is_denied(result)
-
-
-# ---------------------------------------------------------------------------
-# SubagentStart / SubagentStop hooks
-# ---------------------------------------------------------------------------
-
-
-@pytest.fixture()
-def _subagent_hooks():
-    """Create hooks and return (subagent_start, subagent_stop) handlers."""
-    hooks = create_security_hooks(user_id="u1", sdk_cwd=SDK_CWD, max_subtasks=2)
-    start = hooks["SubagentStart"][0].hooks[0]
-    stop = hooks["SubagentStop"][0].hooks[0]
-    return start, stop
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_subagent_start_hook_returns_empty(_subagent_hooks):
-    """SubagentStart hook should return an empty dict (logging only)."""
-    start, _ = _subagent_hooks
-    result = await start(
-        {"agent_id": "sa-123", "agent_type": "research"},
-        tool_use_id=None,
-        context={},
-    )
-    assert result == {}
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_subagent_stop_hook_returns_empty(_subagent_hooks):
-    """SubagentStop hook should return an empty dict (logging only)."""
-    _, stop = _subagent_hooks
-    result = await stop(
-        {
-            "agent_id": "sa-123",
-            "agent_type": "research",
-            "agent_transcript_path": "/tmp/transcript.txt",
-        },
-        tool_use_id=None,
-        context={},
-    )
-    assert result == {}
-
-
-@pytest.mark.skipif(not _sdk_available(), reason="claude_agent_sdk not installed")
-@pytest.mark.asyncio
-async def test_subagent_hooks_sanitize_inputs(_subagent_hooks, caplog):
-    """SubagentStart/Stop should sanitize control chars from inputs."""
-    start, stop = _subagent_hooks
-    # Inject control characters (C0, DEL, C1, BiDi overrides, zero-width)
-    # — hook should not raise AND logs must be clean
-    with caplog.at_level(logging.DEBUG, logger="backend.copilot.sdk.security_hooks"):
-        result = await start(
-            {
-                "agent_id": "sa\n-injected\r\x00\x7f",
-                "agent_type": "safe\x80_type\x9f\ttab",
-            },
-            tool_use_id=None,
-            context={},
-        )
-    assert result == {}
-    # Control chars must be stripped from the logged values
-    for record in caplog.records:
-        assert "\x00" not in record.message
-        assert "\r" not in record.message
-        assert "\n" not in record.message
-        assert "\x7f" not in record.message
-        assert "\x80" not in record.message
-        assert "\x9f" not in record.message
-    assert "safe_type" in caplog.text
-
-    caplog.clear()
-    with caplog.at_level(logging.DEBUG, logger="backend.copilot.sdk.security_hooks"):
-        result = await stop(
-            {
-                "agent_id": "sa\n-injected\x7f",
-                "agent_type": "type\r\x80\x9f",
-                "agent_transcript_path": "/tmp/\x00malicious\npath\u202a\u200b",
-            },
-            tool_use_id=None,
-            context={},
-        )
-    assert result == {}
-    for record in caplog.records:
-        assert "\x00" not in record.message
-        assert "\r" not in record.message
-        assert "\n" not in record.message
-        assert "\x7f" not in record.message
-        assert "\u202a" not in record.message
-        assert "\u200b" not in record.message
-    assert "/tmp/maliciouspath" in caplog.text
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1310,16 +1310,10 @@ async def _run_stream_attempt(
                # AssistantMessage.error (not as a Python exception).
                # Re-raise so the outer retry loop can compact the
                # transcript and retry with reduced context.
-                # Check both error_text and error_preview: sdk_error
-                # being set confirms this is an error message (not user
-                # content), so checking content is safe. The actual
-                # error description (e.g. "Prompt is too long") may be
-                # in the content, not the error type field
-                # (e.g. error="invalid_request", content="Prompt is
-                # too long").
-                if _is_prompt_too_long(Exception(error_text)) or _is_prompt_too_long(
-                    Exception(error_preview)
-                ):
+                # Only check error_text (the error field), not the
+                # content preview — content may contain arbitrary text
+                # that false-positives the pattern match.
+                if _is_prompt_too_long(Exception(error_text)):
                    logger.warning(
                        "%s Prompt-too-long detected via AssistantMessage "
                        "error — raising for retry",
@@ -1420,16 +1414,13 @@ async def _run_stream_attempt(
                        ctx.log_prefix,
                        sdk_msg.result or "(no error message provided)",
                    )
-
-                # Check for prompt-too-long regardless of subtype — the
-                # SDK may return subtype="success" with result="Prompt is
-                # too long" when the CLI rejects the prompt before calling
-                # the API (cost_usd=0, no tokens consumed).  If we only
-                # check the "error" subtype path, the stream appears to
-                # complete normally, the synthetic error text is stored
-                # in the transcript, and the session grows without bound.
-                if _is_prompt_too_long(RuntimeError(sdk_msg.result or "")):
-                    raise RuntimeError("Prompt is too long")
+                    # If the CLI itself rejected the prompt as too long
+                    # (pre-API check, duration_api_ms=0), re-raise as an
+                    # exception so the retry loop can trigger compaction.
+                    # Without this, the ResultMessage is silently consumed
+                    # and the retry/compaction mechanism is never invoked.
+                    if _is_prompt_too_long(RuntimeError(sdk_msg.result or "")):
+                        raise RuntimeError("Prompt is too long")

                # Capture token usage from ResultMessage.
                # Anthropic reports cached tokens separately:
@@ -1462,23 +1453,6 @@ async def _run_stream_attempt(
            # Emit compaction end if SDK finished compacting.
            # Sync TranscriptBuilder with the CLI's active context.
            compact_result = await ctx.compaction.emit_end_if_ready(ctx.session)
-            if compact_result.events:
-                # Compaction events end with StreamFinishStep, which maps to
-                # Vercel AI SDK's "finish-step" — that clears activeTextParts.
-                # Close any open text block BEFORE the compaction events so
-                # the text-end arrives before finish-step, preventing
-                # "text-end for missing text part" errors on the frontend.
-                pre_close: list[StreamBaseResponse] = []
-                state.adapter._end_text_if_open(pre_close)
-                # Compaction events bypass the adapter, so sync step state
-                # when a StreamFinishStep is present — otherwise the adapter
-                # will skip StreamStartStep on the next AssistantMessage.
-                if any(
-                    isinstance(ev, StreamFinishStep) for ev in compact_result.events
-                ):
-                    state.adapter.step_open = False
-                for r in pre_close:
-                    yield r
            for ev in compact_result.events:
                yield ev
            entries_replaced = False
@@ -1884,10 +1858,7 @@ async def stream_chat_completion_sdk(
        )

        # Fail fast when no API credentials are available at all.
-        # sdk_cwd routes the CLI's temp dir into the per-session workspace
-        # so sub-agent output files land inside sdk_cwd (see build_sdk_env).
-        sdk_env = build_sdk_env(session_id=session_id, user_id=user_id, sdk_cwd=sdk_cwd)
-
+        sdk_env = build_sdk_env(session_id=session_id, user_id=user_id)
        if not config.api_key and not config.use_claude_code_subscription:
            raise RuntimeError(
                "No API key configured. Set OPEN_ROUTER_API_KEY, "
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
@@ -38,7 +38,7 @@ from backend.copilot.tools import TOOL_REGISTRY
 from backend.copilot.tools.base import BaseTool
 from backend.util.truncate import truncate

-from .e2b_file_tools import E2B_FILE_TOOL_NAMES, E2B_FILE_TOOLS, bridge_and_annotate
+from .e2b_file_tools import E2B_FILE_TOOL_NAMES, E2B_FILE_TOOLS

 if TYPE_CHECKING:
    from e2b import AsyncSandbox
@@ -387,16 +387,7 @@ async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
            selected = list(itertools.islice(f, offset, offset + limit))
        # Cleanup happens in _cleanup_sdk_tool_results after session ends;
        # don't delete here — the SDK may read in multiple chunks.
-        #
-        # When E2B is active, also copy the file into the sandbox so
-        # bash_exec can process it (the model often uses Read then bash).
-        text = "".join(selected)
-        sandbox = _current_sandbox.get(None)
-        if sandbox is not None:
-            annotation = await bridge_and_annotate(sandbox, resolved, offset, limit)
-            if annotation:
-                text += annotation
-        return _mcp_ok(text)
+        return _mcp_ok("".join(selected))
    except FileNotFoundError:
        return _mcp_err(f"File not found: {file_path}")
    except Exception as e:
@@ -590,14 +581,13 @@ def create_copilot_mcp_server(*, use_e2b: bool = False):
 # Security hooks validate that file paths stay within sdk_cwd.
 # Bash is NOT included — use the sandboxed MCP bash_exec tool instead,
 # which provides kernel-level network isolation via unshare --net.
-# Task/Agent allows spawning sub-agents (rate-limited by security hooks).
-#   The CLI renamed "Task" → "Agent" in v2.x; both are listed for compat.
+# Task allows spawning sub-agents (rate-limited by security hooks).
 # WebSearch uses Brave Search via Anthropic's API — safe, no SSRF risk.
 # TodoWrite manages the task checklist shown in the UI — no security concern.
 # In E2B mode, all five are disabled — MCP equivalents provide direct sandbox
 # access.  read_file also handles local tool-results and ephemeral reads.
 _SDK_BUILTIN_FILE_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep"]
-_SDK_BUILTIN_ALWAYS = ["Task", "Agent", "WebSearch", "TodoWrite"]
+_SDK_BUILTIN_ALWAYS = ["Task", "WebSearch", "TodoWrite"]
 _SDK_BUILTIN_TOOLS = [*_SDK_BUILTIN_FILE_TOOLS, *_SDK_BUILTIN_ALWAYS]

 # SDK built-in tools that must be explicitly blocked.
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
@@ -619,95 +619,3 @@ class TestSDKDisallowedTools:
    def test_webfetch_tool_is_disallowed(self):
        """WebFetch is disallowed due to SSRF risk."""
        assert "WebFetch" in SDK_DISALLOWED_TOOLS
-
-
-# ---------------------------------------------------------------------------
-# _read_file_handler — bridge_and_annotate integration
-# ---------------------------------------------------------------------------
-
-
-class TestReadFileHandlerBridge:
-    """Verify that _read_file_handler calls bridge_and_annotate when a sandbox is active."""
-
-    @pytest.fixture(autouse=True)
-    def _init_context(self):
-        set_execution_context(
-            user_id="test",
-            session=None,  # type: ignore[arg-type]
-            sandbox=None,
-            sdk_cwd="/tmp/copilot-bridge-test",
-        )
-
-    @pytest.mark.asyncio
-    async def test_bridge_called_when_sandbox_active(self, tmp_path, monkeypatch):
-        """When a sandbox is set, bridge_and_annotate is called and its annotation appended."""
-        from backend.copilot.context import _current_sandbox
-
-        from .tool_adapter import _read_file_handler
-
-        test_file = tmp_path / "tool-results" / "data.json"
-        test_file.parent.mkdir(parents=True, exist_ok=True)
-        test_file.write_text('{"ok": true}\n')
-
-        monkeypatch.setattr(
-            "backend.copilot.sdk.tool_adapter.is_allowed_local_path",
-            lambda path, cwd: True,
-        )
-
-        fake_sandbox = object()
-        token = _current_sandbox.set(fake_sandbox)  # type: ignore[arg-type]
-        try:
-            bridge_calls: list[tuple] = []
-
-            async def fake_bridge_and_annotate(sandbox, file_path, offset, limit):
-                bridge_calls.append((sandbox, file_path, offset, limit))
-                return "\n[Sandbox copy available at /tmp/abc-data.json]"
-
-            monkeypatch.setattr(
-                "backend.copilot.sdk.tool_adapter.bridge_and_annotate",
-                fake_bridge_and_annotate,
-            )
-
-            result = await _read_file_handler(
-                {"file_path": str(test_file), "offset": 0, "limit": 2000}
-            )
-
-            assert result["isError"] is False
-            assert len(bridge_calls) == 1
-            assert bridge_calls[0][0] is fake_sandbox
-            assert "/tmp/abc-data.json" in result["content"][0]["text"]
-        finally:
-            _current_sandbox.reset(token)
-
-    @pytest.mark.asyncio
-    async def test_bridge_not_called_without_sandbox(self, tmp_path, monkeypatch):
-        """When no sandbox is set, bridge_and_annotate is not called."""
-        from .tool_adapter import _read_file_handler
-
-        test_file = tmp_path / "tool-results" / "data.json"
-        test_file.parent.mkdir(parents=True, exist_ok=True)
-        test_file.write_text('{"ok": true}\n')
-
-        monkeypatch.setattr(
-            "backend.copilot.sdk.tool_adapter.is_allowed_local_path",
-            lambda path, cwd: True,
-        )
-
-        bridge_calls: list[tuple] = []
-
-        async def fake_bridge_and_annotate(sandbox, file_path, offset, limit):
-            bridge_calls.append((sandbox, file_path, offset, limit))
-            return "\n[Sandbox copy available at /tmp/abc-data.json]"
-
-        monkeypatch.setattr(
-            "backend.copilot.sdk.tool_adapter.bridge_and_annotate",
-            fake_bridge_and_annotate,
-        )
-
-        result = await _read_file_handler(
-            {"file_path": str(test_file), "offset": 0, "limit": 2000}
-        )
-
-        assert result["isError"] is False
-        assert len(bridge_calls) == 0
-        assert "Sandbox copy" not in result["content"][0]["text"]
--- a/autogpt_platform/backend/backend/copilot/tools/init.py
+++ b/autogpt_platform/backend/backend/copilot/tools/init.py
@@ -10,7 +10,6 @@ from backend.copilot.tracking import track_tool_called
 from .add_understanding import AddUnderstandingTool
 from .agent_browser import BrowserActTool, BrowserNavigateTool, BrowserScreenshotTool
 from .agent_output import AgentOutputTool
-from .ask_question import AskQuestionTool
 from .base import BaseTool
 from .bash_exec import BashExecTool
 from .connect_integration import ConnectIntegrationTool
@@ -56,7 +55,6 @@ logger = logging.getLogger(__name__)
 # Single source of truth for all tools
 TOOL_REGISTRY: dict[str, BaseTool] = {
    "add_understanding": AddUnderstandingTool(),
-    "ask_question": AskQuestionTool(),
    "create_agent": CreateAgentTool(),
    "customize_agent": CustomizeAgentTool(),
    "edit_agent": EditAgentTool(),
--- a/autogpt_platform/backend/backend/copilot/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_search.py
@@ -2,7 +2,6 @@

 from __future__ import annotations

-import asyncio
 import logging
 from typing import TYPE_CHECKING, Literal

@@ -10,7 +9,7 @@ if TYPE_CHECKING:
    from backend.api.features.library.model import LibraryAgent
    from backend.api.features.store.model import StoreAgent, StoreAgentDetails

-from backend.data.db_accessors import graph_db, library_db, store_db
+from backend.data.db_accessors import library_db, store_db
 from backend.util.exceptions import DatabaseError, NotFoundError

 from .models import (
@@ -35,13 +34,12 @@ async def search_agents(
    source: SearchSource,
    session_id: str | None = None,
    user_id: str | None = None,
-    include_graph: bool = False,
 ) -> ToolResponseBase:
    """Search for agents in marketplace or user library."""
    if source == "marketplace":
        return await _search_marketplace(query, session_id)
    else:
-        return await _search_library(query, session_id, user_id, include_graph)
+        return await _search_library(query, session_id, user_id)


 async def _search_marketplace(query: str, session_id: str | None) -> ToolResponseBase:
@@ -107,10 +105,7 @@ async def _search_marketplace(query: str, session_id: str | None) -> ToolRespons


 async def _search_library(
-    query: str,
-    session_id: str | None,
-    user_id: str | None,
-    include_graph: bool = False,
+    query: str, session_id: str | None, user_id: str | None
 ) -> ToolResponseBase:
    """Search user's library agents, with direct UUID lookup fallback."""
    if not user_id:
@@ -154,10 +149,6 @@ async def _search_library(
            session_id=session_id,
        )

-    truncation_notice: str | None = None
-    if include_graph and agents:
-        truncation_notice = await _enrich_agents_with_graph(agents, user_id)
-
    if not agents:
        if not query:
            return NoResultsResponse(
@@ -191,17 +182,13 @@ async def _search_library(
    else:
        title = f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} in your library for '{query}'"

-    message = (
-        "Found agents in the user's library. You can provide a link to view "
-        "an agent at: /library/agents/{agent_id}. Use agent_output to get "
-        "execution results, or run_agent to execute. Let the user know we can "
-        "create a custom agent for them based on their needs."
-    )
-    if truncation_notice:
-        message = f"{message}\n\nNote: {truncation_notice}"
-
    return AgentsFoundResponse(
-        message=message,
+        message=(
+            "Found agents in the user's library. You can provide a link to view "
+            "an agent at: /library/agents/{agent_id}. Use agent_output to get "
+            "execution results, or run_agent to execute. Let the user know we can "
+            "create a custom agent for them based on their needs."
+        ),
        title=title,
        agents=agents,
        count=len(agents),
@@ -209,81 +196,6 @@ async def _search_library(
    )


-_MAX_GRAPH_FETCHES = 10
-
-
-_GRAPH_FETCH_TIMEOUT = 15  # seconds
-
-
-async def _enrich_agents_with_graph(
-    agents: list[AgentInfo], user_id: str
-) -> str | None:
-    """Fetch and attach full Graph (nodes + links) to each agent in-place.
-
-    Only the first ``_MAX_GRAPH_FETCHES`` agents with a ``graph_id`` are
-    enriched.  If some agents are skipped, a truncation notice is returned
-    so the caller can surface it to the copilot.
-
-    Graphs are fetched with ``for_export=True`` so that credentials, API keys,
-    and other secrets in ``input_default`` are stripped before the data reaches
-    the LLM context.
-
-    Returns a truncation notice string when some agents were skipped, or
-    ``None`` when all eligible agents were enriched.
-    """
-    with_graph_id = [a for a in agents if a.graph_id]
-    fetchable = with_graph_id[:_MAX_GRAPH_FETCHES]
-    if not fetchable:
-        return None
-
-    gdb = graph_db()
-
-    async def _fetch(agent: AgentInfo) -> None:
-        graph_id = agent.graph_id
-        if not graph_id:
-            return
-        try:
-            graph = await gdb.get_graph(
-                graph_id,
-                version=agent.graph_version,
-                user_id=user_id,
-                for_export=True,
-            )
-            if graph is None:
-                logger.warning("Graph not found for agent %s", graph_id)
-            agent.graph = graph
-        except Exception as e:
-            logger.warning("Failed to fetch graph for agent %s: %s", graph_id, e)
-
-    try:
-        await asyncio.wait_for(
-            asyncio.gather(*[_fetch(a) for a in fetchable]),
-            timeout=_GRAPH_FETCH_TIMEOUT,
-        )
-    except asyncio.TimeoutError:
-        logger.warning(
-            "include_graph: timed out after %ds fetching graphs", _GRAPH_FETCH_TIMEOUT
-        )
-
-    skipped = len(with_graph_id) - len(fetchable)
-    if skipped > 0:
-        logger.warning(
-            "include_graph: fetched graphs for %d/%d agents "
-            "(_MAX_GRAPH_FETCHES=%d, %d skipped)",
-            len(fetchable),
-            len(with_graph_id),
-            _MAX_GRAPH_FETCHES,
-            skipped,
-        )
-        return (
-            f"Graph data included for {len(fetchable)} of "
-            f"{len(with_graph_id)} eligible agents (limit: {_MAX_GRAPH_FETCHES}). "
-            f"To fetch graphs for remaining agents, narrow your search to a "
-            f"specific agent by UUID."
-        )
-    return None
-
-
 def _marketplace_agent_to_info(agent: StoreAgent | StoreAgentDetails) -> AgentInfo:
    """Convert a marketplace agent (StoreAgent or StoreAgentDetails) to an AgentInfo."""
    return AgentInfo(
--- a/autogpt_platform/backend/backend/copilot/tools/agent_search_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_search_test.py
@@ -1,12 +1,11 @@
 """Tests for agent search direct lookup functionality."""

-import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

-from .agent_search import _enrich_agents_with_graph, search_agents
-from .models import AgentInfo, AgentsFoundResponse, NoResultsResponse
+from .agent_search import search_agents
+from .models import AgentsFoundResponse, NoResultsResponse

 _TEST_USER_ID = "test-user-agent-search"

@@ -134,10 +133,10 @@ class TestMarketplaceSlugLookup:
 class TestLibraryUUIDLookup:
    """Tests for UUID direct lookup in library search."""

-    @staticmethod
-    def _make_mock_library_agent(
-        agent_id: str = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
-    ) -> MagicMock:
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_uuid_lookup_found_by_graph_id(self):
+        """UUID query matching a graph_id returns the agent directly."""
+        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
        mock_agent = MagicMock()
        mock_agent.id = "lib-agent-id"
        mock_agent.name = "My Library Agent"
@@ -151,13 +150,6 @@ class TestLibraryUUIDLookup:
        mock_agent.graph_version = 1
        mock_agent.input_schema = {}
        mock_agent.output_schema = {}
-        return mock_agent
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_uuid_lookup_found_by_graph_id(self):
-        """UUID query matching a graph_id returns the agent directly."""
-        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
-        mock_agent = self._make_mock_library_agent(agent_id)

        mock_lib_db = MagicMock()
        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
@@ -176,427 +168,3 @@ class TestLibraryUUIDLookup:
        assert isinstance(response, AgentsFoundResponse)
        assert response.count == 1
        assert response.agents[0].name == "My Library Agent"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_include_graph_fetches_graph(self):
-        """include_graph=True attaches BaseGraph to agent results."""
-        from backend.data.graph import BaseGraph
-
-        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
-        mock_agent = self._make_mock_library_agent(agent_id)
-        mock_lib_db = MagicMock()
-        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
-
-        fake_graph = BaseGraph(id=agent_id, name="My Library Agent", description="test")
-        mock_graph_db = MagicMock()
-        mock_graph_db.get_graph = AsyncMock(return_value=fake_graph)
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_graph_db,
-            ),
-        ):
-            response = await search_agents(
-                query=agent_id,
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert response.agents[0].graph is not None
-        assert response.agents[0].graph.id == agent_id
-        mock_graph_db.get_graph.assert_awaited_once_with(
-            agent_id,
-            version=1,
-            user_id=_TEST_USER_ID,
-            for_export=True,
-        )
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_include_graph_false_skips_fetch(self):
-        """include_graph=False (default) does not fetch graph data."""
-        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
-        mock_agent = self._make_mock_library_agent(agent_id)
-        mock_lib_db = MagicMock()
-        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
-
-        mock_graph_db = MagicMock()
-        mock_graph_db.get_graph = AsyncMock()
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_graph_db,
-            ),
-        ):
-            response = await search_agents(
-                query=agent_id,
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=False,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert response.agents[0].graph is None
-        mock_graph_db.get_graph.assert_not_awaited()
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_include_graph_handles_fetch_failure(self):
-        """include_graph=True still returns agents when graph fetch fails."""
-        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
-        mock_agent = self._make_mock_library_agent(agent_id)
-        mock_lib_db = MagicMock()
-        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
-
-        mock_graph_db = MagicMock()
-        mock_graph_db.get_graph = AsyncMock(side_effect=Exception("DB down"))
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_graph_db,
-            ),
-        ):
-            response = await search_agents(
-                query=agent_id,
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert response.agents[0].graph is None
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_include_graph_handles_none_return(self):
-        """include_graph=True handles get_graph returning None."""
-        agent_id = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d"
-        mock_agent = self._make_mock_library_agent(agent_id)
-        mock_lib_db = MagicMock()
-        mock_lib_db.get_library_agent_by_graph_id = AsyncMock(return_value=mock_agent)
-
-        mock_graph_db = MagicMock()
-        mock_graph_db.get_graph = AsyncMock(return_value=None)
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_graph_db,
-            ),
-        ):
-            response = await search_agents(
-                query=agent_id,
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert response.agents[0].graph is None
-
-
-class TestEnrichAgentsWithGraph:
-    """Tests for _enrich_agents_with_graph edge cases."""
-
-    @staticmethod
-    def _make_mock_library_agent(
-        agent_id: str = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
-        graph_id: str | None = "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
-    ) -> MagicMock:
-        mock_agent = MagicMock()
-        mock_agent.id = f"lib-{agent_id[:8]}"
-        mock_agent.name = f"Agent {agent_id[:8]}"
-        mock_agent.description = "A library agent"
-        mock_agent.creator_name = "testuser"
-        mock_agent.status.value = "HEALTHY"
-        mock_agent.can_access_graph = True
-        mock_agent.has_external_trigger = False
-        mock_agent.new_output = False
-        mock_agent.graph_id = graph_id
-        mock_agent.graph_version = 1
-        mock_agent.input_schema = {}
-        mock_agent.output_schema = {}
-        return mock_agent
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_truncation_surfaces_in_response(self):
-        """When >_MAX_GRAPH_FETCHES agents have graphs, the response contains a truncation notice."""
-        from backend.copilot.tools.agent_search import _MAX_GRAPH_FETCHES
-        from backend.data.graph import BaseGraph
-
-        agent_count = _MAX_GRAPH_FETCHES + 5
-        mock_agents = []
-        for i in range(agent_count):
-            uid = f"a1b2c3d4-e5f6-4a7b-8c9d-{i:012d}"
-            mock_agents.append(self._make_mock_library_agent(uid, uid))
-
-        mock_lib_db = MagicMock()
-        mock_search_results = MagicMock()
-        mock_search_results.agents = mock_agents
-        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
-
-        fake_graph = BaseGraph(id="x", name="g", description="d")
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_gdb,
-            ),
-        ):
-            response = await search_agents(
-                query="",
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert mock_gdb.get_graph.await_count == _MAX_GRAPH_FETCHES
-        enriched = [a for a in response.agents if a.graph is not None]
-        assert len(enriched) == _MAX_GRAPH_FETCHES
-        assert "Graph data included for" in response.message
-        assert str(_MAX_GRAPH_FETCHES) in response.message
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_mixed_graph_id_presence(self):
-        """Agents without graph_id are skipped during enrichment."""
-        from backend.data.graph import BaseGraph
-
-        agent_with = self._make_mock_library_agent(
-            "aaaa0000-0000-0000-0000-000000000001",
-            "aaaa0000-0000-0000-0000-000000000001",
-        )
-        agent_without = self._make_mock_library_agent(
-            "bbbb0000-0000-0000-0000-000000000002",
-            graph_id=None,
-        )
-
-        mock_lib_db = MagicMock()
-        mock_search_results = MagicMock()
-        mock_search_results.agents = [agent_with, agent_without]
-        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
-
-        fake_graph = BaseGraph(
-            id="aaaa0000-0000-0000-0000-000000000001", name="g", description="d"
-        )
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_gdb,
-            ),
-        ):
-            response = await search_agents(
-                query="",
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert len(response.agents) == 2
-        assert response.agents[0].graph is not None
-        assert response.agents[1].graph is None
-        mock_gdb.get_graph.assert_awaited_once()
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_partial_failure_across_multiple_agents(self):
-        """When some graph fetches fail, successful ones still have graphs attached."""
-        from backend.data.graph import BaseGraph
-
-        id_ok = "aaaa0000-0000-0000-0000-000000000001"
-        id_fail = "bbbb0000-0000-0000-0000-000000000002"
-        agent_ok = self._make_mock_library_agent(id_ok, id_ok)
-        agent_fail = self._make_mock_library_agent(id_fail, id_fail)
-
-        mock_lib_db = MagicMock()
-        mock_search_results = MagicMock()
-        mock_search_results.agents = [agent_ok, agent_fail]
-        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
-
-        fake_graph = BaseGraph(id=id_ok, name="g", description="d")
-
-        async def _side_effect(graph_id, **kwargs):
-            if graph_id == id_fail:
-                raise Exception("DB error")
-            return fake_graph
-
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock(side_effect=_side_effect)
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_gdb,
-            ),
-        ):
-            response = await search_agents(
-                query="",
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert response.agents[0].graph is not None
-        assert response.agents[0].graph.id == id_ok
-        assert response.agents[1].graph is None
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_keyword_search_with_include_graph(self):
-        """include_graph works via keyword search (non-UUID path)."""
-        from backend.data.graph import BaseGraph
-
-        agent_id = "cccc0000-0000-0000-0000-000000000003"
-        mock_agent = self._make_mock_library_agent(agent_id, agent_id)
-
-        mock_lib_db = MagicMock()
-        mock_search_results = MagicMock()
-        mock_search_results.agents = [mock_agent]
-        mock_lib_db.list_library_agents = AsyncMock(return_value=mock_search_results)
-
-        fake_graph = BaseGraph(id=agent_id, name="g", description="d")
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
-
-        with (
-            patch(
-                "backend.copilot.tools.agent_search.library_db",
-                return_value=mock_lib_db,
-            ),
-            patch(
-                "backend.copilot.tools.agent_search.graph_db",
-                return_value=mock_gdb,
-            ),
-        ):
-            response = await search_agents(
-                query="email",
-                source="library",
-                session_id="s",
-                user_id=_TEST_USER_ID,
-                include_graph=True,
-            )
-
-        assert isinstance(response, AgentsFoundResponse)
-        assert response.agents[0].graph is not None
-        assert response.agents[0].graph.id == agent_id
-        mock_gdb.get_graph.assert_awaited_once()
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_timeout_preserves_successful_fetches(self):
-        """On timeout, agents that already fetched their graph keep the result."""
-        fast_agent = AgentInfo(
-            id="a1",
-            name="Fast",
-            description="d",
-            source="library",
-            graph_id="fast-graph",
-        )
-        slow_agent = AgentInfo(
-            id="a2",
-            name="Slow",
-            description="d",
-            source="library",
-            graph_id="slow-graph",
-        )
-        fake_graph = MagicMock()
-        fake_graph.id = "graph-1"
-
-        async def mock_get_graph(
-            graph_id, *, version=None, user_id=None, for_export=False
-        ):
-            if graph_id == "fast-graph":
-                return fake_graph
-            await asyncio.sleep(999)
-            return MagicMock()
-
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock(side_effect=mock_get_graph)
-
-        with (
-            patch("backend.copilot.tools.agent_search.graph_db", return_value=mock_gdb),
-            patch("backend.copilot.tools.agent_search._GRAPH_FETCH_TIMEOUT", 0.1),
-        ):
-            await _enrich_agents_with_graph([fast_agent, slow_agent], _TEST_USER_ID)
-
-        assert fast_agent.graph is fake_graph
-        assert slow_agent.graph is None
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_enrich_success(self):
-        """All agents get their graphs when no timeout occurs."""
-        agent = AgentInfo(
-            id="a1", name="Test", description="d", source="library", graph_id="g1"
-        )
-        fake_graph = MagicMock()
-        fake_graph.id = "graph-1"
-
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock(return_value=fake_graph)
-
-        with patch(
-            "backend.copilot.tools.agent_search.graph_db", return_value=mock_gdb
-        ):
-            result = await _enrich_agents_with_graph([agent], _TEST_USER_ID)
-
-        assert agent.graph is fake_graph
-        assert result is None
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_enrich_skips_agents_without_graph_id(self):
-        """Agents without graph_id are not fetched."""
-        agent_no_id = AgentInfo(
-            id="a1", name="Test", description="d", source="library", graph_id=None
-        )
-
-        mock_gdb = MagicMock()
-        mock_gdb.get_graph = AsyncMock()
-
-        with patch(
-            "backend.copilot.tools.agent_search.graph_db", return_value=mock_gdb
-        ):
-            result = await _enrich_agents_with_graph([agent_no_id], _TEST_USER_ID)
-
-        mock_gdb.get_graph.assert_not_called()
-        assert result is None
--- a/autogpt_platform/backend/backend/copilot/tools/ask_question.py
+++ b/autogpt_platform/backend/backend/copilot/tools/ask_question.py
@@ -1,93 +0,0 @@
-"""AskQuestionTool - Ask the user a clarifying question before proceeding."""
-
-from typing import Any
-
-from backend.copilot.model import ChatSession
-
-from .base import BaseTool
-from .models import ClarificationNeededResponse, ClarifyingQuestion, ToolResponseBase
-
-
-class AskQuestionTool(BaseTool):
-    """Ask the user a clarifying question and wait for their answer.
-
-    Use this tool when the user's request is ambiguous and you need more
-    information before proceeding. Call find_block or other discovery tools
-    first to ground your question in real platform options, then call this
-    tool with a concrete question listing those options.
-    """
-
-    @property
-    def name(self) -> str:
-        return "ask_question"
-
-    @property
-    def description(self) -> str:
-        return (
-            "Ask the user a clarifying question. Use when the request is "
-            "ambiguous and you need to confirm intent, choose between options, "
-            "or gather missing details before proceeding."
-        )
-
-    @property
-    def parameters(self) -> dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "question": {
-                    "type": "string",
-                    "description": (
-                        "The concrete question to ask the user. Should list "
-                        "real options when applicable."
-                    ),
-                },
-                "options": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": (
-                        "Options for the user to choose from "
-                        "(e.g. ['Email', 'Slack', 'Google Docs'])."
-                    ),
-                },
-                "keyword": {
-                    "type": "string",
-                    "description": "Short label identifying what the question is about.",
-                },
-            },
-            "required": ["question"],
-        }
-
-    @property
-    def requires_auth(self) -> bool:
-        return False
-
-    async def _execute(
-        self,
-        user_id: str | None,
-        session: ChatSession,
-        **kwargs: Any,
-    ) -> ToolResponseBase:
-        del user_id  # unused; required by BaseTool contract
-        question_raw = kwargs.get("question")
-        if not isinstance(question_raw, str) or not question_raw.strip():
-            raise ValueError("ask_question requires a non-empty 'question' string")
-        question = question_raw.strip()
-        raw_options = kwargs.get("options", [])
-        if not isinstance(raw_options, list):
-            raw_options = []
-        options: list[str] = [str(o) for o in raw_options if o]
-        raw_keyword = kwargs.get("keyword", "")
-        keyword: str = str(raw_keyword) if raw_keyword else ""
-        session_id = session.session_id if session else None
-
-        example = ", ".join(options) if options else None
-        clarifying_question = ClarifyingQuestion(
-            question=question,
-            keyword=keyword,
-            example=example,
-        )
-        return ClarificationNeededResponse(
-            message=question,
-            session_id=session_id,
-            questions=[clarifying_question],
-        )
--- a/autogpt_platform/backend/backend/copilot/tools/ask_question_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/ask_question_test.py
@@ -1,99 +0,0 @@
-"""Tests for AskQuestionTool."""
-
-import pytest
-
-from backend.copilot.model import ChatSession
-from backend.copilot.tools.ask_question import AskQuestionTool
-from backend.copilot.tools.models import ClarificationNeededResponse
-
-
-@pytest.fixture()
-def tool() -> AskQuestionTool:
-    return AskQuestionTool()
-
-
-@pytest.fixture()
-def session() -> ChatSession:
-    return ChatSession.new(user_id="test-user", dry_run=False)
-
-
-@pytest.mark.asyncio
-async def test_execute_with_options(tool: AskQuestionTool, session: ChatSession):
-    result = await tool._execute(
-        user_id=None,
-        session=session,
-        question="Which channel?",
-        options=["Email", "Slack", "Google Docs"],
-        keyword="channel",
-    )
-
-    assert isinstance(result, ClarificationNeededResponse)
-    assert result.message == "Which channel?"
-    assert result.session_id == session.session_id
-    assert len(result.questions) == 1
-
-    q = result.questions[0]
-    assert q.question == "Which channel?"
-    assert q.keyword == "channel"
-    assert q.example == "Email, Slack, Google Docs"
-
-
-@pytest.mark.asyncio
-async def test_execute_without_options(tool: AskQuestionTool, session: ChatSession):
-    result = await tool._execute(
-        user_id=None,
-        session=session,
-        question="What format do you want?",
-    )
-
-    assert isinstance(result, ClarificationNeededResponse)
-    assert result.message == "What format do you want?"
-    assert len(result.questions) == 1
-
-    q = result.questions[0]
-    assert q.question == "What format do you want?"
-    assert q.keyword == ""
-    assert q.example is None
-
-
-@pytest.mark.asyncio
-async def test_execute_with_keyword_only(tool: AskQuestionTool, session: ChatSession):
-    result = await tool._execute(
-        user_id=None,
-        session=session,
-        question="How often should it run?",
-        keyword="trigger",
-    )
-
-    assert isinstance(result, ClarificationNeededResponse)
-    q = result.questions[0]
-    assert q.keyword == "trigger"
-    assert q.example is None
-
-
-@pytest.mark.asyncio
-async def test_execute_rejects_empty_question(
-    tool: AskQuestionTool, session: ChatSession
-):
-    with pytest.raises(ValueError, match="non-empty"):
-        await tool._execute(user_id=None, session=session, question="")
-
-    with pytest.raises(ValueError, match="non-empty"):
-        await tool._execute(user_id=None, session=session, question="   ")
-
-
-@pytest.mark.asyncio
-async def test_execute_coerces_invalid_options(
-    tool: AskQuestionTool, session: ChatSession
-):
-    """LLM may send options as a string instead of a list; should not crash."""
-    result = await tool._execute(
-        user_id=None,
-        session=session,
-        question="Pick one",
-        options="not-a-list",  # type: ignore[arg-type]
-    )
-
-    assert isinstance(result, ClarificationNeededResponse)
-    q = result.questions[0]
-    assert q.example is None
--- a/autogpt_platform/backend/backend/copilot/tools/base.py
+++ b/autogpt_platform/backend/backend/copilot/tools/base.py
@@ -91,16 +91,10 @@ async def _persist_and_summarize(
        f"\nFull output ({total:,} chars) saved to workspace. "
        f"Use read_workspace_file("
        f'path="{file_path}", offset=<char_offset>, length=50000) '
-        f"to read any section. "
-        f"To process the file in the sandbox/working dir, use "
-        f"read_workspace_file("
-        f'path="{file_path}", save_to_path="<working_dir>/{tool_call_id}.json") '
-        f"first, then use bash_exec to work with the local copy."
+        f"to read any section."
    )
-    # Use workspace:// prefix so the model doesn't confuse the workspace path
-    # with a local filesystem path (e.g. ~/.claude/projects/.../tool-outputs/).
    return (
-        f'<tool-output-truncated total_chars={total} workspace_path="{file_path}">\n'
+        f'<tool-output-truncated total_chars={total} path="{file_path}">\n'
        f"{preview}\n"
        f"{retrieval}\n"
        f"</tool-output-truncated>"
--- a/autogpt_platform/backend/backend/copilot/tools/base_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/base_test.py
@@ -67,7 +67,7 @@ class TestPersistAndSummarize:
        assert "<tool-output-truncated" in result
        assert "</tool-output-truncated>" in result
        assert "total_chars=200000" in result
-        assert 'workspace_path="tool-outputs/tc-123.json"' in result
+        assert 'path="tool-outputs/tc-123.json"' in result
        assert "read_workspace_file" in result
        # Middle-out sentinel from truncate()
        assert "omitted" in result
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
@@ -22,10 +22,7 @@ from e2b import AsyncSandbox
 from e2b.exceptions import TimeoutException

 from backend.copilot.context import E2B_WORKDIR, get_current_sandbox
-from backend.copilot.integration_creds import (
-    get_github_user_git_identity,
-    get_integration_env_vars,
-)
+from backend.copilot.integration_creds import get_integration_env_vars
 from backend.copilot.model import ChatSession

 from .base import BaseTool
@@ -162,12 +159,6 @@ class BashExecTool(BaseTool):
            secret_values = [v for v in integration_env.values() if v]
            envs.update(integration_env)

-            # Set git author/committer identity from the user's GitHub profile
-            # so commits made in the sandbox are attributed correctly.
-            git_identity = await get_github_user_git_identity(user_id)
-            if git_identity:
-                envs.update(git_identity)
-
        try:
            result = await sandbox.commands.run(
                f"bash -c {shlex.quote(command)}",
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec_test.py
@@ -38,10 +38,7 @@ class TestBashExecE2BTokenInjection:
        with patch(
            "backend.copilot.tools.bash_exec.get_integration_env_vars",
            new=AsyncMock(return_value=env_vars),
-        ) as mock_get_env, patch(
-            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
-            new=AsyncMock(return_value=None),
-        ):
+        ) as mock_get_env:
            result = await tool._execute_on_e2b(
                sandbox=sandbox,
                command="echo hi",
@@ -56,66 +53,6 @@ class TestBashExecE2BTokenInjection:
        assert call_kwargs["envs"]["GITHUB_TOKEN"] == "gh-secret"
        assert isinstance(result, BashExecResponse)

-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_git_identity_set_from_github_profile(self):
-        """When user has a connected GitHub account, git env vars are set from their profile."""
-        tool = _make_tool()
-        session = make_session(user_id=_USER)
-        sandbox = _make_sandbox(stdout="ok")
-        identity = {
-            "GIT_AUTHOR_NAME": "Test User",
-            "GIT_AUTHOR_EMAIL": "test@example.com",
-            "GIT_COMMITTER_NAME": "Test User",
-            "GIT_COMMITTER_EMAIL": "test@example.com",
-        }
-
-        with patch(
-            "backend.copilot.tools.bash_exec.get_integration_env_vars",
-            new=AsyncMock(return_value={}),
-        ), patch(
-            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
-            new=AsyncMock(return_value=identity),
-        ):
-            await tool._execute_on_e2b(
-                sandbox=sandbox,
-                command="git commit -m test",
-                timeout=10,
-                session_id=session.session_id,
-                user_id=_USER,
-            )
-
-        call_kwargs = sandbox.commands.run.call_args[1]
-        assert call_kwargs["envs"]["GIT_AUTHOR_NAME"] == "Test User"
-        assert call_kwargs["envs"]["GIT_AUTHOR_EMAIL"] == "test@example.com"
-        assert call_kwargs["envs"]["GIT_COMMITTER_NAME"] == "Test User"
-        assert call_kwargs["envs"]["GIT_COMMITTER_EMAIL"] == "test@example.com"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_no_git_identity_when_github_not_connected(self):
-        """When user has no GitHub account, git identity env vars are absent."""
-        tool = _make_tool()
-        session = make_session(user_id=_USER)
-        sandbox = _make_sandbox(stdout="ok")
-
-        with patch(
-            "backend.copilot.tools.bash_exec.get_integration_env_vars",
-            new=AsyncMock(return_value={}),
-        ), patch(
-            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
-            new=AsyncMock(return_value=None),
-        ):
-            await tool._execute_on_e2b(
-                sandbox=sandbox,
-                command="echo hi",
-                timeout=10,
-                session_id=session.session_id,
-                user_id=_USER,
-            )
-
-        call_kwargs = sandbox.commands.run.call_args[1]
-        assert "GIT_AUTHOR_NAME" not in call_kwargs["envs"]
-        assert "GIT_COMMITTER_EMAIL" not in call_kwargs["envs"]
-
    @pytest.mark.asyncio(loop_scope="session")
    async def test_no_token_injection_when_user_id_is_none(self):
        """When user_id is None, get_integration_env_vars must NOT be called."""
@@ -126,10 +63,7 @@ class TestBashExecE2BTokenInjection:
        with patch(
            "backend.copilot.tools.bash_exec.get_integration_env_vars",
            new=AsyncMock(return_value={"GH_TOKEN": "should-not-appear"}),
-        ) as mock_get_env, patch(
-            "backend.copilot.tools.bash_exec.get_github_user_git_identity",
-            new=AsyncMock(return_value=None),
-        ) as mock_get_identity:
+        ) as mock_get_env:
            result = await tool._execute_on_e2b(
                sandbox=sandbox,
                command="echo hi",
@@ -139,8 +73,6 @@ class TestBashExecE2BTokenInjection:
            )

        mock_get_env.assert_not_called()
-        mock_get_identity.assert_not_called()
        call_kwargs = sandbox.commands.run.call_args[1]
        assert "GH_TOKEN" not in call_kwargs["envs"]
-        assert "GIT_AUTHOR_NAME" not in call_kwargs["envs"]
        assert isinstance(result, BashExecResponse)
--- a/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
@@ -20,8 +20,7 @@ class FindLibraryAgentTool(BaseTool):
    def description(self) -> str:
        return (
            "Search user's library agents. Returns graph_id, schemas for sub-agent composition. "
-            "Omit query to list all. Set include_graph=true to also fetch the full "
-            "graph structure (nodes + links) for debugging or editing."
+            "Omit query to list all."
        )

    @property
@@ -33,15 +32,6 @@ class FindLibraryAgentTool(BaseTool):
                    "type": "string",
                    "description": "Search by name/description. Omit to list all.",
                },
-                "include_graph": {
-                    "type": "boolean",
-                    "description": (
-                        "When true, includes the full graph structure "
-                        "(nodes + links) for each found agent. "
-                        "Use when you need to inspect, debug, or edit an agent."
-                    ),
-                    "default": False,
-                },
            },
            "required": [],
        }
@@ -55,7 +45,6 @@ class FindLibraryAgentTool(BaseTool):
        user_id: str | None,
        session: ChatSession,
        query: str = "",
-        include_graph: bool = False,
        **kwargs,
    ) -> ToolResponseBase:
        return await search_agents(
@@ -63,5 +52,4 @@ class FindLibraryAgentTool(BaseTool):
            source="library",
            session_id=session.session_id,
            user_id=user_id,
-            include_graph=include_graph,
        )
--- a/autogpt_platform/backend/backend/copilot/tools/models.py
+++ b/autogpt_platform/backend/backend/copilot/tools/models.py
@@ -6,7 +6,6 @@ from typing import Any, Literal

 from pydantic import BaseModel, Field

-from backend.data.graph import BaseGraph
 from backend.data.model import CredentialsMetaInput


@@ -123,10 +122,6 @@ class AgentInfo(BaseModel):
        default=None,
        description="Input schema for the agent, including field names, types, and defaults",
    )
-    graph: BaseGraph | None = Field(
-        default=None,
-        description="Full graph structure (nodes + links) when include_graph is requested",
-    )


 class AgentsFoundResponse(ToolResponseBase):
--- a/autogpt_platform/backend/backend/copilot/tools/test_dry_run.py
+++ b/autogpt_platform/backend/backend/copilot/tools/test_dry_run.py
@@ -10,11 +10,7 @@ import backend.copilot.tools.run_block as run_block_module
 from backend.copilot.tools.helpers import execute_block
 from backend.copilot.tools.models import BlockOutputResponse, ErrorResponse
 from backend.copilot.tools.run_block import RunBlockTool
-from backend.executor.simulator import (
-    build_simulation_prompt,
-    prepare_dry_run,
-    simulate_block,
-)
+from backend.executor.simulator import build_simulation_prompt, simulate_block

 # ---------------------------------------------------------------------------
 # Helpers
@@ -79,8 +75,7 @@ def make_openai_response(
 async def test_simulate_block_basic():
    """simulate_block returns correct (output_name, output_data) tuples.

-    Empty error pins should be omitted (not yielded) — only pins with
-    meaningful values are forwarded.
+    Empty "error" pins are dropped at source — only non-empty errors are yielded.
    """
    mock_block = make_mock_block()
    mock_client = AsyncMock()
@@ -90,14 +85,13 @@ async def test_simulate_block_basic():

    with patch(
        "backend.executor.simulator.get_openai_client", return_value=mock_client
-    ) as mock_get_client:
+    ):
        outputs = []
        async for name, data in simulate_block(mock_block, {"query": "test"}):
            outputs.append((name, data))

-    mock_get_client.assert_called_once_with(prefer_openrouter=True)
    assert ("result", "simulated output") in outputs
-    # Empty error pin should NOT be yielded — the simulator omits empty values
+    # Empty error pin is dropped at the simulator level
    assert ("error", "") not in outputs


@@ -153,7 +147,7 @@ async def test_simulate_block_all_retries_exhausted():

@pytest.mark.asyncio
 async def test_simulate_block_missing_output_pins():
-    """LLM response missing some output pins; they are omitted (not yielded)."""
+    """LLM response missing some output pins; verify non-error pins filled with None."""
    mock_block = make_mock_block(
        output_props={
            "result": {"type": "string"},
@@ -175,9 +169,30 @@ async def test_simulate_block_missing_output_pins():
            outputs[name] = data

    assert outputs["result"] == "hello"
-    # Missing pins are omitted — only pins with meaningful values are yielded
-    assert "count" not in outputs
-    assert "error" not in outputs
+    assert outputs["count"] is None  # missing pin filled with None
+    assert "error" not in outputs  # missing error pin is omitted entirely
+
+
+@pytest.mark.asyncio
+async def test_simulate_block_keeps_nonempty_error():
+    """simulate_block keeps non-empty error pins (simulated logical errors)."""
+    mock_block = make_mock_block()
+    mock_client = AsyncMock()
+    mock_client.chat.completions.create = AsyncMock(
+        return_value=make_openai_response(
+            '{"result": "", "error": "API rate limit exceeded"}'
+        )
+    )
+
+    with patch(
+        "backend.executor.simulator.get_openai_client", return_value=mock_client
+    ):
+        outputs = []
+        async for name, data in simulate_block(mock_block, {"query": "test"}):
+            outputs.append((name, data))
+
+    assert ("result", "") in outputs
+    assert ("error", "API rate limit exceeded") in outputs


@pytest.mark.asyncio
@@ -213,19 +228,17 @@ async def test_simulate_block_truncates_long_inputs():
    assert len(parsed["text"]) < 25000


-def test_build_simulation_prompt_lists_available_output_pins():
-    """The prompt should list available output pins (excluding error) so the LLM
-    knows which keys it MUST include.  Error is excluded because the prompt
-    tells the LLM to omit it unless simulating a logical failure."""
+def test_build_simulation_prompt_excludes_error_from_must_include():
+    """The 'MUST include' prompt line should NOT list 'error' — the prompt
+    already instructs the LLM to OMIT error unless simulating a logical error.
+    Including it in 'MUST include' would be contradictory."""
    block = make_mock_block()  # default output_props has "result" and "error"
    system_prompt, _ = build_simulation_prompt(block, {"query": "test"})
-    available_line = [
-        line for line in system_prompt.splitlines() if "Available output pins" in line
+    must_include_line = [
+        line for line in system_prompt.splitlines() if "MUST include" in line
    ][0]
-    assert '"result"' in available_line
-    # "error" is intentionally excluded from the required output pins list
-    # since the prompt instructs the LLM to omit it unless simulating errors
-    assert '"error"' not in available_line
+    assert '"result"' in must_include_line
+    assert '"error"' not in must_include_line


 # ---------------------------------------------------------------------------
@@ -480,146 +493,3 @@ async def test_execute_block_dry_run_simulator_error_returns_error_response():

    assert isinstance(response, ErrorResponse)
    assert "[SIMULATOR ERROR" in response.message
-
-
-# ---------------------------------------------------------------------------
-# prepare_dry_run tests
-# ---------------------------------------------------------------------------
-
-
-def test_prepare_dry_run_orchestrator_block():
-    """prepare_dry_run caps iterations and overrides model to simulation model."""
-    from backend.blocks.orchestrator import OrchestratorBlock
-
-    block = OrchestratorBlock()
-    input_data = {"prompt": "hello", "model": "gpt-4o", "agent_mode_max_iterations": 10}
-    with patch(
-        "backend.executor.simulator._get_platform_openrouter_key",
-        return_value="sk-or-test-key",
-    ):
-        result = prepare_dry_run(block, input_data)
-
-    assert result is not None
-    # Model is overridden to the simulation model (not the user's model).
-    assert result["model"] != "gpt-4o"
-    assert result["agent_mode_max_iterations"] == 1
-    assert result["_dry_run_api_key"] == "sk-or-test-key"
-    # Original input_data should not be mutated.
-    assert input_data["model"] == "gpt-4o"
-
-
-def test_prepare_dry_run_agent_executor_block():
-    """prepare_dry_run returns a copy of input_data for AgentExecutorBlock.
-
-    AgentExecutorBlock must execute for real during dry-run so it can spawn
-    a child graph execution (whose blocks are then simulated).  Its Output
-    schema has no properties, so LLM simulation would yield zero outputs.
-    """
-    from backend.blocks.agent import AgentExecutorBlock
-
-    block = AgentExecutorBlock()
-    input_data = {
-        "user_id": "u1",
-        "graph_id": "g1",
-        "graph_version": 1,
-        "inputs": {"text": "hello"},
-        "input_schema": {},
-        "output_schema": {},
-    }
-    result = prepare_dry_run(block, input_data)
-
-    assert result is not None
-    # Input data is returned as-is (no model swap needed).
-    assert result["user_id"] == "u1"
-    assert result["graph_id"] == "g1"
-    # Original input_data should not be mutated.
-    assert result is not input_data
-
-
-def test_prepare_dry_run_regular_block_returns_none():
-    """prepare_dry_run returns None for a regular block (use simulator)."""
-    mock_block = make_mock_block()
-    assert prepare_dry_run(mock_block, {"query": "test"}) is None
-
-
-# ---------------------------------------------------------------------------
-# Input/output block passthrough tests
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.asyncio
-async def test_simulate_agent_input_block_passthrough():
-    """AgentInputBlock should pass through the value directly, no LLM call."""
-    from backend.blocks.io import AgentInputBlock
-
-    block = AgentInputBlock()
-    outputs = []
-    async for name, data in simulate_block(
-        block, {"value": "hello world", "name": "q"}
-    ):
-        outputs.append((name, data))
-
-    assert outputs == [("result", "hello world")]
-
-
-@pytest.mark.asyncio
-async def test_simulate_agent_dropdown_input_block_passthrough():
-    """AgentDropdownInputBlock (subclass of AgentInputBlock) should pass through."""
-    from backend.blocks.io import AgentDropdownInputBlock
-
-    block = AgentDropdownInputBlock()
-    outputs = []
-    async for name, data in simulate_block(
-        block,
-        {
-            "value": "Option B",
-            "name": "sev",
-            "options": ["Option A", "Option B"],
-        },
-    ):
-        outputs.append((name, data))
-
-    assert outputs == [("result", "Option B")]
-
-
-@pytest.mark.asyncio
-async def test_simulate_agent_input_block_none_value_falls_back_to_name():
-    """AgentInputBlock with value=None falls back to the input name."""
-    from backend.blocks.io import AgentInputBlock
-
-    block = AgentInputBlock()
-    outputs = []
-    async for name, data in simulate_block(block, {"value": None, "name": "q"}):
-        outputs.append((name, data))
-
-    # When value is None, the simulator falls back to the "name" field
-    assert outputs == [("result", "q")]
-
-
-@pytest.mark.asyncio
-async def test_simulate_agent_output_block_passthrough():
-    """AgentOutputBlock should pass through value as output."""
-    from backend.blocks.io import AgentOutputBlock
-
-    block = AgentOutputBlock()
-    outputs = []
-    async for name, data in simulate_block(
-        block, {"value": "result text", "name": "out1"}
-    ):
-        outputs.append((name, data))
-
-    assert ("output", "result text") in outputs
-    assert ("name", "out1") in outputs
-
-
-@pytest.mark.asyncio
-async def test_simulate_agent_output_block_no_name():
-    """AgentOutputBlock without name in input should still yield output."""
-    from backend.blocks.io import AgentOutputBlock
-
-    block = AgentOutputBlock()
-    outputs = []
-    async for name, data in simulate_block(block, {"value": 42}):
-        outputs.append((name, data))
-
-    assert outputs == [("output", 42)]
--- a/autogpt_platform/backend/backend/data/onboarding.py
+++ b/autogpt_platform/backend/backend/data/onboarding.py
@@ -436,28 +436,6 @@ async def get_recommended_agents(user_id: str) -> list[StoreAgentDetails]:
    return [StoreAgentDetails.from_db(agent) for agent in recommended_agents]


-def format_onboarding_for_extraction(
-    user_name: str,
-    user_role: str,
-    pain_points: list[str],
-) -> str:
-    """Format onboarding wizard answers as Q&A text for LLM extraction."""
-
-    def normalize(value: str) -> str:
-        return " ".join(value.strip().split())
-
-    name = normalize(user_name)
-    role = normalize(user_role)
-    points = [normalize(p) for p in pain_points if normalize(p)]
-
-    lines = [
-        f"Q: What is your name?\nA: {name}",
-        f"Q: What best describes your role?\nA: {role}",
-        f"Q: What tasks are eating your time?\nA: {', '.join(points)}",
-    ]
-    return "\n\n".join(lines)
-
-
@cached(maxsize=1, ttl_seconds=300)  # Cache for 5 minutes since this rarely changes
 async def onboarding_enabled() -> bool:
    """
--- a/autogpt_platform/backend/backend/data/onboarding_test.py
+++ b/autogpt_platform/backend/backend/data/onboarding_test.py
@@ -1,27 +0,0 @@
-from backend.data.onboarding import format_onboarding_for_extraction
-
-
-def test_format_onboarding_for_extraction_basic():
-    result = format_onboarding_for_extraction(
-        user_name="John",
-        user_role="Founder/CEO",
-        pain_points=["Finding leads", "Email & outreach"],
-    )
-    assert "Q: What is your name?" in result
-    assert "A: John" in result
-    assert "Q: What best describes your role?" in result
-    assert "A: Founder/CEO" in result
-    assert "Q: What tasks are eating your time?" in result
-    assert "Finding leads" in result
-    assert "Email & outreach" in result
-
-
-def test_format_onboarding_for_extraction_with_other():
-    result = format_onboarding_for_extraction(
-        user_name="Jane",
-        user_role="Data Scientist",
-        pain_points=["Research", "Building dashboards"],
-    )
-    assert "A: Jane" in result
-    assert "A: Data Scientist" in result
-    assert "Research, Building dashboards" in result
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -81,7 +81,7 @@ from backend.util.settings import Settings
 from .activity_status_generator import generate_activity_status_for_execution
 from .automod.manager import automod_manager
 from .cluster_lock import ClusterLock
-from .simulator import get_dry_run_credentials, prepare_dry_run, simulate_block
+from .simulator import simulate_block
 from .utils import (
    GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS,
    GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
@@ -279,21 +279,6 @@ async def execute_node(
        "nodes_to_skip": nodes_to_skip or set(),
    }

-    # For special blocks in dry-run, prepare_dry_run returns a (possibly
-    # modified) copy of input_data so the block executes for real.  For all
-    # other blocks it returns None -> use LLM simulator.
-    # OrchestratorBlock uses the platform's simulation model + OpenRouter key
-    # so no user credentials are needed.
-    _dry_run_input: dict[str, Any] | None = None
-    if execution_context.dry_run:
-        _dry_run_input = prepare_dry_run(node_block, input_data)
-    if _dry_run_input is not None:
-        input_data = _dry_run_input
-
-    # Check for dry-run platform credentials (OrchestratorBlock uses the
-    # platform's OpenRouter key instead of user credentials).
-    _dry_run_creds = get_dry_run_credentials(input_data) if _dry_run_input else None
-
    # Last-minute fetch credentials + acquire a system-wide read-write lock to prevent
    # changes during execution. ⚠️ This means a set of credentials can only be used by
    # one (running) block at a time; simultaneous execution of blocks using same
@@ -303,12 +288,6 @@ async def execute_node(

    # Handle regular credentials fields
    for field_name, input_type in input_model.get_credentials_fields().items():
-        # Dry-run platform credentials bypass the credential store
-        if _dry_run_creds is not None:
-            input_data[field_name] = None
-            extra_exec_kwargs[field_name] = _dry_run_creds
-            continue
-
        field_value = input_data.get(field_name)
        if not field_value or (
            isinstance(field_value, dict) and not field_value.get("id")
@@ -396,7 +375,7 @@ async def execute_node(
        scope.set_tag(f"execution_context.{k}", v)

    try:
-        if execution_context.dry_run and _dry_run_input is None:
+        if execution_context.dry_run:
            block_iter = simulate_block(node_block, input_data)
        else:
            block_iter = node_block.execute(input_data, **extra_exec_kwargs)
--- a/autogpt_platform/backend/backend/executor/simulator.py
+++ b/autogpt_platform/backend/backend/executor/simulator.py
@@ -2,65 +2,58 @@
 LLM-powered block simulator for dry-run execution.

 When dry_run=True, instead of calling the real block, this module
-role-plays the block's execution using an LLM.  For most blocks no real
-API calls or side effects occur.
-
-Special cases (no LLM simulation needed):
-  - OrchestratorBlock executes for real with the platform's simulation model
-    (iterations capped to 1).  Uses the platform OpenRouter key so no user
-    credentials are required.  Falls back to LLM simulation if the platform
-    key is unavailable.
-  - AgentExecutorBlock executes for real so it can spawn child graph executions
-    (whose blocks are then simulated).  No credentials needed.
-  - AgentInputBlock (and all subclasses) and AgentOutputBlock are pure
-    passthrough -- they forward their input values directly.
-  - MCPToolBlock is simulated via the generic LLM prompt (with run() source code).
-
-OrchestratorBlock and AgentExecutorBlock are handled in manager.py via
-``prepare_dry_run``.
-
-The LLM simulation is grounded by:
+role-plays the block's execution using an LLM. No real API calls,
+no side effects. The LLM is grounded by:
  - Block name and description
  - Input/output schemas (from block.input_schema.jsonschema() / output_schema.jsonschema())
-  - The block's run() source code (via inspect.getsource)
  - The actual input values

 Inspired by https://github.com/Significant-Gravitas/agent-simulator
 """

-import inspect
 import json
 import logging
 from collections.abc import AsyncGenerator
 from typing import Any

-from backend.blocks.agent import AgentExecutorBlock
-from backend.blocks.io import AgentInputBlock, AgentOutputBlock
-from backend.blocks.orchestrator import OrchestratorBlock
 from backend.util.clients import get_openai_client

 logger = logging.getLogger(__name__)


-# Default simulator model — Gemini 2.5 Flash via OpenRouter (fast, cheap, good at
-# JSON generation).  Configurable via ChatConfig.simulation_model
-# (CHAT_SIMULATION_MODEL env var).
-_DEFAULT_SIMULATOR_MODEL = "google/gemini-2.5-flash"
-
-
+# Use the same fast/cheap model the copilot uses for non-primary tasks.
+# Overridable via ChatConfig.title_model if ChatConfig is available.
 def _simulator_model() -> str:
    try:
        from backend.copilot.config import ChatConfig  # noqa: PLC0415

-        return ChatConfig().simulation_model or _DEFAULT_SIMULATOR_MODEL
+        model = ChatConfig().title_model
    except Exception:
-        return _DEFAULT_SIMULATOR_MODEL
+        model = "openai/gpt-4o-mini"
+
+    # get_openai_client() may return a direct OpenAI client (not OpenRouter).
+    # Direct OpenAI expects bare model names ("gpt-4o-mini"), not the
+    # OpenRouter-prefixed form ("openai/gpt-4o-mini").  Strip the prefix when
+    # the internal OpenAI key is configured (i.e. not going through OpenRouter).
+    try:
+        from backend.util.settings import Settings  # noqa: PLC0415
+
+        secrets = Settings().secrets
+        # get_openai_client() uses the direct OpenAI client whenever
+        # openai_internal_api_key is set, regardless of open_router_api_key.
+        # Strip the provider prefix (e.g. "openai/gpt-4o-mini" → "gpt-4o-mini")
+        # so the model name is valid for the direct OpenAI API.
+        if secrets.openai_internal_api_key and "/" in model:
+            model = model.split("/", 1)[1]
+    except Exception:
+        pass
+
+    return model


 _TEMPERATURE = 0.2
 _MAX_JSON_RETRIES = 5
 _MAX_INPUT_VALUE_CHARS = 20000
-_COMMON_CRED_KEYS = frozenset({"credentials", "api_key", "token", "secret"})


 def _truncate_value(value: Any) -> Any:
@@ -95,31 +88,73 @@ def _describe_schema_pins(schema: dict[str, Any]) -> str:
    return "\n".join(lines) if lines else "(no output pins defined)"


-# ---------------------------------------------------------------------------
-# Shared LLM call helper
-# ---------------------------------------------------------------------------
+def build_simulation_prompt(block: Any, input_data: dict[str, Any]) -> tuple[str, str]:
+    """Build (system_prompt, user_prompt) for block simulation."""
+    input_schema = block.input_schema.jsonschema()
+    output_schema = block.output_schema.jsonschema()
+
+    input_pins = _describe_schema_pins(input_schema)
+    output_pins = _describe_schema_pins(output_schema)
+    output_properties = list(output_schema.get("properties", {}).keys())
+    # Build a separate list for the "MUST include" instruction that excludes
+    # "error" — the prompt already tells the LLM to OMIT the error pin unless
+    # simulating a logical error.  Including it in "MUST include" is contradictory.
+    required_output_properties = [k for k in output_properties if k != "error"]
+
+    block_name = getattr(block, "name", type(block).__name__)
+    block_description = getattr(block, "description", "No description available.")
+
+    system_prompt = f"""You are simulating the execution of a software block called "{block_name}".
+
+## Block Description
+{block_description}
+
+## Input Schema
+{input_pins}
+
+## Output Schema (what you must return)
+{output_pins}
+
+Your task: given the current inputs, produce realistic simulated outputs for this block.
+
+Rules:
+- Respond with a single JSON object whose keys are EXACTLY the output pin names listed above.
+- Assume all credentials and authentication are present and valid. Never simulate authentication failures.
+- Make the simulated outputs realistic and consistent with the inputs.
+- If there is an "error" pin, OMIT it entirely unless you are simulating a logical error. Only include the "error" pin when there is a genuine error message to report.
+- Do not include any extra keys beyond the output pins.
+
+Output pin names you MUST include: {json.dumps(required_output_properties)}
+"""
+
+    safe_inputs = _truncate_input_values(input_data)
+    user_prompt = f"## Current Inputs\n{json.dumps(safe_inputs, indent=2)}"
+
+    return system_prompt, user_prompt


-async def _call_llm_for_simulation(
-    system_prompt: str,
-    user_prompt: str,
-    *,
-    label: str = "simulate",
-) -> dict[str, Any]:
-    """Send a simulation prompt to the LLM and return the parsed JSON dict.
+async def simulate_block(
+    block: Any,
+    input_data: dict[str, Any],
+) -> AsyncGenerator[tuple[str, Any], None]:
+    """Simulate block execution using an LLM.

-    Handles client acquisition, retries on invalid JSON, and logging.
-
-    Raises:
-        RuntimeError: If no LLM client is available.
-        ValueError: If all retry attempts are exhausted.
+    Yields (output_name, output_data) tuples matching the Block.execute() interface.
+    On unrecoverable failure, yields a single ("error", "[SIMULATOR ERROR ...") tuple.
    """
-    client = get_openai_client(prefer_openrouter=True)
+    client = get_openai_client()
    if client is None:
-        raise RuntimeError(
+        yield (
+            "error",
            "[SIMULATOR ERROR — NOT A BLOCK FAILURE] No LLM client available "
-            "(missing OpenAI/OpenRouter API key)."
+            "(missing OpenAI/OpenRouter API key).",
        )
+        return
+
+    output_schema = block.output_schema.jsonschema()
+    output_properties: dict[str, Any] = output_schema.get("properties", {})
+
+    system_prompt, user_prompt = build_simulation_prompt(block, input_data)

    model = _simulator_model()
    last_error: Exception | None = None
@@ -141,366 +176,60 @@ async def _call_llm_for_simulation(
            if not isinstance(parsed, dict):
                raise ValueError(f"LLM returned non-object JSON: {raw[:200]}")

+            # Fill missing output pins with defaults.
+            # Skip empty "error" pins — an empty string means "no error" and
+            # would only confuse downstream consumers (LLM, frontend).
+            result: dict[str, Any] = {}
+            for pin_name in output_properties:
+                if pin_name in parsed:
+                    value = parsed[pin_name]
+                    # Drop empty/blank error pins: they carry no information.
+                    # Uses strip() intentionally so whitespace-only strings
+                    # (e.g. " ", "\n") are also treated as empty.
+                    if (
+                        pin_name == "error"
+                        and isinstance(value, str)
+                        and not value.strip()
+                    ):
+                        continue
+                    result[pin_name] = value
+                elif pin_name != "error":
+                    # Only fill non-error missing pins with None
+                    result[pin_name] = None
+
            logger.debug(
-                "simulate(%s): attempt=%d tokens=%s/%s",
-                label,
+                "simulate_block: block=%s attempt=%d tokens=%s/%s",
+                getattr(block, "name", "?"),
                attempt + 1,
                getattr(getattr(response, "usage", None), "prompt_tokens", "?"),
                getattr(getattr(response, "usage", None), "completion_tokens", "?"),
            )
-            return parsed
+
+            for pin_name, pin_value in result.items():
+                yield pin_name, pin_value
+            return

        except (json.JSONDecodeError, ValueError) as e:
            last_error = e
            logger.warning(
-                "simulate(%s): JSON parse error on attempt %d/%d: %s",
-                label,
+                "simulate_block: JSON parse error on attempt %d/%d: %s",
                attempt + 1,
                _MAX_JSON_RETRIES,
                e,
            )
        except Exception as e:
            last_error = e
-            logger.error("simulate(%s): LLM call failed: %s", label, e, exc_info=True)
+            logger.error("simulate_block: LLM call failed: %s", e, exc_info=True)
            break

-    msg = (
-        f"[SIMULATOR ERROR — NOT A BLOCK FAILURE] Failed after {_MAX_JSON_RETRIES} "
-        f"attempts: {last_error}"
-    )
    logger.error(
-        "simulate(%s): all retries exhausted; last_error=%s", label, last_error
+        "simulate_block: all %d retries exhausted for block=%s; last_error=%s",
+        _MAX_JSON_RETRIES,
+        getattr(block, "name", "?"),
+        last_error,
+    )
+    yield (
+        "error",
+        f"[SIMULATOR ERROR — NOT A BLOCK FAILURE] Failed after {_MAX_JSON_RETRIES} "
+        f"attempts: {last_error}",
    )
-    raise ValueError(msg)
-
-
-# ---------------------------------------------------------------------------
-# Prompt builders
-# ---------------------------------------------------------------------------
-
-
-def build_simulation_prompt(block: Any, input_data: dict[str, Any]) -> tuple[str, str]:
-    """Build (system_prompt, user_prompt) for block simulation."""
-    input_schema = block.input_schema.jsonschema()
-    output_schema = block.output_schema.jsonschema()
-
-    input_pins = _describe_schema_pins(input_schema)
-    output_pins = _describe_schema_pins(output_schema)
-    output_properties = list(output_schema.get("properties", {}).keys())
-    # Build a separate list for the "MUST include" instruction that excludes
-    # "error" — the prompt already tells the LLM to OMIT the error pin unless
-    # simulating a logical error.  Including it in "MUST include" is contradictory.
-    required_output_properties = [k for k in output_properties if k != "error"]
-
-    block_name = getattr(block, "name", type(block).__name__)
-    block_description = getattr(block, "description", "No description available.")
-
-    # Include the block's run() source code so the LLM knows exactly how
-    # inputs are transformed to outputs.  Truncate to avoid blowing up the
-    # prompt for very large blocks.
-    try:
-        run_source = inspect.getsource(block.run)
-        if len(run_source) > _MAX_INPUT_VALUE_CHARS:
-            run_source = run_source[:_MAX_INPUT_VALUE_CHARS] + "\n# ... [TRUNCATED]"
-    except (TypeError, OSError):
-        run_source = ""
-
-    implementation_section = ""
-    if run_source:
-        implementation_section = (
-            "\n## Block Implementation (run function source code)\n"
-            "```python\n"
-            f"{run_source}\n"
-            "```\n"
-        )
-
-    system_prompt = f"""You are simulating the execution of a software block called "{block_name}".
-
-## Block Description
-{block_description}
-
-## Input Schema
-{input_pins}
-
-## Output Schema (what you must return)
-{output_pins}
-{implementation_section}
-Your task: given the current inputs, produce realistic simulated outputs for this block.
-{"Study the block's run() source code above to understand exactly how inputs are transformed to outputs." if run_source else "Use the block description and schemas to infer realistic outputs."}
-
-Rules:
- Respond with a single JSON object.
- Only include output pins that have meaningful values. Omit pins with no relevant output.
- Assume all credentials and API keys are present and valid. Do not simulate auth failures.
- Generate REALISTIC, useful outputs: real-looking URLs, plausible text, valid data structures.
- Never return empty strings, null, or "N/A" for pins that should have content.
- You MAY simulate logical errors (e.g., invalid input format, unsupported operation) when the inputs warrant it — use the "error" pin for these. But do NOT simulate auth/credential errors.
- Do not include extra keys beyond the defined output pins.
-
-Available output pins: {json.dumps(required_output_properties)}
-"""
-
-    # Strip credentials from input so the LLM doesn't see null/empty creds
-    # and incorrectly simulate auth failures.  Use the block's schema to
-    # detect credential fields when available, falling back to common names.
-    try:
-        cred_fields = set(block.input_schema.get_credentials_fields())
-    except (AttributeError, TypeError):
-        cred_fields = set()
-    exclude_keys = cred_fields | _COMMON_CRED_KEYS
-    safe_inputs = {
-        k: v
-        for k, v in _truncate_input_values(input_data).items()
-        if k not in exclude_keys
-    }
-    user_prompt = f"## Current Inputs\n{json.dumps(safe_inputs, indent=2)}"
-
-    return system_prompt, user_prompt
-
-
-# ---------------------------------------------------------------------------
-# Public simulation functions
-# ---------------------------------------------------------------------------
-
-
-def _get_platform_openrouter_key() -> str | None:
-    """Return the platform's OpenRouter API key, or None if unavailable."""
-    try:
-        from backend.util.settings import Settings  # noqa: PLC0415
-
-        key = Settings().secrets.open_router_api_key
-        return key if key else None
-    except Exception:
-        return None
-
-
-def prepare_dry_run(block: Any, input_data: dict[str, Any]) -> dict[str, Any] | None:
-    """Prepare *input_data* for a dry-run execution of *block*.
-
-    Returns a **modified copy** of *input_data* for blocks that should execute
-    for real with cheap settings, or ``None`` when the block should be
-    LLM-simulated instead.
-
-    - **OrchestratorBlock** executes for real with the platform's simulation
-      model (iterations capped to 1).  Uses the platform OpenRouter key so no
-      user credentials are needed.  Falls back to LLM simulation if the
-      platform key is unavailable.
-    - **AgentExecutorBlock** executes for real so it can spawn a child graph
-      execution.  The child graph inherits ``dry_run=True`` and its blocks
-      are simulated.  No credentials are needed.
-    """
-    if isinstance(block, OrchestratorBlock):
-        or_key = _get_platform_openrouter_key()
-        if not or_key:
-            logger.info(
-                "Dry-run: no platform OpenRouter key, "
-                "falling back to LLM simulation for OrchestratorBlock"
-            )
-            return None
-
-        original = input_data.get("agent_mode_max_iterations", 0)
-        max_iters = 1 if original != 0 else 0
-        sim_model = _simulator_model()
-
-        # Keep the original credentials dict in input_data so the block's
-        # JSON schema validation passes (validate_data strips None values,
-        # making the field absent and failing the "required" check).
-        # The actual credentials are injected via extra_exec_kwargs in
-        # manager.py using _dry_run_api_key.
-        return {
-            **input_data,
-            "agent_mode_max_iterations": max_iters,
-            "model": sim_model,
-            "_dry_run_api_key": or_key,
-        }
-
-    if isinstance(block, AgentExecutorBlock):
-        return {**input_data}
-
-    return None
-
-
-def get_dry_run_credentials(
-    input_data: dict[str, Any],
-) -> Any | None:
-    """Build an ``APIKeyCredentials`` for dry-run OrchestratorBlock execution.
-
-    Returns credentials using the platform's OpenRouter key (injected by
-    ``prepare_dry_run``), or ``None`` if not a dry-run override.
-    """
-    api_key = input_data.pop("_dry_run_api_key", None)
-    if not api_key:
-        return None
-
-    try:
-        from backend.blocks.llm import APIKeyCredentials  # noqa: PLC0415
-        from backend.integrations.providers import ProviderName  # noqa: PLC0415
-
-        return APIKeyCredentials(
-            id="dry-run-platform",
-            provider=ProviderName.OPEN_ROUTER,
-            api_key=api_key,
-            title="Dry-run simulation",
-            expires_at=None,
-        )
-    except Exception:
-        logger.warning("Failed to create dry-run credentials", exc_info=True)
-        return None
-
-
-def _default_for_input_result(result_schema: dict[str, Any], name: str | None) -> Any:
-    """Return a type-appropriate sample value for an AgentInputBlock's result pin.
-
-    Typed subclasses (AgentNumberInputBlock, AgentDateInputBlock, etc.)
-    declare a specific type/format on their ``result`` output.  When dry-run
-    has no user-supplied value, this generates a fallback that matches the
-    expected type so downstream validation doesn't fail with a plain string.
-    """
-    pin_type = result_schema.get("type", "string")
-    fmt = result_schema.get("format")
-
-    if pin_type == "integer":
-        return 0
-    if pin_type == "number":
-        return 0.0
-    if pin_type == "boolean":
-        return False
-    if pin_type == "array":
-        return []
-    if pin_type == "object":
-        return {}
-    if fmt == "date":
-        from datetime import date as _date  # noqa: PLC0415
-
-        return _date.today().isoformat()
-    if fmt == "time":
-        return "00:00:00"
-    # Default: use the block's name as a sample string.
-    return name or "sample input"
-
-
-async def simulate_block(
-    block: Any,
-    input_data: dict[str, Any],
-) -> AsyncGenerator[tuple[str, Any], None]:
-    """Simulate block execution using an LLM.
-
-    All block types (including MCPToolBlock) use the same generic LLM prompt
-    which includes the block's run() source code for accurate simulation.
-
-    Note: callers should check ``prepare_dry_run(block, input_data)`` first.
-    OrchestratorBlock and AgentExecutorBlock execute for real in dry-run mode
-    (see manager.py).
-
-    Yields (output_name, output_data) tuples matching the Block.execute() interface.
-    On unrecoverable failure, yields a single ("error", "[SIMULATOR ERROR ...") tuple.
-    """
-    # Input/output blocks are pure passthrough -- they just forward their
-    # input values.  No LLM simulation needed.
-    if isinstance(block, AgentInputBlock):
-        value = input_data.get("value")
-        if value is None:
-            # Dry-run with no user input: use first dropdown option or name,
-            # then coerce to a type-appropriate fallback so typed subclasses
-            # (e.g. AgentNumberInputBlock → int, AgentDateInputBlock → date)
-            # don't fail validation with a plain string.
-            placeholder = input_data.get("options") or input_data.get(
-                "placeholder_values"
-            )
-            if placeholder and isinstance(placeholder, list) and placeholder:
-                value = placeholder[0]
-            else:
-                result_schema = (
-                    block.output_schema.jsonschema()
-                    .get("properties", {})
-                    .get("result", {})
-                )
-                value = _default_for_input_result(
-                    result_schema, input_data.get("name", "sample input")
-                )
-        yield "result", value
-        return
-
-    if isinstance(block, AgentOutputBlock):
-        # Mirror AgentOutputBlock.run(): if a format string is provided,
-        # apply Jinja2 formatting and yield only "output"; otherwise yield
-        # both "output" (raw value) and "name".
-        fmt = input_data.get("format", "")
-        value = input_data.get("value")
-        name = input_data.get("name", "")
-        if fmt:
-            try:
-                from backend.util.text import TextFormatter  # noqa: PLC0415
-
-                escape_html = input_data.get("escape_html", False)
-                formatter = TextFormatter(autoescape=escape_html)
-                formatted = await formatter.format_string(fmt, {name: value})
-                yield "output", formatted
-            except Exception as e:
-                yield "output", f"Error: {e}, {value}"
-        else:
-            yield "output", value
-            if name:
-                yield "name", name
-        return
-
-    output_schema = block.output_schema.jsonschema()
-    output_properties: dict[str, Any] = output_schema.get("properties", {})
-
-    system_prompt, user_prompt = build_simulation_prompt(block, input_data)
-    label = getattr(block, "name", "?")
-
-    try:
-        parsed = await _call_llm_for_simulation(system_prompt, user_prompt, label=label)
-
-        # Track which pins were yielded so we can fill in missing required
-        # ones afterwards — downstream nodes connected to unyielded pins
-        # would otherwise stall in INCOMPLETE state.
-        yielded_pins: set[str] = set()
-
-        # Yield pins present in the LLM response with meaningful values.
-        # We skip None and empty strings but preserve valid falsy values
-        # like False, 0, and [].
-        for pin_name in output_properties:
-            if pin_name not in parsed:
-                continue
-            value = parsed[pin_name]
-            if value is None or value == "":
-                continue
-            yield pin_name, value
-            yielded_pins.add(pin_name)
-
-        # For any required output pins the LLM omitted (excluding "error"),
-        # yield a type-appropriate default so downstream nodes still fire.
-        required_pins = set(output_schema.get("required", []))
-        for pin_name in required_pins - yielded_pins - {"error"}:
-            pin_schema = output_properties.get(pin_name, {})
-            default = _default_for_schema(pin_schema)
-            logger.debug(
-                "simulate(%s): filling missing required pin %r with default %r",
-                label,
-                pin_name,
-                default,
-            )
-            yield pin_name, default
-
-    except (RuntimeError, ValueError) as e:
-        yield "error", str(e)
-
-
-def _default_for_schema(pin_schema: dict[str, Any]) -> Any:
-    """Return a sensible default value for a JSON schema type."""
-    pin_type = pin_schema.get("type", "string")
-    if pin_type == "string":
-        return ""
-    if pin_type == "integer":
-        return 0
-    if pin_type == "number":
-        return 0.0
-    if pin_type == "boolean":
-        return False
-    if pin_type == "array":
-        return []
-    if pin_type == "object":
-        return {}
-    return ""
--- a/autogpt_platform/backend/backend/executor/simulator_test.py
+++ b/autogpt_platform/backend/backend/executor/simulator_test.py
@@ -1,475 +0,0 @@
-"""Tests for the LLM-powered block simulator (dry-run execution).
-
-Covers:
-  - Prompt building (credential stripping, realistic-output instructions)
-  - Input/output block passthrough
-  - prepare_dry_run routing
-  - simulate_block output-pin filling
-"""
-
-from __future__ import annotations
-
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from backend.executor.simulator import (
-    _truncate_input_values,
-    _truncate_value,
-    build_simulation_prompt,
-    prepare_dry_run,
-    simulate_block,
-)
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _make_block(
-    *,
-    name: str = "TestBlock",
-    description: str = "A test block.",
-    input_schema: dict[str, Any] | None = None,
-    output_schema: dict[str, Any] | None = None,
-) -> Any:
-    """Create a minimal mock block for testing."""
-    block = MagicMock()
-    block.name = name
-    block.description = description
-    block.input_schema.jsonschema.return_value = input_schema or {
-        "properties": {"query": {"type": "string"}},
-        "required": ["query"],
-    }
-    block.output_schema.jsonschema.return_value = output_schema or {
-        "properties": {
-            "result": {"type": "string"},
-            "error": {"type": "string"},
-        },
-        "required": ["result"],
-    }
-    return block
-
-
-# ---------------------------------------------------------------------------
-# Truncation
-# ---------------------------------------------------------------------------
-
-
-class TestTruncation:
-    def test_short_string_unchanged(self) -> None:
-        assert _truncate_value("hello") == "hello"
-
-    def test_long_string_truncated(self) -> None:
-        long_str = "x" * 30000
-        result = _truncate_value(long_str)
-        assert result.endswith("... [TRUNCATED]")
-        assert len(result) < 25000
-
-    def test_nested_dict_truncation(self) -> None:
-        data = {"key": "y" * 30000}
-        result = _truncate_input_values(data)
-        assert result["key"].endswith("... [TRUNCATED]")
-
-
-# ---------------------------------------------------------------------------
-# Prompt building
-# ---------------------------------------------------------------------------
-
-
-class TestBuildSimulationPrompt:
-    def test_system_prompt_contains_block_name(self) -> None:
-        block = _make_block(name="WebSearchBlock")
-        system, _user = build_simulation_prompt(block, {"query": "test"})
-        assert "WebSearchBlock" in system
-
-    def test_system_prompt_contains_realistic_instruction(self) -> None:
-        block = _make_block()
-        system, _ = build_simulation_prompt(block, {})
-        assert "REALISTIC" in system
-        assert "Never return empty strings" in system
-
-    def test_system_prompt_contains_no_auth_failure_instruction(self) -> None:
-        block = _make_block()
-        system, _ = build_simulation_prompt(block, {})
-        assert "Do not simulate auth failures" in system
-
-    def test_credentials_stripped_from_user_prompt(self) -> None:
-        block = _make_block()
-        _, user = build_simulation_prompt(
-            block,
-            {
-                "query": "test",
-                "credentials": {"api_key": "sk-secret"},
-                "api_key": "sk-secret",
-                "token": "tok-secret",
-                "secret": "shh",
-                "normal_field": "visible",
-            },
-        )
-        assert "sk-secret" not in user
-        assert "tok-secret" not in user
-        assert "shh" not in user
-        assert "visible" in user
-
-    def test_error_pin_always_empty_instruction(self) -> None:
-        block = _make_block()
-        system, _ = build_simulation_prompt(block, {})
-        assert "error" in system.lower()
-        assert "empty string" in system.lower()
-
-    def test_output_pin_names_in_prompt(self) -> None:
-        block = _make_block(
-            output_schema={
-                "properties": {
-                    "url": {"type": "string"},
-                    "status_code": {"type": "integer"},
-                },
-            }
-        )
-        system, _ = build_simulation_prompt(block, {})
-        assert "url" in system
-        assert "status_code" in system
-
-
-# ---------------------------------------------------------------------------
-# prepare_dry_run routing
-# ---------------------------------------------------------------------------
-
-
-class TestPrepareDryRun:
-    def test_orchestrator_uses_simulation_model(self) -> None:
-        """OrchestratorBlock should use the simulation model and cap iterations."""
-        from unittest.mock import patch
-
-        from backend.blocks.orchestrator import OrchestratorBlock
-
-        block = OrchestratorBlock()
-        with patch(
-            "backend.executor.simulator._get_platform_openrouter_key",
-            return_value="sk-or-test-key",
-        ):
-            result = prepare_dry_run(
-                block,
-                {"agent_mode_max_iterations": 10, "model": "gpt-4o", "other": "val"},
-            )
-        assert result is not None
-        assert result["agent_mode_max_iterations"] == 1
-        assert result["other"] == "val"
-        assert result["model"] != "gpt-4o"  # overridden to simulation model
-        # credentials left as-is so block schema validation passes —
-        # actual creds injected via extra_exec_kwargs in manager.py
-        assert "credentials" not in result
-        assert result["_dry_run_api_key"] == "sk-or-test-key"
-
-    def test_orchestrator_zero_stays_zero(self) -> None:
-        from unittest.mock import patch
-
-        from backend.blocks.orchestrator import OrchestratorBlock
-
-        block = OrchestratorBlock()
-        with patch(
-            "backend.executor.simulator._get_platform_openrouter_key",
-            return_value="sk-or-test-key",
-        ):
-            result = prepare_dry_run(block, {"agent_mode_max_iterations": 0})
-        assert result is not None
-        assert result["agent_mode_max_iterations"] == 0
-
-    def test_orchestrator_falls_back_without_key(self) -> None:
-        """Without platform OpenRouter key, OrchestratorBlock falls back
-        to LLM simulation (returns None)."""
-        from unittest.mock import patch
-
-        from backend.blocks.orchestrator import OrchestratorBlock
-
-        block = OrchestratorBlock()
-        with patch(
-            "backend.executor.simulator._get_platform_openrouter_key",
-            return_value=None,
-        ):
-            result = prepare_dry_run(block, {"agent_mode_max_iterations": 5})
-        assert result is None
-
-    def test_agent_executor_block_passthrough(self) -> None:
-        from backend.blocks.agent import AgentExecutorBlock
-
-        block = AgentExecutorBlock()
-        result = prepare_dry_run(block, {"graph_id": "abc"})
-        assert result is not None
-        assert result["graph_id"] == "abc"
-
-    def test_agent_executor_block_returns_identical_copy(self) -> None:
-        """AgentExecutorBlock must execute for real during dry-run so it can
-        spawn a child graph execution.  ``prepare_dry_run`` returns a shallow
-        copy of input_data with no modifications -- every key/value must be
-        identical, but the returned dict must be a *different* object so
-        callers can mutate it without affecting the original."""
-        from backend.blocks.agent import AgentExecutorBlock
-
-        block = AgentExecutorBlock()
-        input_data = {
-            "user_id": "user-42",
-            "graph_id": "graph-99",
-            "graph_version": 3,
-            "inputs": {"text": "hello"},
-            "input_schema": {"props": "a"},
-            "output_schema": {"props": "b"},
-        }
-        result = prepare_dry_run(block, input_data)
-
-        assert result is not None
-        # Must be a different object (copy, not alias)
-        assert result is not input_data
-        # Every key/value must be identical -- no modifications
-        assert result == input_data
-        # Mutating the copy must not affect the original
-        result["extra"] = "added"
-        assert "extra" not in input_data
-
-    def test_regular_block_returns_none(self) -> None:
-        block = _make_block()
-        result = prepare_dry_run(block, {"query": "test"})
-        assert result is None
-
-
-# ---------------------------------------------------------------------------
-# simulate_block – input/output passthrough
-# ---------------------------------------------------------------------------
-
-
-class TestSimulateBlockPassthrough:
-    @pytest.mark.asyncio
-    async def test_input_block_passthrough_with_value(self) -> None:
-        from backend.blocks.io import AgentInputBlock
-
-        block = AgentInputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(block, {"value": "hello world"}):
-            outputs.append((name, data))
-
-        assert outputs == [("result", "hello world")]
-
-    @pytest.mark.asyncio
-    async def test_input_block_passthrough_without_value_uses_name(self) -> None:
-        from backend.blocks.io import AgentInputBlock
-
-        block = AgentInputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(block, {"name": "user_query"}):
-            outputs.append((name, data))
-
-        assert outputs == [("result", "user_query")]
-
-    @pytest.mark.asyncio
-    async def test_input_block_passthrough_uses_placeholder(self) -> None:
-        from backend.blocks.io import AgentInputBlock
-
-        block = AgentInputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(
-            block, {"options": ["option1", "option2"]}
-        ):
-            outputs.append((name, data))
-
-        assert outputs == [("result", "option1")]
-
-    @pytest.mark.asyncio
-    async def test_output_block_passthrough_no_format(self) -> None:
-        from backend.blocks.io import AgentOutputBlock
-
-        block = AgentOutputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(
-            block, {"value": "result data", "name": "output_name"}
-        ):
-            outputs.append((name, data))
-
-        assert ("output", "result data") in outputs
-        assert ("name", "output_name") in outputs
-
-    @pytest.mark.asyncio
-    async def test_output_block_with_format_applies_jinja2(self) -> None:
-        """When a format string is provided, AgentOutputBlock simulation should
-        apply Jinja2 formatting and yield only 'output' (no 'name' pin)."""
-        from backend.blocks.io import AgentOutputBlock
-
-        block = AgentOutputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(
-            block,
-            {
-                "value": "Hello, World!",
-                "name": "output_1",
-                "format": "{{ output_1 }}!!",
-            },
-        ):
-            outputs.append((name, data))
-
-        assert len(outputs) == 1
-        assert outputs[0] == ("output", "Hello, World!!!")
-
-    @pytest.mark.asyncio
-    async def test_output_block_with_format_no_name_pin(self) -> None:
-        """When format is provided, the 'name' pin must NOT be yielded."""
-        from backend.blocks.io import AgentOutputBlock
-
-        block = AgentOutputBlock()
-
-        output_names = []
-        async for name, data in simulate_block(
-            block,
-            {
-                "value": "42",
-                "name": "output_2",
-                "format": "{{ output_2 }}",
-            },
-        ):
-            output_names.append(name)
-
-        assert "name" not in output_names
-
-    @pytest.mark.asyncio
-    async def test_input_block_no_value_no_name_empty_options(self) -> None:
-        """AgentInputBlock with value=None, name=None, and empty
-        options list must not crash.
-
-        When the ``name`` key is present but explicitly ``None``,
-        ``dict.get("name", "sample input")`` returns ``None`` (the key
-        exists), so the fallback sentinel is *not* used.  The test verifies
-        the code does not raise and yields a single result."""
-        from backend.blocks.io import AgentInputBlock
-
-        block = AgentInputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(
-            block, {"value": None, "name": None, "options": []}
-        ):
-            outputs.append((name, data))
-
-        # Does not crash; yields exactly one output
-        assert len(outputs) == 1
-        assert outputs[0][0] == "result"
-
-    @pytest.mark.asyncio
-    async def test_input_block_missing_all_fields_uses_sentinel(self) -> None:
-        """AgentInputBlock with no value, name, or placeholders at all should
-        fall back to the ``"sample input"`` sentinel."""
-        from backend.blocks.io import AgentInputBlock
-
-        block = AgentInputBlock()
-
-        outputs = []
-        async for name, data in simulate_block(block, {}):
-            outputs.append((name, data))
-
-        assert outputs == [("result", "sample input")]
-
-    @pytest.mark.asyncio
-    async def test_generic_block_zero_outputs_handled(self) -> None:
-        """When the LLM returns a valid JSON object but none of the output pins
-        have meaningful values, ``simulate_block`` should still yield defaults
-        for required output pins so downstream nodes don't stall."""
-        block = _make_block()
-
-        with patch(
-            "backend.executor.simulator._call_llm_for_simulation",
-            new_callable=AsyncMock,
-            # All output pin values are None or empty -- nothing to yield
-            return_value={"result": None, "error": ""},
-        ):
-            outputs = []
-            async for name, data in simulate_block(block, {"query": "test"}):
-                outputs.append((name, data))
-
-            # "result" is required, so a default empty string is yielded
-            assert outputs == [("result", "")]
-
-    @pytest.mark.asyncio
-    async def test_generic_block_calls_llm(self) -> None:
-        """Generic blocks should call _call_llm_for_simulation."""
-        block = _make_block()
-
-        with patch(
-            "backend.executor.simulator._call_llm_for_simulation",
-            new_callable=AsyncMock,
-            return_value={"result": "simulated result", "error": ""},
-        ) as mock_llm:
-            outputs = []
-            async for name, data in simulate_block(block, {"query": "test"}):
-                outputs.append((name, data))
-
-            mock_llm.assert_called_once()
-            assert ("result", "simulated result") in outputs
-            # Empty error pin is omitted — not yielded
-            assert ("error", "") not in outputs
-
-    @pytest.mark.asyncio
-    async def test_generic_block_omits_missing_pins(self) -> None:
-        """Missing output pins are omitted (not yielded)."""
-        block = _make_block()
-
-        with patch(
-            "backend.executor.simulator._call_llm_for_simulation",
-            new_callable=AsyncMock,
-            return_value={"result": "data"},  # missing "error" pin
-        ):
-            outputs: dict[str, Any] = {}
-            async for name, data in simulate_block(block, {"query": "test"}):
-                outputs[name] = data
-
-            assert outputs["result"] == "data"
-            # Missing pins are omitted — only meaningful values are yielded
-            assert "error" not in outputs
-
-    @pytest.mark.asyncio
-    async def test_generic_block_preserves_falsy_values(self) -> None:
-        """Valid falsy values like False, 0, and [] must be yielded, not dropped."""
-        block = _make_block(
-            output_schema={
-                "properties": {
-                    "flag": {"type": "boolean"},
-                    "count": {"type": "integer"},
-                    "items": {"type": "array"},
-                },
-                "required": ["flag", "count", "items"],
-            }
-        )
-
-        with patch(
-            "backend.executor.simulator._call_llm_for_simulation",
-            new_callable=AsyncMock,
-            return_value={"flag": False, "count": 0, "items": []},
-        ):
-            outputs: dict[str, Any] = {}
-            async for name, data in simulate_block(block, {"query": "test"}):
-                outputs[name] = data
-
-            assert outputs["flag"] is False
-            assert outputs["count"] == 0
-            assert outputs["items"] == []
-
-    @pytest.mark.asyncio
-    async def test_llm_failure_yields_error(self) -> None:
-        """When LLM fails, should yield an error tuple."""
-        block = _make_block()
-
-        with patch(
-            "backend.executor.simulator._call_llm_for_simulation",
-            new_callable=AsyncMock,
-            side_effect=RuntimeError("No client"),
-        ):
-            outputs = []
-            async for name, data in simulate_block(block, {"query": "test"}):
-                outputs.append((name, data))
-
-            assert len(outputs) == 1
-            assert outputs[0][0] == "error"
-            assert "No client" in outputs[0][1]
--- a/autogpt_platform/backend/backend/executor/utils.py
+++ b/autogpt_platform/backend/backend/executor/utils.py
@@ -923,11 +923,6 @@ async def add_graph_execution(
            execution_context.parent_execution_id if execution_context else None
        )

-        # When execution_context is provided (e.g. from AgentExecutorBlock),
-        # inherit dry_run so child-graph validation skips credential checks.
-        if execution_context and execution_context.dry_run:
-            dry_run = True
-
        # Create new execution
        graph, starting_nodes_input, compiled_nodes_input_masks, nodes_to_skip = (
            await validate_and_construct_node_execution_input(
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -19,7 +19,6 @@ from backend.data.model import (
    UserPasswordCredentials,
 )
 from backend.data.redis_client import get_redis_async
-from backend.util.cache import thread_cached
 from backend.util.settings import Settings

 settings = Settings()
@@ -305,12 +304,15 @@ def is_system_provider(provider: str) -> bool:


 class IntegrationCredentialsStore:
-    @thread_cached
+    def __init__(self):
+        self._locks = None
+
    async def locks(self) -> AsyncRedisKeyedMutex:
-        # Per-thread: copilot executor runs worker threads with separate event
-        # loops; AsyncRedisKeyedMutex's internal asyncio.Lock is bound to the
-        # loop it was created on.
-        return AsyncRedisKeyedMutex(await get_redis_async())
+        if self._locks:
+            return self._locks
+
+        self._locks = AsyncRedisKeyedMutex(await get_redis_async())
+        return self._locks

    @property
    def db_manager(self):
--- a/autogpt_platform/backend/backend/integrations/creds_manager.py
+++ b/autogpt_platform/backend/backend/integrations/creds_manager.py
@@ -8,6 +8,7 @@ from autogpt_libs.utils.synchronize import AsyncRedisKeyedMutex
 from redis.asyncio.lock import Lock as AsyncRedisLock

 from backend.data.model import Credentials, OAuth2Credentials
+from backend.data.redis_client import get_redis_async
 from backend.integrations.credentials_store import (
    IntegrationCredentialsStore,
    provider_matches,
@@ -105,13 +106,14 @@ class IntegrationCredentialsManager:

    def __init__(self):
        self.store = IntegrationCredentialsStore()
+        self._locks = None

    async def locks(self) -> AsyncRedisKeyedMutex:
-        # Delegate to store's @thread_cached locks.  Manager uses these for
-        # fine-grained per-credential locking (refresh, acquire); the store
-        # uses its own for coarse per-user integrations locking.  Same mutex
-        # type, different key spaces — no collision.
-        return await self.store.locks()
+        if self._locks:
+            return self._locks
+
+        self._locks = AsyncRedisKeyedMutex(await get_redis_async())
+        return self._locks

    async def create(self, user_id: str, credentials: Credentials) -> None:
        result = await self.store.add_creds(user_id, credentials)
@@ -186,74 +188,35 @@ class IntegrationCredentialsManager:

    async def refresh_if_needed(
        self, user_id: str, credentials: OAuth2Credentials, lock: bool = True
-    ) -> OAuth2Credentials:
-        # When lock=False, skip ALL Redis locking (both the outer "refresh" scope
-        # lock and the inner credential lock).  This is used by the copilot's
-        # integration_creds module which runs across multiple threads with separate
-        # event loops; acquiring a Redis lock whose asyncio.Lock() was created on
-        # a different loop raises "Future attached to a different loop".
-        if lock:
-            return await self._refresh_locked(user_id, credentials)
-        return await self._refresh_unlocked(user_id, credentials)
-
-    async def _get_oauth_handler(
-        self, credentials: OAuth2Credentials
-    ) -> "BaseOAuthHandler":
-        """Resolve the appropriate OAuth handler for the given credentials."""
-        if provider_matches(credentials.provider, ProviderName.MCP.value):
-            return create_mcp_oauth_handler(credentials)
-        return await _get_provider_oauth_handler(credentials.provider)
-
-    async def _refresh_locked(
-        self, user_id: str, credentials: OAuth2Credentials
    ) -> OAuth2Credentials:
        async with self._locked(user_id, credentials.id, "refresh"):
-            oauth_handler = await self._get_oauth_handler(credentials)
+            if provider_matches(credentials.provider, ProviderName.MCP.value):
+                oauth_handler = create_mcp_oauth_handler(credentials)
+            else:
+                oauth_handler = await _get_provider_oauth_handler(credentials.provider)
            if oauth_handler.needs_refresh(credentials):
                logger.debug(
-                    "Refreshing '%s' credentials #%s",
-                    credentials.provider,
-                    credentials.id,
+                    f"Refreshing '{credentials.provider}' credentials #{credentials.id}"
                )
-                # Wait until the credentials are no longer in use anywhere
-                _lock = await self._acquire_lock(user_id, credentials.id)
-                try:
-                    fresh_credentials = await oauth_handler.refresh_tokens(credentials)
-                    await self.store.update_creds(user_id, fresh_credentials)
-                    _invoke_creds_changed_hook(user_id, fresh_credentials.provider)
-                    credentials = fresh_credentials
-                finally:
-                    if (await _lock.locked()) and (await _lock.owned()):
-                        try:
-                            await _lock.release()
-                        except Exception:
-                            logger.warning(
-                                "Failed to release OAuth refresh lock",
-                                exc_info=True,
-                            )
-        return credentials
+                _lock = None
+                if lock:
+                    # Wait until the credentials are no longer in use anywhere
+                    _lock = await self._acquire_lock(user_id, credentials.id)

-    async def _refresh_unlocked(
-        self, user_id: str, credentials: OAuth2Credentials
-    ) -> OAuth2Credentials:
-        """Best-effort token refresh without any Redis locking.
+                fresh_credentials = await oauth_handler.refresh_tokens(credentials)
+                await self.store.update_creds(user_id, fresh_credentials)
+                # Notify listeners so the refreshed token is picked up immediately.
+                _invoke_creds_changed_hook(user_id, fresh_credentials.provider)
+                if _lock and (await _lock.locked()) and (await _lock.owned()):
+                    try:
+                        await _lock.release()
+                    except Exception:
+                        logger.warning(
+                            "Failed to release OAuth refresh lock",
+                            exc_info=True,
+                        )

-        Safe for use from multi-threaded contexts (e.g. copilot workers) where
-        each thread has its own event loop and sharing Redis-backed asyncio locks
-        is not possible.  Concurrent refreshes are tolerated: the last writer
-        wins, and stale tokens are overwritten.
-        """
-        oauth_handler = await self._get_oauth_handler(credentials)
-        if oauth_handler.needs_refresh(credentials):
-            logger.debug(
-                "Refreshing '%s' credentials #%s (lock-free)",
-                credentials.provider,
-                credentials.id,
-            )
-            fresh_credentials = await oauth_handler.refresh_tokens(credentials)
-            await self.store.update_creds(user_id, fresh_credentials)
-            _invoke_creds_changed_hook(user_id, fresh_credentials.provider)
-            credentials = fresh_credentials
+                credentials = fresh_credentials
        return credentials

    async def update(self, user_id: str, updated: Credentials) -> None:
@@ -301,6 +264,7 @@ class IntegrationCredentialsManager:

    async def release_all_locks(self):
        """Call this on process termination to ensure all locks are released"""
+        await (await self.locks()).release_all_locks()
        await (await self.store.locks()).release_all_locks()


--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -15,7 +15,6 @@ class ProviderName(str, Enum):
    ANTHROPIC = "anthropic"
    APOLLO = "apollo"
    COMPASS = "compass"
-    DATABASE = "database"
    DISCORD = "discord"
    D_ID = "d_id"
    E2B = "e2b"
--- a/autogpt_platform/backend/backend/util/clients.py
+++ b/autogpt_platform/backend/backend/util/clients.py
@@ -163,31 +163,23 @@ async def get_async_supabase() -> "AClient":


@cached(ttl_seconds=3600)
-def get_openai_client(*, prefer_openrouter: bool = False) -> "AsyncOpenAI | None":
+def get_openai_client() -> "AsyncOpenAI | None":
    """
-    Get a process-cached async OpenAI client.
+    Get a process-cached async OpenAI client for embeddings.

-    By default prefers openai_internal_api_key (direct OpenAI) and falls back
-    to open_router_api_key via OpenRouter.
-
-    When ``prefer_openrouter=True``, returns an OpenRouter client or None —
-    does **not** fall back to direct OpenAI (which can't route non-OpenAI
-    models like ``google/gemini-2.5-flash``).
+    Prefers openai_internal_api_key (direct OpenAI). Falls back to
+    open_router_api_key via OpenRouter's OpenAI-compatible endpoint.
+    Returns None if neither key is configured.
    """
    from openai import AsyncOpenAI

-    openai_key = settings.secrets.openai_internal_api_key
-    openrouter_key = settings.secrets.open_router_api_key
-
-    if prefer_openrouter:
-        if openrouter_key:
-            return AsyncOpenAI(api_key=openrouter_key, base_url=OPENROUTER_BASE_URL)
-        return None
-    else:
-        if openai_key:
-            return AsyncOpenAI(api_key=openai_key)
-        if openrouter_key:
-            return AsyncOpenAI(api_key=openrouter_key, base_url=OPENROUTER_BASE_URL)
+    if settings.secrets.openai_internal_api_key:
+        return AsyncOpenAI(api_key=settings.secrets.openai_internal_api_key)
+    if settings.secrets.open_router_api_key:
+        return AsyncOpenAI(
+            api_key=settings.secrets.open_router_api_key,
+            base_url=OPENROUTER_BASE_URL,
+        )
    return None


--- a/autogpt_platform/backend/backend/util/clients_test.py
+++ b/autogpt_platform/backend/backend/util/clients_test.py
@@ -1,69 +0,0 @@
-"""Tests for get_openai_client prefer_openrouter parameter."""
-
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from backend.util.clients import get_openai_client
-
-
-@pytest.fixture(autouse=True)
-def _clear_client_cache():
-    """Clear the @cached singleton between tests."""
-    get_openai_client.cache_clear()
-    yield
-    get_openai_client.cache_clear()
-
-
-def _mock_secrets(*, openai_key: str = "", openrouter_key: str = "") -> MagicMock:
-    secrets = MagicMock()
-    secrets.openai_internal_api_key = openai_key
-    secrets.open_router_api_key = openrouter_key
-    return secrets
-
-
-class TestGetOpenaiClientDefault:
-    def test_prefers_openai_key(self):
-        secrets = _mock_secrets(openai_key="sk-openai", openrouter_key="sk-or")
-        with patch("backend.util.clients.settings") as mock_settings:
-            mock_settings.secrets = secrets
-            client = get_openai_client()
-        assert client is not None
-        assert client.api_key == "sk-openai"
-        assert "openrouter" not in str(client.base_url or "")
-
-    def test_falls_back_to_openrouter(self):
-        secrets = _mock_secrets(openrouter_key="sk-or")
-        with patch("backend.util.clients.settings") as mock_settings:
-            mock_settings.secrets = secrets
-            client = get_openai_client()
-        assert client is not None
-        assert client.api_key == "sk-or"
-
-    def test_returns_none_when_no_keys(self):
-        secrets = _mock_secrets()
-        with patch("backend.util.clients.settings") as mock_settings:
-            mock_settings.secrets = secrets
-            assert get_openai_client() is None
-
-
-class TestGetOpenaiClientPreferOpenrouter:
-    def test_returns_openrouter_client(self):
-        secrets = _mock_secrets(openai_key="sk-openai", openrouter_key="sk-or")
-        with patch("backend.util.clients.settings") as mock_settings:
-            mock_settings.secrets = secrets
-            client = get_openai_client(prefer_openrouter=True)
-        assert client is not None
-        assert client.api_key == "sk-or"
-
-    def test_returns_none_without_openrouter_key(self):
-        secrets = _mock_secrets(openai_key="sk-openai")
-        with patch("backend.util.clients.settings") as mock_settings:
-            mock_settings.secrets = secrets
-            assert get_openai_client(prefer_openrouter=True) is None
-
-    def test_returns_none_when_no_keys(self):
-        secrets = _mock_secrets()
-        with patch("backend.util.clients.settings") as mock_settings:
-            mock_settings.secrets = secrets
-            assert get_openai_client(prefer_openrouter=True) is None
--- a/autogpt_platform/backend/load-tests/tests/api/graph-execution-test.js
+++ b/autogpt_platform/backend/load-tests/tests/api/graph-execution-test.js
@@ -22,6 +22,7 @@ function generateTestGraph(name = null) {
          input_default: {
            name: "Load Test Input",
            description: "Test input for load testing",
+            placeholder_values: {},
          },
          input_nodes: [],
          output_nodes: ["output_node"],
@@ -58,7 +59,11 @@ function generateExecutionInputs() {
    "Load Test Input": {
      name: "Load Test Input",
      description: "Test input for load testing",
-      value: `Test execution at ${new Date().toISOString()}`,
+      placeholder_values: {
+        test_data: `Test execution at ${new Date().toISOString()}`,
+        test_parameter: Math.random().toString(36).substr(2, 9),
+        numeric_value: Math.floor(Math.random() * 1000),
+      },
    },
  };
 }
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -974,128 +974,6 @@ files = [
 ]
 markers = {dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}

-[[package]]
-name = "coverage"
-version = "7.13.5"
-description = "Code coverage measurement for Python"
-optional = false
-python-versions = ">=3.10"
-groups = ["dev"]
-files = [
-    {file = "coverage-7.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5"},
-    {file = "coverage-7.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf"},
-    {file = "coverage-7.13.5-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8"},
-    {file = "coverage-7.13.5-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4"},
-    {file = "coverage-7.13.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d"},
-    {file = "coverage-7.13.5-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930"},
-    {file = "coverage-7.13.5-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d"},
-    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40"},
-    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878"},
-    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400"},
-    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0"},
-    {file = "coverage-7.13.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0"},
-    {file = "coverage-7.13.5-cp310-cp310-win32.whl", hash = "sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58"},
-    {file = "coverage-7.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e"},
-    {file = "coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d"},
-    {file = "coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587"},
-    {file = "coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642"},
-    {file = "coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b"},
-    {file = "coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686"},
-    {file = "coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743"},
-    {file = "coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75"},
-    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209"},
-    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a"},
-    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e"},
-    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd"},
-    {file = "coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8"},
-    {file = "coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf"},
-    {file = "coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9"},
-    {file = "coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028"},
-    {file = "coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01"},
-    {file = "coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422"},
-    {file = "coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f"},
-    {file = "coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5"},
-    {file = "coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376"},
-    {file = "coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256"},
-    {file = "coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c"},
-    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5"},
-    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09"},
-    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9"},
-    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf"},
-    {file = "coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c"},
-    {file = "coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf"},
-    {file = "coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810"},
-    {file = "coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de"},
-    {file = "coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1"},
-    {file = "coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3"},
-    {file = "coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26"},
-    {file = "coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3"},
-    {file = "coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b"},
-    {file = "coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a"},
-    {file = "coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969"},
-    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161"},
-    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15"},
-    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1"},
-    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6"},
-    {file = "coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17"},
-    {file = "coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85"},
-    {file = "coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b"},
-    {file = "coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664"},
-    {file = "coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d"},
-    {file = "coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0"},
-    {file = "coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806"},
-    {file = "coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3"},
-    {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9"},
-    {file = "coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd"},
-    {file = "coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606"},
-    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e"},
-    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0"},
-    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87"},
-    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479"},
-    {file = "coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2"},
-    {file = "coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a"},
-    {file = "coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819"},
-    {file = "coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911"},
-    {file = "coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f"},
-    {file = "coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e"},
-    {file = "coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a"},
-    {file = "coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510"},
-    {file = "coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247"},
-    {file = "coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6"},
-    {file = "coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0"},
-    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882"},
-    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740"},
-    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16"},
-    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0"},
-    {file = "coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0"},
-    {file = "coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc"},
-    {file = "coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633"},
-    {file = "coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8"},
-    {file = "coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b"},
-    {file = "coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c"},
-    {file = "coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9"},
-    {file = "coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29"},
-    {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607"},
-    {file = "coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90"},
-    {file = "coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3"},
-    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab"},
-    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562"},
-    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2"},
-    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea"},
-    {file = "coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a"},
-    {file = "coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215"},
-    {file = "coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43"},
-    {file = "coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45"},
-    {file = "coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61"},
-    {file = "coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179"},
-]
-
-[package.dependencies]
-tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
-
-[package.extras]
-toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
-
 [[package]]
 name = "crashtest"
 version = "0.4.1"
@@ -5842,75 +5720,6 @@ dev = ["coverage[toml] (==7.10.7)", "cryptography (>=3.4.0)", "pre-commit", "pyt
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==7.10.7)", "pytest (>=8.4.2,<9.0.0)"]

-[[package]]
-name = "pymssql"
-version = "2.3.13"
-description = "DB-API interface to Microsoft SQL Server for Python. (new Cython-based version)"
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "pymssql-2.3.13-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:476f6f06b2ae5dfbfa0b169a6ecdd0d9ddfedb07f2d6dc97d2dd630ff2d6789a"},
-    {file = "pymssql-2.3.13-cp310-cp310-macosx_15_0_x86_64.whl", hash = "sha256:17942dc9474693ab2229a8a6013e5b9cb1312a5251207552141bb85fcce8c131"},
-    {file = "pymssql-2.3.13-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d87237500def5f743a52e415cd369d632907212154fcc7b4e13f264b4e30021"},
-    {file = "pymssql-2.3.13-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:612ac062027d2118879f11a5986e9d9d82d07ca3545bb98c93200b68826ea687"},
-    {file = "pymssql-2.3.13-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f1897c1b767cc143e77d285123ae5fd4fa7379a1bfec5c515d38826caf084eb6"},
-    {file = "pymssql-2.3.13-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:48631c7b9fd14a1bd5675c521b6082590bf700b7961c65638d237817b3fde735"},
-    {file = "pymssql-2.3.13-cp310-cp310-win_amd64.whl", hash = "sha256:79c759db6e991eeae473b000c2e0a7fb8da799b2da469fe5a10d30916315e0b5"},
-    {file = "pymssql-2.3.13-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:152be40c0d7f5e4b1323f7728b0a01f3ee0082190cfbadf84b2c2e930d57e00e"},
-    {file = "pymssql-2.3.13-cp311-cp311-macosx_15_0_x86_64.whl", hash = "sha256:d94da3a55545c5b6926cb4d1c6469396f0ae32ad5d6932c513f7a0bf569b4799"},
-    {file = "pymssql-2.3.13-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51e42c5defc3667f0803c7ade85db0e6f24b9a1c5a18fcdfa2d09c36bff9b065"},
-    {file = "pymssql-2.3.13-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4aa18944a121f996178e26cadc598abdbf73759f03dc3cd74263fdab1b28cd96"},
-    {file = "pymssql-2.3.13-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:910404e0ec85c4cc7c633ec3df9b04a35f23bb74a844dd377a387026ae635e3a"},
-    {file = "pymssql-2.3.13-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4b834c34e7600369eee7bc877948b53eb0fe6f3689f0888d005ae47dd53c0a66"},
-    {file = "pymssql-2.3.13-cp311-cp311-win_amd64.whl", hash = "sha256:5c2e55b6513f9c5a2f58543233ed40baaa7f91c79e64a5f961ea3fc57a700b80"},
-    {file = "pymssql-2.3.13-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cf4f32b4a05b66f02cb7d55a0f3bcb0574a6f8cf0bee4bea6f7b104038364733"},
-    {file = "pymssql-2.3.13-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:2b056eb175955f7fb715b60dc1c0c624969f4d24dbdcf804b41ab1e640a2b131"},
-    {file = "pymssql-2.3.13-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:319810b89aa64b99d9c5c01518752c813938df230496fa2c4c6dda0603f04c4c"},
-    {file = "pymssql-2.3.13-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0ea72641cb0f8bce7ad8565dbdbda4a7437aa58bce045f2a3a788d71af2e4be"},
-    {file = "pymssql-2.3.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1493f63d213607f708a5722aa230776ada726ccdb94097fab090a1717a2534e0"},
-    {file = "pymssql-2.3.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb3275985c23479e952d6462ae6c8b2b6993ab6b99a92805a9c17942cf3d5b3d"},
-    {file = "pymssql-2.3.13-cp312-cp312-win_amd64.whl", hash = "sha256:a930adda87bdd8351a5637cf73d6491936f34e525a5e513068a6eac742f69cdb"},
-    {file = "pymssql-2.3.13-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:30918bb044242865c01838909777ef5e0f1b9ecd7f5882346aefa57f4414b29c"},
-    {file = "pymssql-2.3.13-cp313-cp313-macosx_15_0_x86_64.whl", hash = "sha256:1c6d0b2d7961f159a07e4f0d8cc81f70ceab83f5e7fd1e832a2d069e1d67ee4e"},
-    {file = "pymssql-2.3.13-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16c5957a3c9e51a03276bfd76a22431e2bc4c565e2e95f2cbb3559312edda230"},
-    {file = "pymssql-2.3.13-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0fddd24efe9d18bbf174fab7c6745b0927773718387f5517cf8082241f721a68"},
-    {file = "pymssql-2.3.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:123c55ee41bc7a82c76db12e2eb189b50d0d7a11222b4f8789206d1cda3b33b9"},
-    {file = "pymssql-2.3.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e053b443e842f9e1698fcb2b23a4bff1ff3d410894d880064e754ad823d541e5"},
-    {file = "pymssql-2.3.13-cp313-cp313-win_amd64.whl", hash = "sha256:5c045c0f1977a679cc30d5acd9da3f8aeb2dc6e744895b26444b4a2f20dad9a0"},
-    {file = "pymssql-2.3.13-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:fc5482969c813b0a45ce51c41844ae5bfa8044ad5ef8b4820ef6de7d4545b7f2"},
-    {file = "pymssql-2.3.13-cp314-cp314-macosx_15_0_x86_64.whl", hash = "sha256:ff5be7ab1d643dbce2ee3424d2ef9ae8e4146cf75bd20946bc7a6108e3ad1e47"},
-    {file = "pymssql-2.3.13-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8d66ce0a249d2e3b57369048d71e1f00d08dfb90a758d134da0250ae7bc739c1"},
-    {file = "pymssql-2.3.13-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d663c908414a6a032f04d17628138b1782af916afc0df9fefac4751fa394c3ac"},
-    {file = "pymssql-2.3.13-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aa5e07eff7e6e8bd4ba22c30e4cb8dd073e138cd272090603609a15cc5dbc75b"},
-    {file = "pymssql-2.3.13-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:db77da1a3fc9b5b5c5400639d79d7658ba7ad620957100c5b025be608b562193"},
-    {file = "pymssql-2.3.13-cp314-cp314-win_amd64.whl", hash = "sha256:7d7037d2b5b907acc7906d0479924db2935a70c720450c41339146a4ada2b93d"},
-    {file = "pymssql-2.3.13-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:b0af51904764811da0bfe4b057b1d72dee11a399ce9ed5770875162772740c8a"},
-    {file = "pymssql-2.3.13-cp39-cp39-macosx_15_0_x86_64.whl", hash = "sha256:0a7e6431925572bc75fb47929ae8ca5b0aac26abfe8b98d4c08daf117b5657f1"},
-    {file = "pymssql-2.3.13-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9b1d5aef2b5f47a7f9d9733caee4d66772681e8f798a0f5e4739a8bdab408c"},
-    {file = "pymssql-2.3.13-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c690f1869dadbf4201b7f51317fceff6e5d8f5175cec6a4a813e06b0dca2d6ed"},
-    {file = "pymssql-2.3.13-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e7c31f192da9d30f0e03ad99e548120a8740a675302e2f04fa8c929f7cbee771"},
-    {file = "pymssql-2.3.13-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f5d995a80996235ed32102a93067ce6a7143cce3bfd4e5042bf600020fc08456"},
-    {file = "pymssql-2.3.13-cp39-cp39-win_amd64.whl", hash = "sha256:6a6c0783d97f57133573a03aad3017917dbdf7831a65e0d84ccf2a85e183ca66"},
-    {file = "pymssql-2.3.13.tar.gz", hash = "sha256:2137e904b1a65546be4ccb96730a391fcd5a85aab8a0632721feb5d7e39cfbce"},
-]
-
-[[package]]
-name = "pymysql"
-version = "1.1.2"
-description = "Pure Python MySQL Driver"
-optional = false
-python-versions = ">=3.8"
-groups = ["main"]
-files = [
-    {file = "pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9"},
-    {file = "pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03"},
-]
-
-[package.extras]
-ed25519 = ["PyNaCl (>=1.4.0)"]
-rsa = ["cryptography"]
-
 [[package]]
 name = "pyparsing"
 version = "3.3.2"
@@ -6110,26 +5919,6 @@ typing-extensions = {version = ">=4.12", markers = "python_version < \"3.13\""}
 docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
 testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]

-[[package]]
-name = "pytest-cov"
-version = "7.1.0"
-description = "Pytest plugin for measuring coverage."
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
-    {file = "pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678"},
-    {file = "pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2"},
-]
-
-[package.dependencies]
-coverage = {version = ">=7.10.6", extras = ["toml"]}
-pluggy = ">=1.2"
-pytest = ">=7"
-
-[package.extras]
-testing = ["process-tests", "pytest-xdist", "virtualenv"]
-
 [[package]]
 name = "pytest-mock"
 version = "3.15.1"
@@ -7220,22 +7009,6 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
 pymysql = ["pymysql"]
 sqlcipher = ["sqlcipher3_binary"]

-[[package]]
-name = "sqlparse"
-version = "0.5.5"
-description = "A non-validating SQL parser."
-optional = false
-python-versions = ">=3.8"
-groups = ["main"]
-files = [
-    {file = "sqlparse-0.5.5-py3-none-any.whl", hash = "sha256:12a08b3bf3eec877c519589833aed092e2444e68240a3577e8e26148acc7b1ba"},
-    {file = "sqlparse-0.5.5.tar.gz", hash = "sha256:e20d4a9b0b8585fdf63b10d30066c7c94c5d7a7ec47c889a2d83a3caa93ff28e"},
-]
-
-[package.extras]
-dev = ["build"]
-doc = ["sphinx"]
-
 [[package]]
 name = "sse-starlette"
 version = "3.2.0"
@@ -8857,4 +8630,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "8dd9db689a2dd57fc3cccea02e596a522f334f6b5ed18e92252555f61835d71d"
+content-hash = "1dd10577184ebff0d10997f4c6ba49484de79b7fa090946e8e5ce5c5bac3cdeb"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -94,10 +94,7 @@ posthog = "^7.6.0"
 fpdf2 = "^2.8.6"
 langsmith = "^0.7.7"
 openpyxl = "^3.1.5"
-pymssql = "^2.3.2"
-pymysql = "^1.1.1"
 pyarrow = "^23.0.0"
-sqlparse = "^0.5.5"

 [tool.poetry.group.dev.dependencies]
 aiohappyeyeballs = "^2.6.1"
@@ -108,7 +105,6 @@ isort = "^5.13.2"
 poethepoet = "^0.41.0"
 pre-commit = "^4.4.0"
 pyright = "^1.1.407"
-pytest-cov = "^7.1.0"
 pytest-mock = "^3.15.1"
 pytest-watcher = "^0.6.3"
 requests = "^2.32.5"
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -15,7 +15,7 @@
    "types": "tsc --noEmit",
    "test": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test",
    "test-ui": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test --ui",
-    "test:unit": "vitest run --coverage",
+    "test:unit": "vitest run",
    "test:unit:watch": "vitest",
    "test:no-build": "playwright test",
    "gentests": "playwright codegen http://localhost:3000",
@@ -122,7 +122,6 @@
    "tailwind-merge": "2.6.0",
    "tailwind-scrollbar": "3.1.0",
    "tailwindcss-animate": "1.0.7",
-    "twemoji": "14.0.2",
    "use-stick-to-bottom": "1.1.2",
    "uuid": "11.1.0",
    "vaul": "1.1.2",
@@ -151,7 +150,6 @@
    "@types/react-modal": "3.16.3",
    "@types/react-window": "2.0.0",
    "@vitejs/plugin-react": "5.1.2",
-    "@vitest/coverage-v8": "4.0.17",
    "axe-playwright": "2.2.2",
    "chromatic": "13.3.3",
    "concurrently": "9.2.1",
--- a/autogpt_platform/frontend/pnpm-lock.yaml
+++ b/autogpt_platform/frontend/pnpm-lock.yaml
@@ -288,9 +288,6 @@ importers:
      tailwindcss-animate:
        specifier: 1.0.7
        version: 1.0.7(tailwindcss@3.4.17)
-      twemoji:
-        specifier: 14.0.2
-        version: 14.0.2
      use-stick-to-bottom:
        specifier: 1.1.2
        version: 1.1.2(react@18.3.1)
@@ -370,9 +367,6 @@ importers:
      '@vitejs/plugin-react':
        specifier: 5.1.2
        version: 5.1.2(vite@7.3.1(@types/node@24.10.0)(jiti@2.6.1)(terser@5.44.1)(yaml@2.8.2))
-      '@vitest/coverage-v8':
-        specifier: 4.0.17
-        version: 4.0.17(vitest@4.0.17(@opentelemetry/api@1.9.0)(@types/node@24.10.0)(happy-dom@20.3.4)(jiti@2.6.1)(jsdom@27.4.0)(msw@2.11.6(@types/node@24.10.0)(typescript@5.9.3))(terser@5.44.1)(yaml@2.8.2))
      axe-playwright:
        specifier: 2.2.2
        version: 2.2.2(playwright@1.56.1)
@@ -635,11 +629,6 @@ packages:
    engines: {node: '>=6.0.0'}
    hasBin: true

-  '@babel/parser@7.29.2':
-    resolution: {integrity: sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==}
-    engines: {node: '>=6.0.0'}
-    hasBin: true
-
  '@babel/plugin-bugfix-firefox-class-in-computed-class-key@7.28.5':
    resolution: {integrity: sha512-87GDMS3tsmMSi/3bWOte1UblL+YUTFMV8SZPZ2eSEL17s74Cw/l63rR6NmGVKMYW2GYi85nE+/d6Hw5N0bEk2Q==}
    engines: {node: '>=6.9.0'}
@@ -1109,14 +1098,6 @@ packages:
    resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==}
    engines: {node: '>=6.9.0'}

-  '@babel/types@7.29.0':
-    resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==}
-    engines: {node: '>=6.9.0'}
-
-  '@bcoe/v8-coverage@1.0.2':
-    resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==}
-    engines: {node: '>=18'}
-
  '@braintree/sanitize-url@7.1.2':
    resolution: {integrity: sha512-jigsZK+sMF/cuiB7sERuo9V7N9jx+dhmHHnQyDSVdpZwVutaBu7WvNYqMDLSgFgfB30n452TP3vjDAvFC973mA==}

@@ -3936,15 +3917,6 @@ packages:
    peerDependencies:
      vite: ^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0

-  '@vitest/coverage-v8@4.0.17':
-    resolution: {integrity: sha512-/6zU2FLGg0jsd+ePZcwHRy3+WpNTBBhDY56P4JTRqUN/Dp6CvOEa9HrikcQ4KfV2b2kAHUFB4dl1SuocWXSFEw==}
-    peerDependencies:
-      '@vitest/browser': 4.0.17
-      vitest: 4.0.17
-    peerDependenciesMeta:
-      '@vitest/browser':
-        optional: true
-
  '@vitest/expect@3.2.4':
    resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==}

@@ -4257,9 +4229,6 @@ packages:
    resolution: {integrity: sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg==}
    engines: {node: '>=4'}

-  ast-v8-to-istanbul@0.3.12:
-    resolution: {integrity: sha512-BRRC8VRZY2R4Z4lFIL35MwNXmwVqBityvOIwETtsCSwvjl0IdgFsy9NhdaA6j74nUdtJJlIypeRhpDam19Wq3g==}
-
  astring@1.9.0:
    resolution: {integrity: sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg==}
    hasBin: true
@@ -5501,10 +5470,6 @@ packages:
    resolution: {integrity: sha512-VWSRii4t0AFm6ixFFmLLx1t7wS1gh+ckoa84aOeapGum0h+EZd1EhEumSB+ZdDLnEPuucsVB9oB7cxJHap6Afg==}
    engines: {node: '>=14.14'}

-  fs-extra@8.1.0:
-    resolution: {integrity: sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==}
-    engines: {node: '>=6 <7 || >=8'}
-
  fs-monkey@1.1.0:
    resolution: {integrity: sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==}

@@ -5744,9 +5709,6 @@ packages:
  html-entities@2.6.0:
    resolution: {integrity: sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==}

-  html-escaper@2.0.2:
-    resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
-
  html-minifier-terser@6.1.0:
    resolution: {integrity: sha512-YXxSlJBZTP7RS3tWnQw74ooKa6L9b9i9QYXY21eUEvhZ3u9XLfv6OnFsQq6RxkhHygsaUMvYsZRV5rU/OVNZxw==}
    engines: {node: '>=12'}
@@ -6042,18 +6004,6 @@ packages:
  isexe@2.0.0:
    resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}

-  istanbul-lib-coverage@3.2.2:
-    resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==}
-    engines: {node: '>=8'}
-
-  istanbul-lib-report@3.0.1:
-    resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==}
-    engines: {node: '>=10'}
-
-  istanbul-reports@3.2.0:
-    resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==}
-    engines: {node: '>=8'}
-
  iterator.prototype@1.1.5:
    resolution: {integrity: sha512-H0dkQoCa3b2VEeKQBOxFph+JAbcrQdE7KC0UkqwpLmv2EC4P41QXP+rqo9wYodACiG5/WM5s9oDApTU8utwj9g==}
    engines: {node: '>= 0.4'}
@@ -6094,9 +6044,6 @@ packages:
      react:
        optional: true

-  js-tokens@10.0.0:
-    resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==}
-
  js-tokens@4.0.0:
    resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}

@@ -6156,12 +6103,6 @@ packages:
  jsonc-parser@2.2.1:
    resolution: {integrity: sha512-o6/yDBYccGvTz1+QFevz6l6OBZ2+fMVu2JZ9CIhzsYRX4mjaK5IyX9eldUdCmga16zlgQxyrj5pt9kzuj2C02w==}

-  jsonfile@4.0.0:
-    resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==}
-
-  jsonfile@5.0.0:
-    resolution: {integrity: sha512-NQRZ5CRo74MhMMC3/3r5g2k4fjodJ/wh8MxjFbCViWKFjxrnudWSY5vomh+23ZaXzAS7J3fBZIR2dV6WbmfM0w==}
-
  jsonfile@6.2.0:
    resolution: {integrity: sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==}

@@ -6358,17 +6299,10 @@ packages:
    resolution: {integrity: sha512-ISQTe55T2ao7XtlAStud6qwYPZjE4GK1S/BeVPus4jrq6JuOnQ00YKQC581RWhR122W7msZV263KzVeLoqidyQ==}
    engines: {node: '>=12'}

-  magicast@0.5.2:
-    resolution: {integrity: sha512-E3ZJh4J3S9KfwdjZhe2afj6R9lGIN5Pher1pF39UGrXRqq/VDaGVIGN13BjHd2u8B61hArAGOnso7nBOouW3TQ==}
-
  make-dir@3.1.0:
    resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
    engines: {node: '>=8'}

-  make-dir@4.0.0:
-    resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
-    engines: {node: '>=10'}
-
  markdown-it@14.1.0:
    resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==}
    hasBin: true
@@ -8231,12 +8165,6 @@ packages:
  tty-browserify@0.0.1:
    resolution: {integrity: sha512-C3TaO7K81YvjCgQH9Q1S3R3P3BtN3RIM8n+OvX4il1K1zgE8ZhI0op7kClgkxtutIE8hQrcrHBXvIheqKUUCxw==}

-  twemoji-parser@14.0.0:
-    resolution: {integrity: sha512-9DUOTGLOWs0pFWnh1p6NF+C3CkQ96PWmEFwhOVmT3WbecRC+68AIqpsnJXygfkFcp4aXbOp8Dwbhh/HQgvoRxA==}
-
-  twemoji@14.0.2:
-    resolution: {integrity: sha512-BzOoXIe1QVdmsUmZ54xbEH+8AgtOKUiG53zO5vVP2iUu6h5u9lN15NcuS6te4OY96qx0H7JK9vjjl9WQbkTRuA==}
-
  type-check@0.4.0:
    resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
    engines: {node: '>= 0.8.0'}
@@ -8361,10 +8289,6 @@ packages:
  unist-util-visit@5.0.0:
    resolution: {integrity: sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==}

-  universalify@0.1.2:
-    resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==}
-    engines: {node: '>= 4.0.0'}
-
  universalify@2.0.1:
    resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==}
    engines: {node: '>= 10.0.0'}
@@ -9074,10 +8998,6 @@ snapshots:
    dependencies:
      '@babel/types': 7.28.5

-  '@babel/parser@7.29.2':
-    dependencies:
-      '@babel/types': 7.29.0
-
  '@babel/plugin-bugfix-firefox-class-in-computed-class-key@7.28.5(@babel/core@7.28.5)':
    dependencies:
      '@babel/core': 7.28.5
@@ -9679,13 +9599,6 @@ snapshots:
      '@babel/helper-string-parser': 7.27.1
      '@babel/helper-validator-identifier': 7.28.5

-  '@babel/types@7.29.0':
-    dependencies:
-      '@babel/helper-string-parser': 7.27.1
-      '@babel/helper-validator-identifier': 7.28.5
-
-  '@bcoe/v8-coverage@1.0.2': {}
-
  '@braintree/sanitize-url@7.1.2': {}

  '@chevrotain/cst-dts-gen@11.0.3':
@@ -12715,20 +12628,6 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  '@vitest/coverage-v8@4.0.17(vitest@4.0.17(@opentelemetry/api@1.9.0)(@types/node@24.10.0)(happy-dom@20.3.4)(jiti@2.6.1)(jsdom@27.4.0)(msw@2.11.6(@types/node@24.10.0)(typescript@5.9.3))(terser@5.44.1)(yaml@2.8.2))':
-    dependencies:
-      '@bcoe/v8-coverage': 1.0.2
-      '@vitest/utils': 4.0.17
-      ast-v8-to-istanbul: 0.3.12
-      istanbul-lib-coverage: 3.2.2
-      istanbul-lib-report: 3.0.1
-      istanbul-reports: 3.2.0
-      magicast: 0.5.2
-      obug: 2.1.1
-      std-env: 3.10.0
-      tinyrainbow: 3.0.3
-      vitest: 4.0.17(@opentelemetry/api@1.9.0)(@types/node@24.10.0)(happy-dom@20.3.4)(jiti@2.6.1)(jsdom@27.4.0)(msw@2.11.6(@types/node@24.10.0)(typescript@5.9.3))(terser@5.44.1)(yaml@2.8.2)
-
  '@vitest/expect@3.2.4':
    dependencies:
      '@types/chai': 5.2.3
@@ -13120,12 +13019,6 @@ snapshots:
    dependencies:
      tslib: 2.8.1

-  ast-v8-to-istanbul@0.3.12:
-    dependencies:
-      '@jridgewell/trace-mapping': 0.3.31
-      estree-walker: 3.0.3
-      js-tokens: 10.0.0
-
  astring@1.9.0: {}

  async-function@1.0.0: {}
@@ -14221,8 +14114,8 @@ snapshots:
      '@typescript-eslint/parser': 8.52.0(eslint@8.57.1)(typescript@5.9.3)
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1)
-      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1)
+      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
      eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1)
      eslint-plugin-react: 7.37.5(eslint@8.57.1)
      eslint-plugin-react-hooks: 5.2.0(eslint@8.57.1)
@@ -14241,7 +14134,7 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1):
+  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1):
    dependencies:
      '@nolyfill/is-core-module': 1.0.39
      debug: 4.4.3
@@ -14252,22 +14145,22 @@ snapshots:
      tinyglobby: 0.2.15
      unrs-resolver: 1.11.1
    optionalDependencies:
-      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
+      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
    transitivePeerDependencies:
      - supports-color

-  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1):
+  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
    dependencies:
      debug: 3.2.7
    optionalDependencies:
      '@typescript-eslint/parser': 8.52.0(eslint@8.57.1)(typescript@5.9.3)
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1)
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1)
    transitivePeerDependencies:
      - supports-color

-  eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1):
+  eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
    dependencies:
      '@rtsao/scc': 1.1.0
      array-includes: 3.1.9
@@ -14278,7 +14171,7 @@ snapshots:
      doctrine: 2.1.0
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1)
+      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.52.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
      hasown: 2.0.2
      is-core-module: 2.16.1
      is-glob: 4.0.3
@@ -14618,12 +14511,6 @@ snapshots:
      jsonfile: 6.2.0
      universalify: 2.0.1

-  fs-extra@8.1.0:
-    dependencies:
-      graceful-fs: 4.2.11
-      jsonfile: 4.0.0
-      universalify: 0.1.2
-
  fs-monkey@1.1.0: {}

  fs.realpath@1.0.0: {}
@@ -14961,8 +14848,6 @@ snapshots:

  html-entities@2.6.0: {}

-  html-escaper@2.0.2: {}
-
  html-minifier-terser@6.1.0:
    dependencies:
      camel-case: 4.1.2
@@ -15250,19 +15135,6 @@ snapshots:

  isexe@2.0.0: {}

-  istanbul-lib-coverage@3.2.2: {}
-
-  istanbul-lib-report@3.0.1:
-    dependencies:
-      istanbul-lib-coverage: 3.2.2
-      make-dir: 4.0.0
-      supports-color: 7.2.0
-
-  istanbul-reports@3.2.0:
-    dependencies:
-      html-escaper: 2.0.2
-      istanbul-lib-report: 3.0.1
-
  iterator.prototype@1.1.5:
    dependencies:
      define-data-property: 1.1.4
@@ -15297,8 +15169,6 @@ snapshots:
      '@types/react': 18.3.17
      react: 18.3.1

-  js-tokens@10.0.0: {}
-
  js-tokens@4.0.0: {}

  js-yaml@4.1.0:
@@ -15362,16 +15232,6 @@ snapshots:

  jsonc-parser@2.2.1: {}

-  jsonfile@4.0.0:
-    optionalDependencies:
-      graceful-fs: 4.2.11
-
-  jsonfile@5.0.0:
-    dependencies:
-      universalify: 0.1.2
-    optionalDependencies:
-      graceful-fs: 4.2.11
-
  jsonfile@6.2.0:
    dependencies:
      universalify: 2.0.1
@@ -15560,20 +15420,10 @@ snapshots:
    dependencies:
      '@jridgewell/sourcemap-codec': 1.5.5

-  magicast@0.5.2:
-    dependencies:
-      '@babel/parser': 7.29.2
-      '@babel/types': 7.29.0
-      source-map-js: 1.2.1
-
  make-dir@3.1.0:
    dependencies:
      semver: 6.3.1

-  make-dir@4.0.0:
-    dependencies:
-      semver: 7.7.3
-
  markdown-it@14.1.0:
    dependencies:
      argparse: 2.0.1
@@ -17932,15 +17782,6 @@ snapshots:

  tty-browserify@0.0.1: {}

-  twemoji-parser@14.0.0: {}
-
-  twemoji@14.0.2:
-    dependencies:
-      fs-extra: 8.1.0
-      jsonfile: 5.0.0
-      twemoji-parser: 14.0.0
-      universalify: 0.1.2
-
  type-check@0.4.0:
    dependencies:
      prelude-ls: 1.2.1
@@ -18078,8 +17919,6 @@ snapshots:
      unist-util-is: 6.0.1
      unist-util-visit-parents: 6.0.2

-  universalify@0.1.2: {}
-
  universalify@2.0.1: {}

  unplugin@1.0.1:
--- a/autogpt_platform/frontend/public/notification.mp3
+++ b/autogpt_platform/frontend/public/notification.mp3
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/1-welcome/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/1-welcome/page.tsx
@@ -0,0 +1,33 @@
+"use client";
+import { OnboardingText } from "../components/OnboardingText";
+import OnboardingButton from "../components/OnboardingButton";
+import Image from "next/image";
+import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
+
+export default function Page() {
+  useOnboarding(1);
+
+  return (
+    <>
+      <Image
+        src="/gpt_dark_RGB.svg"
+        alt="GPT Dark Logo"
+        className="-mb-2"
+        width={300}
+        height={300}
+      />
+      <OnboardingText className="mb-3" variant="header" center>
+        Welcome to AutoGPT
+      </OnboardingText>
+      <OnboardingText className="mb-12" center>
+        Think of AutoGPT as your digital teammate, working intelligently to
+        <br />
+        complete tasks based on your directions. Let&apos;s learn a bit about
+        you to
+        <br />
+        tailor your experience.
+      </OnboardingText>
+      <OnboardingButton href="/onboarding/2-reason">Continue</OnboardingButton>
+    </>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/2-reason/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/2-reason/page.tsx
@@ -0,0 +1,69 @@
+"use client";
+import OnboardingButton from "../components/OnboardingButton";
+import {
+  OnboardingFooter,
+  OnboardingHeader,
+  OnboardingStep,
+} from "../components/OnboardingStep";
+import { OnboardingText } from "../components/OnboardingText";
+import OnboardingList from "../components/OnboardingList";
+import { isEmptyOrWhitespace } from "@/lib/utils";
+import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
+
+const reasons = [
+  {
+    label: "Content & Marketing",
+    text: "Content creation, social media management, blogging, creative writing",
+    id: "content_marketing",
+  },
+  {
+    label: "Business & Workflow Automation",
+    text: "Operations, task management, productivity",
+    id: "business_workflow_automation",
+  },
+  {
+    label: "Data & Research",
+    text: "Data analysis, insights, research, financial operation",
+    id: "data_research",
+  },
+  {
+    label: "AI & Innovation",
+    text: "AI experimentation, automation testing, advanced AI applications",
+    id: "ai_innovation",
+  },
+  {
+    label: "Personal productivity",
+    text: "Automating daily tasks, organizing information, personal workflows",
+    id: "personal_productivity",
+  },
+];
+
+export default function Page() {
+  const { state, updateState } = useOnboarding(2, "WELCOME");
+
+  return (
+    <OnboardingStep>
+      <OnboardingHeader backHref={"/onboarding/1-welcome"}>
+        <OnboardingText className="mt-4" variant="header" center>
+          What&apos;s your main reason for using AutoGPT?
+        </OnboardingText>
+        <OnboardingText className="mt-1" center>
+          Select the option that best matches your needs
+        </OnboardingText>
+      </OnboardingHeader>
+      <OnboardingList
+        elements={reasons}
+        selectedId={state?.usageReason}
+        onSelect={(usageReason) => updateState({ usageReason })}
+      />
+      <OnboardingFooter>
+        <OnboardingButton
+          href="/onboarding/3-services"
+          disabled={isEmptyOrWhitespace(state?.usageReason)}
+        >
+          Next
+        </OnboardingButton>
+      </OnboardingFooter>
+    </OnboardingStep>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/3-services/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/3-services/page.tsx
@@ -0,0 +1,171 @@
+"use client";
+import OnboardingButton from "../components/OnboardingButton";
+import {
+  OnboardingStep,
+  OnboardingHeader,
+  OnboardingFooter,
+} from "../components/OnboardingStep";
+import { OnboardingText } from "../components/OnboardingText";
+import { OnboardingGrid } from "../components/OnboardingGrid";
+import { useCallback } from "react";
+import OnboardingInput from "../components/OnboardingInput";
+import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
+
+const services = [
+  {
+    name: "D-ID",
+    text: "Generate AI-powered avatars and videos for dynamic content creation.",
+    icon: "/integrations/d-id.png",
+  },
+  {
+    name: "Discord",
+    text: "A chat platform for communities and teams, supporting text, voice, and video.",
+    icon: "/integrations/discord.png",
+  },
+  {
+    name: "GitHub",
+    text: "AutoGPT can track issues, manage repos, and automate workflows with GitHub.",
+    icon: "/integrations/github.png",
+  },
+  {
+    name: "Google Workspace",
+    text: "Automate emails, calendar events, and document management in AutoGPT with Google Workspace.",
+    icon: "/integrations/google.png",
+  },
+  {
+    name: "Google Maps",
+    text: "Fetch locations, directions, and real-time geodata for navigation.",
+    icon: "/integrations/maps.png",
+  },
+  {
+    name: "HubSpot",
+    text: "Manage customer relationships, automate marketing, and track sales.",
+    icon: "/integrations/hubspot.png",
+  },
+  {
+    name: "Linear",
+    text: "Streamline project management and issue tracking with a modern workflow.",
+    icon: "/integrations/linear.png",
+  },
+  {
+    name: "Medium",
+    text: "Publish and explore insightful content with a powerful writing platform.",
+    icon: "/integrations/medium.png",
+  },
+  {
+    name: "Mem0",
+    text: "AI-powered memory assistant for smarter data organization and recall.",
+    icon: "/integrations/mem0.png",
+  },
+  {
+    name: "Notion",
+    text: "Organize work, notes, and databases in an all-in-one workspace.",
+    icon: "/integrations/notion.png",
+  },
+  {
+    name: "NVIDIA",
+    text: "Accelerate AI, graphics, and computing with cutting-edge technology.",
+    icon: "/integrations/nvidia.jpg",
+  },
+  {
+    name: "OpenWeatherMap",
+    text: "Access real-time weather data and forecasts worldwide.",
+    icon: "/integrations/openweathermap.png",
+  },
+  {
+    name: "Pinecone",
+    text: "Store and search vector data for AI-driven applications.",
+    icon: "/integrations/pinecone.png",
+  },
+  {
+    name: "Reddit",
+    text: "Explore trending discussions and engage with online communities.",
+    icon: "/integrations/reddit.png",
+  },
+  {
+    name: "Slant3D",
+    text: "Automate and optimize 3D printing workflows with AI.",
+    icon: "/integrations/slant3d.jpeg",
+  },
+  {
+    name: "SMTP",
+    text: "Send and manage emails with secure and reliable delivery.",
+    icon: "/integrations/smtp.png",
+  },
+  {
+    name: "Todoist",
+    text: "Organize tasks and projects with a simple, intuitive to-do list.",
+    icon: "/integrations/todoist.png",
+  },
+  {
+    name: "Twitter (X)",
+    text: "Stay connected and share updates on the world's biggest conversation platform.",
+    icon: "/integrations/x.png",
+  },
+  {
+    name: "Unreal Speech",
+    text: "Generate natural-sounding AI voices for speech applications.",
+    icon: "/integrations/unreal-speech.png",
+  },
+];
+
+export default function Page() {
+  const { state, updateState } = useOnboarding(3, "USAGE_REASON");
+
+  const switchIntegration = useCallback(
+    (name: string) => {
+      if (!state) {
+        return;
+      }
+
+      const integrations = state.integrations.includes(name)
+        ? state.integrations.filter((i) => i !== name)
+        : [...state.integrations, name];
+
+      updateState({ integrations });
+    },
+    [state, updateState],
+  );
+
+  return (
+    <OnboardingStep>
+      <OnboardingHeader backHref={"/onboarding/2-reason"}>
+        <OnboardingText className="mt-4" variant="header" center>
+          What platforms or services would you like AutoGPT to work with?
+        </OnboardingText>
+        <OnboardingText className="mt-1" center>
+          You can select more than one option
+        </OnboardingText>
+      </OnboardingHeader>
+
+      <div className="w-fit">
+        <OnboardingText className="my-4" variant="subheader">
+          Available integrations
+        </OnboardingText>
+        <OnboardingGrid
+          elements={services}
+          selected={state?.integrations}
+          onSelect={switchIntegration}
+        />
+        <OnboardingText className="mt-12" variant="subheader">
+          Help us grow our integrations
+        </OnboardingText>
+        <OnboardingText className="my-4">
+          Let us know which partnerships you&apos;d like to see next
+        </OnboardingText>
+        <OnboardingInput
+          className="mb-4"
+          placeholder="Others (please specify)"
+          value={state?.otherIntegrations || ""}
+          onChange={(otherIntegrations) => updateState({ otherIntegrations })}
+        />
+      </div>
+
+      <OnboardingFooter>
+        <OnboardingButton className="mb-2" href="/onboarding/4-agent">
+          Next
+        </OnboardingButton>
+      </OnboardingFooter>
+    </OnboardingStep>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/4-agent/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/4-agent/page.tsx
@@ -0,0 +1,104 @@
+"use client";
+import { isEmptyOrWhitespace } from "@/lib/utils";
+import { useRouter } from "next/navigation";
+import { useEffect, useState } from "react";
+import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
+import OnboardingAgentCard from "../components/OnboardingAgentCard";
+import OnboardingButton from "../components/OnboardingButton";
+import {
+  OnboardingFooter,
+  OnboardingHeader,
+  OnboardingStep,
+} from "../components/OnboardingStep";
+import { OnboardingText } from "../components/OnboardingText";
+import { getV1RecommendedOnboardingAgents } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
+import { resolveResponse } from "@/app/api/helpers";
+import { StoreAgentDetails } from "@/app/api/__generated__/models/storeAgentDetails";
+
+export default function Page() {
+  const { state, updateState, completeStep } = useOnboarding(4, "INTEGRATIONS");
+  const [agents, setAgents] = useState<StoreAgentDetails[]>([]);
+  const router = useRouter();
+
+  useEffect(() => {
+    resolveResponse(getV1RecommendedOnboardingAgents()).then((agents) => {
+      if (agents.length < 2) {
+        completeStep("CONGRATS");
+        router.replace("/");
+      }
+      setAgents(agents);
+    });
+  }, []);
+
+  useEffect(() => {
+    // Deselect agent if it's not in the list of agents
+    if (
+      state?.selectedStoreListingVersionId &&
+      agents.length > 0 &&
+      !agents.some(
+        (agent) =>
+          agent.store_listing_version_id ===
+          state.selectedStoreListingVersionId,
+      )
+    ) {
+      updateState({
+        selectedStoreListingVersionId: null,
+        agentInput: {},
+      });
+    }
+  }, [state?.selectedStoreListingVersionId, updateState, agents]);
+
+  return (
+    <OnboardingStep>
+      <OnboardingHeader backHref={"/onboarding/3-services"}>
+        <OnboardingText className="mt-4" variant="header" center>
+          Choose an agent
+        </OnboardingText>
+        <OnboardingText className="mt-1" center>
+          We think these agents are a good match for you based on your answers
+        </OnboardingText>
+      </OnboardingHeader>
+
+      <div className="my-12 flex items-center justify-between gap-5">
+        <OnboardingAgentCard
+          agent={agents[0]}
+          selected={
+            agents[0] !== undefined
+              ? state?.selectedStoreListingVersionId ==
+                agents[0]?.store_listing_version_id
+              : false
+          }
+          onClick={() =>
+            updateState({
+              selectedStoreListingVersionId: agents[0].store_listing_version_id,
+              agentInput: {},
+            })
+          }
+        />
+        <OnboardingAgentCard
+          agent={agents[1]}
+          selected={
+            agents[1] !== undefined
+              ? state?.selectedStoreListingVersionId ==
+                agents[1]?.store_listing_version_id
+              : false
+          }
+          onClick={() =>
+            updateState({
+              selectedStoreListingVersionId: agents[1].store_listing_version_id,
+            })
+          }
+        />
+      </div>
+
+      <OnboardingFooter>
+        <OnboardingButton
+          href="/onboarding/5-run"
+          disabled={isEmptyOrWhitespace(state?.selectedStoreListingVersionId)}
+        >
+          Next
+        </OnboardingButton>
+      </OnboardingFooter>
+    </OnboardingStep>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/AgentOnboardingCredentials.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/AgentOnboardingCredentials.tsx
@@ -0,0 +1,62 @@
+import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
+import { GraphModel } from "@/app/api/__generated__/models/graphModel";
+import { CredentialsInput } from "@/components/contextual/CredentialsInput/CredentialsInput";
+import { useState } from "react";
+import { getSchemaDefaultCredentials } from "../../helpers";
+import { areAllCredentialsSet, getCredentialFields } from "./helpers";
+
+type Credential = CredentialsMetaInput | undefined;
+type Credentials = Record<string, Credential>;
+
+type Props = {
+  agent: GraphModel | null;
+  siblingInputs?: Record<string, any>;
+  onCredentialsChange: (
+    credentials: Record<string, CredentialsMetaInput>,
+  ) => void;
+  onValidationChange: (isValid: boolean) => void;
+  onLoadingChange: (isLoading: boolean) => void;
+};
+
+export function AgentOnboardingCredentials(props: Props) {
+  const [inputCredentials, setInputCredentials] = useState<Credentials>({});
+
+  const fields = getCredentialFields(props.agent);
+  const required = Object.keys(fields || {}).length > 0;
+
+  if (!required) return null;
+
+  function handleSelectCredentials(key: string, value: Credential) {
+    const updated = { ...inputCredentials, [key]: value };
+    setInputCredentials(updated);
+
+    const sanitized: Record<string, CredentialsMetaInput> = {};
+    for (const [k, v] of Object.entries(updated)) {
+      if (v) sanitized[k] = v;
+    }
+
+    props.onCredentialsChange(sanitized);
+
+    const isValid = !required || areAllCredentialsSet(fields, updated);
+    props.onValidationChange(isValid);
+  }
+
+  return (
+    <>
+      {Object.entries(fields).map(([key, inputSubSchema]) => (
+        <div key={key} className="mt-4">
+          <CredentialsInput
+            schema={inputSubSchema}
+            selectedCredentials={
+              inputCredentials[key] ??
+              getSchemaDefaultCredentials(inputSubSchema)
+            }
+            onSelectCredentials={(value) => handleSelectCredentials(key, value)}
+            siblingInputs={props.siblingInputs}
+            onLoaded={(loaded) => props.onLoadingChange(!loaded)}
+          />
+        </div>
+      ))}
+    </>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/AgentOnboardingCredentials/helpers.ts
@@ -0,0 +1,32 @@
+import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
+import { GraphModel } from "@/app/api/__generated__/models/graphModel";
+import { BlockIOCredentialsSubSchema } from "@/lib/autogpt-server-api/types";
+
+export function getCredentialFields(
+  agent: GraphModel | null,
+): AgentCredentialsFields {
+  if (!agent) return {};
+
+  const hasNoInputs =
+    !agent.credentials_input_schema ||
+    typeof agent.credentials_input_schema !== "object" ||
+    !("properties" in agent.credentials_input_schema) ||
+    !agent.credentials_input_schema.properties;
+
+  if (hasNoInputs) return {};
+
+  return agent.credentials_input_schema.properties as AgentCredentialsFields;
+}
+
+export type AgentCredentialsFields = Record<
+  string,
+  BlockIOCredentialsSubSchema
+>;
+
+export function areAllCredentialsSet(
+  fields: AgentCredentialsFields,
+  inputs: Record<string, CredentialsMetaInput | undefined>,
+) {
+  const required = Object.keys(fields || {});
+  return required.every((k) => Boolean(inputs[k]));
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/RunAgentHint.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/RunAgentHint.tsx
@@ -0,0 +1,45 @@
+import { cn } from "@/lib/utils";
+import { OnboardingText } from "../../components/OnboardingText";
+
+type RunAgentHintProps = {
+  handleNewRun: () => void;
+};
+
+export function RunAgentHint(props: RunAgentHintProps) {
+  return (
+    <div className="ml-[104px] w-[481px] pl-5">
+      <div className="flex flex-col">
+        <OnboardingText variant="header">Run your first agent</OnboardingText>
+        <span className="mt-9 text-base font-normal leading-normal text-zinc-600">
+          A &apos;run&apos; is when your agent starts working on a task
+        </span>
+        <span className="mt-4 text-base font-normal leading-normal text-zinc-600">
+          Click on <b>New Run</b> below to try it out
+        </span>
+
+        <div
+          onClick={props.handleNewRun}
+          className={cn(
+            "mt-16 flex h-[68px] w-[330px] items-center justify-center rounded-xl border-2 border-violet-700 bg-neutral-50",
+            "cursor-pointer transition-all duration-200 ease-in-out hover:bg-violet-50",
+          )}
+        >
+          <svg
+            width="38"
+            height="38"
+            viewBox="0 0 32 32"
+            xmlns="http://www.w3.org/2000/svg"
+          >
+            <g stroke="#6d28d9" strokeWidth="1.2" strokeLinecap="round">
+              <line x1="16" y1="8" x2="16" y2="24" />
+              <line x1="8" y1="16" x2="24" y2="16" />
+            </g>
+          </svg>
+          <span className="ml-3 font-sans text-[19px] font-medium leading-normal text-violet-700">
+            New run
+          </span>
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/SelectedAgentCard.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/components/SelectedAgentCard.tsx
@@ -0,0 +1,52 @@
+import { StoreAgentDetails } from "@/app/api/__generated__/models/storeAgentDetails";
+import StarRating from "../../components/StarRating";
+import SmartImage from "@/components/__legacy__/SmartImage";
+
+type Props = {
+  storeAgent: StoreAgentDetails | null;
+};
+
+export function SelectedAgentCard(props: Props) {
+  return (
+    <div className="fixed left-1/4 top-1/2 w-[481px] -translate-x-1/2 -translate-y-1/2">
+      <div className="h-[156px] w-[481px] rounded-xl bg-white px-6 pb-5 pt-4">
+        <span className="font-sans text-xs font-medium tracking-wide text-zinc-500">
+          SELECTED AGENT
+        </span>
+        {props.storeAgent ? (
+          <div className="mt-4 flex h-20 rounded-lg bg-violet-50 p-3">
+            {/* Left image */}
+            <SmartImage
+              src={props.storeAgent.agent_image[0]}
+              alt="Agent cover"
+              className="w-[350px] rounded-lg"
+            />
+            {/* Right content */}
+            <div className="ml-3 flex flex-1 flex-col">
+              <div className="mb-2 flex flex-col items-start">
+                <span className="data-sentry-unmask w-[292px] truncate font-sans text-[14px] font-medium leading-tight text-zinc-800">
+                  {props.storeAgent.agent_name}
+                </span>
+                <span className="data-sentry-unmask font-norma w-[292px] truncate font-sans text-xs text-zinc-600">
+                  by {props.storeAgent.creator}
+                </span>
+              </div>
+              <div className="flex w-[292px] items-center justify-between">
+                <span className="truncate font-sans text-xs font-normal leading-tight text-zinc-600">
+                  {props.storeAgent.runs.toLocaleString("en-US")} runs
+                </span>
+                <StarRating
+                  className="font-sans text-xs font-normal leading-tight text-zinc-600"
+                  starSize={12}
+                  rating={props.storeAgent.rating || 0}
+                />
+              </div>
+            </div>
+          </div>
+        ) : (
+          <div className="mt-4 flex h-20 animate-pulse rounded-lg bg-gray-300 p-2" />
+        )}
+      </div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/helpers.ts
@@ -0,0 +1,57 @@
+import type {
+  BlockIOCredentialsSubSchema,
+  CredentialsMetaInput,
+} from "@/lib/autogpt-server-api/types";
+import type { InputValues } from "./types";
+import { GraphModel } from "@/app/api/__generated__/models/graphModel";
+
+export function computeInitialAgentInputs(
+  agent: GraphModel | null,
+  existingInputs?: InputValues | null,
+): InputValues {
+  const properties = agent?.input_schema?.properties || {};
+  const result: InputValues = {};
+
+  Object.entries(properties).forEach(([key, subSchema]) => {
+    if (
+      existingInputs &&
+      key in existingInputs &&
+      existingInputs[key] != null
+    ) {
+      result[key] = existingInputs[key];
+      return;
+    }
+    const def = (subSchema as unknown as { default?: string | number }).default;
+    result[key] = def ?? "";
+  });
+
+  return result;
+}
+
+type IsRunDisabledParams = {
+  agent: GraphModel | null;
+  isRunning: boolean;
+  agentInputs: InputValues | null | undefined;
+};
+
+export function isRunDisabled({
+  agent,
+  isRunning,
+  agentInputs,
+}: IsRunDisabledParams) {
+  const hasEmptyInput = Object.values(agentInputs || {}).some(
+    (value) => String(value).trim() === "",
+  );
+
+  if (hasEmptyInput) return true;
+  if (!agent) return true;
+  if (isRunning) return true;
+
+  return false;
+}
+
+export function getSchemaDefaultCredentials(
+  schema: BlockIOCredentialsSubSchema,
+): CredentialsMetaInput | undefined {
+  return schema.default as CredentialsMetaInput | undefined;
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/page.tsx
@@ -0,0 +1,124 @@
+"use client";
+
+import {
+  Card,
+  CardContent,
+  CardHeader,
+  CardTitle,
+} from "@/components/__legacy__/ui/card";
+import { RunAgentInputs } from "@/components/contextual/RunAgentInputs/RunAgentInputs";
+import { ErrorCard } from "@/components/molecules/ErrorCard/ErrorCard";
+import { CircleNotchIcon } from "@phosphor-icons/react/dist/ssr";
+import { Play } from "lucide-react";
+import OnboardingButton from "../components/OnboardingButton";
+import { OnboardingHeader, OnboardingStep } from "../components/OnboardingStep";
+import { OnboardingText } from "../components/OnboardingText";
+import { AgentOnboardingCredentials } from "./components/AgentOnboardingCredentials/AgentOnboardingCredentials";
+import { RunAgentHint } from "./components/RunAgentHint";
+import { SelectedAgentCard } from "./components/SelectedAgentCard";
+import { isRunDisabled } from "./helpers";
+import type { InputValues } from "./types";
+import { useOnboardingRunStep } from "./useOnboardingRunStep";
+
+export default function Page() {
+  const {
+    ready,
+    error,
+    showInput,
+    agentGraph,
+    onboarding,
+    storeAgent,
+    runningAgent,
+    handleSetAgentInput,
+    handleRunAgent,
+    handleNewRun,
+    handleCredentialsChange,
+    handleCredentialsValidationChange,
+    handleCredentialsLoadingChange,
+  } = useOnboardingRunStep();
+
+  if (error) {
+    return <ErrorCard responseError={error} />;
+  }
+
+  if (!ready) {
+    return (
+      <div className="flex flex-col gap-8">
+        <CircleNotchIcon className="size-10 animate-spin" />
+      </div>
+    );
+  }
+
+  return (
+    <OnboardingStep dotted>
+      <OnboardingHeader backHref={"/onboarding/4-agent"} transparent />
+      <div className="flex min-h-[80vh] items-center justify-center">
+        <SelectedAgentCard storeAgent={storeAgent} />
+        <div className="w-[481px]" />
+        {!showInput ? (
+          <RunAgentHint handleNewRun={handleNewRun} />
+        ) : (
+          <div className="ml-[104px] w-[481px] pl-5">
+            <div className="flex flex-col">
+              <OnboardingText variant="header">
+                Provide details for your agent
+              </OnboardingText>
+              <span className="mt-9 text-base font-normal leading-normal text-zinc-600">
+                Give your agent the details it needs to work—just enter <br />
+                the key information and get started.
+              </span>
+              <span className="mt-4 text-base font-normal leading-normal text-zinc-600">
+                When you&apos;re done, click <b>Run Agent</b>.
+              </span>
+
+              <Card className="agpt-box mt-4">
+                <CardHeader>
+                  <CardTitle className="font-poppins text-lg">Input</CardTitle>
+                </CardHeader>
+                <CardContent className="flex flex-col gap-4">
+                  {Object.entries(
+                    agentGraph?.input_schema.properties || {},
+                  ).map(([key, inputSubSchema]) => (
+                    <RunAgentInputs
+                      key={key}
+                      schema={inputSubSchema}
+                      value={onboarding.state?.agentInput?.[key]}
+                      placeholder={inputSubSchema.description}
+                      onChange={(value) => handleSetAgentInput(key, value)}
+                    />
+                  ))}
+                  <AgentOnboardingCredentials
+                    agent={agentGraph}
+                    siblingInputs={
+                      (onboarding.state?.agentInput as Record<string, any>) ||
+                      undefined
+                    }
+                    onCredentialsChange={handleCredentialsChange}
+                    onValidationChange={handleCredentialsValidationChange}
+                    onLoadingChange={handleCredentialsLoadingChange}
+                  />
+                </CardContent>
+              </Card>
+              <OnboardingButton
+                variant="violet"
+                className="mt-8 w-[136px]"
+                loading={runningAgent}
+                disabled={isRunDisabled({
+                  agent: agentGraph,
+                  isRunning: runningAgent,
+                  agentInputs:
+                    (onboarding.state?.agentInput as unknown as InputValues) ||
+                    null,
+                })}
+                onClick={handleRunAgent}
+                icon={<Play className="mr-2" size={18} />}
+              >
+                Run agent
+              </OnboardingButton>
+            </div>
+          </div>
+        )}
+      </div>
+    </OnboardingStep>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/types.ts
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/types.ts
@@ -0,0 +1,2 @@
+export type InputPrimitive = string | number;
+export type InputValues = Record<string, InputPrimitive>;
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/useOnboardingRunStep.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/5-run/useOnboardingRunStep.tsx
@@ -0,0 +1,157 @@
+import { useToast } from "@/components/molecules/Toast/use-toast";
+import { useBackendAPI } from "@/lib/autogpt-server-api/context";
+import { useOnboarding } from "@/providers/onboarding/onboarding-provider";
+import { useRouter } from "next/navigation";
+import { useEffect, useState } from "react";
+import { computeInitialAgentInputs } from "./helpers";
+import { InputValues } from "./types";
+import { okData, resolveResponse } from "@/app/api/helpers";
+import { postV2AddMarketplaceAgent } from "@/app/api/__generated__/endpoints/library/library";
+import {
+  useGetV2GetAgentByVersion,
+  useGetV2GetAgentGraph,
+} from "@/app/api/__generated__/endpoints/store/store";
+import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
+import { GraphID } from "@/lib/autogpt-server-api";
+
+export function useOnboardingRunStep() {
+  const onboarding = useOnboarding(undefined, "AGENT_CHOICE");
+
+  const [showInput, setShowInput] = useState(false);
+  const [runningAgent, setRunningAgent] = useState(false);
+
+  const [inputCredentials, setInputCredentials] = useState<
+    Record<string, CredentialsMetaInput>
+  >({});
+
+  const [credentialsValid, setCredentialsValid] = useState(true);
+  const [credentialsLoaded, setCredentialsLoaded] = useState(false);
+
+  const { toast } = useToast();
+  const router = useRouter();
+  const api = useBackendAPI();
+
+  const currentAgentVersion =
+    onboarding.state?.selectedStoreListingVersionId ?? "";
+
+  const {
+    data: storeAgent,
+    error: storeAgentQueryError,
+    isSuccess: storeAgentQueryIsSuccess,
+  } = useGetV2GetAgentByVersion(currentAgentVersion, {
+    query: {
+      enabled: !!currentAgentVersion,
+      select: okData,
+    },
+  });
+
+  const {
+    data: agentGraphMeta,
+    error: agentGraphQueryError,
+    isSuccess: agentGraphQueryIsSuccess,
+  } = useGetV2GetAgentGraph(currentAgentVersion, {
+    query: {
+      enabled: !!currentAgentVersion,
+      select: okData,
+    },
+  });
+
+  useEffect(() => {
+    onboarding.setStep(5);
+  }, []);
+
+  useEffect(() => {
+    if (agentGraphMeta && onboarding.state) {
+      const initialAgentInputs = computeInitialAgentInputs(
+        agentGraphMeta,
+        (onboarding.state.agentInput as unknown as InputValues) || null,
+      );
+
+      onboarding.updateState({ agentInput: initialAgentInputs });
+    }
+  }, [agentGraphMeta]);
+
+  function handleNewRun() {
+    if (!onboarding.state) return;
+
+    setShowInput(true);
+    onboarding.setStep(6);
+    onboarding.completeStep("AGENT_NEW_RUN");
+  }
+
+  function handleSetAgentInput(key: string, value: string) {
+    if (!onboarding.state) return;
+
+    onboarding.updateState({
+      agentInput: {
+        ...onboarding.state.agentInput,
+        [key]: value,
+      },
+    });
+  }
+
+  async function handleRunAgent() {
+    if (!agentGraphMeta || !storeAgent || !onboarding.state) {
+      toast({
+        title: "Error getting agent",
+        description:
+          "Either the agent is not available or there was an error getting it.",
+        variant: "destructive",
+      });
+
+      return;
+    }
+
+    setRunningAgent(true);
+
+    try {
+      const libraryAgent = await resolveResponse(
+        postV2AddMarketplaceAgent({
+          store_listing_version_id: storeAgent?.store_listing_version_id || "",
+          source: "onboarding",
+        }),
+      );
+
+      const { id: runID } = await api.executeGraph(
+        libraryAgent.graph_id as GraphID,
+        libraryAgent.graph_version,
+        onboarding.state.agentInput || {},
+        inputCredentials,
+        "onboarding",
+      );
+
+      onboarding.updateState({ onboardingAgentExecutionId: runID });
+
+      router.push("/onboarding/6-congrats");
+    } catch (error) {
+      console.error("Error running agent:", error);
+
+      toast({
+        title: "Error running agent",
+        description:
+          "There was an error running your agent. Please try again or try choosing a different agent if it still fails.",
+        variant: "destructive",
+      });
+
+      setRunningAgent(false);
+    }
+  }
+
+  return {
+    ready: agentGraphQueryIsSuccess && storeAgentQueryIsSuccess,
+    error: agentGraphQueryError || storeAgentQueryError,
+    agentGraph: agentGraphMeta || null,
+    onboarding,
+    showInput,
+    storeAgent: storeAgent || null,
+    runningAgent,
+    credentialsValid,
+    credentialsLoaded,
+    handleSetAgentInput,
+    handleRunAgent,
+    handleNewRun,
+    handleCredentialsChange: setInputCredentials,
+    handleCredentialsValidationChange: setCredentialsValid,
+    handleCredentialsLoadingChange: (v: boolean) => setCredentialsLoaded(!v),
+  };
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/6-congrats/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/6-congrats/page.tsx
@@ -0,0 +1,127 @@
+"use client";
+import { useBackendAPI } from "@/lib/autogpt-server-api/context";
+import { cn } from "@/lib/utils";
+import { useRouter } from "next/navigation";
+import { useEffect, useRef, useState } from "react";
+import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
+import { resolveResponse } from "@/app/api/helpers";
+import { getV1OnboardingState } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
+import { postV2AddMarketplaceAgent } from "@/app/api/__generated__/endpoints/library/library";
+import { Confetti } from "@/components/molecules/Confetti/Confetti";
+import type { ConfettiRef } from "@/components/molecules/Confetti/Confetti";
+
+export default function Page() {
+  const { completeStep } = useOnboarding(7, "AGENT_INPUT");
+  const router = useRouter();
+  const api = useBackendAPI();
+  const [showText, setShowText] = useState(false);
+  const [showSubtext, setShowSubtext] = useState(false);
+  const confettiRef = useRef<ConfettiRef>(null);
+
+  useEffect(() => {
+    // Fire side cannons for a celebratory effect
+    const duration = 1500;
+    const end = Date.now() + duration;
+
+    function frame() {
+      confettiRef.current?.fire({
+        particleCount: 4,
+        angle: 60,
+        spread: 70,
+        origin: { x: 0, y: 0.6 },
+        shapes: ["square"],
+        scalar: 0.8,
+        gravity: 0.6,
+        decay: 0.93,
+      });
+      confettiRef.current?.fire({
+        particleCount: 4,
+        angle: 120,
+        spread: 70,
+        origin: { x: 1, y: 0.6 },
+        shapes: ["square"],
+        scalar: 0.8,
+        gravity: 0.6,
+        decay: 0.93,
+      });
+
+      if (Date.now() < end) {
+        requestAnimationFrame(frame);
+      }
+    }
+
+    frame();
+
+    const timer0 = setTimeout(() => {
+      setShowText(true);
+    }, 100);
+
+    const timer1 = setTimeout(() => {
+      setShowSubtext(true);
+    }, 500);
+
+    const timer2 = setTimeout(async () => {
+      completeStep("CONGRATS");
+
+      try {
+        const onboarding = await resolveResponse(getV1OnboardingState());
+        if (onboarding?.selectedStoreListingVersionId) {
+          try {
+            const libraryAgent = await resolveResponse(
+              postV2AddMarketplaceAgent({
+                store_listing_version_id:
+                  onboarding.selectedStoreListingVersionId,
+                source: "onboarding",
+              }),
+            );
+            router.replace(`/library/agents/${libraryAgent.id}`);
+          } catch (error) {
+            console.error("Failed to add agent to library:", error);
+            router.replace("/library");
+          }
+        } else {
+          router.replace("/library");
+        }
+      } catch (error) {
+        console.error("Failed to get onboarding data:", error);
+        router.replace("/library");
+      }
+    }, 3000);
+
+    return () => {
+      clearTimeout(timer0);
+      clearTimeout(timer1);
+      clearTimeout(timer2);
+    };
+  }, [completeStep, router, api]);
+
+  return (
+    <div className="flex h-screen w-screen flex-col items-center justify-center bg-violet-100">
+      <Confetti ref={confettiRef} manualstart />
+      <div
+        className={cn(
+          "z-10 -mb-16 text-9xl duration-500",
+          showText ? "opacity-100" : "opacity-0",
+        )}
+      >
+        🎉
+      </div>
+      <h1
+        className={cn(
+          "font-poppins text-9xl font-medium tracking-tighter text-violet-700 duration-500",
+          showText ? "opacity-100" : "opacity-0",
+        )}
+      >
+        Congrats!
+      </h1>
+      <p
+        className={cn(
+          "mb-16 mt-4 font-poppins text-2xl font-medium text-violet-800 transition-opacity duration-500",
+          showSubtext ? "opacity-100" : "opacity-0",
+        )}
+      >
+        You earned 3$ for running your first agent
+      </p>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingAgentCard.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingAgentCard.tsx
@@ -0,0 +1,105 @@
+import { cn } from "@/lib/utils";
+import StarRating from "./StarRating";
+import SmartImage from "@/components/__legacy__/SmartImage";
+import { StoreAgentDetails } from "@/app/api/__generated__/models/storeAgentDetails";
+
+type OnboardingAgentCardProps = {
+  agent?: StoreAgentDetails;
+  selected?: boolean;
+  onClick: () => void;
+};
+
+export default function OnboardingAgentCard({
+  agent,
+  selected,
+  onClick,
+}: OnboardingAgentCardProps) {
+  if (!agent) {
+    return (
+      <div
+        className={cn(
+          "relative animate-pulse",
+          "h-[394px] w-[368px] rounded-[20px] border border-transparent bg-zinc-200",
+        )}
+      />
+    );
+  }
+
+  const {
+    agent_image,
+    creator_avatar,
+    agent_name,
+    description,
+    creator,
+    runs,
+    rating,
+  } = agent;
+
+  return (
+    <div
+      className={cn(
+        "relative cursor-pointer transition-all duration-200 ease-in-out",
+        "h-[394px] w-[368px] rounded-[20px] border border-transparent bg-white",
+        selected ? "bg-[#F5F3FF80]" : "hover:border-zinc-400",
+      )}
+      onClick={onClick}
+    >
+      {/* Image container */}
+      <div className="relative">
+        <SmartImage
+          src={agent_image?.[0]}
+          alt="Agent cover"
+          className="m-2 h-[196px] w-[350px] rounded-[16px]"
+        />
+        {/* Profile picture overlay */}
+        <div className="absolute bottom-2 left-4">
+          <SmartImage
+            src={creator_avatar}
+            alt="Profile picture"
+            className="h-[50px] w-[50px] rounded-full border border-white"
+          />
+        </div>
+      </div>
+
+      {/* Content container */}
+      <div className="flex h-[180px] flex-col justify-between px-4 pb-3">
+        {/* Text content wrapper */}
+        <div>
+          {/* Title - 2 lines max */}
+          <p className="data-sentry-unmask text-md line-clamp-2 max-h-[50px] font-sans text-base font-medium leading-normal text-zinc-800">
+            {agent_name}
+          </p>
+
+          {/* Author - single line with truncate */}
+          <p className="data-sentry-unmask truncate text-sm font-normal leading-normal text-zinc-600">
+            by {creator}
+          </p>
+
+          {/* Description - 3 lines max */}
+          <p
+            className={cn(
+              "mt-2 line-clamp-3 text-sm leading-5",
+              selected ? "text-zinc-500" : "text-zinc-400",
+            )}
+          >
+            {description}
+          </p>
+        </div>
+
+        {/* Bottom stats */}
+        <div className="flex w-full items-center justify-between">
+          <span className="mt-1 font-sans text-sm font-medium text-zinc-800">
+            {runs?.toLocaleString("en-US")} runs
+          </span>
+          <StarRating rating={rating} />
+        </div>
+      </div>
+      <div
+        className={cn(
+          "pointer-events-none absolute inset-0 rounded-[20px] border-2 transition-all duration-200 ease-in-out",
+          selected ? "border-violet-700" : "border-transparent",
+        )}
+      />
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingBackButton.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingBackButton.tsx
@@ -0,0 +1,20 @@
+import { ChevronLeft } from "lucide-react";
+import Link from "next/link";
+
+interface OnboardingBackButtonProps {
+  href: string;
+}
+
+export default function OnboardingBackButton({
+  href,
+}: OnboardingBackButtonProps) {
+  return (
+    <Link
+      className="flex items-center gap-2 font-sans text-base font-medium text-zinc-700 transition-colors duration-200 hover:text-zinc-800"
+      href={href}
+    >
+      <ChevronLeft size={24} className="-mr-1" />
+      <span>Back</span>
+    </Link>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingButton.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingButton.tsx
@@ -0,0 +1,76 @@
+import { useCallback, useMemo, useState } from "react";
+import { LoadingSpinner } from "@/components/__legacy__/ui/loading";
+import { cn } from "@/lib/utils";
+import Link from "next/link";
+
+const variants = {
+  default: "bg-zinc-700 hover:bg-zinc-800",
+  violet: "bg-violet-600 hover:bg-violet-700",
+};
+
+type OnboardingButtonProps = {
+  className?: string;
+  variant?: keyof typeof variants;
+  children?: React.ReactNode;
+  loading?: boolean;
+  disabled?: boolean;
+  onClick?: () => void;
+  href?: string;
+  icon?: React.ReactNode;
+};
+
+export default function OnboardingButton({
+  className,
+  variant = "default",
+  children,
+  loading,
+  disabled,
+  onClick,
+  href,
+  icon,
+}: OnboardingButtonProps) {
+  const [internalLoading, setInternalLoading] = useState(false);
+  const isLoading = loading !== undefined ? loading : internalLoading;
+
+  const buttonClasses = useMemo(
+    () =>
+      cn(
+        "font-sans text-white text-sm font-medium",
+        "inline-flex justify-center items-center",
+        "h-12 min-w-[100px] rounded-full py-3 px-5",
+        "transition-colors duration-200",
+        className,
+        disabled ? "bg-zinc-300 cursor-not-allowed" : variants[variant],
+      ),
+    [disabled, variant, className],
+  );
+
+  const onClickInternal = useCallback(() => {
+    setInternalLoading(true);
+    if (onClick) {
+      onClick();
+    }
+  }, [setInternalLoading, onClick]);
+
+  if (href && !disabled) {
+    return (
+      <Link href={href} onClick={onClickInternal} className={buttonClasses}>
+        {isLoading && <LoadingSpinner className="mr-2 size-5" />}
+        {icon && !isLoading && <>{icon}</>}
+        {children}
+      </Link>
+    );
+  }
+
+  return (
+    <button
+      onClick={onClickInternal}
+      disabled={disabled}
+      className={buttonClasses}
+    >
+      {isLoading && <LoadingSpinner className="mr-2 size-5" />}
+      {icon && !isLoading && <>{icon}</>}
+      {children}
+    </button>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingGrid.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingGrid.tsx
@@ -0,0 +1,86 @@
+import { cn } from "@/lib/utils";
+import SmartImage from "@/components/__legacy__/SmartImage";
+
+type OnboardingGridElementProps = {
+  name: string;
+  text: string;
+  icon: string;
+  selected: boolean;
+  onClick: () => void;
+};
+
+function OnboardingGridElement({
+  name,
+  text,
+  icon,
+  selected,
+  onClick,
+}: OnboardingGridElementProps) {
+  return (
+    <button
+      className={cn(
+        "relative flex h-[236px] w-[200px] flex-col items-start gap-2 rounded-xl border border-transparent bg-white p-[15px] font-sans",
+        "transition-all duration-200 ease-in-out",
+        selected ? "bg-[#F5F3FF80]" : "hover:border-zinc-400",
+      )}
+      onClick={onClick}
+    >
+      <SmartImage
+        src={icon}
+        alt={`Logo of ${name}`}
+        imageContain
+        className="h-12 w-12 rounded-lg"
+      />
+      <span className="text-md mt-4 w-full text-left font-medium leading-normal text-[#121212]">
+        {name}
+      </span>
+      <span className="w-full text-left text-[11.5px] font-normal leading-5 text-zinc-500">
+        {text}
+      </span>
+      <div
+        className={cn(
+          "pointer-events-none absolute inset-0 rounded-xl border-2 transition-all duration-200 ease-in-out",
+          selected ? "border-violet-700" : "border-transparent",
+        )}
+      />
+    </button>
+  );
+}
+
+type OnboardingGridProps = {
+  className?: string;
+  elements: Array<{
+    name: string;
+    text: string;
+    icon: string;
+  }>;
+  selected?: string[];
+  onSelect: (name: string) => void;
+};
+
+export function OnboardingGrid({
+  className,
+  elements,
+  selected,
+  onSelect,
+}: OnboardingGridProps) {
+  return (
+    <div
+      className={cn(
+        className,
+        "grid grid-cols-1 gap-3 sm:grid-cols-2 lg:grid-cols-4",
+      )}
+    >
+      {elements.map((element) => (
+        <OnboardingGridElement
+          key={element.name}
+          name={element.name}
+          text={element.text}
+          icon={element.icon}
+          selected={selected?.includes(element.name) || false}
+          onClick={() => onSelect(element.name)}
+        />
+      ))}
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingInput.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingInput.tsx
@@ -0,0 +1,29 @@
+import { cn } from "@/lib/utils";
+
+interface OnboardingInputProps {
+  className?: string;
+  placeholder: string;
+  value: string;
+  onChange: (value: string) => void;
+}
+
+export default function OnboardingInput({
+  className,
+  placeholder,
+  value,
+  onChange,
+}: OnboardingInputProps) {
+  return (
+    <input
+      className={cn(
+        className,
+        "font-poppin relative h-[50px] w-[512px] rounded-[25px] border border-transparent bg-white px-4 text-sm font-normal leading-normal text-zinc-900",
+        "transition-all duration-200 ease-in-out placeholder:text-zinc-400",
+        "focus:border-transparent focus:bg-[#F5F3FF80] focus:outline-none focus:ring-2 focus:ring-violet-700",
+      )}
+      placeholder={placeholder}
+      value={value}
+      onChange={(e) => onChange(e.target.value)}
+    />
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingList.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingList.tsx
@@ -0,0 +1,135 @@
+import { cn } from "@/lib/utils";
+import { Check } from "lucide-react";
+import { useCallback, useEffect, useRef, useState } from "react";
+
+type OnboardingListElementProps = {
+  label: string;
+  text: string;
+  selected?: boolean;
+  custom?: boolean;
+  onClick: (content: string) => void;
+};
+
+export function OnboardingListElement({
+  label,
+  text,
+  selected,
+  custom,
+  onClick,
+}: OnboardingListElementProps) {
+  const inputRef = useRef<HTMLInputElement>(null);
+  const [content, setContent] = useState(text);
+
+  useEffect(() => {
+    if (selected && custom && inputRef.current) {
+      inputRef.current.focus();
+    }
+  }, [selected, custom]);
+
+  const setCustomText = (e: React.ChangeEvent<HTMLInputElement>) => {
+    setContent(e.target.value);
+    onClick(e.target.value);
+  };
+
+  return (
+    <button
+      onClick={() => onClick(content)}
+      className={cn(
+        "relative flex h-[78px] w-[530px] items-center rounded-xl border border-transparent px-5 py-4 transition-all duration-200 ease-in-out",
+        selected ? "bg-[#F5F3FF80]" : "bg-white hover:border-zinc-400",
+      )}
+    >
+      <div className="flex w-full flex-col items-start gap-1">
+        <span className="text-sm font-medium text-zinc-700">{label}</span>
+        {custom && selected ? (
+          <input
+            ref={inputRef}
+            className={cn(
+              selected ? "text-zinc-600" : "text-zinc-400",
+              "font-poppin w-full border-0 bg-[#F5F3FF80] text-sm focus:outline-none",
+            )}
+            placeholder="Please specify"
+            value={content}
+            onChange={setCustomText}
+          />
+        ) : (
+          <span
+            className={cn(
+              selected ? "text-zinc-600" : "text-zinc-400",
+              "text-sm",
+            )}
+          >
+            {custom ? "Please specify" : text}
+          </span>
+        )}
+      </div>
+      {!custom && (
+        <div className="absolute right-4">
+          <Check
+            size={24}
+            className={cn(
+              "transition-all duration-200 ease-in-out",
+              selected ? "text-violet-700" : "text-transparent",
+            )}
+          />
+        </div>
+      )}
+      <div
+        className={cn(
+          "pointer-events-none absolute inset-0 rounded-xl border-2 transition-all duration-200 ease-in-out",
+          selected ? "border-violet-700" : "border-transparent",
+        )}
+      />
+    </button>
+  );
+}
+
+type OnboardingListProps = {
+  className?: string;
+  elements: Array<{
+    label: string;
+    text: string;
+    id: string;
+  }>;
+  selectedId?: string | null;
+  onSelect: (id: string) => void;
+};
+
+function OnboardingList({
+  className,
+  elements,
+  selectedId,
+  onSelect,
+}: OnboardingListProps) {
+  const isCustom = useCallback(() => {
+    return (
+      selectedId !== null &&
+      !elements.some((element) => element.id === selectedId)
+    );
+  }, [selectedId, elements]);
+
+  return (
+    <div className={cn(className, "flex flex-col gap-2")}>
+      {elements.map((element) => (
+        <OnboardingListElement
+          key={element.id}
+          label={element.label}
+          text={element.text}
+          selected={element.id === selectedId}
+          onClick={() => onSelect(element.id)}
+        />
+      ))}
+      <OnboardingListElement
+        label="Other"
+        text={isCustom() ? selectedId! : ""}
+        selected={isCustom()}
+        custom
+        onClick={(c) => {
+          onSelect(c);
+        }}
+      />
+    </div>
+  );
+}
+
+export default OnboardingList;
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingProgress.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingProgress.tsx
@@ -0,0 +1,45 @@
+import { useState, useEffect, useRef } from "react";
+
+interface OnboardingProgressProps {
+  totalSteps: number;
+  toStep: number;
+}
+
+export default function OnboardingProgress({
+  totalSteps,
+  toStep,
+}: OnboardingProgressProps) {
+  const [animatedStep, setAnimatedStep] = useState(toStep - 1);
+  const isInitialMount = useRef(true);
+
+  useEffect(() => {
+    if (isInitialMount.current) {
+      // On initial mount, just set the position without animation
+      isInitialMount.current = false;
+      return;
+    }
+    // After initial mount, animate position changes
+    setAnimatedStep(toStep - 1);
+  }, [toStep]);
+
+  return (
+    <div className="relative flex items-center justify-center gap-3">
+      {/* Background circles */}
+      {Array.from({ length: totalSteps + 1 }).map((_, index) => (
+        <div key={index} className="h-2 w-2 rounded-full bg-zinc-400" />
+      ))}
+
+      {/* Animated progress indicator */}
+      <div
+        className={`absolute left-0 h-2 w-7 rounded-full bg-zinc-400 ${
+          !isInitialMount.current
+            ? "transition-all duration-300 ease-in-out"
+            : ""
+        }`}
+        style={{
+          transform: `translateX(${animatedStep * 20}px)`,
+        }}
+      />
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingStep.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/components/OnboardingStep.tsx
@@ -0,0 +1,66 @@
+"use client";
+import { ReactNode } from "react";
+import OnboardingBackButton from "./OnboardingBackButton";
+import { cn } from "@/lib/utils";
+import OnboardingProgress from "./OnboardingProgress";
+import { useOnboarding } from "../../../../providers/onboarding/onboarding-provider";
+
+export function OnboardingStep({
+  dotted,
+  children,
+}: {
+  dotted?: boolean;
+  children: ReactNode;
+}) {
+  return (
+    <div className="relative flex min-h-screen w-full flex-col">
+      {dotted && (
+        <div className="absolute left-1/2 h-full w-1/2 bg-white bg-[radial-gradient(#e5e7eb77_1px,transparent_1px)] [background-size:10px_10px]"></div>
+      )}
+      <div className="z-10 flex flex-col items-center">{children}</div>
+    </div>
+  );
+}
+
+interface OnboardingHeaderProps {
+  backHref: string;
+  transparent?: boolean;
+  children?: ReactNode;
+}
+
+export function OnboardingHeader({
+  backHref,
+  transparent,
+  children,
+}: OnboardingHeaderProps) {
+  const { step } = useOnboarding();
+
+  return (
+    <div className="sticky top-0 z-10 w-full">
+      <div
+        className={cn(transparent ? "bg-transparent" : "bg-gray-100", "pb-5")}
+      >
+        <div className="flex w-full items-center justify-between px-5 py-4">
+          <OnboardingBackButton href={backHref} />
+          <OnboardingProgress totalSteps={5} toStep={(step || 1) - 1} />
+        </div>
+        {children}
+      </div>
+
+      {!transparent && (
+        <div className="h-4 w-full bg-gradient-to-b from-gray-100 via-gray-100/50 to-transparent" />
+      )}
+    </div>
+  );
+}
+
+export function OnboardingFooter({ children }: { children?: ReactNode }) {
+  return (
+    <div className="sticky bottom-0 z-10 w-full">
+      <div className="h-4 w-full bg-gradient-to-t from-gray-100 via-gray-100/50 to-transparent" />
+      <div className="flex justify-center bg-gray-100">
+        <div className="px-5 py-5">{children}</div>
+      </div>
+    </div>
+  );
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Zamil Majdy	95c6907ccd	fix(frontend): remove test screenshots from repo Remove binary test screenshots that bloat the repo. Test evidence should be in the PR description or CI artifacts, not committed.	2026-04-01 18:03:00 +02:00
Zamil Majdy	f4bc3c2012	test: add test screenshots for PR #12598 stream timeout verification	2026-04-01 17:59:17 +02:00
Zamil Majdy	f265ef8ac3	fix(frontend): use type-safe any cast for createSessionMutation call The generated mutation type differs between local (void) and CI (requires CreateSessionRequest) due to export-api-schema regeneration. Use an explicit any cast to handle both generated type variants.	2026-04-01 17:59:17 +02:00
Zamil Majdy	c79e6ff30a	fix(frontend): clear stream timeout on stop and fix pre-existing TS errors Clear the stream timeout timer immediately when the user clicks stop, preventing a brief window where the timeout could fire after the user already cancelled the stream. Also fix pre-existing TypeScript errors in admin rate-limit components (missing user_email on generated type) and useChatSession (createSessionMutation arg mismatch).	2026-04-01 17:59:17 +02:00
Zamil Majdy	7db8bf161a	style(frontend): remove eslint-disable by referencing rawMessages in effect body Reference rawMessages.length in the stream timeout effect so the exhaustive-deps rule is satisfied without an eslint suppressor comment.	2026-04-01 17:59:17 +02:00
Zamil Majdy	84650d0f4d	fix(frontend): improve stream timeout toast description Deduplicate "Connection lost" between title and description — the description now tells the user what to do next.	2026-04-01 17:59:17 +02:00
Zamil Majdy	0467cb2e49	fix(frontend): add stream timeout to copilot chat When an SSE stream dies silently (no disconnect event), the UI stays stuck in "Reasoning..." indefinitely. Add a 60-second inactivity timeout that auto-cancels the stream and shows an error toast, prompting the user to retry.	2026-04-01 17:59:17 +02:00