refactor(frontend): streamline NodeDataViewer component and execution results handling

### Changes - Removed unused `NodeExecutionResult` type and `executionResults` prop from `NodeDataViewerProps`. - Simplified the logic for resolving execution results by directly using the `useNodeStore` hook. - Updated the component to ensure consistent handling of data types and improved readability. ### Impact - Enhances code clarity and maintainability by reducing unnecessary complexity in the component. - Ensures that the latest execution results are effectively utilized in the data viewer. ### Testing - Verified that the component functions correctly with the updated logic and maintains expected behavior.
Merge branch 'dev' into abhi/show-all-execution-node
2026-01-25 06:58:21 -05:00 · 2026-01-25 12:25:49 +05:30 · 2026-01-25 12:17:28 +05:30 · 2026-01-25 12:17:12 +05:30 · 2026-01-25 12:03:22 +05:30 · 2026-01-25 11:54:05 +05:30
2654 changed files with 832670 additions and 38898 deletions
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -6,15 +6,11 @@ on:
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/forge/**'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/forge/**'

 concurrency:
  group: ${{ format('classic-autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -23,22 +19,47 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic
+    working-directory: classic/original_autogpt

 jobs:
  test:
    permissions:
      contents: read
    timeout-minutes: 30
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+        platform-os: [ubuntu, macos, macos-arm64, windows]
+    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

    steps:
-      - name: Start MinIO service
+      # Quite slow on macOS (2~4 minutes to set up Docker)
+      # - name: Set up Docker (macOS)
+      #   if: runner.os == 'macOS'
+      #   uses: crazy-max/ghaction-setup-docker@v3
+
+      - name: Start MinIO service (Linux)
+        if: runner.os == 'Linux'
        working-directory: '.'
        run: |
          docker pull minio/minio:edge-cicd
          docker run -d -p 9000:9000 minio/minio:edge-cicd

+      - name: Start MinIO service (macOS)
+        if: runner.os == 'macOS'
+        working-directory: ${{ runner.temp }}
+        run: |
+          brew install minio/stable/minio
+          mkdir data
+          minio server ./data &
+
+      # No MinIO on Windows:
+      # - Windows doesn't support running Linux Docker containers
+      # - It doesn't seem possible to start background processes on Windows. They are
+      #   killed after the step returns.
+      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
+
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
@@ -50,23 +71,41 @@ jobs:
          git config --global user.name "Auto-GPT-Bot"
          git config --global user.email "github-bot@agpt.co"

-      - name: Set up Python 3.12
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: "3.12"
+          python-version: ${{ matrix.python-version }}

      - id: get_date
        name: Get date
        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

      - name: Set up Python dependency cache
+        # On Windows, unpacking cached dependencies takes longer than just installing them
+        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }}

-      - name: Install Poetry
-        run: curl -sSL https://install.python-poetry.org | python3 -
+      - name: Install Poetry (Unix)
+        if: runner.os != 'Windows'
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+
+          if [ "${{ runner.os }}" = "macOS" ]; then
+            PATH="$HOME/.local/bin:$PATH"
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install Poetry (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+
+          $env:PATH += ";$env:APPDATA\Python\Scripts"
+          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH

      - name: Install Python dependencies
        run: poetry install
@@ -77,12 +116,12 @@ jobs:
            --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
            --numprocesses=logical --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            original_autogpt/tests/unit original_autogpt/tests/integration
+            tests/unit tests/integration
        env:
          CI: true
          PLAIN_OUTPUT: True
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          S3_ENDPOINT_URL: http://127.0.0.1:9000
+          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin

@@ -96,11 +135,11 @@ jobs:
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: autogpt-agent
+          flags: autogpt-agent,${{ runner.os }}

      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/logs/
+          path: classic/original_autogpt/logs/
--- a/.github/workflows/classic-autogpts-ci.yml
+++ b/.github/workflows/classic-autogpts-ci.yml
@@ -11,6 +11,9 @@ on:
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
      - 'classic/benchmark/**'
+      - 'classic/run'
+      - 'classic/cli.py'
+      - 'classic/setup.py'
      - '!**/*.md'
  pull_request:
    branches: [ master, dev, release-* ]
@@ -19,6 +22,9 @@ on:
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
      - 'classic/benchmark/**'
+      - 'classic/run'
+      - 'classic/cli.py'
+      - 'classic/setup.py'
      - '!**/*.md'

 defaults:
@@ -29,9 +35,13 @@ defaults:
 jobs:
  serve-agent-protocol:
    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        agent-name: [ original_autogpt ]
+      fail-fast: false
    timeout-minutes: 20
    env:
-      min-python-version: '3.12'
+      min-python-version: '3.10'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -45,22 +55,22 @@ jobs:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
+        working-directory: ./classic/${{ matrix.agent-name }}/
        run: |
          curl -sSL https://install.python-poetry.org | python -

-      - name: Install dependencies
-        run: poetry install
-
-      - name: Run smoke tests with direct-benchmark
+      - name: Run regression tests
        run: |
-          poetry run direct-benchmark run \
-            --strategies one_shot \
-            --models claude \
-            --tests ReadFile,WriteFile \
-            --json
+          ./run agent start ${{ matrix.agent-name }}
+          cd ${{ matrix.agent-name }}
+          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
+          poetry run agbenchmark --test=WriteFile
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          AGENT_NAME: ${{ matrix.agent-name }}
          REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
-          NONINTERACTIVE_MODE: "true"
-          CI: true
+          HELICONE_CACHE_ENABLED: false
+          HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
+          REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }}
+          TELEMETRY_ENVIRONMENT: autogpt-ci
+          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
--- a/.github/workflows/classic-benchmark-ci.yml
+++ b/.github/workflows/classic-benchmark-ci.yml
@@ -1,21 +1,17 @@
-name: Classic - Direct Benchmark CI
+name: Classic - AGBenchmark CI

 on:
  push:
    branches: [ master, dev, ci-test* ]
    paths:
-      - 'classic/direct_benchmark/**'
-      - 'classic/benchmark/agbenchmark/challenges/**'
-      - 'classic/original_autogpt/**'
-      - 'classic/forge/**'
+      - 'classic/benchmark/**'
+      - '!classic/benchmark/reports/**'
      - .github/workflows/classic-benchmark-ci.yml
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
-      - 'classic/direct_benchmark/**'
-      - 'classic/benchmark/agbenchmark/challenges/**'
-      - 'classic/original_autogpt/**'
-      - 'classic/forge/**'
+      - 'classic/benchmark/**'
+      - '!classic/benchmark/reports/**'
      - .github/workflows/classic-benchmark-ci.yml

 concurrency:
@@ -27,16 +23,23 @@ defaults:
    shell: bash

 env:
-  min-python-version: '3.12'
+  min-python-version: '3.10'

 jobs:
-  benchmark-tests:
-    runs-on: ubuntu-latest
+  test:
+    permissions:
+      contents: read
    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+        platform-os: [ubuntu, macos, macos-arm64, windows]
+    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
    defaults:
      run:
        shell: bash
-        working-directory: classic
+        working-directory: classic/benchmark
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -44,84 +47,71 @@ jobs:
          fetch-depth: 0
          submodules: true

-      - name: Set up Python ${{ env.min-python-version }}
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: ${{ env.min-python-version }}
+          python-version: ${{ matrix.python-version }}

      - name: Set up Python dependency cache
+        # On Windows, unpacking cached dependencies takes longer than just installing them
+        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/benchmark/poetry.lock') }}

-      - name: Install Poetry
+      - name: Install Poetry (Unix)
+        if: runner.os != 'Windows'
        run: |
          curl -sSL https://install.python-poetry.org | python3 -

-      - name: Install dependencies
+          if [ "${{ runner.os }}" = "macOS" ]; then
+            PATH="$HOME/.local/bin:$PATH"
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install Poetry (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+
+          $env:PATH += ";$env:APPDATA\Python\Scripts"
+          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
+
+      - name: Install Python dependencies
        run: poetry install

-      - name: Run basic benchmark tests
+      - name: Run pytest with coverage
        run: |
-          echo "Testing ReadFile challenge with one_shot strategy..."
-          poetry run direct-benchmark run \
-            --strategies one_shot \
-            --models claude \
-            --tests ReadFile \
-            --json
-
-          echo "Testing WriteFile challenge..."
-          poetry run direct-benchmark run \
-            --strategies one_shot \
-            --models claude \
-            --tests WriteFile \
-            --json
+          poetry run pytest -vv \
+            --cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \
+            --durations=10 \
+            --junitxml=junit.xml -o junit_family=legacy \
+            tests
        env:
          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"

-      - name: Test category filtering
-        run: |
-          echo "Testing coding category..."
-          poetry run direct-benchmark run \
-            --strategies one_shot \
-            --models claude \
-            --categories coding \
-            --tests ReadFile,WriteFile \
-            --json
-        env:
-          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"
+      - name: Upload test results to Codecov
+        if: ${{ !cancelled() }}  # Run even if tests fail
+        uses: codecov/test-results-action@v1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}

-      - name: Test multiple strategies
-        run: |
-          echo "Testing multiple strategies..."
-          poetry run direct-benchmark run \
-            --strategies one_shot,plan_execute \
-            --models claude \
-            --tests ReadFile \
-            --parallel 2 \
-            --json
-        env:
-          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          flags: agbenchmark,${{ runner.os }}

-  # Run regression tests on maintain challenges
-  regression-tests:
+  self-test-with-agent:
    runs-on: ubuntu-latest
-    timeout-minutes: 45
-    if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/dev'
-    defaults:
-      run:
-        shell: bash
-        working-directory: classic
+    strategy:
+      matrix:
+        agent-name: [forge]
+      fail-fast: false
+    timeout-minutes: 20
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -136,22 +126,51 @@ jobs:

      - name: Install Poetry
        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-      - name: Install dependencies
-        run: poetry install
+          curl -sSL https://install.python-poetry.org | python -

      - name: Run regression tests
+        working-directory: classic
        run: |
-          echo "Running regression tests (previously beaten challenges)..."
-          poetry run direct-benchmark run \
-            --strategies one_shot \
-            --models claude \
-            --maintain \
-            --parallel 4 \
-            --json
+          ./run agent start ${{ matrix.agent-name }}
+          cd ${{ matrix.agent-name }}
+
+          set +e # Ignore non-zero exit codes and continue execution
+          echo "Running the following command: poetry run agbenchmark --maintain --mock"
+          poetry run agbenchmark --maintain --mock
+          EXIT_CODE=$?
+          set -e  # Stop ignoring non-zero exit codes
+          # Check if the exit code was 5, and if so, exit with 0 instead
+          if [ $EXIT_CODE -eq 5 ]; then
+            echo "regression_tests.json is empty."
+          fi
+
+          echo "Running the following command: poetry run agbenchmark --mock"
+          poetry run agbenchmark --mock
+
+          echo "Running the following command: poetry run agbenchmark --mock --category=data"
+          poetry run agbenchmark --mock --category=data
+
+          echo "Running the following command: poetry run agbenchmark --mock --category=coding"
+          poetry run agbenchmark --mock --category=coding
+
+          # echo "Running the following command: poetry run agbenchmark --test=WriteFile"
+          # poetry run agbenchmark --test=WriteFile
+          cd ../benchmark
+          poetry install
+          echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
+          export BUILD_SKILL_TREE=true
+
+          # poetry run agbenchmark --mock
+
+          # CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../classic/frontend/assets)') || echo "No diffs"
+          # if [ ! -z "$CHANGED" ]; then
+          #   echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
+          #   echo "$CHANGED"
+          #   exit 1
+          # else
+          #   echo "No unstaged changes."
+          # fi
        env:
-          CI: true
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          NONINTERACTIVE_MODE: "true"
+          TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
+          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -6,11 +6,13 @@ on:
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
+      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
+      - '!classic/forge/tests/vcr_cassettes'

 concurrency:
  group: ${{ format('forge-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -19,38 +21,115 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic
+    working-directory: classic/forge

 jobs:
  test:
    permissions:
      contents: read
    timeout-minutes: 30
-    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+        platform-os: [ubuntu, macos, macos-arm64, windows]
+    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

    steps:
-      - name: Start MinIO service
+      # Quite slow on macOS (2~4 minutes to set up Docker)
+      # - name: Set up Docker (macOS)
+      #   if: runner.os == 'macOS'
+      #   uses: crazy-max/ghaction-setup-docker@v3
+
+      - name: Start MinIO service (Linux)
+        if: runner.os == 'Linux'
        working-directory: '.'
        run: |
          docker pull minio/minio:edge-cicd
          docker run -d -p 9000:9000 minio/minio:edge-cicd

+      - name: Start MinIO service (macOS)
+        if: runner.os == 'macOS'
+        working-directory: ${{ runner.temp }}
+        run: |
+          brew install minio/stable/minio
+          mkdir data
+          minio server ./data &
+
+      # No MinIO on Windows:
+      # - Windows doesn't support running Linux Docker containers
+      # - It doesn't seem possible to start background processes on Windows. They are
+      #   killed after the step returns.
+      #   See: https://github.com/actions/runner/issues/598#issuecomment-2011890429
+
      - name: Checkout repository
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: true

-      - name: Set up Python 3.12
+      - name: Checkout cassettes
+        if: ${{ startsWith(github.event_name, 'pull_request') }}
+        env:
+          PR_BASE: ${{ github.event.pull_request.base.ref }}
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        run: |
+          cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
+          cassette_base_branch="${PR_BASE}"
+          cd tests/vcr_cassettes
+
+          if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then
+            cassette_base_branch="master"
+          fi
+
+          if git ls-remote --exit-code --heads origin $cassette_branch ; then
+            git fetch origin $cassette_branch
+            git fetch origin $cassette_base_branch
+
+            git checkout $cassette_branch
+
+            # Pick non-conflicting cassette updates from the base branch
+            git merge --no-commit --strategy-option=ours origin/$cassette_base_branch
+            echo "Using cassettes from mirror branch '$cassette_branch'," \
+              "synced to upstream branch '$cassette_base_branch'."
+          else
+            git checkout -b $cassette_branch
+            echo "Branch '$cassette_branch' does not exist in cassette submodule." \
+              "Using cassettes from '$cassette_base_branch'."
+          fi
+
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: "3.12"
+          python-version: ${{ matrix.python-version }}

      - name: Set up Python dependency cache
+        # On Windows, unpacking cached dependencies takes longer than just installing them
+        if: runner.os != 'Windows'
        uses: actions/cache@v4
        with:
-          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}
+          path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/forge/poetry.lock') }}

-      - name: Install Poetry
-        run: curl -sSL https://install.python-poetry.org | python3 -
+      - name: Install Poetry (Unix)
+        if: runner.os != 'Windows'
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+
+          if [ "${{ runner.os }}" = "macOS" ]; then
+            PATH="$HOME/.local/bin:$PATH"
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install Poetry (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
+
+          $env:PATH += ";$env:APPDATA\Python\Scripts"
+          echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH

      - name: Install Python dependencies
        run: poetry install
@@ -61,15 +140,12 @@ jobs:
            --cov=forge --cov-branch --cov-report term-missing --cov-report xml \
            --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            forge/forge forge/tests
+            forge
        env:
          CI: true
          PLAIN_OUTPUT: True
-          # API keys - tests that need these will skip if not available
-          # Secrets are not available to fork PRs (GitHub security feature)
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          S3_ENDPOINT_URL: http://127.0.0.1:9000
+          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin

@@ -83,11 +159,85 @@ jobs:
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
-          flags: forge
+          flags: forge,${{ runner.os }}
+
+      - id: setup_git_auth
+        name: Set up git token authentication
+        # Cassettes may be pushed even when tests fail
+        if: success() || failure()
+        run: |
+          config_key="http.${{ github.server_url }}/.extraheader"
+          if [ "${{ runner.os }}" = 'macOS' ]; then
+            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64)
+          else
+            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64 -w0)
+          fi
+
+          git config "$config_key" \
+            "Authorization: Basic $base64_pat"
+
+          cd tests/vcr_cassettes
+          git config "$config_key" \
+            "Authorization: Basic $base64_pat"
+
+          echo "config_key=$config_key" >> $GITHUB_OUTPUT
+
+      - id: push_cassettes
+        name: Push updated cassettes
+        # For pull requests, push updated cassettes even when tests fail
+        if: github.event_name == 'push' || (! github.event.pull_request.head.repo.fork && (success() || failure()))
+        env:
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+        run: |
+          if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
+            is_pull_request=true
+            cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
+          else
+            cassette_branch="${{ github.ref_name }}"
+          fi
+
+          cd tests/vcr_cassettes
+          # Commit & push changes to cassettes if any
+          if ! git diff --quiet; then
+            git add .
+            git commit -m "Auto-update cassettes"
+            git push origin HEAD:$cassette_branch
+            if [ ! $is_pull_request ]; then
+              cd ../..
+              git add tests/vcr_cassettes
+              git commit -m "Update cassette submodule"
+              git push origin HEAD:$cassette_branch
+            fi
+            echo "updated=true" >> $GITHUB_OUTPUT
+          else
+            echo "updated=false" >> $GITHUB_OUTPUT
+            echo "No cassette changes to commit"
+          fi
+
+      - name: Post Set up git token auth
+        if: steps.setup_git_auth.outcome == 'success'
+        run: |
+          git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
+          git submodule foreach git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
+
+      - name: Apply "behaviour change" label and comment on PR
+        if: ${{ startsWith(github.event_name, 'pull_request') }}
+        run: |
+          PR_NUMBER="${{ github.event.pull_request.number }}"
+          TOKEN="${{ secrets.PAT_REVIEW }}"
+          REPO="${{ github.repository }}"
+
+          if [[ "${{ steps.push_cassettes.outputs.updated }}" == "true" ]]; then
+            echo "Adding label and comment..."
+            echo $TOKEN | gh auth login --with-token
+            gh issue edit $PR_NUMBER --add-label "behaviour change"
+            gh issue comment $PR_NUMBER --body "You changed AutoGPT's behaviour on ${{ runner.os }}. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
+          fi

      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/logs/
+          path: classic/forge/logs/
--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -0,0 +1,60 @@
+name: Classic - Frontend CI/CD
+
+on:
+  push:
+    branches:
+      - master
+      - dev
+      - 'ci-test*' # This will match any branch that starts with "ci-test"
+    paths:
+      - 'classic/frontend/**'
+      - '.github/workflows/classic-frontend-ci.yml'
+  pull_request:
+    paths:
+      - 'classic/frontend/**'
+      - '.github/workflows/classic-frontend-ci.yml'
+
+jobs:
+  build:
+    permissions:
+      contents: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    env:
+      BUILD_BRANCH: ${{ format('classic-frontend-build/{0}', github.ref_name) }}
+
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Setup Flutter
+        uses: subosito/flutter-action@v2
+        with:
+          flutter-version: '3.13.2'
+
+      - name: Build Flutter to Web
+        run: |
+          cd classic/frontend
+          flutter build web --base-href /app/
+
+      # - name: Commit and Push to ${{ env.BUILD_BRANCH }}
+      #   if: github.event_name == 'push'
+      #   run: |
+      #     git config --local user.email "action@github.com"
+      #     git config --local user.name "GitHub Action"
+      #     git add classic/frontend/build/web
+      #     git checkout -B ${{ env.BUILD_BRANCH }}
+      #     git commit -m "Update frontend build to ${GITHUB_SHA:0:7}" -a
+      #     git push -f origin ${{ env.BUILD_BRANCH }}
+
+      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
+        if: github.event_name == 'push'
+        uses: peter-evans/create-pull-request@v7
+        with:
+          add-paths: classic/frontend/build/web
+          base: ${{ github.ref_name }}
+          branch: ${{ env.BUILD_BRANCH }}
+          delete-branch: true
+          title: "Update frontend build in `${{ github.ref_name }}`"
+          body: "This PR updates the frontend build based on commit ${{ github.sha }}."
+          commit-message: "Update frontend build based on commit ${{ github.sha }}"
--- a/.github/workflows/classic-python-checks.yml
+++ b/.github/workflows/classic-python-checks.yml
@@ -7,9 +7,7 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - 'classic/benchmark/**'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
@@ -18,9 +16,7 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/direct_benchmark/**'
-      - 'classic/pyproject.toml'
-      - 'classic/poetry.lock'
+      - 'classic/benchmark/**'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'

@@ -31,13 +27,44 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic

 jobs:
+  get-changed-parts:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - id: changes-in
+        name: Determine affected subprojects
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            original_autogpt:
+              - classic/original_autogpt/autogpt/**
+              - classic/original_autogpt/tests/**
+              - classic/original_autogpt/poetry.lock
+            forge:
+              - classic/forge/forge/**
+              - classic/forge/tests/**
+              - classic/forge/poetry.lock
+            benchmark:
+              - classic/benchmark/agbenchmark/**
+              - classic/benchmark/tests/**
+              - classic/benchmark/poetry.lock
+    outputs:
+      changed-parts: ${{ steps.changes-in.outputs.changes }}
+
  lint:
+    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.12"
+      min-python-version: "3.10"
+
+    strategy:
+      matrix:
+        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
+      fail-fast: false

    steps:
      - name: Checkout repository
@@ -54,31 +81,42 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

+      # Install dependencies
+
      - name: Install Python dependencies
-        run: poetry install
+        run: poetry -C classic/${{ matrix.sub-package }} install

      # Lint

      - name: Lint (isort)
        run: poetry run isort --check .
+        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Black)
        if: success() || failure()
        run: poetry run black --check .
+        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Flake8)
        if: success() || failure()
        run: poetry run flake8 .
+        working-directory: classic/${{ matrix.sub-package }}

  types:
+    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.12"
+      min-python-version: "3.10"
+
+    strategy:
+      matrix:
+        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
+      fail-fast: false

    steps:
      - name: Checkout repository
@@ -95,16 +133,19 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

+      # Install dependencies
+
      - name: Install Python dependencies
-        run: poetry install
+        run: poetry -C classic/${{ matrix.sub-package }} install

      # Typecheck

      - name: Typecheck
        if: success() || failure()
        run: poetry run pyright
+        working-directory: classic/${{ matrix.sub-package }}
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -128,7 +128,7 @@ jobs:
          token: ${{ secrets.GITHUB_TOKEN }}
          exitOnceUploaded: true

-  test:
+  e2e_test:
    runs-on: big-boi
    needs: setup
    strategy:
@@ -258,3 +258,39 @@ jobs:
      - name: Print Final Docker Compose logs
        if: always()
        run: docker compose -f ../docker-compose.yml logs
+
+  integration_test:
+    runs-on: ubuntu-latest
+    needs: setup
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "22.18.0"
+
+      - name: Enable corepack
+        run: corepack enable
+
+      - name: Restore dependencies cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.pnpm-store
+          key: ${{ needs.setup.outputs.cache-key }}
+          restore-keys: |
+            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
+            ${{ runner.os }}-pnpm-
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Generate API client
+        run: pnpm generate:api
+
+      - name: Run Integration Tests
+        run: pnpm test:unit
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@
 classic/original_autogpt/keys.py
 classic/original_autogpt/*.json
 auto_gpt_workspace/*
-.autogpt/
 *.mpeg
 .env
 # Root .env files
@@ -160,10 +159,6 @@ CURRENT_BULLETIN.md

 # AgBenchmark
 classic/benchmark/agbenchmark/reports/
-classic/reports/
-classic/direct_benchmark/reports/
-classic/.benchmark_workspaces/
-classic/direct_benchmark/.benchmark_workspaces/

 # Nodejs
 package-lock.json
@@ -182,8 +177,5 @@ autogpt_platform/backend/settings.py

 *.ign.*
 .test-contents
-**/.claude/settings.local.json
+.claude/settings.local.json
 /autogpt_platform/backend/logs
-
-# Test database
-test.db
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "classic/forge/tests/vcr_cassettes"]
+	path = classic/forge/tests/vcr_cassettes
+	url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -43,10 +43,29 @@ repos:
        pass_filenames: false

      - id: poetry-install
-        name: Check & Install dependencies - Classic
-        alias: poetry-install-classic
-        entry: poetry -C classic install
-        files: ^classic/poetry\.lock$
+        name: Check & Install dependencies - Classic - AutoGPT
+        alias: poetry-install-classic-autogpt
+        entry: poetry -C classic/original_autogpt install
+        # include forge source (since it's a path dependency)
+        files: ^classic/(original_autogpt|forge)/poetry\.lock$
+        types: [file]
+        language: system
+        pass_filenames: false
+
+      - id: poetry-install
+        name: Check & Install dependencies - Classic - Forge
+        alias: poetry-install-classic-forge
+        entry: poetry -C classic/forge install
+        files: ^classic/forge/poetry\.lock$
+        types: [file]
+        language: system
+        pass_filenames: false
+
+      - id: poetry-install
+        name: Check & Install dependencies - Classic - Benchmark
+        alias: poetry-install-classic-benchmark
+        entry: poetry -C classic/benchmark install
+        files: ^classic/benchmark/poetry\.lock$
        types: [file]
        language: system
        pass_filenames: false
@@ -97,10 +116,26 @@ repos:
        language: system

      - id: isort
-        name: Lint (isort) - Classic
-        alias: isort-classic
-        entry: bash -c 'cd classic && poetry run isort $(echo "$@" | sed "s|classic/||g")' --
-        files: ^classic/(original_autogpt|forge|direct_benchmark)/
+        name: Lint (isort) - Classic - AutoGPT
+        alias: isort-classic-autogpt
+        entry: poetry -P classic/original_autogpt run isort -p autogpt
+        files: ^classic/original_autogpt/
+        types: [file, python]
+        language: system
+
+      - id: isort
+        name: Lint (isort) - Classic - Forge
+        alias: isort-classic-forge
+        entry: poetry -P classic/forge run isort -p forge
+        files: ^classic/forge/
+        types: [file, python]
+        language: system
+
+      - id: isort
+        name: Lint (isort) - Classic - Benchmark
+        alias: isort-classic-benchmark
+        entry: poetry -P classic/benchmark run isort -p agbenchmark
+        files: ^classic/benchmark/
        types: [file, python]
        language: system

@@ -114,13 +149,26 @@ repos:

  - repo: https://github.com/PyCQA/flake8
    rev: 7.0.0
-    # Use consolidated flake8 config at classic/.flake8
+    # To have flake8 load the config of the individual subprojects, we have to call
+    # them separately.
    hooks:
      - id: flake8
-        name: Lint (Flake8) - Classic
-        alias: flake8-classic
-        files: ^classic/(original_autogpt|forge|direct_benchmark)/
-        args: [--config=classic/.flake8]
+        name: Lint (Flake8) - Classic - AutoGPT
+        alias: flake8-classic-autogpt
+        files: ^classic/original_autogpt/(autogpt|scripts|tests)/
+        args: [--config=classic/original_autogpt/.flake8]
+
+      - id: flake8
+        name: Lint (Flake8) - Classic - Forge
+        alias: flake8-classic-forge
+        files: ^classic/forge/(forge|tests)/
+        args: [--config=classic/forge/.flake8]
+
+      - id: flake8
+        name: Lint (Flake8) - Classic - Benchmark
+        alias: flake8-classic-benchmark
+        files: ^classic/benchmark/(agbenchmark|tests)/((?!reports).)*[/.]
+        args: [--config=classic/benchmark/.flake8]

  - repo: local
    hooks:
@@ -156,10 +204,29 @@ repos:
        pass_filenames: false

      - id: pyright
-        name: Typecheck - Classic
-        alias: pyright-classic
-        entry: poetry -C classic run pyright
-        files: ^classic/(original_autogpt|forge|direct_benchmark)/.*\.py$|^classic/poetry\.lock$
+        name: Typecheck - Classic - AutoGPT
+        alias: pyright-classic-autogpt
+        entry: poetry -C classic/original_autogpt run pyright
+        # include forge source (since it's a path dependency) but exclude *_test.py files:
+        files: ^(classic/original_autogpt/((autogpt|scripts|tests)/|poetry\.lock$)|classic/forge/(forge/.*(?<!_test)\.py|poetry\.lock)$)
+        types: [file]
+        language: system
+        pass_filenames: false
+
+      - id: pyright
+        name: Typecheck - Classic - Forge
+        alias: pyright-classic-forge
+        entry: poetry -C classic/forge run pyright
+        files: ^classic/forge/(forge/|poetry\.lock$)
+        types: [file]
+        language: system
+        pass_filenames: false
+
+      - id: pyright
+        name: Typecheck - Classic - Benchmark
+        alias: pyright-classic-benchmark
+        entry: poetry -C classic/benchmark run pyright
+        files: ^classic/benchmark/(agbenchmark/|tests/|poetry\.lock$)
        types: [file]
        language: system
        pass_filenames: false
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -16,6 +16,32 @@ See `docs/content/platform/getting-started.md` for setup instructions.
 - Format Python code with `poetry run format`.
 - Format frontend code using `pnpm format`.

+
+## Frontend guidelines:
+
+See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
+
+1. **Pages**: Create in `src/app/(platform)/feature-name/page.tsx`
+   - Add `usePageName.ts` hook for logic
+   - Put sub-components in local `components/` folder
+2. **Components**: Structure as `ComponentName/ComponentName.tsx` + `useComponentName.ts` + `helpers.ts`
+   - Use design system components from `src/components/` (atoms, molecules, organisms)
+   - Never use `src/components/__legacy__/*`
+3. **Data fetching**: Use generated API hooks from `@/app/api/__generated__/endpoints/`
+   - Regenerate with `pnpm generate:api`
+   - Pattern: `use{Method}{Version}{OperationName}`
+4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
+5. **Testing**: Add Storybook stories for new components, Playwright for E2E
+6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
+- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
+- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
+- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
+- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
+- Use function declarations for components, arrow functions only for callbacks
+- No barrel files or `index.ts` re-exports
+- Do not use `useCallback` or `useMemo` unless strictly needed
+- Avoid comments at all times unless the code is very complex
+
 ## Testing

 - Backend: `poetry run test` (runs pytest with a docker based postgres + prisma).
--- a/autogpt_platform/CLAUDE.md
+++ b/autogpt_platform/CLAUDE.md
@@ -201,7 +201,7 @@ If you get any pushback or hit complex block conditions check the new_blocks gui
 3. Write tests alongside the route file
 4. Run `poetry run test` to verify

-**Frontend feature development:**
+### Frontend guidelines:

 See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:

@@ -217,6 +217,14 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
 4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
 5. **Testing**: Add Storybook stories for new components, Playwright for E2E
 6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
+- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
+- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
+- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
+- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
+- Use function declarations for components, arrow functions only for callbacks
+- No barrel files or `index.ts` re-exports
+- Do not use `useCallback` or `useMemo` unless strictly needed
+- Avoid comments at all times unless the code is very complex

 ### Security Implementation

--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -290,6 +290,11 @@ async def _cache_session(session: ChatSession) -> None:
    await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())


+async def cache_chat_session(session: ChatSession) -> None:
+    """Cache a chat session without persisting to the database."""
+    await _cache_session(session)
+
+
 async def _get_session_from_db(session_id: str) -> ChatSession | None:
    """Get a chat session from the database."""
    prisma_session = await chat_db.get_chat_session(session_id)
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -172,12 +172,12 @@ async def get_session(
        user_id: The optional authenticated user ID, or None for anonymous access.

    Returns:
-        SessionDetailResponse: Details for the requested session; raises NotFoundError if not found.
+        SessionDetailResponse: Details for the requested session, or None if not found.

    """
    session = await get_chat_session(session_id, user_id)
    if not session:
-        raise NotFoundError(f"Session {session_id} not found")
+        raise NotFoundError(f"Session {session_id} not found.")

    messages = [message.model_dump() for message in session.messages]
    logger.info(
@@ -222,6 +222,8 @@ async def stream_chat_post(
    session = await _validate_and_get_session(session_id, user_id)

    async def event_generator() -> AsyncGenerator[str, None]:
+        chunk_count = 0
+        first_chunk_type: str | None = None
        async for chunk in chat_service.stream_chat_completion(
            session_id,
            request.message,
@@ -230,7 +232,26 @@ async def stream_chat_post(
            session=session,  # Pass pre-fetched session to avoid double-fetch
            context=request.context,
        ):
+            if chunk_count < 3:
+                logger.info(
+                    "Chat stream chunk",
+                    extra={
+                        "session_id": session_id,
+                        "chunk_type": str(chunk.type),
+                    },
+                )
+            if not first_chunk_type:
+                first_chunk_type = str(chunk.type)
+            chunk_count += 1
            yield chunk.to_sse()
+        logger.info(
+            "Chat stream completed",
+            extra={
+                "session_id": session_id,
+                "chunk_count": chunk_count,
+                "first_chunk_type": first_chunk_type,
+            },
+        )
        # AI SDK protocol termination
        yield "data: [DONE]\n\n"

@@ -275,6 +296,8 @@ async def stream_chat_get(
    session = await _validate_and_get_session(session_id, user_id)

    async def event_generator() -> AsyncGenerator[str, None]:
+        chunk_count = 0
+        first_chunk_type: str | None = None
        async for chunk in chat_service.stream_chat_completion(
            session_id,
            message,
@@ -282,7 +305,26 @@ async def stream_chat_get(
            user_id=user_id,
            session=session,  # Pass pre-fetched session to avoid double-fetch
        ):
+            if chunk_count < 3:
+                logger.info(
+                    "Chat stream chunk",
+                    extra={
+                        "session_id": session_id,
+                        "chunk_type": str(chunk.type),
+                    },
+                )
+            if not first_chunk_type:
+                first_chunk_type = str(chunk.type)
+            chunk_count += 1
            yield chunk.to_sse()
+        logger.info(
+            "Chat stream completed",
+            extra={
+                "session_id": session_id,
+                "chunk_count": chunk_count,
+                "first_chunk_type": first_chunk_type,
+            },
+        )
        # AI SDK protocol termination
        yield "data: [DONE]\n\n"

--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1,12 +1,20 @@
 import asyncio
 import logging
+import time
+from asyncio import CancelledError
 from collections.abc import AsyncGenerator
 from typing import Any

 import orjson
 from langfuse import get_client, propagate_attributes
 from langfuse.openai import openai  # type: ignore
-from openai import APIConnectionError, APIError, APIStatusError, RateLimitError
+from openai import (
+    APIConnectionError,
+    APIError,
+    APIStatusError,
+    PermissionDeniedError,
+    RateLimitError,
+)
 from openai.types.chat import ChatCompletionChunk, ChatCompletionToolParam

 from backend.data.understanding import (
@@ -21,6 +29,7 @@ from .model import (
    ChatMessage,
    ChatSession,
    Usage,
+    cache_chat_session,
    get_chat_session,
    update_session_title,
    upsert_chat_session,
@@ -296,6 +305,10 @@ async def stream_chat_completion(
                content="",
            )
            accumulated_tool_calls: list[dict[str, Any]] = []
+            has_saved_assistant_message = False
+            has_appended_streaming_message = False
+            last_cache_time = 0.0
+            last_cache_content_len = 0

            # Wrap main logic in try/finally to ensure Langfuse observations are always ended
            has_yielded_end = False
@@ -332,6 +345,23 @@ async def stream_chat_completion(
                        assert assistant_response.content is not None
                        assistant_response.content += delta
                        has_received_text = True
+                        if not has_appended_streaming_message:
+                            session.messages.append(assistant_response)
+                            has_appended_streaming_message = True
+                        current_time = time.monotonic()
+                        content_len = len(assistant_response.content)
+                        if (
+                            current_time - last_cache_time >= 1.0
+                            and content_len > last_cache_content_len
+                        ):
+                            try:
+                                await cache_chat_session(session)
+                            except Exception as e:
+                                logger.warning(
+                                    f"Failed to cache partial session {session.session_id}: {e}"
+                                )
+                            last_cache_time = current_time
+                            last_cache_content_len = content_len
                        yield chunk
                    elif isinstance(chunk, StreamTextEnd):
                        # Emit text-end after text completes
@@ -390,10 +420,42 @@ async def stream_chat_completion(
                            if has_received_text and not text_streaming_ended:
                                yield StreamTextEnd(id=text_block_id)
                                text_streaming_ended = True
+
+                            # Save assistant message before yielding finish to ensure it's persisted
+                            # even if client disconnects immediately after receiving StreamFinish
+                            if not has_saved_assistant_message:
+                                messages_to_save_early: list[ChatMessage] = []
+                                if accumulated_tool_calls:
+                                    assistant_response.tool_calls = (
+                                        accumulated_tool_calls
+                                    )
+                                if not has_appended_streaming_message and (
+                                    assistant_response.content
+                                    or assistant_response.tool_calls
+                                ):
+                                    messages_to_save_early.append(assistant_response)
+                                messages_to_save_early.extend(tool_response_messages)
+
+                                if messages_to_save_early:
+                                    session.messages.extend(messages_to_save_early)
+                                    logger.info(
+                                        f"Saving assistant message before StreamFinish: "
+                                        f"content_len={len(assistant_response.content or '')}, "
+                                        f"tool_calls={len(assistant_response.tool_calls or [])}, "
+                                        f"tool_responses={len(tool_response_messages)}"
+                                    )
+                                if (
+                                    messages_to_save_early
+                                    or has_appended_streaming_message
+                                ):
+                                    await upsert_chat_session(session)
+                                    has_saved_assistant_message = True
+
                            has_yielded_end = True
                            yield chunk
                    elif isinstance(chunk, StreamError):
                        has_yielded_error = True
+                        yield chunk
                    elif isinstance(chunk, StreamUsage):
                        session.usage.append(
                            Usage(
@@ -413,6 +475,27 @@ async def stream_chat_completion(
                    langfuse.update_current_trace(output=str(tool_response_messages))
                    langfuse.update_current_span(output=str(tool_response_messages))

+            except CancelledError:
+                if not has_saved_assistant_message:
+                    if accumulated_tool_calls:
+                        assistant_response.tool_calls = accumulated_tool_calls
+                    if assistant_response.content:
+                        assistant_response.content = (
+                            f"{assistant_response.content}\n\n[interrupted]"
+                        )
+                    else:
+                        assistant_response.content = "[interrupted]"
+                    if not has_appended_streaming_message:
+                        session.messages.append(assistant_response)
+                    if tool_response_messages:
+                        session.messages.extend(tool_response_messages)
+                    try:
+                        await upsert_chat_session(session)
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to save interrupted session {session.session_id}: {e}"
+                        )
+                raise
            except Exception as e:
                logger.error(f"Error during stream: {e!s}", exc_info=True)

@@ -434,14 +517,19 @@ async def stream_chat_completion(
                    # Add assistant message if it has content or tool calls
                    if accumulated_tool_calls:
                        assistant_response.tool_calls = accumulated_tool_calls
-                    if assistant_response.content or assistant_response.tool_calls:
+                    if not has_appended_streaming_message and (
+                        assistant_response.content or assistant_response.tool_calls
+                    ):
                        messages_to_save.append(assistant_response)

                    # Add tool response messages after assistant message
                    messages_to_save.extend(tool_response_messages)

-                    session.messages.extend(messages_to_save)
-                    await upsert_chat_session(session)
+                    if not has_saved_assistant_message:
+                        if messages_to_save:
+                            session.messages.extend(messages_to_save)
+                        if messages_to_save or has_appended_streaming_message:
+                            await upsert_chat_session(session)

                    if not has_yielded_error:
                        error_message = str(e)
@@ -472,38 +560,49 @@ async def stream_chat_completion(
                return  # Exit after retry to avoid double-saving in finally block

            # Normal completion path - save session and handle tool call continuation
-            logger.info(
-                f"Normal completion path: session={session.session_id}, "
-                f"current message_count={len(session.messages)}"
-            )
-
-            # Build the messages list in the correct order
-            messages_to_save: list[ChatMessage] = []
-
-            # Add assistant message with tool_calls if any
-            if accumulated_tool_calls:
-                assistant_response.tool_calls = accumulated_tool_calls
+            # Only save if we haven't already saved when StreamFinish was received
+            if not has_saved_assistant_message:
                logger.info(
-                    f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
-                )
-            if assistant_response.content or assistant_response.tool_calls:
-                messages_to_save.append(assistant_response)
-                logger.info(
-                    f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
+                    f"Normal completion path: session={session.session_id}, "
+                    f"current message_count={len(session.messages)}"
                )

-            # Add tool response messages after assistant message
-            messages_to_save.extend(tool_response_messages)
-            logger.info(
-                f"Saving {len(tool_response_messages)} tool response messages, "
-                f"total_to_save={len(messages_to_save)}"
-            )
+                # Build the messages list in the correct order
+                messages_to_save: list[ChatMessage] = []

-            session.messages.extend(messages_to_save)
-            logger.info(
-                f"Extended session messages, new message_count={len(session.messages)}"
-            )
-            await upsert_chat_session(session)
+                # Add assistant message with tool_calls if any
+                if accumulated_tool_calls:
+                    assistant_response.tool_calls = accumulated_tool_calls
+                    logger.info(
+                        f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
+                    )
+                if not has_appended_streaming_message and (
+                    assistant_response.content or assistant_response.tool_calls
+                ):
+                    messages_to_save.append(assistant_response)
+                    logger.info(
+                        f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
+                    )
+
+                # Add tool response messages after assistant message
+                messages_to_save.extend(tool_response_messages)
+                logger.info(
+                    f"Saving {len(tool_response_messages)} tool response messages, "
+                    f"total_to_save={len(messages_to_save)}"
+                )
+
+                if messages_to_save:
+                    session.messages.extend(messages_to_save)
+                    logger.info(
+                        f"Extended session messages, new message_count={len(session.messages)}"
+                    )
+                if messages_to_save or has_appended_streaming_message:
+                    await upsert_chat_session(session)
+            else:
+                logger.info(
+                    "Assistant message already saved when StreamFinish was received, "
+                    "skipping duplicate save"
+                )

            # If we did a tool call, stream the chat completion again to get the next response
            if has_done_tool_call:
@@ -545,6 +644,12 @@ def _is_retryable_error(error: Exception) -> bool:
    return False


+def _is_region_blocked_error(error: Exception) -> bool:
+    if isinstance(error, PermissionDeniedError):
+        return "not available in your region" in str(error).lower()
+    return "not available in your region" in str(error).lower()
+
+
 async def _stream_chat_chunks(
    session: ChatSession,
    tools: list[ChatCompletionToolParam],
@@ -737,7 +842,18 @@ async def _stream_chat_chunks(
                        f"Error in stream (not retrying): {e!s}",
                        exc_info=True,
                    )
-                    error_response = StreamError(errorText=str(e))
+                    error_code = None
+                    error_text = str(e)
+                    if _is_region_blocked_error(e):
+                        error_code = "MODEL_NOT_AVAILABLE_REGION"
+                        error_text = (
+                            "This model is not available in your region. "
+                            "Please connect via VPN and try again."
+                        )
+                    error_response = StreamError(
+                        errorText=error_text,
+                        code=error_code,
+                    )
                    yield error_response
                    yield StreamFinish()
                    return
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
@@ -1,29 +1,28 @@
 """Agent generator package - Creates agents from natural language."""

 from .core import (
-    apply_agent_patch,
+    AgentGeneratorNotConfiguredError,
    decompose_goal,
    generate_agent,
    generate_agent_patch,
    get_agent_as_json,
+    json_to_graph,
    save_agent_to_library,
 )
-from .fixer import apply_all_fixes
-from .utils import get_blocks_info
-from .validator import validate_agent
+from .service import health_check as check_external_service_health
+from .service import is_external_service_configured

 __all__ = [
    # Core functions
    "decompose_goal",
    "generate_agent",
    "generate_agent_patch",
-    "apply_agent_patch",
    "save_agent_to_library",
    "get_agent_as_json",
-    # Fixer
-    "apply_all_fixes",
-    # Validator
-    "validate_agent",
-    # Utils
-    "get_blocks_info",
+    "json_to_graph",
+    # Exceptions
+    "AgentGeneratorNotConfiguredError",
+    # Service
+    "is_external_service_configured",
+    "check_external_service_health",
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/client.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/client.py
@@ -1,25 +0,0 @@
-"""OpenRouter client configuration for agent generation."""
-
-import os
-
-from openai import AsyncOpenAI
-
-# Configuration - use OPEN_ROUTER_API_KEY for consistency with chat/config.py
-OPENROUTER_API_KEY = os.getenv("OPEN_ROUTER_API_KEY")
-AGENT_GENERATOR_MODEL = os.getenv("AGENT_GENERATOR_MODEL", "anthropic/claude-opus-4.5")
-
-# OpenRouter client (OpenAI-compatible API)
-_client: AsyncOpenAI | None = None
-
-
-def get_client() -> AsyncOpenAI:
-    """Get or create the OpenRouter client."""
-    global _client
-    if _client is None:
-        if not OPENROUTER_API_KEY:
-            raise ValueError("OPENROUTER_API_KEY environment variable is required")
-        _client = AsyncOpenAI(
-            base_url="https://openrouter.ai/api/v1",
-            api_key=OPENROUTER_API_KEY,
-        )
-    return _client
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
@@ -1,7 +1,5 @@
 """Core agent generation functions."""

-import copy
-import json
 import logging
 import uuid
 from typing import Any
@@ -9,13 +7,35 @@ from typing import Any
 from backend.api.features.library import db as library_db
 from backend.data.graph import Graph, Link, Node, create_graph

-from .client import AGENT_GENERATOR_MODEL, get_client
-from .prompts import DECOMPOSITION_PROMPT, GENERATION_PROMPT, PATCH_PROMPT
-from .utils import get_block_summaries, parse_json_from_llm
+from .service import (
+    decompose_goal_external,
+    generate_agent_external,
+    generate_agent_patch_external,
+    is_external_service_configured,
+)

 logger = logging.getLogger(__name__)


+class AgentGeneratorNotConfiguredError(Exception):
+    """Raised when the external Agent Generator service is not configured."""
+
+    pass
+
+
+def _check_service_configured() -> None:
+    """Check if the external Agent Generator service is configured.
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the service is not configured.
+    """
+    if not is_external_service_configured():
+        raise AgentGeneratorNotConfiguredError(
+            "Agent Generator service is not configured. "
+            "Set AGENTGENERATOR_HOST environment variable to enable agent generation."
+        )
+
+
 async def decompose_goal(description: str, context: str = "") -> dict[str, Any] | None:
    """Break down a goal into steps or return clarifying questions.

@@ -28,40 +48,13 @@ async def decompose_goal(description: str, context: str = "") -> dict[str, Any]
        - {"type": "clarifying_questions", "questions": [...]}
        - {"type": "instructions", "steps": [...]}
        Or None on error
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
-    client = get_client()
-    prompt = DECOMPOSITION_PROMPT.format(block_summaries=get_block_summaries())
-
-    full_description = description
-    if context:
-        full_description = f"{description}\n\nAdditional context:\n{context}"
-
-    try:
-        response = await client.chat.completions.create(
-            model=AGENT_GENERATOR_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": full_description},
-            ],
-            temperature=0,
-        )
-
-        content = response.choices[0].message.content
-        if content is None:
-            logger.error("LLM returned empty content for decomposition")
-            return None
-
-        result = parse_json_from_llm(content)
-
-        if result is None:
-            logger.error(f"Failed to parse decomposition response: {content[:200]}")
-            return None
-
-        return result
-
-    except Exception as e:
-        logger.error(f"Error decomposing goal: {e}")
-        return None
+    _check_service_configured()
+    logger.info("Calling external Agent Generator service for decompose_goal")
+    return await decompose_goal_external(description, context)


 async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:
@@ -72,31 +65,14 @@ async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:

    Returns:
        Agent JSON dict or None on error
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
-    client = get_client()
-    prompt = GENERATION_PROMPT.format(block_summaries=get_block_summaries())
-
-    try:
-        response = await client.chat.completions.create(
-            model=AGENT_GENERATOR_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": json.dumps(instructions, indent=2)},
-            ],
-            temperature=0,
-        )
-
-        content = response.choices[0].message.content
-        if content is None:
-            logger.error("LLM returned empty content for agent generation")
-            return None
-
-        result = parse_json_from_llm(content)
-
-        if result is None:
-            logger.error(f"Failed to parse agent JSON: {content[:200]}")
-            return None
-
+    _check_service_configured()
+    logger.info("Calling external Agent Generator service for generate_agent")
+    result = await generate_agent_external(instructions)
+    if result:
        # Ensure required fields
        if "id" not in result:
            result["id"] = str(uuid.uuid4())
@@ -104,12 +80,7 @@ async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:
            result["version"] = 1
        if "is_active" not in result:
            result["is_active"] = True
-
-        return result
-
-    except Exception as e:
-        logger.error(f"Error generating agent: {e}")
-        return None
+    return result


 def json_to_graph(agent_json: dict[str, Any]) -> Graph:
@@ -218,6 +189,7 @@ async def save_agent_to_library(
    library_agents = await library_db.create_library_agent(
        graph=created_graph,
        user_id=user_id,
+        sensitive_action_safe_mode=True,
        create_library_agents_for_sub_graphs=False,
    )

@@ -283,108 +255,23 @@ async def get_agent_as_json(
 async def generate_agent_patch(
    update_request: str, current_agent: dict[str, Any]
 ) -> dict[str, Any] | None:
-    """Generate a patch to update an existing agent.
+    """Update an existing agent using natural language.
+
+    The external Agent Generator service handles:
+    - Generating the patch
+    - Applying the patch
+    - Fixing and validating the result

    Args:
        update_request: Natural language description of changes
        current_agent: Current agent JSON

    Returns:
-        Patch dict or clarifying questions, or None on error
+        Updated agent JSON, clarifying questions dict, or None on error
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
-    client = get_client()
-    prompt = PATCH_PROMPT.format(
-        current_agent=json.dumps(current_agent, indent=2),
-        block_summaries=get_block_summaries(),
-    )
-
-    try:
-        response = await client.chat.completions.create(
-            model=AGENT_GENERATOR_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": update_request},
-            ],
-            temperature=0,
-        )
-
-        content = response.choices[0].message.content
-        if content is None:
-            logger.error("LLM returned empty content for patch generation")
-            return None
-
-        return parse_json_from_llm(content)
-
-    except Exception as e:
-        logger.error(f"Error generating patch: {e}")
-        return None
-
-
-def apply_agent_patch(
-    current_agent: dict[str, Any], patch: dict[str, Any]
-) -> dict[str, Any]:
-    """Apply a patch to an existing agent.
-
-    Args:
-        current_agent: Current agent JSON
-        patch: Patch dict with operations
-
-    Returns:
-        Updated agent JSON
-    """
-    agent = copy.deepcopy(current_agent)
-    patches = patch.get("patches", [])
-
-    for p in patches:
-        patch_type = p.get("type")
-
-        if patch_type == "modify":
-            node_id = p.get("node_id")
-            changes = p.get("changes", {})
-
-            for node in agent.get("nodes", []):
-                if node["id"] == node_id:
-                    _deep_update(node, changes)
-                    logger.debug(f"Modified node {node_id}")
-                    break
-
-        elif patch_type == "add":
-            new_nodes = p.get("new_nodes", [])
-            new_links = p.get("new_links", [])
-
-            agent["nodes"] = agent.get("nodes", []) + new_nodes
-            agent["links"] = agent.get("links", []) + new_links
-            logger.debug(f"Added {len(new_nodes)} nodes, {len(new_links)} links")
-
-        elif patch_type == "remove":
-            node_ids_to_remove = set(p.get("node_ids", []))
-            link_ids_to_remove = set(p.get("link_ids", []))
-
-            # Remove nodes
-            agent["nodes"] = [
-                n for n in agent.get("nodes", []) if n["id"] not in node_ids_to_remove
-            ]
-
-            # Remove links (both explicit and those referencing removed nodes)
-            agent["links"] = [
-                link
-                for link in agent.get("links", [])
-                if link["id"] not in link_ids_to_remove
-                and link["source_id"] not in node_ids_to_remove
-                and link["sink_id"] not in node_ids_to_remove
-            ]
-
-            logger.debug(
-                f"Removed {len(node_ids_to_remove)} nodes, {len(link_ids_to_remove)} links"
-            )
-
-    return agent
-
-
-def _deep_update(target: dict, source: dict) -> None:
-    """Recursively update a dict with another dict."""
-    for key, value in source.items():
-        if key in target and isinstance(target[key], dict) and isinstance(value, dict):
-            _deep_update(target[key], value)
-        else:
-            target[key] = value
+    _check_service_configured()
+    logger.info("Calling external Agent Generator service for generate_agent_patch")
+    return await generate_agent_patch_external(update_request, current_agent)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/fixer.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/fixer.py
@@ -1,606 +0,0 @@
-"""Agent fixer - Fixes common LLM generation errors."""
-
-import logging
-import re
-import uuid
-from typing import Any
-
-from .utils import (
-    ADDTODICTIONARY_BLOCK_ID,
-    ADDTOLIST_BLOCK_ID,
-    CODE_EXECUTION_BLOCK_ID,
-    CONDITION_BLOCK_ID,
-    CREATEDICT_BLOCK_ID,
-    CREATELIST_BLOCK_ID,
-    DATA_SAMPLING_BLOCK_ID,
-    DOUBLE_CURLY_BRACES_BLOCK_IDS,
-    GET_CURRENT_DATE_BLOCK_ID,
-    STORE_VALUE_BLOCK_ID,
-    UNIVERSAL_TYPE_CONVERTER_BLOCK_ID,
-    get_blocks_info,
-    is_valid_uuid,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def fix_agent_ids(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix invalid UUIDs in agent and link IDs."""
-    # Fix agent ID
-    if not is_valid_uuid(agent.get("id", "")):
-        agent["id"] = str(uuid.uuid4())
-        logger.debug(f"Fixed agent ID: {agent['id']}")
-
-    # Fix node IDs
-    id_mapping = {}  # Old ID -> New ID
-    for node in agent.get("nodes", []):
-        if not is_valid_uuid(node.get("id", "")):
-            old_id = node.get("id", "")
-            new_id = str(uuid.uuid4())
-            id_mapping[old_id] = new_id
-            node["id"] = new_id
-            logger.debug(f"Fixed node ID: {old_id} -> {new_id}")
-
-    # Fix link IDs and update references
-    for link in agent.get("links", []):
-        if not is_valid_uuid(link.get("id", "")):
-            link["id"] = str(uuid.uuid4())
-            logger.debug(f"Fixed link ID: {link['id']}")
-
-        # Update source/sink IDs if they were remapped
-        if link.get("source_id") in id_mapping:
-            link["source_id"] = id_mapping[link["source_id"]]
-        if link.get("sink_id") in id_mapping:
-            link["sink_id"] = id_mapping[link["sink_id"]]
-
-    return agent
-
-
-def fix_double_curly_braces(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix single curly braces to double in template blocks."""
-    for node in agent.get("nodes", []):
-        if node.get("block_id") not in DOUBLE_CURLY_BRACES_BLOCK_IDS:
-            continue
-
-        input_data = node.get("input_default", {})
-        for key in ("prompt", "format"):
-            if key in input_data and isinstance(input_data[key], str):
-                original = input_data[key]
-                # Fix simple variable references: {var} -> {{var}}
-                fixed = re.sub(
-                    r"(?<!\{)\{([a-zA-Z_][a-zA-Z0-9_]*)\}(?!\})",
-                    r"{{\1}}",
-                    original,
-                )
-                if fixed != original:
-                    input_data[key] = fixed
-                    logger.debug(f"Fixed curly braces in {key}")
-
-    return agent
-
-
-def fix_storevalue_before_condition(agent: dict[str, Any]) -> dict[str, Any]:
-    """Add StoreValueBlock before ConditionBlock if needed for value2."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-
-    # Find all ConditionBlock nodes
-    condition_node_ids = {
-        node["id"] for node in nodes if node.get("block_id") == CONDITION_BLOCK_ID
-    }
-
-    if not condition_node_ids:
-        return agent
-
-    new_nodes = []
-    new_links = []
-    processed_conditions = set()
-
-    for link in links:
-        sink_id = link.get("sink_id")
-        sink_name = link.get("sink_name")
-
-        # Check if this link goes to a ConditionBlock's value2
-        if sink_id in condition_node_ids and sink_name == "value2":
-            source_node = next(
-                (n for n in nodes if n["id"] == link.get("source_id")), None
-            )
-
-            # Skip if source is already a StoreValueBlock
-            if source_node and source_node.get("block_id") == STORE_VALUE_BLOCK_ID:
-                continue
-
-            # Skip if we already processed this condition
-            if sink_id in processed_conditions:
-                continue
-
-            processed_conditions.add(sink_id)
-
-            # Create StoreValueBlock
-            store_node_id = str(uuid.uuid4())
-            store_node = {
-                "id": store_node_id,
-                "block_id": STORE_VALUE_BLOCK_ID,
-                "input_default": {"data": None},
-                "metadata": {"position": {"x": 0, "y": -100}},
-            }
-            new_nodes.append(store_node)
-
-            # Create link: original source -> StoreValueBlock
-            new_links.append(
-                {
-                    "id": str(uuid.uuid4()),
-                    "source_id": link["source_id"],
-                    "source_name": link["source_name"],
-                    "sink_id": store_node_id,
-                    "sink_name": "input",
-                    "is_static": False,
-                }
-            )
-
-            # Update original link: StoreValueBlock -> ConditionBlock
-            link["source_id"] = store_node_id
-            link["source_name"] = "output"
-
-            logger.debug(f"Added StoreValueBlock before ConditionBlock {sink_id}")
-
-    if new_nodes:
-        agent["nodes"] = nodes + new_nodes
-
-    return agent
-
-
-def fix_addtolist_blocks(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix AddToList blocks by adding prerequisite empty AddToList block.
-
-    When an AddToList block is found:
-    1. Checks if there's a CreateListBlock before it
-    2. Removes CreateListBlock if linked directly to AddToList
-    3. Adds an empty AddToList block before the original
-    4. Ensures the original has a self-referencing link
-    """
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    new_nodes = []
-    original_addtolist_ids = set()
-    nodes_to_remove = set()
-    links_to_remove = []
-
-    # First pass: identify CreateListBlock nodes to remove
-    for link in links:
-        source_node = next(
-            (n for n in nodes if n.get("id") == link.get("source_id")), None
-        )
-        sink_node = next((n for n in nodes if n.get("id") == link.get("sink_id")), None)
-
-        if (
-            source_node
-            and sink_node
-            and source_node.get("block_id") == CREATELIST_BLOCK_ID
-            and sink_node.get("block_id") == ADDTOLIST_BLOCK_ID
-        ):
-            nodes_to_remove.add(source_node.get("id"))
-            links_to_remove.append(link)
-            logger.debug(f"Removing CreateListBlock {source_node.get('id')}")
-
-    # Second pass: process AddToList blocks
-    filtered_nodes = []
-    for node in nodes:
-        if node.get("id") in nodes_to_remove:
-            continue
-
-        if node.get("block_id") == ADDTOLIST_BLOCK_ID:
-            original_addtolist_ids.add(node.get("id"))
-            node_id = node.get("id")
-            pos = node.get("metadata", {}).get("position", {"x": 0, "y": 0})
-
-            # Check if already has prerequisite
-            has_prereq = any(
-                link.get("sink_id") == node_id
-                and link.get("sink_name") == "list"
-                and link.get("source_name") == "updated_list"
-                for link in links
-            )
-
-            if not has_prereq:
-                # Remove links to "list" input (except self-reference)
-                for link in links:
-                    if (
-                        link.get("sink_id") == node_id
-                        and link.get("sink_name") == "list"
-                        and link.get("source_id") != node_id
-                        and link not in links_to_remove
-                    ):
-                        links_to_remove.append(link)
-
-                # Create prerequisite AddToList block
-                prereq_id = str(uuid.uuid4())
-                prereq_node = {
-                    "id": prereq_id,
-                    "block_id": ADDTOLIST_BLOCK_ID,
-                    "input_default": {"list": [], "entry": None, "entries": []},
-                    "metadata": {
-                        "position": {"x": pos.get("x", 0) - 800, "y": pos.get("y", 0)}
-                    },
-                }
-                new_nodes.append(prereq_node)
-
-                # Link prerequisite to original
-                links.append(
-                    {
-                        "id": str(uuid.uuid4()),
-                        "source_id": prereq_id,
-                        "source_name": "updated_list",
-                        "sink_id": node_id,
-                        "sink_name": "list",
-                        "is_static": False,
-                    }
-                )
-                logger.debug(f"Added prerequisite AddToList block for {node_id}")
-
-        filtered_nodes.append(node)
-
-    # Remove marked links
-    filtered_links = [link for link in links if link not in links_to_remove]
-
-    # Add self-referencing links for original AddToList blocks
-    for node in filtered_nodes + new_nodes:
-        if (
-            node.get("block_id") == ADDTOLIST_BLOCK_ID
-            and node.get("id") in original_addtolist_ids
-        ):
-            node_id = node.get("id")
-            has_self_ref = any(
-                link["source_id"] == node_id
-                and link["sink_id"] == node_id
-                and link["source_name"] == "updated_list"
-                and link["sink_name"] == "list"
-                for link in filtered_links
-            )
-            if not has_self_ref:
-                filtered_links.append(
-                    {
-                        "id": str(uuid.uuid4()),
-                        "source_id": node_id,
-                        "source_name": "updated_list",
-                        "sink_id": node_id,
-                        "sink_name": "list",
-                        "is_static": False,
-                    }
-                )
-                logger.debug(f"Added self-reference for AddToList {node_id}")
-
-    agent["nodes"] = filtered_nodes + new_nodes
-    agent["links"] = filtered_links
-    return agent
-
-
-def fix_addtodictionary_blocks(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix AddToDictionary blocks by removing empty CreateDictionary nodes."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    nodes_to_remove = set()
-    links_to_remove = []
-
-    for link in links:
-        source_node = next(
-            (n for n in nodes if n.get("id") == link.get("source_id")), None
-        )
-        sink_node = next((n for n in nodes if n.get("id") == link.get("sink_id")), None)
-
-        if (
-            source_node
-            and sink_node
-            and source_node.get("block_id") == CREATEDICT_BLOCK_ID
-            and sink_node.get("block_id") == ADDTODICTIONARY_BLOCK_ID
-        ):
-            nodes_to_remove.add(source_node.get("id"))
-            links_to_remove.append(link)
-            logger.debug(f"Removing CreateDictionary {source_node.get('id')}")
-
-    agent["nodes"] = [n for n in nodes if n.get("id") not in nodes_to_remove]
-    agent["links"] = [link for link in links if link not in links_to_remove]
-    return agent
-
-
-def fix_code_execution_output(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix CodeExecutionBlock output: change 'response' to 'stdout_logs'."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-
-    for link in links:
-        source_node = next(
-            (n for n in nodes if n.get("id") == link.get("source_id")), None
-        )
-        if (
-            source_node
-            and source_node.get("block_id") == CODE_EXECUTION_BLOCK_ID
-            and link.get("source_name") == "response"
-        ):
-            link["source_name"] = "stdout_logs"
-            logger.debug("Fixed CodeExecutionBlock output: response -> stdout_logs")
-
-    return agent
-
-
-def fix_data_sampling_sample_size(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix DataSamplingBlock by setting sample_size to 1 as default."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    links_to_remove = []
-
-    for node in nodes:
-        if node.get("block_id") == DATA_SAMPLING_BLOCK_ID:
-            node_id = node.get("id")
-            input_default = node.get("input_default", {})
-
-            # Remove links to sample_size
-            for link in links:
-                if (
-                    link.get("sink_id") == node_id
-                    and link.get("sink_name") == "sample_size"
-                ):
-                    links_to_remove.append(link)
-
-            # Set default
-            input_default["sample_size"] = 1
-            node["input_default"] = input_default
-            logger.debug(f"Fixed DataSamplingBlock {node_id} sample_size to 1")
-
-    if links_to_remove:
-        agent["links"] = [link for link in links if link not in links_to_remove]
-
-    return agent
-
-
-def fix_node_x_coordinates(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix node x-coordinates to ensure 800+ unit spacing between linked nodes."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    node_lookup = {n.get("id"): n for n in nodes}
-
-    for link in links:
-        source_id = link.get("source_id")
-        sink_id = link.get("sink_id")
-
-        source_node = node_lookup.get(source_id)
-        sink_node = node_lookup.get(sink_id)
-
-        if not source_node or not sink_node:
-            continue
-
-        source_pos = source_node.get("metadata", {}).get("position", {})
-        sink_pos = sink_node.get("metadata", {}).get("position", {})
-
-        source_x = source_pos.get("x", 0)
-        sink_x = sink_pos.get("x", 0)
-
-        if abs(sink_x - source_x) < 800:
-            new_x = source_x + 800
-            if "metadata" not in sink_node:
-                sink_node["metadata"] = {}
-            if "position" not in sink_node["metadata"]:
-                sink_node["metadata"]["position"] = {}
-            sink_node["metadata"]["position"]["x"] = new_x
-            logger.debug(f"Fixed node {sink_id} x: {sink_x} -> {new_x}")
-
-    return agent
-
-
-def fix_getcurrentdate_offset(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix GetCurrentDateBlock offset to ensure it's positive."""
-    for node in agent.get("nodes", []):
-        if node.get("block_id") == GET_CURRENT_DATE_BLOCK_ID:
-            input_default = node.get("input_default", {})
-            if "offset" in input_default:
-                offset = input_default["offset"]
-                if isinstance(offset, (int, float)) and offset < 0:
-                    input_default["offset"] = abs(offset)
-                    logger.debug(f"Fixed offset: {offset} -> {abs(offset)}")
-
-    return agent
-
-
-def fix_ai_model_parameter(
-    agent: dict[str, Any],
-    blocks_info: list[dict[str, Any]],
-    default_model: str = "gpt-4o",
-) -> dict[str, Any]:
-    """Add default model parameter to AI blocks if missing."""
-    block_map = {b.get("id"): b for b in blocks_info}
-
-    for node in agent.get("nodes", []):
-        block_id = node.get("block_id")
-        block = block_map.get(block_id)
-
-        if not block:
-            continue
-
-        # Check if block has AI category
-        categories = block.get("categories", [])
-        is_ai_block = any(
-            cat.get("category") == "AI" for cat in categories if isinstance(cat, dict)
-        )
-
-        if is_ai_block:
-            input_default = node.get("input_default", {})
-            if "model" not in input_default:
-                input_default["model"] = default_model
-                node["input_default"] = input_default
-                logger.debug(
-                    f"Added model '{default_model}' to AI block {node.get('id')}"
-                )
-
-    return agent
-
-
-def fix_link_static_properties(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-) -> dict[str, Any]:
-    """Fix is_static property based on source block's staticOutput."""
-    block_map = {b.get("id"): b for b in blocks_info}
-    node_lookup = {n.get("id"): n for n in agent.get("nodes", [])}
-
-    for link in agent.get("links", []):
-        source_node = node_lookup.get(link.get("source_id"))
-        if not source_node:
-            continue
-
-        source_block = block_map.get(source_node.get("block_id"))
-        if not source_block:
-            continue
-
-        static_output = source_block.get("staticOutput", False)
-        if link.get("is_static") != static_output:
-            link["is_static"] = static_output
-            logger.debug(f"Fixed link {link.get('id')} is_static to {static_output}")
-
-    return agent
-
-
-def fix_data_type_mismatch(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-) -> dict[str, Any]:
-    """Fix data type mismatches by inserting UniversalTypeConverterBlock."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    block_map = {b.get("id"): b for b in blocks_info}
-    node_lookup = {n.get("id"): n for n in nodes}
-
-    def get_property_type(schema: dict, name: str) -> str | None:
-        if "_#_" in name:
-            parent, child = name.split("_#_", 1)
-            parent_schema = schema.get(parent, {})
-            if "properties" in parent_schema:
-                return parent_schema["properties"].get(child, {}).get("type")
-            return None
-        return schema.get(name, {}).get("type")
-
-    def are_types_compatible(src: str, sink: str) -> bool:
-        if {src, sink} <= {"integer", "number"}:
-            return True
-        return src == sink
-
-    type_mapping = {
-        "string": "string",
-        "text": "string",
-        "integer": "number",
-        "number": "number",
-        "float": "number",
-        "boolean": "boolean",
-        "bool": "boolean",
-        "array": "list",
-        "list": "list",
-        "object": "dictionary",
-        "dict": "dictionary",
-        "dictionary": "dictionary",
-    }
-
-    new_links = []
-    nodes_to_add = []
-
-    for link in links:
-        source_node = node_lookup.get(link.get("source_id"))
-        sink_node = node_lookup.get(link.get("sink_id"))
-
-        if not source_node or not sink_node:
-            new_links.append(link)
-            continue
-
-        source_block = block_map.get(source_node.get("block_id"))
-        sink_block = block_map.get(sink_node.get("block_id"))
-
-        if not source_block or not sink_block:
-            new_links.append(link)
-            continue
-
-        source_outputs = source_block.get("outputSchema", {}).get("properties", {})
-        sink_inputs = sink_block.get("inputSchema", {}).get("properties", {})
-
-        source_type = get_property_type(source_outputs, link.get("source_name", ""))
-        sink_type = get_property_type(sink_inputs, link.get("sink_name", ""))
-
-        if (
-            source_type
-            and sink_type
-            and not are_types_compatible(source_type, sink_type)
-        ):
-            # Insert type converter
-            converter_id = str(uuid.uuid4())
-            target_type = type_mapping.get(sink_type, sink_type)
-
-            converter_node = {
-                "id": converter_id,
-                "block_id": UNIVERSAL_TYPE_CONVERTER_BLOCK_ID,
-                "input_default": {"type": target_type},
-                "metadata": {"position": {"x": 0, "y": 100}},
-            }
-            nodes_to_add.append(converter_node)
-
-            # source -> converter
-            new_links.append(
-                {
-                    "id": str(uuid.uuid4()),
-                    "source_id": link["source_id"],
-                    "source_name": link["source_name"],
-                    "sink_id": converter_id,
-                    "sink_name": "value",
-                    "is_static": False,
-                }
-            )
-
-            # converter -> sink
-            new_links.append(
-                {
-                    "id": str(uuid.uuid4()),
-                    "source_id": converter_id,
-                    "source_name": "value",
-                    "sink_id": link["sink_id"],
-                    "sink_name": link["sink_name"],
-                    "is_static": False,
-                }
-            )
-
-            logger.debug(f"Inserted type converter: {source_type} -> {target_type}")
-        else:
-            new_links.append(link)
-
-    if nodes_to_add:
-        agent["nodes"] = nodes + nodes_to_add
-        agent["links"] = new_links
-
-    return agent
-
-
-def apply_all_fixes(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]] | None = None
-) -> dict[str, Any]:
-    """Apply all fixes to an agent JSON.
-
-    Args:
-        agent: Agent JSON dict
-        blocks_info: Optional list of block info dicts for advanced fixes
-
-    Returns:
-        Fixed agent JSON
-    """
-    # Basic fixes (no block info needed)
-    agent = fix_agent_ids(agent)
-    agent = fix_double_curly_braces(agent)
-    agent = fix_storevalue_before_condition(agent)
-    agent = fix_addtolist_blocks(agent)
-    agent = fix_addtodictionary_blocks(agent)
-    agent = fix_code_execution_output(agent)
-    agent = fix_data_sampling_sample_size(agent)
-    agent = fix_node_x_coordinates(agent)
-    agent = fix_getcurrentdate_offset(agent)
-
-    # Advanced fixes (require block info)
-    if blocks_info is None:
-        blocks_info = get_blocks_info()
-
-    agent = fix_ai_model_parameter(agent, blocks_info)
-    agent = fix_link_static_properties(agent, blocks_info)
-    agent = fix_data_type_mismatch(agent, blocks_info)
-
-    return agent
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/prompts.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/prompts.py
@@ -1,225 +0,0 @@
-"""Prompt templates for agent generation."""
-
-DECOMPOSITION_PROMPT = """
-You are an expert AutoGPT Workflow Decomposer. Your task is to analyze a user's high-level goal and break it down into a clear, step-by-step plan using the available blocks.
-
-Each step should represent a distinct, automatable action suitable for execution by an AI automation system.
-
---
-
-FIRST: Analyze the user's goal and determine:
-1) Design-time configuration (fixed settings that won't change per run)
-2) Runtime inputs (values the agent's end-user will provide each time it runs)
-
-For anything that can vary per run (email addresses, names, dates, search terms, etc.):
- DO NOT ask for the actual value
- Instead, define it as an Agent Input with a clear name, type, and description
-
-Only ask clarifying questions about design-time config that affects how you build the workflow:
- Which external service to use (e.g., "Gmail vs Outlook", "Notion vs Google Docs")
- Required formats or structures (e.g., "CSV, JSON, or PDF output?")
- Business rules that must be hard-coded
-
-IMPORTANT CLARIFICATIONS POLICY:
- Ask no more than five essential questions
- Do not ask for concrete values that can be provided at runtime as Agent Inputs
- Do not ask for API keys or credentials; the platform handles those directly
- If there is enough information to infer reasonable defaults, prefer to propose defaults
-
---
-
-GUIDELINES:
-1. List each step as a numbered item
-2. Describe the action clearly and specify inputs/outputs
-3. Ensure steps are in logical, sequential order
-4. Mention block names naturally (e.g., "Use GetWeatherByLocationBlock to...")
-5. Help the user reach their goal efficiently
-
---
-
-RULES:
-1. OUTPUT FORMAT: Only output either clarifying questions OR step-by-step instructions, not both
-2. USE ONLY THE BLOCKS PROVIDED
-3. ALL required_input fields must be provided
-4. Data types of linked properties must match
-5. Write expert-level prompts for AI-related blocks
-
---
-
-CRITICAL BLOCK RESTRICTIONS:
-1. AddToListBlock: Outputs updated list EVERY addition, not after all additions
-2. SendEmailBlock: Draft the email for user review; set SMTP config based on email type
-3. ConditionBlock: value2 is reference, value1 is contrast
-4. CodeExecutionBlock: DO NOT USE - use AI blocks instead
-5. ReadCsvBlock: Only use the 'rows' output, not 'row'
-
---
-
-OUTPUT FORMAT:
-
-If more information is needed:
-```json
-{{
-  "type": "clarifying_questions",
-  "questions": [
-    {{
-      "question": "Which email provider should be used? (Gmail, Outlook, custom SMTP)",
-      "keyword": "email_provider",
-      "example": "Gmail"
-    }}
-  ]
-}}
-```
-
-If ready to proceed:
-```json
-{{
-  "type": "instructions",
-  "steps": [
-    {{
-      "step_number": 1,
-      "block_name": "AgentShortTextInputBlock",
-      "description": "Get the URL of the content to analyze.",
-      "inputs": [{{"name": "name", "value": "URL"}}],
-      "outputs": [{{"name": "result", "description": "The URL entered by user"}}]
-    }}
-  ]
-}}
-```
-
---
-
-AVAILABLE BLOCKS:
-{block_summaries}
-"""
-
-GENERATION_PROMPT = """
-You are an expert AI workflow builder. Generate a valid agent JSON from the given instructions.
-
---
-
-NODES:
-Each node must include:
- `id`: Unique UUID v4 (e.g. `a8f5b1e2-c3d4-4e5f-8a9b-0c1d2e3f4a5b`)
- `block_id`: The block identifier (must match an Allowed Block)
- `input_default`: Dict of inputs (can be empty if no static inputs needed)
- `metadata`: Must contain:
-  - `position`: {{"x": number, "y": number}} - adjacent nodes should differ by 800+ in X
-  - `customized_name`: Clear name describing this block's purpose in the workflow
-
---
-
-LINKS:
-Each link connects a source node's output to a sink node's input:
- `id`: MUST be UUID v4 (NOT "link-1", "link-2", etc.)
- `source_id`: ID of the source node
- `source_name`: Output field name from the source block
- `sink_id`: ID of the sink node
- `sink_name`: Input field name on the sink block
- `is_static`: true only if source block has static_output: true
-
-CRITICAL: All IDs must be valid UUID v4 format!
-
---
-
-AGENT (GRAPH):
-Wrap nodes and links in:
- `id`: UUID of the agent
- `name`: Short, generic name (avoid specific company names, URLs)
- `description`: Short, generic description
- `nodes`: List of all nodes
- `links`: List of all links
- `version`: 1
- `is_active`: true
-
---
-
-TIPS:
- All required_input fields must be provided via input_default or a valid link
- Ensure consistent source_id and sink_id references
- Avoid dangling links
- Input/output pins must match block schemas
- Do not invent unknown block_ids
-
---
-
-ALLOWED BLOCKS:
-{block_summaries}
-
---
-
-Generate the complete agent JSON. Output ONLY valid JSON, no explanation.
-"""
-
-PATCH_PROMPT = """
-You are an expert at modifying AutoGPT agent workflows. Given the current agent and a modification request, generate a JSON patch to update the agent.
-
-CURRENT AGENT:
-{current_agent}
-
-AVAILABLE BLOCKS:
-{block_summaries}
-
---
-
-PATCH FORMAT:
-Return a JSON object with the following structure:
-
-```json
-{{
-  "type": "patch",
-  "intent": "Brief description of what the patch does",
-  "patches": [
-    {{
-      "type": "modify",
-      "node_id": "uuid-of-node-to-modify",
-      "changes": {{
-        "input_default": {{"field": "new_value"}},
-        "metadata": {{"customized_name": "New Name"}}
-      }}
-    }},
-    {{
-      "type": "add",
-      "new_nodes": [
-        {{
-          "id": "new-uuid",
-          "block_id": "block-uuid",
-          "input_default": {{}},
-          "metadata": {{"position": {{"x": 0, "y": 0}}, "customized_name": "Name"}}
-        }}
-      ],
-      "new_links": [
-        {{
-          "id": "link-uuid",
-          "source_id": "source-node-id",
-          "source_name": "output_field",
-          "sink_id": "sink-node-id",
-          "sink_name": "input_field"
-        }}
-      ]
-    }},
-    {{
-      "type": "remove",
-      "node_ids": ["uuid-of-node-to-remove"],
-      "link_ids": ["uuid-of-link-to-remove"]
-    }}
-  ]
-}}
-```
-
-If you need more information, return:
-```json
-{{
-  "type": "clarifying_questions",
-  "questions": [
-    {{
-      "question": "What specific change do you want?",
-      "keyword": "change_type",
-      "example": "Add error handling"
-    }}
-  ]
-}}
-```
-
-Generate the minimal patch needed. Output ONLY valid JSON.
-"""
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
@@ -0,0 +1,269 @@
+"""External Agent Generator service client.
+
+This module provides a client for communicating with the external Agent Generator
+microservice. When AGENTGENERATOR_HOST is configured, the agent generation functions
+will delegate to the external service instead of using the built-in LLM-based implementation.
+"""
+
+import logging
+from typing import Any
+
+import httpx
+
+from backend.util.settings import Settings
+
+logger = logging.getLogger(__name__)
+
+_client: httpx.AsyncClient | None = None
+_settings: Settings | None = None
+
+
+def _get_settings() -> Settings:
+    """Get or create settings singleton."""
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
+
+
+def is_external_service_configured() -> bool:
+    """Check if external Agent Generator service is configured."""
+    settings = _get_settings()
+    return bool(settings.config.agentgenerator_host)
+
+
+def _get_base_url() -> str:
+    """Get the base URL for the external service."""
+    settings = _get_settings()
+    host = settings.config.agentgenerator_host
+    port = settings.config.agentgenerator_port
+    return f"http://{host}:{port}"
+
+
+def _get_client() -> httpx.AsyncClient:
+    """Get or create the HTTP client for the external service."""
+    global _client
+    if _client is None:
+        settings = _get_settings()
+        _client = httpx.AsyncClient(
+            base_url=_get_base_url(),
+            timeout=httpx.Timeout(settings.config.agentgenerator_timeout),
+        )
+    return _client
+
+
+async def decompose_goal_external(
+    description: str, context: str = ""
+) -> dict[str, Any] | None:
+    """Call the external service to decompose a goal.
+
+    Args:
+        description: Natural language goal description
+        context: Additional context (e.g., answers to previous questions)
+
+    Returns:
+        Dict with either:
+        - {"type": "clarifying_questions", "questions": [...]}
+        - {"type": "instructions", "steps": [...]}
+        - {"type": "unachievable_goal", ...}
+        - {"type": "vague_goal", ...}
+        Or None on error
+    """
+    client = _get_client()
+
+    # Build the request payload
+    payload: dict[str, Any] = {"description": description}
+    if context:
+        # The external service uses user_instruction for additional context
+        payload["user_instruction"] = context
+
+    try:
+        response = await client.post("/api/decompose-description", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error(f"External service returned error: {data.get('error')}")
+            return None
+
+        # Map the response to the expected format
+        response_type = data.get("type")
+        if response_type == "instructions":
+            return {"type": "instructions", "steps": data.get("steps", [])}
+        elif response_type == "clarifying_questions":
+            return {
+                "type": "clarifying_questions",
+                "questions": data.get("questions", []),
+            }
+        elif response_type == "unachievable_goal":
+            return {
+                "type": "unachievable_goal",
+                "reason": data.get("reason"),
+                "suggested_goal": data.get("suggested_goal"),
+            }
+        elif response_type == "vague_goal":
+            return {
+                "type": "vague_goal",
+                "suggested_goal": data.get("suggested_goal"),
+            }
+        else:
+            logger.error(
+                f"Unknown response type from external service: {response_type}"
+            )
+            return None
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error calling external agent generator: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error calling external agent generator: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error calling external agent generator: {e}")
+        return None
+
+
+async def generate_agent_external(
+    instructions: dict[str, Any]
+) -> dict[str, Any] | None:
+    """Call the external service to generate an agent from instructions.
+
+    Args:
+        instructions: Structured instructions from decompose_goal
+
+    Returns:
+        Agent JSON dict or None on error
+    """
+    client = _get_client()
+
+    try:
+        response = await client.post(
+            "/api/generate-agent", json={"instructions": instructions}
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error(f"External service returned error: {data.get('error')}")
+            return None
+
+        return data.get("agent_json")
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error calling external agent generator: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error calling external agent generator: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error calling external agent generator: {e}")
+        return None
+
+
+async def generate_agent_patch_external(
+    update_request: str, current_agent: dict[str, Any]
+) -> dict[str, Any] | None:
+    """Call the external service to generate a patch for an existing agent.
+
+    Args:
+        update_request: Natural language description of changes
+        current_agent: Current agent JSON
+
+    Returns:
+        Updated agent JSON, clarifying questions dict, or None on error
+    """
+    client = _get_client()
+
+    try:
+        response = await client.post(
+            "/api/update-agent",
+            json={
+                "update_request": update_request,
+                "current_agent_json": current_agent,
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error(f"External service returned error: {data.get('error')}")
+            return None
+
+        # Check if it's clarifying questions
+        if data.get("type") == "clarifying_questions":
+            return {
+                "type": "clarifying_questions",
+                "questions": data.get("questions", []),
+            }
+
+        # Otherwise return the updated agent JSON
+        return data.get("agent_json")
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error calling external agent generator: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error calling external agent generator: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error calling external agent generator: {e}")
+        return None
+
+
+async def get_blocks_external() -> list[dict[str, Any]] | None:
+    """Get available blocks from the external service.
+
+    Returns:
+        List of block info dicts or None on error
+    """
+    client = _get_client()
+
+    try:
+        response = await client.get("/api/blocks")
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error("External service returned error getting blocks")
+            return None
+
+        return data.get("blocks", [])
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error getting blocks from external service: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error getting blocks from external service: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error getting blocks from external service: {e}")
+        return None
+
+
+async def health_check() -> bool:
+    """Check if the external service is healthy.
+
+    Returns:
+        True if healthy, False otherwise
+    """
+    if not is_external_service_configured():
+        return False
+
+    client = _get_client()
+
+    try:
+        response = await client.get("/health")
+        response.raise_for_status()
+        data = response.json()
+        return data.get("status") == "healthy" and data.get("blocks_loaded", False)
+    except Exception as e:
+        logger.warning(f"External agent generator health check failed: {e}")
+        return False
+
+
+async def close_client() -> None:
+    """Close the HTTP client."""
+    global _client
+    if _client is not None:
+        await _client.aclose()
+        _client = None
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/utils.py
@@ -1,213 +0,0 @@
-"""Utilities for agent generation."""
-
-import json
-import re
-from typing import Any
-
-from backend.data.block import get_blocks
-
-# UUID validation regex
-UUID_REGEX = re.compile(
-    r"^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$"
-)
-
-# Block IDs for various fixes
-STORE_VALUE_BLOCK_ID = "1ff065e9-88e8-4358-9d82-8dc91f622ba9"
-CONDITION_BLOCK_ID = "715696a0-e1da-45c8-b209-c2fa9c3b0be6"
-ADDTOLIST_BLOCK_ID = "aeb08fc1-2fc1-4141-bc8e-f758f183a822"
-ADDTODICTIONARY_BLOCK_ID = "31d1064e-7446-4693-a7d4-65e5ca1180d1"
-CREATELIST_BLOCK_ID = "a912d5c7-6e00-4542-b2a9-8034136930e4"
-CREATEDICT_BLOCK_ID = "b924ddf4-de4f-4b56-9a85-358930dcbc91"
-CODE_EXECUTION_BLOCK_ID = "0b02b072-abe7-11ef-8372-fb5d162dd712"
-DATA_SAMPLING_BLOCK_ID = "4a448883-71fa-49cf-91cf-70d793bd7d87"
-UNIVERSAL_TYPE_CONVERTER_BLOCK_ID = "95d1b990-ce13-4d88-9737-ba5c2070c97b"
-GET_CURRENT_DATE_BLOCK_ID = "b29c1b50-5d0e-4d9f-8f9d-1b0e6fcbf0b1"
-
-DOUBLE_CURLY_BRACES_BLOCK_IDS = [
-    "44f6c8ad-d75c-4ae1-8209-aad1c0326928",  # FillTextTemplateBlock
-    "6ab085e2-20b3-4055-bc3e-08036e01eca6",
-    "90f8c45e-e983-4644-aa0b-b4ebe2f531bc",
-    "363ae599-353e-4804-937e-b2ee3cef3da4",  # AgentOutputBlock
-    "3b191d9f-356f-482d-8238-ba04b6d18381",
-    "db7d8f02-2f44-4c55-ab7a-eae0941f0c30",
-    "3a7c4b8d-6e2f-4a5d-b9c1-f8d23c5a9b0e",
-    "ed1ae7a0-b770-4089-b520-1f0005fad19a",
-    "a892b8d9-3e4e-4e9c-9c1e-75f8efcf1bfa",
-    "b29c1b50-5d0e-4d9f-8f9d-1b0e6fcbf0b1",
-    "716a67b3-6760-42e7-86dc-18645c6e00fc",
-    "530cf046-2ce0-4854-ae2c-659db17c7a46",
-    "ed55ac19-356e-4243-a6cb-bc599e9b716f",
-    "1f292d4a-41a4-4977-9684-7c8d560b9f91",  # LLM blocks
-    "32a87eab-381e-4dd4-bdb8-4c47151be35a",
-]
-
-
-def is_valid_uuid(value: str) -> bool:
-    """Check if a string is a valid UUID v4."""
-    return isinstance(value, str) and UUID_REGEX.match(value) is not None
-
-
-def _compact_schema(schema: dict) -> dict[str, str]:
-    """Extract compact type info from a JSON schema properties dict.
-
-    Returns a dict of {field_name: type_string} for essential info only.
-    """
-    props = schema.get("properties", {})
-    result = {}
-
-    for name, prop in props.items():
-        # Skip internal/complex fields
-        if name.startswith("_"):
-            continue
-
-        # Get type string
-        type_str = prop.get("type", "any")
-
-        # Handle anyOf/oneOf (optional types)
-        if "anyOf" in prop:
-            types = [t.get("type", "?") for t in prop["anyOf"] if t.get("type")]
-            type_str = "|".join(types) if types else "any"
-        elif "allOf" in prop:
-            type_str = "object"
-
-        # Add array item type if present
-        if type_str == "array" and "items" in prop:
-            items = prop["items"]
-            if isinstance(items, dict):
-                item_type = items.get("type", "any")
-                type_str = f"array[{item_type}]"
-
-        result[name] = type_str
-
-    return result
-
-
-def get_block_summaries(include_schemas: bool = True) -> str:
-    """Generate compact block summaries for prompts.
-
-    Args:
-        include_schemas: Whether to include input/output type info
-
-    Returns:
-        Formatted string of block summaries (compact format)
-    """
-    blocks = get_blocks()
-    summaries = []
-
-    for block_id, block_cls in blocks.items():
-        block = block_cls()
-        name = block.name
-        desc = getattr(block, "description", "") or ""
-
-        # Truncate description
-        if len(desc) > 150:
-            desc = desc[:147] + "..."
-
-        if not include_schemas:
-            summaries.append(f"- {name} (id: {block_id}): {desc}")
-        else:
-            # Compact format with type info only
-            inputs = {}
-            outputs = {}
-            required = []
-
-            if hasattr(block, "input_schema"):
-                try:
-                    schema = block.input_schema.jsonschema()
-                    inputs = _compact_schema(schema)
-                    required = schema.get("required", [])
-                except Exception:
-                    pass
-
-            if hasattr(block, "output_schema"):
-                try:
-                    schema = block.output_schema.jsonschema()
-                    outputs = _compact_schema(schema)
-                except Exception:
-                    pass
-
-            # Build compact line format
-            # Format: NAME (id): desc | in: {field:type, ...} [required] | out: {field:type}
-            in_str = ", ".join(f"{k}:{v}" for k, v in inputs.items())
-            out_str = ", ".join(f"{k}:{v}" for k, v in outputs.items())
-            req_str = f" req=[{','.join(required)}]" if required else ""
-
-            static = " [static]" if getattr(block, "static_output", False) else ""
-
-            line = f"- {name} (id: {block_id}): {desc}"
-            if in_str:
-                line += f"\n  in: {{{in_str}}}{req_str}"
-            if out_str:
-                line += f"\n  out: {{{out_str}}}{static}"
-
-            summaries.append(line)
-
-    return "\n".join(summaries)
-
-
-def get_blocks_info() -> list[dict[str, Any]]:
-    """Get block information with schemas for validation and fixing."""
-    blocks = get_blocks()
-    blocks_info = []
-    for block_id, block_cls in blocks.items():
-        block = block_cls()
-        blocks_info.append(
-            {
-                "id": block_id,
-                "name": block.name,
-                "description": getattr(block, "description", ""),
-                "categories": getattr(block, "categories", []),
-                "staticOutput": getattr(block, "static_output", False),
-                "inputSchema": (
-                    block.input_schema.jsonschema()
-                    if hasattr(block, "input_schema")
-                    else {}
-                ),
-                "outputSchema": (
-                    block.output_schema.jsonschema()
-                    if hasattr(block, "output_schema")
-                    else {}
-                ),
-            }
-        )
-    return blocks_info
-
-
-def parse_json_from_llm(text: str) -> dict[str, Any] | None:
-    """Extract JSON from LLM response (handles markdown code blocks)."""
-    if not text:
-        return None
-
-    # Try fenced code block
-    match = re.search(r"```(?:json)?\s*([\s\S]*?)```", text, re.IGNORECASE)
-    if match:
-        try:
-            return json.loads(match.group(1).strip())
-        except json.JSONDecodeError:
-            pass
-
-    # Try raw text
-    try:
-        return json.loads(text.strip())
-    except json.JSONDecodeError:
-        pass
-
-    # Try finding {...} span
-    start = text.find("{")
-    end = text.rfind("}")
-    if start != -1 and end > start:
-        try:
-            return json.loads(text[start : end + 1])
-        except json.JSONDecodeError:
-            pass
-
-    # Try finding [...] span
-    start = text.find("[")
-    end = text.rfind("]")
-    if start != -1 and end > start:
-        try:
-            return json.loads(text[start : end + 1])
-        except json.JSONDecodeError:
-            pass
-
-    return None
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/validator.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/validator.py
@@ -1,279 +0,0 @@
-"""Agent validator - Validates agent structure and connections."""
-
-import logging
-import re
-from typing import Any
-
-from .utils import get_blocks_info
-
-logger = logging.getLogger(__name__)
-
-
-class AgentValidator:
-    """Validator for AutoGPT agents with detailed error reporting."""
-
-    def __init__(self):
-        self.errors: list[str] = []
-
-    def add_error(self, error: str) -> None:
-        """Add an error message."""
-        self.errors.append(error)
-
-    def validate_block_existence(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate all block IDs exist in the blocks library."""
-        valid = True
-        valid_block_ids = {b.get("id") for b in blocks_info if b.get("id")}
-
-        for node in agent.get("nodes", []):
-            block_id = node.get("block_id")
-            node_id = node.get("id")
-
-            if not block_id:
-                self.add_error(f"Node '{node_id}' is missing 'block_id' field.")
-                valid = False
-                continue
-
-            if block_id not in valid_block_ids:
-                self.add_error(
-                    f"Node '{node_id}' references block_id '{block_id}' which does not exist."
-                )
-                valid = False
-
-        return valid
-
-    def validate_link_node_references(self, agent: dict[str, Any]) -> bool:
-        """Validate all node IDs referenced in links exist."""
-        valid = True
-        valid_node_ids = {n.get("id") for n in agent.get("nodes", []) if n.get("id")}
-
-        for link in agent.get("links", []):
-            link_id = link.get("id", "Unknown")
-            source_id = link.get("source_id")
-            sink_id = link.get("sink_id")
-
-            if not source_id:
-                self.add_error(f"Link '{link_id}' is missing 'source_id'.")
-                valid = False
-            elif source_id not in valid_node_ids:
-                self.add_error(
-                    f"Link '{link_id}' references non-existent source_id '{source_id}'."
-                )
-                valid = False
-
-            if not sink_id:
-                self.add_error(f"Link '{link_id}' is missing 'sink_id'.")
-                valid = False
-            elif sink_id not in valid_node_ids:
-                self.add_error(
-                    f"Link '{link_id}' references non-existent sink_id '{sink_id}'."
-                )
-                valid = False
-
-        return valid
-
-    def validate_required_inputs(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate required inputs are provided."""
-        valid = True
-        block_map = {b.get("id"): b for b in blocks_info}
-
-        for node in agent.get("nodes", []):
-            block_id = node.get("block_id")
-            block = block_map.get(block_id)
-
-            if not block:
-                continue
-
-            required_inputs = block.get("inputSchema", {}).get("required", [])
-            input_defaults = node.get("input_default", {})
-            node_id = node.get("id")
-
-            # Get linked inputs
-            linked_inputs = {
-                link["sink_name"]
-                for link in agent.get("links", [])
-                if link.get("sink_id") == node_id
-            }
-
-            for req_input in required_inputs:
-                if (
-                    req_input not in input_defaults
-                    and req_input not in linked_inputs
-                    and req_input != "credentials"
-                ):
-                    block_name = block.get("name", "Unknown Block")
-                    self.add_error(
-                        f"Node '{node_id}' ({block_name}) is missing required input '{req_input}'."
-                    )
-                    valid = False
-
-        return valid
-
-    def validate_data_type_compatibility(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate linked data types are compatible."""
-        valid = True
-        block_map = {b.get("id"): b for b in blocks_info}
-        node_lookup = {n.get("id"): n for n in agent.get("nodes", [])}
-
-        def get_type(schema: dict, name: str) -> str | None:
-            if "_#_" in name:
-                parent, child = name.split("_#_", 1)
-                parent_schema = schema.get(parent, {})
-                if "properties" in parent_schema:
-                    return parent_schema["properties"].get(child, {}).get("type")
-                return None
-            return schema.get(name, {}).get("type")
-
-        def are_compatible(src: str, sink: str) -> bool:
-            if {src, sink} <= {"integer", "number"}:
-                return True
-            return src == sink
-
-        for link in agent.get("links", []):
-            source_node = node_lookup.get(link.get("source_id"))
-            sink_node = node_lookup.get(link.get("sink_id"))
-
-            if not source_node or not sink_node:
-                continue
-
-            source_block = block_map.get(source_node.get("block_id"))
-            sink_block = block_map.get(sink_node.get("block_id"))
-
-            if not source_block or not sink_block:
-                continue
-
-            source_outputs = source_block.get("outputSchema", {}).get("properties", {})
-            sink_inputs = sink_block.get("inputSchema", {}).get("properties", {})
-
-            source_type = get_type(source_outputs, link.get("source_name", ""))
-            sink_type = get_type(sink_inputs, link.get("sink_name", ""))
-
-            if source_type and sink_type and not are_compatible(source_type, sink_type):
-                self.add_error(
-                    f"Type mismatch: {source_block.get('name')} output '{link['source_name']}' "
-                    f"({source_type}) -> {sink_block.get('name')} input '{link['sink_name']}' ({sink_type})."
-                )
-                valid = False
-
-        return valid
-
-    def validate_nested_sink_links(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate nested sink links (with _#_ notation)."""
-        valid = True
-        block_map = {b.get("id"): b for b in blocks_info}
-        node_lookup = {n.get("id"): n for n in agent.get("nodes", [])}
-
-        for link in agent.get("links", []):
-            sink_name = link.get("sink_name", "")
-
-            if "_#_" in sink_name:
-                parent, child = sink_name.split("_#_", 1)
-
-                sink_node = node_lookup.get(link.get("sink_id"))
-                if not sink_node:
-                    continue
-
-                block = block_map.get(sink_node.get("block_id"))
-                if not block:
-                    continue
-
-                input_props = block.get("inputSchema", {}).get("properties", {})
-                parent_schema = input_props.get(parent)
-
-                if not parent_schema:
-                    self.add_error(
-                        f"Invalid nested link '{sink_name}': parent '{parent}' not found."
-                    )
-                    valid = False
-                    continue
-
-                if not parent_schema.get("additionalProperties"):
-                    if not (
-                        isinstance(parent_schema, dict)
-                        and "properties" in parent_schema
-                        and child in parent_schema.get("properties", {})
-                    ):
-                        self.add_error(
-                            f"Invalid nested link '{sink_name}': child '{child}' not found in '{parent}'."
-                        )
-                        valid = False
-
-        return valid
-
-    def validate_prompt_spaces(self, agent: dict[str, Any]) -> bool:
-        """Validate prompts don't have spaces in template variables."""
-        valid = True
-
-        for node in agent.get("nodes", []):
-            input_default = node.get("input_default", {})
-            prompt = input_default.get("prompt", "")
-
-            if not isinstance(prompt, str):
-                continue
-
-            # Find {{...}} with spaces
-            matches = re.finditer(r"\{\{([^}]+)\}\}", prompt)
-            for match in matches:
-                content = match.group(1)
-                if " " in content:
-                    self.add_error(
-                        f"Node '{node.get('id')}' has spaces in template variable: "
-                        f"'{{{{{content}}}}}' should be '{{{{{content.replace(' ', '_')}}}}}'."
-                    )
-                    valid = False
-
-        return valid
-
-    def validate(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]] | None = None
-    ) -> tuple[bool, str | None]:
-        """Run all validations.
-
-        Returns:
-            Tuple of (is_valid, error_message)
-        """
-        self.errors = []
-
-        if blocks_info is None:
-            blocks_info = get_blocks_info()
-
-        checks = [
-            self.validate_block_existence(agent, blocks_info),
-            self.validate_link_node_references(agent),
-            self.validate_required_inputs(agent, blocks_info),
-            self.validate_data_type_compatibility(agent, blocks_info),
-            self.validate_nested_sink_links(agent, blocks_info),
-            self.validate_prompt_spaces(agent),
-        ]
-
-        all_passed = all(checks)
-
-        if all_passed:
-            logger.info("Agent validation successful")
-            return True, None
-
-        error_message = "Agent validation failed:\n"
-        for i, error in enumerate(self.errors, 1):
-            error_message += f"{i}. {error}\n"
-
-        logger.warning(f"Agent validation failed with {len(self.errors)} errors")
-        return False, error_message
-
-
-def validate_agent(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]] | None = None
-) -> tuple[bool, str | None]:
-    """Convenience function to validate an agent.
-
-    Returns:
-        Tuple of (is_valid, error_message)
-    """
-    validator = AgentValidator()
-    return validator.validate(agent, blocks_info)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -8,12 +8,10 @@ from langfuse import observe
 from backend.api.features.chat.model import ChatSession

 from .agent_generator import (
-    apply_all_fixes,
+    AgentGeneratorNotConfiguredError,
    decompose_goal,
    generate_agent,
-    get_blocks_info,
    save_agent_to_library,
-    validate_agent,
 )
 from .base import BaseTool
 from .models import (
@@ -27,9 +25,6 @@ from .models import (

 logger = logging.getLogger(__name__)

-# Maximum retries for agent generation with validation feedback
-MAX_GENERATION_RETRIES = 2
-

 class CreateAgentTool(BaseTool):
    """Tool for creating agents from natural language descriptions."""
@@ -91,9 +86,8 @@ class CreateAgentTool(BaseTool):

        Flow:
        1. Decompose the description into steps (may return clarifying questions)
-        2. Generate agent JSON from the steps
-        3. Apply fixes to correct common LLM errors
-        4. Preview or save based on the save parameter
+        2. Generate agent JSON (external service handles fixing and validation)
+        3. Preview or save based on the save parameter
        """
        description = kwargs.get("description", "").strip()
        context = kwargs.get("context", "")
@@ -110,11 +104,13 @@ class CreateAgentTool(BaseTool):
        # Step 1: Decompose goal into steps
        try:
            decomposition_result = await decompose_goal(description, context)
-        except ValueError as e:
-            # Handle missing API key or configuration errors
+        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
-                message=f"Agent generation is not configured: {str(e)}",
-                error="configuration_error",
+                message=(
+                    "Agent generation is not available. "
+                    "The Agent Generator service is not configured."
+                ),
+                error="service_not_configured",
                session_id=session_id,
            )

@@ -171,72 +167,32 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )

-        # Step 2: Generate agent JSON with retry on validation failure
-        blocks_info = get_blocks_info()
-        agent_json = None
-        validation_errors = None
-
-        for attempt in range(MAX_GENERATION_RETRIES + 1):
-            # Generate agent (include validation errors from previous attempt)
-            if attempt == 0:
-                agent_json = await generate_agent(decomposition_result)
-            else:
-                # Retry with validation error feedback
-                logger.info(
-                    f"Retry {attempt}/{MAX_GENERATION_RETRIES} with validation feedback"
-                )
-                retry_instructions = {
-                    **decomposition_result,
-                    "previous_errors": validation_errors,
-                    "retry_instructions": (
-                        "The previous generation had validation errors. "
-                        "Please fix these issues in the new generation:\n"
-                        f"{validation_errors}"
-                    ),
-                }
-                agent_json = await generate_agent(retry_instructions)
-
-            if agent_json is None:
-                if attempt == MAX_GENERATION_RETRIES:
-                    return ErrorResponse(
-                        message="Failed to generate the agent. Please try again.",
-                        error="Generation failed",
-                        session_id=session_id,
-                    )
-                continue
-
-            # Step 3: Apply fixes to correct common errors
-            agent_json = apply_all_fixes(agent_json, blocks_info)
-
-            # Step 4: Validate the agent
-            is_valid, validation_errors = validate_agent(agent_json, blocks_info)
-
-            if is_valid:
-                logger.info(f"Agent generated successfully on attempt {attempt + 1}")
-                break
-
-            logger.warning(
-                f"Validation failed on attempt {attempt + 1}: {validation_errors}"
+        # Step 2: Generate agent JSON (external service handles fixing and validation)
+        try:
+            agent_json = await generate_agent(decomposition_result)
+        except AgentGeneratorNotConfiguredError:
+            return ErrorResponse(
+                message=(
+                    "Agent generation is not available. "
+                    "The Agent Generator service is not configured."
+                ),
+                error="service_not_configured",
+                session_id=session_id,
            )

-            if attempt == MAX_GENERATION_RETRIES:
-                # Return error with validation details
-                return ErrorResponse(
-                    message=(
-                        f"Generated agent has validation errors after {MAX_GENERATION_RETRIES + 1} attempts. "
-                        f"Please try rephrasing your request or simplify the workflow."
-                    ),
-                    error="validation_failed",
-                    details={"validation_errors": validation_errors},
-                    session_id=session_id,
-                )
+        if agent_json is None:
+            return ErrorResponse(
+                message="Failed to generate the agent. Please try again.",
+                error="Generation failed",
+                session_id=session_id,
+            )

        agent_name = agent_json.get("name", "Generated Agent")
        agent_description = agent_json.get("description", "")
        node_count = len(agent_json.get("nodes", []))
        link_count = len(agent_json.get("links", []))

-        # Step 4: Preview or save
+        # Step 3: Preview or save
        if not save:
            return AgentPreviewResponse(
                message=(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -8,13 +8,10 @@ from langfuse import observe
 from backend.api.features.chat.model import ChatSession

 from .agent_generator import (
-    apply_agent_patch,
-    apply_all_fixes,
+    AgentGeneratorNotConfiguredError,
    generate_agent_patch,
    get_agent_as_json,
-    get_blocks_info,
    save_agent_to_library,
-    validate_agent,
 )
 from .base import BaseTool
 from .models import (
@@ -28,9 +25,6 @@ from .models import (

 logger = logging.getLogger(__name__)

-# Maximum retries for patch generation with validation feedback
-MAX_GENERATION_RETRIES = 2
-

 class EditAgentTool(BaseTool):
    """Tool for editing existing agents using natural language."""
@@ -43,7 +37,7 @@ class EditAgentTool(BaseTool):
    def description(self) -> str:
        return (
            "Edit an existing agent from the user's library using natural language. "
-            "Generates a patch to update the agent while preserving unchanged parts."
+            "Generates updates to the agent while preserving unchanged parts."
        )

    @property
@@ -98,9 +92,8 @@ class EditAgentTool(BaseTool):

        Flow:
        1. Fetch the current agent
-        2. Generate a patch based on the requested changes
-        3. Apply the patch to create an updated agent
-        4. Preview or save based on the save parameter
+        2. Generate updated agent (external service handles fixing and validation)
+        3. Preview or save based on the save parameter
        """
        agent_id = kwargs.get("agent_id", "").strip()
        changes = kwargs.get("changes", "").strip()
@@ -137,121 +130,58 @@ class EditAgentTool(BaseTool):
        if context:
            update_request = f"{changes}\n\nAdditional context:\n{context}"

-        # Step 2: Generate patch with retry on validation failure
-        blocks_info = get_blocks_info()
-        updated_agent = None
-        validation_errors = None
-        intent = "Applied requested changes"
-
-        for attempt in range(MAX_GENERATION_RETRIES + 1):
-            # Generate patch (include validation errors from previous attempt)
-            try:
-                if attempt == 0:
-                    patch_result = await generate_agent_patch(
-                        update_request, current_agent
-                    )
-                else:
-                    # Retry with validation error feedback
-                    logger.info(
-                        f"Retry {attempt}/{MAX_GENERATION_RETRIES} with validation feedback"
-                    )
-                    retry_request = (
-                        f"{update_request}\n\n"
-                        f"IMPORTANT: The previous edit had validation errors. "
-                        f"Please fix these issues:\n{validation_errors}"
-                    )
-                    patch_result = await generate_agent_patch(
-                        retry_request, current_agent
-                    )
-            except ValueError as e:
-                # Handle missing API key or configuration errors
-                return ErrorResponse(
-                    message=f"Agent generation is not configured: {str(e)}",
-                    error="configuration_error",
-                    session_id=session_id,
-                )
-
-            if patch_result is None:
-                if attempt == MAX_GENERATION_RETRIES:
-                    return ErrorResponse(
-                        message="Failed to generate changes. Please try rephrasing.",
-                        error="Patch generation failed",
-                        session_id=session_id,
-                    )
-                continue
-
-            # Check if LLM returned clarifying questions
-            if patch_result.get("type") == "clarifying_questions":
-                questions = patch_result.get("questions", [])
-                return ClarificationNeededResponse(
-                    message=(
-                        "I need some more information about the changes. "
-                        "Please answer the following questions:"
-                    ),
-                    questions=[
-                        ClarifyingQuestion(
-                            question=q.get("question", ""),
-                            keyword=q.get("keyword", ""),
-                            example=q.get("example"),
-                        )
-                        for q in questions
-                    ],
-                    session_id=session_id,
-                )
-
-            # Step 3: Apply patch and fixes
-            try:
-                updated_agent = apply_agent_patch(current_agent, patch_result)
-                updated_agent = apply_all_fixes(updated_agent, blocks_info)
-            except Exception as e:
-                if attempt == MAX_GENERATION_RETRIES:
-                    return ErrorResponse(
-                        message=f"Failed to apply changes: {str(e)}",
-                        error="patch_apply_failed",
-                        details={"exception": str(e)},
-                        session_id=session_id,
-                    )
-                validation_errors = str(e)
-                continue
-
-            # Step 4: Validate the updated agent
-            is_valid, validation_errors = validate_agent(updated_agent, blocks_info)
-
-            if is_valid:
-                logger.info(f"Agent edited successfully on attempt {attempt + 1}")
-                intent = patch_result.get("intent", "Applied requested changes")
-                break
-
-            logger.warning(
-                f"Validation failed on attempt {attempt + 1}: {validation_errors}"
+        # Step 2: Generate updated agent (external service handles fixing and validation)
+        try:
+            result = await generate_agent_patch(update_request, current_agent)
+        except AgentGeneratorNotConfiguredError:
+            return ErrorResponse(
+                message=(
+                    "Agent editing is not available. "
+                    "The Agent Generator service is not configured."
+                ),
+                error="service_not_configured",
+                session_id=session_id,
            )

-            if attempt == MAX_GENERATION_RETRIES:
-                # Return error with validation details
-                return ErrorResponse(
-                    message=(
-                        f"Updated agent has validation errors after "
-                        f"{MAX_GENERATION_RETRIES + 1} attempts. "
-                        f"Please try rephrasing your request or simplify the changes."
-                    ),
-                    error="validation_failed",
-                    details={"validation_errors": validation_errors},
-                    session_id=session_id,
-                )
+        if result is None:
+            return ErrorResponse(
+                message="Failed to generate changes. Please try rephrasing.",
+                error="Update generation failed",
+                session_id=session_id,
+            )

-        # At this point, updated_agent is guaranteed to be set (we return on all failure paths)
-        assert updated_agent is not None
+        # Check if LLM returned clarifying questions
+        if result.get("type") == "clarifying_questions":
+            questions = result.get("questions", [])
+            return ClarificationNeededResponse(
+                message=(
+                    "I need some more information about the changes. "
+                    "Please answer the following questions:"
+                ),
+                questions=[
+                    ClarifyingQuestion(
+                        question=q.get("question", ""),
+                        keyword=q.get("keyword", ""),
+                        example=q.get("example"),
+                    )
+                    for q in questions
+                ],
+                session_id=session_id,
+            )
+
+        # Result is the updated agent JSON
+        updated_agent = result

        agent_name = updated_agent.get("name", "Updated Agent")
        agent_description = updated_agent.get("description", "")
        node_count = len(updated_agent.get("nodes", []))
        link_count = len(updated_agent.get("links", []))

-        # Step 5: Preview or save
+        # Step 3: Preview or save
        if not save:
            return AgentPreviewResponse(
                message=(
-                    f"I've updated the agent. Changes: {intent}. "
+                    f"I've updated the agent. "
                    f"The agent now has {node_count} blocks. "
                    f"Review it and call edit_agent with save=true to save the changes."
                ),
@@ -277,10 +207,7 @@ class EditAgentTool(BaseTool):
            )

            return AgentSavedResponse(
-                message=(
-                    f"Updated agent '{created_graph.name}' has been saved to your library! "
-                    f"Changes: {intent}"
-                ),
+                message=f"Updated agent '{created_graph.name}' has been saved to your library!",
                agent_id=created_graph.id,
                agent_name=created_graph.name,
                library_agent_id=library_agent.id,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -33,7 +33,7 @@ from .models import (
    UserReadiness,
 )
 from .utils import (
-    check_user_has_required_credentials,
+    build_missing_credentials_from_graph,
    extract_credentials_from_schema,
    fetch_graph_from_store_slug,
    get_or_create_library_agent,
@@ -237,15 +237,13 @@ class RunAgentTool(BaseTool):
                # Return credentials needed response with input data info
                # The UI handles credential setup automatically, so the message
                # focuses on asking about input data
-                credentials = extract_credentials_from_schema(
-                    graph.credentials_input_schema
+                requirements_creds_dict = build_missing_credentials_from_graph(
+                    graph, None
                )
-                missing_creds_check = await check_user_has_required_credentials(
-                    user_id, credentials
+                missing_credentials_dict = build_missing_credentials_from_graph(
+                    graph, graph_credentials
                )
-                missing_credentials_dict = {
-                    c.id: c.model_dump() for c in missing_creds_check
-                }
+                requirements_creds_list = list(requirements_creds_dict.values())

                return SetupRequirementsResponse(
                    message=self._build_inputs_message(graph, MSG_WHAT_VALUES_TO_USE),
@@ -259,7 +257,7 @@ class RunAgentTool(BaseTool):
                            ready_to_run=False,
                        ),
                        requirements={
-                            "credentials": [c.model_dump() for c in credentials],
+                            "credentials": requirements_creds_list,
                            "inputs": self._get_inputs_list(graph.input_schema),
                            "execution_modes": self._get_execution_modes(graph),
                        },
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
@@ -29,7 +29,7 @@ def mock_embedding_functions():
        yield


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent(setup_test_data):
    """Test that the run_agent tool successfully executes an approved agent"""
    # Use test data from fixture
@@ -70,7 +70,7 @@ async def test_run_agent(setup_test_data):
    assert result_data["graph_name"] == "Test Agent"


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_missing_inputs(setup_test_data):
    """Test that the run_agent tool returns error when inputs are missing"""
    # Use test data from fixture
@@ -106,7 +106,7 @@ async def test_run_agent_missing_inputs(setup_test_data):
    assert "message" in result_data


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_invalid_agent_id(setup_test_data):
    """Test that the run_agent tool returns error for invalid agent ID"""
    # Use test data from fixture
@@ -141,7 +141,7 @@ async def test_run_agent_invalid_agent_id(setup_test_data):
    )


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_with_llm_credentials(setup_llm_test_data):
    """Test that run_agent works with an agent requiring LLM credentials"""
    # Use test data from fixture
@@ -185,7 +185,7 @@ async def test_run_agent_with_llm_credentials(setup_llm_test_data):
    assert result_data["graph_name"] == "LLM Test Agent"


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_data):
    """Test that run_agent returns available inputs when called without inputs or use_defaults."""
    user = setup_test_data["user"]
@@ -219,7 +219,7 @@ async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_da
    assert "inputs" in result_data["message"].lower()


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_with_use_defaults(setup_test_data):
    """Test that run_agent executes successfully with use_defaults=True."""
    user = setup_test_data["user"]
@@ -251,7 +251,7 @@ async def test_run_agent_with_use_defaults(setup_test_data):
    assert result_data["graph_id"] == graph.id


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
    """Test that run_agent returns setup_requirements when credentials are missing."""
    user = setup_firecrawl_test_data["user"]
@@ -285,7 +285,7 @@ async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
    assert len(setup_info["user_readiness"]["missing_credentials"]) > 0


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_invalid_slug_format(setup_test_data):
    """Test that run_agent returns error for invalid slug format (no slash)."""
    user = setup_test_data["user"]
@@ -313,7 +313,7 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
    assert "username/agent-name" in result_data["message"]


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_unauthenticated():
    """Test that run_agent returns need_login for unauthenticated users."""
    tool = RunAgentTool()
@@ -340,7 +340,7 @@ async def test_run_agent_unauthenticated():
    assert "sign in" in result_data["message"].lower()


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_schedule_without_cron(setup_test_data):
    """Test that run_agent returns error when scheduling without cron expression."""
    user = setup_test_data["user"]
@@ -372,7 +372,7 @@ async def test_run_agent_schedule_without_cron(setup_test_data):
    assert "cron" in result_data["message"].lower()


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_schedule_without_name(setup_test_data):
    """Test that run_agent returns error when scheduling without schedule_name."""
    user = setup_test_data["user"]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -22,6 +22,7 @@ from .models import (
    ToolResponseBase,
    UserReadiness,
 )
+from .utils import build_missing_credentials_from_field_info

 logger = logging.getLogger(__name__)

@@ -189,7 +190,11 @@ class RunBlockTool(BaseTool):

        if missing_credentials:
            # Return setup requirements response with missing credentials
-            missing_creds_dict = {c.id: c.model_dump() for c in missing_credentials}
+            credentials_fields_info = block.input_schema.get_credentials_fields_info()
+            missing_creds_dict = build_missing_credentials_from_field_info(
+                credentials_fields_info, set(matched_credentials.keys())
+            )
+            missing_creds_list = list(missing_creds_dict.values())

            return SetupRequirementsResponse(
                message=(
@@ -206,7 +211,7 @@ class RunBlockTool(BaseTool):
                        ready_to_run=False,
                    ),
                    requirements={
-                        "credentials": [c.model_dump() for c in missing_credentials],
+                        "credentials": missing_creds_list,
                        "inputs": self._get_inputs_list(block),
                        "execution_modes": ["immediate"],
                    },
--- a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
@@ -8,7 +8,7 @@ from backend.api.features.library import model as library_model
 from backend.api.features.store import db as store_db
 from backend.data import graph as graph_db
 from backend.data.graph import GraphModel
-from backend.data.model import CredentialsMetaInput
+from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util.exceptions import NotFoundError

@@ -89,6 +89,59 @@ def extract_credentials_from_schema(
    return credentials


+def _serialize_missing_credential(
+    field_key: str, field_info: CredentialsFieldInfo
+) -> dict[str, Any]:
+    """
+    Convert credential field info into a serializable dict that preserves all supported
+    credential types (e.g., api_key + oauth2) so the UI can offer multiple options.
+    """
+    supported_types = sorted(field_info.supported_types)
+    provider = next(iter(field_info.provider), "unknown")
+    scopes = sorted(field_info.required_scopes or [])
+
+    return {
+        "id": field_key,
+        "title": field_key.replace("_", " ").title(),
+        "provider": provider,
+        "provider_name": provider.replace("_", " ").title(),
+        "type": supported_types[0] if supported_types else "api_key",
+        "types": supported_types,
+        "scopes": scopes,
+    }
+
+
+def build_missing_credentials_from_graph(
+    graph: GraphModel, matched_credentials: dict[str, CredentialsMetaInput] | None
+) -> dict[str, Any]:
+    """
+    Build a missing_credentials mapping from a graph's aggregated credentials inputs,
+    preserving all supported credential types for each field.
+    """
+    matched_keys = set(matched_credentials.keys()) if matched_credentials else set()
+    aggregated_fields = graph.aggregate_credentials_inputs()
+
+    return {
+        field_key: _serialize_missing_credential(field_key, field_info)
+        for field_key, (field_info, _node_fields) in aggregated_fields.items()
+        if field_key not in matched_keys
+    }
+
+
+def build_missing_credentials_from_field_info(
+    credential_fields: dict[str, CredentialsFieldInfo],
+    matched_keys: set[str],
+) -> dict[str, Any]:
+    """
+    Build missing_credentials mapping from a simple credentials field info dictionary.
+    """
+    return {
+        field_key: _serialize_missing_credential(field_key, field_info)
+        for field_key, field_info in credential_fields.items()
+        if field_key not in matched_keys
+    }
+
+
 def extract_credentials_as_dict(
    credentials_input_schema: dict[str, Any] | None,
 ) -> dict[str, CredentialsMetaInput]:
--- a/autogpt_platform/backend/backend/api/features/executions/review/model.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/model.py
@@ -23,6 +23,7 @@ class PendingHumanReviewModel(BaseModel):
        id: Unique identifier for the review record
        user_id: ID of the user who must perform the review
        node_exec_id: ID of the node execution that created this review
+        node_id: ID of the node definition (for grouping reviews from same node)
        graph_exec_id: ID of the graph execution containing the node
        graph_id: ID of the graph template being executed
        graph_version: Version number of the graph template
@@ -37,6 +38,10 @@ class PendingHumanReviewModel(BaseModel):
    """

    node_exec_id: str = Field(description="Node execution ID (primary key)")
+    node_id: str = Field(
+        description="Node definition ID (for grouping)",
+        default="",  # Temporary default for test compatibility
+    )
    user_id: str = Field(description="User ID associated with the review")
    graph_exec_id: str = Field(description="Graph execution ID")
    graph_id: str = Field(description="Graph ID")
@@ -66,7 +71,9 @@ class PendingHumanReviewModel(BaseModel):
    )

    @classmethod
-    def from_db(cls, review: "PendingHumanReview") -> "PendingHumanReviewModel":
+    def from_db(
+        cls, review: "PendingHumanReview", node_id: str
+    ) -> "PendingHumanReviewModel":
        """
        Convert a database model to a response model.

@@ -74,9 +81,14 @@ class PendingHumanReviewModel(BaseModel):
        payload, instructions, and editable flag.

        Handles invalid data gracefully by using safe defaults.
+
+        Args:
+            review: Database review object
+            node_id: Node definition ID (fetched from NodeExecution)
        """
        return cls(
            node_exec_id=review.nodeExecId,
+            node_id=node_id,
            user_id=review.userId,
            graph_exec_id=review.graphExecId,
            graph_id=review.graphId,
@@ -107,6 +119,13 @@ class ReviewItem(BaseModel):
    reviewed_data: SafeJsonData | None = Field(
        None, description="Optional edited data (ignored if approved=False)"
    )
+    auto_approve_future: bool = Field(
+        default=False,
+        description=(
+            "If true and this review is approved, future executions of this same "
+            "block (node) will be automatically approved. This only affects approved reviews."
+        ),
+    )

    @field_validator("reviewed_data")
    @classmethod
@@ -174,6 +193,9 @@ class ReviewRequest(BaseModel):
    This request must include ALL pending reviews for a graph execution.
    Each review will be either approved (with optional data modifications)
    or rejected (data ignored). The execution will resume only after ALL reviews are processed.
+
+    Each review item can individually specify whether to auto-approve future executions
+    of the same block via the `auto_approve_future` field on ReviewItem.
    """

    reviews: List[ReviewItem] = Field(
--- a/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
--- a/autogpt_platform/backend/backend/api/features/executions/review/routes.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/routes.py
@@ -1,17 +1,27 @@
+import asyncio
 import logging
-from typing import List
+from typing import Any, List

 import autogpt_libs.auth as autogpt_auth_lib
 from fastapi import APIRouter, HTTPException, Query, Security, status
 from prisma.enums import ReviewStatus

-from backend.data.execution import get_graph_execution_meta
+from backend.data.execution import (
+    ExecutionContext,
+    ExecutionStatus,
+    get_graph_execution_meta,
+)
+from backend.data.graph import get_graph_settings
 from backend.data.human_review import (
+    create_auto_approval_record,
+    get_pending_reviews_by_node_exec_ids,
    get_pending_reviews_for_execution,
    get_pending_reviews_for_user,
    has_pending_reviews_for_graph_exec,
    process_all_reviews_for_execution,
 )
+from backend.data.model import USER_TIMEZONE_NOT_SET
+from backend.data.user import get_user_by_id
 from backend.executor.utils import add_graph_execution

 from .model import PendingHumanReviewModel, ReviewRequest, ReviewResponse
@@ -127,17 +137,70 @@ async def process_review_action(
            detail="At least one review must be provided",
        )

-    # Build review decisions map
+    # Batch fetch all requested reviews
+    reviews_map = await get_pending_reviews_by_node_exec_ids(
+        list(all_request_node_ids), user_id
+    )
+
+    # Validate all reviews were found
+    missing_ids = all_request_node_ids - set(reviews_map.keys())
+    if missing_ids:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"No pending review found for node execution(s): {', '.join(missing_ids)}",
+        )
+
+    # Validate all reviews belong to the same execution
+    graph_exec_ids = {review.graph_exec_id for review in reviews_map.values()}
+    if len(graph_exec_ids) > 1:
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail="All reviews in a single request must belong to the same execution.",
+        )
+
+    graph_exec_id = next(iter(graph_exec_ids))
+
+    # Validate execution status before processing reviews
+    graph_exec_meta = await get_graph_execution_meta(
+        user_id=user_id, execution_id=graph_exec_id
+    )
+
+    if not graph_exec_meta:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Graph execution #{graph_exec_id} not found",
+        )
+
+    # Only allow processing reviews if execution is paused for review
+    # or incomplete (partial execution with some reviews already processed)
+    if graph_exec_meta.status not in (
+        ExecutionStatus.REVIEW,
+        ExecutionStatus.INCOMPLETE,
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail=f"Cannot process reviews while execution status is {graph_exec_meta.status}. "
+            f"Reviews can only be processed when execution is paused (REVIEW status). "
+            f"Current status: {graph_exec_meta.status}",
+        )
+
+    # Build review decisions map and track which reviews requested auto-approval
+    # Auto-approved reviews use original data (no modifications allowed)
    review_decisions = {}
+    auto_approve_requests = {}  # Map node_exec_id -> auto_approve_future flag
+
    for review in request.reviews:
        review_status = (
            ReviewStatus.APPROVED if review.approved else ReviewStatus.REJECTED
        )
+        # If this review requested auto-approval, don't allow data modifications
+        reviewed_data = None if review.auto_approve_future else review.reviewed_data
        review_decisions[review.node_exec_id] = (
            review_status,
-            review.reviewed_data,
+            reviewed_data,
            review.message,
        )
+        auto_approve_requests[review.node_exec_id] = review.auto_approve_future

    # Process all reviews
    updated_reviews = await process_all_reviews_for_execution(
@@ -145,6 +208,87 @@ async def process_review_action(
        review_decisions=review_decisions,
    )

+    # Create auto-approval records for approved reviews that requested it
+    # Deduplicate by node_id to avoid race conditions when multiple reviews
+    # for the same node are processed in parallel
+    async def create_auto_approval_for_node(
+        node_id: str, review_result
+    ) -> tuple[str, bool]:
+        """
+        Create auto-approval record for a node.
+        Returns (node_id, success) tuple for tracking failures.
+        """
+        try:
+            await create_auto_approval_record(
+                user_id=user_id,
+                graph_exec_id=review_result.graph_exec_id,
+                graph_id=review_result.graph_id,
+                graph_version=review_result.graph_version,
+                node_id=node_id,
+                payload=review_result.payload,
+            )
+            return (node_id, True)
+        except Exception as e:
+            logger.error(
+                f"Failed to create auto-approval record for node {node_id}",
+                exc_info=e,
+            )
+            return (node_id, False)
+
+    # Collect node_exec_ids that need auto-approval
+    node_exec_ids_needing_auto_approval = [
+        node_exec_id
+        for node_exec_id, review_result in updated_reviews.items()
+        if review_result.status == ReviewStatus.APPROVED
+        and auto_approve_requests.get(node_exec_id, False)
+    ]
+
+    # Batch-fetch node executions to get node_ids
+    nodes_needing_auto_approval: dict[str, Any] = {}
+    if node_exec_ids_needing_auto_approval:
+        from backend.data.execution import get_node_executions
+
+        node_execs = await get_node_executions(
+            graph_exec_id=graph_exec_id, include_exec_data=False
+        )
+        node_exec_map = {node_exec.node_exec_id: node_exec for node_exec in node_execs}
+
+        for node_exec_id in node_exec_ids_needing_auto_approval:
+            node_exec = node_exec_map.get(node_exec_id)
+            if node_exec:
+                review_result = updated_reviews[node_exec_id]
+                # Use the first approved review for this node (deduplicate by node_id)
+                if node_exec.node_id not in nodes_needing_auto_approval:
+                    nodes_needing_auto_approval[node_exec.node_id] = review_result
+            else:
+                logger.error(
+                    f"Failed to create auto-approval record for {node_exec_id}: "
+                    f"Node execution not found. This may indicate a race condition "
+                    f"or data inconsistency."
+                )
+
+    # Execute all auto-approval creations in parallel (deduplicated by node_id)
+    auto_approval_results = await asyncio.gather(
+        *[
+            create_auto_approval_for_node(node_id, review_result)
+            for node_id, review_result in nodes_needing_auto_approval.items()
+        ],
+        return_exceptions=True,
+    )
+
+    # Count auto-approval failures
+    auto_approval_failed_count = 0
+    for result in auto_approval_results:
+        if isinstance(result, Exception):
+            # Unexpected exception during auto-approval creation
+            auto_approval_failed_count += 1
+            logger.error(
+                f"Unexpected exception during auto-approval creation: {result}"
+            )
+        elif isinstance(result, tuple) and len(result) == 2 and not result[1]:
+            # Auto-approval creation failed (returned False)
+            auto_approval_failed_count += 1
+
    # Count results
    approved_count = sum(
        1
@@ -157,30 +301,53 @@ async def process_review_action(
        if review.status == ReviewStatus.REJECTED
    )

-    # Resume execution if we processed some reviews
+    # Resume execution only if ALL pending reviews for this execution have been processed
    if updated_reviews:
-        # Get graph execution ID from any processed review
-        first_review = next(iter(updated_reviews.values()))
-        graph_exec_id = first_review.graph_exec_id
-
-        # Check if any pending reviews remain for this execution
        still_has_pending = await has_pending_reviews_for_graph_exec(graph_exec_id)

        if not still_has_pending:
-            # Resume execution
+            # Get the graph_id from any processed review
+            first_review = next(iter(updated_reviews.values()))
+
            try:
+                # Fetch user and settings to build complete execution context
+                user = await get_user_by_id(user_id)
+                settings = await get_graph_settings(
+                    user_id=user_id, graph_id=first_review.graph_id
+                )
+
+                # Preserve user's timezone preference when resuming execution
+                user_timezone = (
+                    user.timezone if user.timezone != USER_TIMEZONE_NOT_SET else "UTC"
+                )
+
+                execution_context = ExecutionContext(
+                    human_in_the_loop_safe_mode=settings.human_in_the_loop_safe_mode,
+                    sensitive_action_safe_mode=settings.sensitive_action_safe_mode,
+                    user_timezone=user_timezone,
+                )
+
                await add_graph_execution(
                    graph_id=first_review.graph_id,
                    user_id=user_id,
                    graph_exec_id=graph_exec_id,
+                    execution_context=execution_context,
                )
                logger.info(f"Resumed execution {graph_exec_id}")
            except Exception as e:
                logger.error(f"Failed to resume execution {graph_exec_id}: {str(e)}")

+    # Build error message if auto-approvals failed
+    error_message = None
+    if auto_approval_failed_count > 0:
+        error_message = (
+            f"{auto_approval_failed_count} auto-approval setting(s) could not be saved. "
+            f"You may need to manually approve these reviews in future executions."
+        )
+
    return ReviewResponse(
        approved_count=approved_count,
        rejected_count=rejected_count,
-        failed_count=0,
-        error=None,
+        failed_count=auto_approval_failed_count,
+        error=error_message,
    )
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -401,27 +401,11 @@ async def add_generated_agent_image(
    )


-def _initialize_graph_settings(graph: graph_db.GraphModel) -> GraphSettings:
-    """
-    Initialize GraphSettings based on graph content.
-
-    Args:
-        graph: The graph to analyze
-
-    Returns:
-        GraphSettings with appropriate human_in_the_loop_safe_mode value
-    """
-    if graph.has_human_in_the_loop:
-        # Graph has HITL blocks - set safe mode to True by default
-        return GraphSettings(human_in_the_loop_safe_mode=True)
-    else:
-        # Graph has no HITL blocks - keep None
-        return GraphSettings(human_in_the_loop_safe_mode=None)
-
-
 async def create_library_agent(
    graph: graph_db.GraphModel,
    user_id: str,
+    hitl_safe_mode: bool = True,
+    sensitive_action_safe_mode: bool = False,
    create_library_agents_for_sub_graphs: bool = True,
 ) -> list[library_model.LibraryAgent]:
    """
@@ -430,6 +414,8 @@ async def create_library_agent(
    Args:
        agent: The agent/Graph to add to the library.
        user_id: The user to whom the agent will be added.
+        hitl_safe_mode: Whether HITL blocks require manual review (default True).
+        sensitive_action_safe_mode: Whether sensitive action blocks require review.
        create_library_agents_for_sub_graphs: If True, creates LibraryAgent records for sub-graphs as well.

    Returns:
@@ -465,7 +451,11 @@ async def create_library_agent(
                            }
                        },
                        settings=SafeJson(
-                            _initialize_graph_settings(graph_entry).model_dump()
+                            GraphSettings.from_graph(
+                                graph_entry,
+                                hitl_safe_mode=hitl_safe_mode,
+                                sensitive_action_safe_mode=sensitive_action_safe_mode,
+                            ).model_dump()
                        ),
                    ),
                    include=library_agent_include(
@@ -593,7 +583,13 @@ async def update_library_agent(
            )
        update_fields["isDeleted"] = is_deleted
    if settings is not None:
-        update_fields["settings"] = SafeJson(settings.model_dump())
+        existing_agent = await get_library_agent(id=library_agent_id, user_id=user_id)
+        current_settings_dict = (
+            existing_agent.settings.model_dump() if existing_agent.settings else {}
+        )
+        new_settings = settings.model_dump(exclude_unset=True)
+        merged_settings = {**current_settings_dict, **new_settings}
+        update_fields["settings"] = SafeJson(merged_settings)

    try:
        # If graph_version is provided, update to that specific version
@@ -627,33 +623,6 @@ async def update_library_agent(
        raise DatabaseError("Failed to update library agent") from e


-async def update_library_agent_settings(
-    user_id: str,
-    agent_id: str,
-    settings: GraphSettings,
-) -> library_model.LibraryAgent:
-    """
-    Updates the settings for a specific LibraryAgent.
-
-    Args:
-        user_id: The owner of the LibraryAgent.
-        agent_id: The ID of the LibraryAgent to update.
-        settings: New GraphSettings to apply.
-
-    Returns:
-        The updated LibraryAgent.
-
-    Raises:
-        NotFoundError: If the specified LibraryAgent does not exist.
-        DatabaseError: If there's an error in the update operation.
-    """
-    return await update_library_agent(
-        library_agent_id=agent_id,
-        user_id=user_id,
-        settings=settings,
-    )
-
-
 async def delete_library_agent(
    library_agent_id: str, user_id: str, soft_delete: bool = True
 ) -> None:
@@ -838,7 +807,7 @@ async def add_store_agent_to_library(
                "isCreatedByUser": False,
                "useGraphIsActiveVersion": False,
                "settings": SafeJson(
-                    _initialize_graph_settings(graph_model).model_dump()
+                    GraphSettings.from_graph(graph_model).model_dump()
                ),
            },
            include=library_agent_include(
@@ -1228,8 +1197,15 @@ async def fork_library_agent(
        )
        new_graph = await on_graph_activate(new_graph, user_id=user_id)

-        # Create a library agent for the new graph
-        return (await create_library_agent(new_graph, user_id))[0]
+        # Create a library agent for the new graph, preserving safe mode settings
+        return (
+            await create_library_agent(
+                new_graph,
+                user_id,
+                hitl_safe_mode=original_agent.settings.human_in_the_loop_safe_mode,
+                sensitive_action_safe_mode=original_agent.settings.sensitive_action_safe_mode,
+            )
+        )[0]
    except prisma.errors.PrismaError as e:
        logger.error(f"Database error cloning library agent: {e}")
        raise DatabaseError("Failed to fork library agent") from e
--- a/autogpt_platform/backend/backend/api/features/library/model.py
+++ b/autogpt_platform/backend/backend/api/features/library/model.py
@@ -73,6 +73,12 @@ class LibraryAgent(pydantic.BaseModel):
    has_external_trigger: bool = pydantic.Field(
        description="Whether the agent has an external trigger (e.g. webhook) node"
    )
+    has_human_in_the_loop: bool = pydantic.Field(
+        description="Whether the agent has human-in-the-loop blocks"
+    )
+    has_sensitive_action: bool = pydantic.Field(
+        description="Whether the agent has sensitive action blocks"
+    )
    trigger_setup_info: Optional[GraphTriggerInfo] = None

    # Indicates whether there's a new output (based on recent runs)
@@ -180,6 +186,8 @@ class LibraryAgent(pydantic.BaseModel):
                graph.credentials_input_schema if sub_graphs is not None else None
            ),
            has_external_trigger=graph.has_external_trigger,
+            has_human_in_the_loop=graph.has_human_in_the_loop,
+            has_sensitive_action=graph.has_sensitive_action,
            trigger_setup_info=graph.trigger_setup_info,
            new_output=new_output,
            can_access_graph=can_access_graph,
--- a/autogpt_platform/backend/backend/api/features/library/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/library/routes_test.py
@@ -52,6 +52,8 @@ async def test_get_library_agents_success(
                output_schema={"type": "object", "properties": {}},
                credentials_input_schema={"type": "object", "properties": {}},
                has_external_trigger=False,
+                has_human_in_the_loop=False,
+                has_sensitive_action=False,
                status=library_model.LibraryAgentStatus.COMPLETED,
                recommended_schedule_cron=None,
                new_output=False,
@@ -75,6 +77,8 @@ async def test_get_library_agents_success(
                output_schema={"type": "object", "properties": {}},
                credentials_input_schema={"type": "object", "properties": {}},
                has_external_trigger=False,
+                has_human_in_the_loop=False,
+                has_sensitive_action=False,
                status=library_model.LibraryAgentStatus.COMPLETED,
                recommended_schedule_cron=None,
                new_output=False,
@@ -150,6 +154,8 @@ async def test_get_favorite_library_agents_success(
                output_schema={"type": "object", "properties": {}},
                credentials_input_schema={"type": "object", "properties": {}},
                has_external_trigger=False,
+                has_human_in_the_loop=False,
+                has_sensitive_action=False,
                status=library_model.LibraryAgentStatus.COMPLETED,
                recommended_schedule_cron=None,
                new_output=False,
@@ -218,6 +224,8 @@ def test_add_agent_to_library_success(
        output_schema={"type": "object", "properties": {}},
        credentials_input_schema={"type": "object", "properties": {}},
        has_external_trigger=False,
+        has_human_in_the_loop=False,
+        has_sensitive_action=False,
        status=library_model.LibraryAgentStatus.COMPLETED,
        new_output=False,
        can_access_graph=True,
--- a/autogpt_platform/backend/backend/api/features/oauth_test.py
+++ b/autogpt_platform/backend/backend/api/features/oauth_test.py
@@ -20,6 +20,7 @@ from typing import AsyncGenerator

 import httpx
 import pytest
+import pytest_asyncio
 from autogpt_libs.api_key.keysmith import APIKeySmith
 from prisma.enums import APIKeyPermission
 from prisma.models import OAuthAccessToken as PrismaOAuthAccessToken
@@ -38,13 +39,13 @@ keysmith = APIKeySmith()
 # ============================================================================


-@pytest.fixture
+@pytest.fixture(scope="session")
 def test_user_id() -> str:
    """Test user ID for OAuth tests."""
    return str(uuid.uuid4())


-@pytest.fixture
+@pytest_asyncio.fixture(scope="session", loop_scope="session")
 async def test_user(server, test_user_id: str):
    """Create a test user in the database."""
    await PrismaUser.prisma().create(
@@ -67,7 +68,7 @@ async def test_user(server, test_user_id: str):
    await PrismaUser.prisma().delete(where={"id": test_user_id})


-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_oauth_app(test_user: str):
    """Create a test OAuth application in the database."""
    app_id = str(uuid.uuid4())
@@ -122,7 +123,7 @@ def pkce_credentials() -> tuple[str, str]:
    return generate_pkce()


-@pytest.fixture
+@pytest_asyncio.fixture
 async def client(server, test_user: str) -> AsyncGenerator[httpx.AsyncClient, None]:
    """
    Create an async HTTP client that talks directly to the FastAPI app.
@@ -287,7 +288,7 @@ async def test_authorize_invalid_client_returns_error(
    assert query_params["error"][0] == "invalid_client"


-@pytest.fixture
+@pytest_asyncio.fixture
 async def inactive_oauth_app(test_user: str):
    """Create an inactive test OAuth application in the database."""
    app_id = str(uuid.uuid4())
@@ -1004,7 +1005,7 @@ async def test_token_refresh_revoked(
    assert "revoked" in response.json()["detail"].lower()


-@pytest.fixture
+@pytest_asyncio.fixture
 async def other_oauth_app(test_user: str):
    """Create a second OAuth application for cross-app tests."""
    app_id = str(uuid.uuid4())
--- a/autogpt_platform/backend/backend/api/features/store/db.py
+++ b/autogpt_platform/backend/backend/api/features/store/db.py
@@ -1552,7 +1552,7 @@ async def review_store_submission(

                # Generate embedding for approved listing (blocking - admin operation)
                # Inside transaction: if embedding fails, entire transaction rolls back
-                embedding_success = await ensure_embedding(
+                await ensure_embedding(
                    version_id=store_listing_version_id,
                    name=store_listing_version.name,
                    description=store_listing_version.description,
@@ -1560,12 +1560,6 @@ async def review_store_submission(
                    categories=store_listing_version.categories or [],
                    tx=tx,
                )
-                if not embedding_success:
-                    raise ValueError(
-                        f"Failed to generate embedding for listing {store_listing_version_id}. "
-                        "This is likely due to OpenAI API being unavailable. "
-                        "Please try again later or contact support if the issue persists."
-                    )

                await prisma.models.StoreListing.prisma(tx).update(
                    where={"id": store_listing_version.StoreListing.id},
--- a/autogpt_platform/backend/backend/api/features/store/embeddings.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py
@@ -21,7 +21,6 @@ from backend.util.json import dumps

 logger = logging.getLogger(__name__)

-
 # OpenAI embedding model configuration
 EMBEDDING_MODEL = "text-embedding-3-small"
 # Embedding dimension for the model above
@@ -63,49 +62,42 @@ def build_searchable_text(
    return " ".join(parts)


-async def generate_embedding(text: str) -> list[float] | None:
+async def generate_embedding(text: str) -> list[float]:
    """
    Generate embedding for text using OpenAI API.

-    Returns None if embedding generation fails.
-    Fail-fast: no retries to maintain consistency with approval flow.
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        client = get_openai_client()
-        if not client:
-            logger.error("openai_internal_api_key not set, cannot generate embedding")
-            return None
+    client = get_openai_client()
+    if not client:
+        raise RuntimeError("openai_internal_api_key not set, cannot generate embedding")

-        # Truncate text to token limit using tiktoken
-        # Character-based truncation is insufficient because token ratios vary by content type
-        enc = encoding_for_model(EMBEDDING_MODEL)
-        tokens = enc.encode(text)
-        if len(tokens) > EMBEDDING_MAX_TOKENS:
-            tokens = tokens[:EMBEDDING_MAX_TOKENS]
-            truncated_text = enc.decode(tokens)
-            logger.info(
-                f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
-            )
-        else:
-            truncated_text = text
-
-        start_time = time.time()
-        response = await client.embeddings.create(
-            model=EMBEDDING_MODEL,
-            input=truncated_text,
-        )
-        latency_ms = (time.time() - start_time) * 1000
-
-        embedding = response.data[0].embedding
+    # Truncate text to token limit using tiktoken
+    # Character-based truncation is insufficient because token ratios vary by content type
+    enc = encoding_for_model(EMBEDDING_MODEL)
+    tokens = enc.encode(text)
+    if len(tokens) > EMBEDDING_MAX_TOKENS:
+        tokens = tokens[:EMBEDDING_MAX_TOKENS]
+        truncated_text = enc.decode(tokens)
        logger.info(
-            f"Generated embedding: {len(embedding)} dims, "
-            f"{len(tokens)} tokens, {latency_ms:.0f}ms"
+            f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
        )
-        return embedding
+    else:
+        truncated_text = text

-    except Exception as e:
-        logger.error(f"Failed to generate embedding: {e}")
-        return None
+    start_time = time.time()
+    response = await client.embeddings.create(
+        model=EMBEDDING_MODEL,
+        input=truncated_text,
+    )
+    latency_ms = (time.time() - start_time) * 1000
+
+    embedding = response.data[0].embedding
+    logger.info(
+        f"Generated embedding: {len(embedding)} dims, "
+        f"{len(tokens)} tokens, {latency_ms:.0f}ms"
+    )
+    return embedding


 async def store_embedding(
@@ -144,48 +136,45 @@ async def store_content_embedding(

    New function for unified content embedding storage.
    Uses raw SQL since Prisma doesn't natively support pgvector.
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        client = tx if tx else prisma.get_client()
+    client = tx if tx else prisma.get_client()

-        # Convert embedding to PostgreSQL vector format
-        embedding_str = embedding_to_vector_string(embedding)
-        metadata_json = dumps(metadata or {})
+    # Convert embedding to PostgreSQL vector format
+    embedding_str = embedding_to_vector_string(embedding)
+    metadata_json = dumps(metadata or {})

-        # Upsert the embedding
-        # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
-        await execute_raw_with_schema(
-            """
-            INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
-                "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
-            )
-            VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
-            ON CONFLICT ("contentType", "contentId", "userId")
-            DO UPDATE SET
-                "embedding" = $4::vector,
-                "searchableText" = $5,
-                "metadata" = $6::jsonb,
-                "updatedAt" = NOW()
-            WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
-                AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
-                AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
-            """,
-            content_type,
-            content_id,
-            user_id,
-            embedding_str,
-            searchable_text,
-            metadata_json,
-            client=client,
-            set_public_search_path=True,
+    # Upsert the embedding
+    # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
+    # Use unqualified ::vector - pgvector is in search_path on all environments
+    await execute_raw_with_schema(
+        """
+        INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
+            "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
        )
+        VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
+        ON CONFLICT ("contentType", "contentId", "userId")
+        DO UPDATE SET
+            "embedding" = $4::vector,
+            "searchableText" = $5,
+            "metadata" = $6::jsonb,
+            "updatedAt" = NOW()
+        WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
+            AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
+            AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
+        """,
+        content_type,
+        content_id,
+        user_id,
+        embedding_str,
+        searchable_text,
+        metadata_json,
+        client=client,
+    )

-        logger.info(f"Stored embedding for {content_type}:{content_id}")
-        return True
-
-    except Exception as e:
-        logger.error(f"Failed to store embedding for {content_type}:{content_id}: {e}")
-        return False
+    logger.info(f"Stored embedding for {content_type}:{content_id}")
+    return True


 async def get_embedding(version_id: str) -> dict[str, Any] | None:
@@ -217,35 +206,31 @@ async def get_content_embedding(

    New function for unified content embedding retrieval.
    Returns dict with contentType, contentId, embedding, timestamps or None if not found.
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        result = await query_raw_with_schema(
-            """
-            SELECT
-                "contentType",
-                "contentId",
-                "userId",
-                "embedding"::text as "embedding",
-                "searchableText",
-                "metadata",
-                "createdAt",
-                "updatedAt"
-            FROM {schema_prefix}"UnifiedContentEmbedding"
-            WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
-            """,
-            content_type,
-            content_id,
-            user_id,
-            set_public_search_path=True,
-        )
+    result = await query_raw_with_schema(
+        """
+        SELECT
+            "contentType",
+            "contentId",
+            "userId",
+            "embedding"::text as "embedding",
+            "searchableText",
+            "metadata",
+            "createdAt",
+            "updatedAt"
+        FROM {schema_prefix}"UnifiedContentEmbedding"
+        WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
+        """,
+        content_type,
+        content_id,
+        user_id,
+    )

-        if result and len(result) > 0:
-            return result[0]
-        return None
-
-    except Exception as e:
-        logger.error(f"Failed to get embedding for {content_type}:{content_id}: {e}")
-        return None
+    if result and len(result) > 0:
+        return result[0]
+    return None


 async def ensure_embedding(
@@ -273,46 +258,38 @@ async def ensure_embedding(
        tx: Optional transaction client

    Returns:
-        True if embedding exists/was created, False on failure
+        True if embedding exists/was created
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        # Check if embedding already exists
-        if not force:
-            existing = await get_embedding(version_id)
-            if existing and existing.get("embedding"):
-                logger.debug(f"Embedding for version {version_id} already exists")
-                return True
+    # Check if embedding already exists
+    if not force:
+        existing = await get_embedding(version_id)
+        if existing and existing.get("embedding"):
+            logger.debug(f"Embedding for version {version_id} already exists")
+            return True

-        # Build searchable text for embedding
-        searchable_text = build_searchable_text(
-            name, description, sub_heading, categories
-        )
+    # Build searchable text for embedding
+    searchable_text = build_searchable_text(name, description, sub_heading, categories)

-        # Generate new embedding
-        embedding = await generate_embedding(searchable_text)
-        if embedding is None:
-            logger.warning(f"Could not generate embedding for version {version_id}")
-            return False
+    # Generate new embedding
+    embedding = await generate_embedding(searchable_text)

-        # Store the embedding with metadata using new function
-        metadata = {
-            "name": name,
-            "subHeading": sub_heading,
-            "categories": categories,
-        }
-        return await store_content_embedding(
-            content_type=ContentType.STORE_AGENT,
-            content_id=version_id,
-            embedding=embedding,
-            searchable_text=searchable_text,
-            metadata=metadata,
-            user_id=None,  # Store agents are public
-            tx=tx,
-        )
-
-    except Exception as e:
-        logger.error(f"Failed to ensure embedding for version {version_id}: {e}")
-        return False
+    # Store the embedding with metadata using new function
+    metadata = {
+        "name": name,
+        "subHeading": sub_heading,
+        "categories": categories,
+    }
+    return await store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=version_id,
+        embedding=embedding,
+        searchable_text=searchable_text,
+        metadata=metadata,
+        user_id=None,  # Store agents are public
+        tx=tx,
+    )


 async def delete_embedding(version_id: str) -> bool:
@@ -522,6 +499,24 @@ async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
            success = sum(1 for result in results if result is True)
            failed = len(results) - success

+            # Aggregate unique errors to avoid Sentry spam
+            if failed > 0:
+                # Group errors by type and message
+                error_summary: dict[str, int] = {}
+                for result in results:
+                    if isinstance(result, Exception):
+                        error_key = f"{type(result).__name__}: {str(result)}"
+                        error_summary[error_key] = error_summary.get(error_key, 0) + 1
+
+                # Log aggregated error summary
+                error_details = ", ".join(
+                    f"{error} ({count}x)" for error, count in error_summary.items()
+                )
+                logger.error(
+                    f"{content_type.value}: {failed}/{len(results)} embeddings failed. "
+                    f"Errors: {error_details}"
+                )
+
            results_by_type[content_type.value] = {
                "processed": len(missing_items),
                "success": success,
@@ -558,11 +553,12 @@ async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
    }


-async def embed_query(query: str) -> list[float] | None:
+async def embed_query(query: str) -> list[float]:
    """
    Generate embedding for a search query.

    Same as generate_embedding but with clearer intent.
+    Raises exceptions on failure - caller should handle.
    """
    return await generate_embedding(query)

@@ -595,40 +591,30 @@ async def ensure_content_embedding(
        tx: Optional transaction client

    Returns:
-        True if embedding exists/was created, False on failure
+        True if embedding exists/was created
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        # Check if embedding already exists
-        if not force:
-            existing = await get_content_embedding(content_type, content_id, user_id)
-            if existing and existing.get("embedding"):
-                logger.debug(
-                    f"Embedding for {content_type}:{content_id} already exists"
-                )
-                return True
+    # Check if embedding already exists
+    if not force:
+        existing = await get_content_embedding(content_type, content_id, user_id)
+        if existing and existing.get("embedding"):
+            logger.debug(f"Embedding for {content_type}:{content_id} already exists")
+            return True

-        # Generate new embedding
-        embedding = await generate_embedding(searchable_text)
-        if embedding is None:
-            logger.warning(
-                f"Could not generate embedding for {content_type}:{content_id}"
-            )
-            return False
+    # Generate new embedding
+    embedding = await generate_embedding(searchable_text)

-        # Store the embedding
-        return await store_content_embedding(
-            content_type=content_type,
-            content_id=content_id,
-            embedding=embedding,
-            searchable_text=searchable_text,
-            metadata=metadata or {},
-            user_id=user_id,
-            tx=tx,
-        )
-
-    except Exception as e:
-        logger.error(f"Failed to ensure embedding for {content_type}:{content_id}: {e}")
-        return False
+    # Store the embedding
+    return await store_content_embedding(
+        content_type=content_type,
+        content_id=content_id,
+        embedding=embedding,
+        searchable_text=searchable_text,
+        metadata=metadata or {},
+        user_id=user_id,
+        tx=tx,
+    )


 async def cleanup_orphaned_embeddings() -> dict[str, Any]:
@@ -855,9 +841,8 @@ async def semantic_search(
        limit = 100

    # Generate query embedding
-    query_embedding = await embed_query(query)
-
-    if query_embedding is not None:
+    try:
+        query_embedding = await embed_query(query)
        # Semantic search with embeddings
        embedding_str = embedding_to_vector_string(query_embedding)

@@ -871,47 +856,58 @@ async def semantic_search(
        # Add content type parameters and build placeholders dynamically
        content_type_start_idx = len(params) + 1
        content_type_placeholders = ", ".join(
-            f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
+            "$" + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
            for i in range(len(content_types))
        )
        params.extend([ct.value for ct in content_types])

-        sql = f"""
+        # Build min_similarity param index before appending
+        min_similarity_idx = len(params) + 1
+        params.append(min_similarity)
+
+        # Use unqualified ::vector and <=> operator - pgvector is in search_path on all environments
+        sql = (
+            """
            SELECT
                "contentId" as content_id,
                "contentType" as content_type,
                "searchableText" as searchable_text,
                metadata,
-                1 - (embedding <=> '{embedding_str}'::vector) as similarity
-            FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
-            WHERE "contentType" IN ({content_type_placeholders})
-            {user_filter}
-            AND 1 - (embedding <=> '{embedding_str}'::vector) >= ${len(params) + 1}
+                1 - (embedding <=> '"""
+            + embedding_str
+            + """'::vector) as similarity
+            FROM {schema_prefix}"UnifiedContentEmbedding"
+            WHERE "contentType" IN ("""
+            + content_type_placeholders
+            + """)
+            """
+            + user_filter
+            + """
+            AND 1 - (embedding <=> '"""
+            + embedding_str
+            + """'::vector) >= $"""
+            + str(min_similarity_idx)
+            + """
            ORDER BY similarity DESC
            LIMIT $1
        """
-        params.append(min_similarity)
+        )

-        try:
-            results = await query_raw_with_schema(
-                sql, *params, set_public_search_path=True
-            )
-            return [
-                {
-                    "content_id": row["content_id"],
-                    "content_type": row["content_type"],
-                    "searchable_text": row["searchable_text"],
-                    "metadata": row["metadata"],
-                    "similarity": float(row["similarity"]),
-                }
-                for row in results
-            ]
-        except Exception as e:
-            logger.error(f"Semantic search failed: {e}")
-            # Fall through to lexical search below
+        results = await query_raw_with_schema(sql, *params)
+        return [
+            {
+                "content_id": row["content_id"],
+                "content_type": row["content_type"],
+                "searchable_text": row["searchable_text"],
+                "metadata": row["metadata"],
+                "similarity": float(row["similarity"]),
+            }
+            for row in results
+        ]
+    except Exception as e:
+        logger.warning(f"Semantic search failed, falling back to lexical search: {e}")

    # Fallback to lexical search if embeddings unavailable
-    logger.warning("Falling back to lexical search (embeddings unavailable)")

    params_lexical: list[Any] = [limit]
    user_filter = ""
@@ -922,31 +918,41 @@ async def semantic_search(
    # Add content type parameters and build placeholders dynamically
    content_type_start_idx = len(params_lexical) + 1
    content_type_placeholders_lexical = ", ".join(
-        f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
+        "$" + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
        for i in range(len(content_types))
    )
    params_lexical.extend([ct.value for ct in content_types])

-    sql_lexical = f"""
+    # Build query param index before appending
+    query_param_idx = len(params_lexical) + 1
+    params_lexical.append(f"%{query}%")
+
+    # Use regular string (not f-string) for template to preserve {schema_prefix} placeholders
+    sql_lexical = (
+        """
        SELECT
            "contentId" as content_id,
            "contentType" as content_type,
            "searchableText" as searchable_text,
            metadata,
            0.0 as similarity
-        FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
-        WHERE "contentType" IN ({content_type_placeholders_lexical})
-        {user_filter}
-        AND "searchableText" ILIKE ${len(params_lexical) + 1}
+        FROM {schema_prefix}"UnifiedContentEmbedding"
+        WHERE "contentType" IN ("""
+        + content_type_placeholders_lexical
+        + """)
+        """
+        + user_filter
+        + """
+        AND "searchableText" ILIKE $"""
+        + str(query_param_idx)
+        + """
        ORDER BY "updatedAt" DESC
        LIMIT $1
    """
-    params_lexical.append(f"%{query}%")
+    )

    try:
-        results = await query_raw_with_schema(
-            sql_lexical, *params_lexical, set_public_search_path=True
-        )
+        results = await query_raw_with_schema(sql_lexical, *params_lexical)
        return [
            {
                "content_id": row["content_id"],
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
@@ -298,17 +298,16 @@ async def test_schema_handling_error_cases():
            mock_client.execute_raw.side_effect = Exception("Database error")
            mock_get_client.return_value = mock_client

-            result = await embeddings.store_content_embedding(
-                content_type=ContentType.STORE_AGENT,
-                content_id="test-id",
-                embedding=[0.1] * EMBEDDING_DIM,
-                searchable_text="test",
-                metadata=None,
-                user_id=None,
-            )
-
-            # Should return False on error, not raise
-            assert result is False
+            # Should raise exception on error
+            with pytest.raises(Exception, match="Database error"):
+                await embeddings.store_content_embedding(
+                    content_type=ContentType.STORE_AGENT,
+                    content_id="test-id",
+                    embedding=[0.1] * EMBEDDING_DIM,
+                    searchable_text="test",
+                    metadata=None,
+                    user_id=None,
+                )


 if __name__ == "__main__":
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
@@ -80,9 +80,8 @@ async def test_generate_embedding_no_api_key():
    ) as mock_get_client:
        mock_get_client.return_value = None

-        result = await embeddings.generate_embedding("test text")
-
-        assert result is None
+        with pytest.raises(RuntimeError, match="openai_internal_api_key not set"):
+            await embeddings.generate_embedding("test text")


@pytest.mark.asyncio(loop_scope="session")
@@ -97,9 +96,8 @@ async def test_generate_embedding_api_error():
    ) as mock_get_client:
        mock_get_client.return_value = mock_client

-        result = await embeddings.generate_embedding("test text")
-
-        assert result is None
+        with pytest.raises(Exception, match="API Error"):
+            await embeddings.generate_embedding("test text")


@pytest.mark.asyncio(loop_scope="session")
@@ -155,18 +153,14 @@ async def test_store_embedding_success(mocker):
    )

    assert result is True
-    # execute_raw is called twice: once for SET search_path, once for INSERT
-    assert mock_client.execute_raw.call_count == 2
+    # execute_raw is called once for INSERT (no separate SET search_path needed)
+    assert mock_client.execute_raw.call_count == 1

-    # First call: SET search_path
-    first_call_args = mock_client.execute_raw.call_args_list[0][0]
-    assert "SET search_path" in first_call_args[0]
-
-    # Second call: INSERT query with the actual data
-    second_call_args = mock_client.execute_raw.call_args_list[1][0]
-    assert "test-version-id" in second_call_args
-    assert "[0.1,0.2,0.3]" in second_call_args
-    assert None in second_call_args  # userId should be None for store agents
+    # Verify the INSERT query with the actual data
+    call_args = mock_client.execute_raw.call_args_list[0][0]
+    assert "test-version-id" in call_args
+    assert "[0.1,0.2,0.3]" in call_args
+    assert None in call_args  # userId should be None for store agents


@pytest.mark.asyncio(loop_scope="session")
@@ -177,11 +171,10 @@ async def test_store_embedding_database_error(mocker):

    embedding = [0.1, 0.2, 0.3]

-    result = await embeddings.store_embedding(
-        version_id="test-version-id", embedding=embedding, tx=mock_client
-    )
-
-    assert result is False
+    with pytest.raises(Exception, match="Database error"):
+        await embeddings.store_embedding(
+            version_id="test-version-id", embedding=embedding, tx=mock_client
+        )


@pytest.mark.asyncio(loop_scope="session")
@@ -281,17 +274,16 @@ async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
 async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
    """Test ensure_embedding when generation fails."""
    mock_get.return_value = None
-    mock_generate.return_value = None
+    mock_generate.side_effect = Exception("Generation failed")

-    result = await embeddings.ensure_embedding(
-        version_id="test-id",
-        name="Test",
-        description="Test description",
-        sub_heading="Test heading",
-        categories=["test"],
-    )
-
-    assert result is False
+    with pytest.raises(Exception, match="Generation failed"):
+        await embeddings.ensure_embedding(
+            version_id="test-id",
+            name="Test",
+            description="Test description",
+            sub_heading="Test heading",
+            categories=["test"],
+        )


@pytest.mark.asyncio(loop_scope="session")
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -12,7 +12,7 @@ from dataclasses import dataclass
 from typing import Any, Literal

 from prisma.enums import ContentType
-from rank_bm25 import BM25Okapi
+from rank_bm25 import BM25Okapi  # type: ignore[import-untyped]

 from backend.api.features.store.embeddings import (
    EMBEDDING_DIM,
@@ -186,13 +186,12 @@ async def unified_hybrid_search(

    offset = (page - 1) * page_size

-    # Generate query embedding
-    query_embedding = await embed_query(query)
-
-    # Graceful degradation if embedding unavailable
-    if query_embedding is None or not query_embedding:
+    # Generate query embedding with graceful degradation
+    try:
+        query_embedding = await embed_query(query)
+    except Exception as e:
        logger.warning(
-            "Failed to generate query embedding - falling back to lexical-only search. "
+            f"Failed to generate query embedding - falling back to lexical-only search: {e}. "
            "Check that openai_internal_api_key is configured and OpenAI API is accessible."
        )
        query_embedding = [0.0] * EMBEDDING_DIM
@@ -363,9 +362,7 @@ async def unified_hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(
-        sql_query, *params, set_public_search_path=True
-    )
+    results = await query_raw_with_schema(sql_query, *params)

    total = results[0]["total_count"] if results else 0
    # Apply BM25 reranking
@@ -466,13 +463,12 @@ async def hybrid_search(

    offset = (page - 1) * page_size

-    # Generate query embedding
-    query_embedding = await embed_query(query)
-
-    # Graceful degradation
-    if query_embedding is None or not query_embedding:
+    # Generate query embedding with graceful degradation
+    try:
+        query_embedding = await embed_query(query)
+    except Exception as e:
        logger.warning(
-            "Failed to generate query embedding - falling back to lexical-only search."
+            f"Failed to generate query embedding - falling back to lexical-only search: {e}"
        )
        query_embedding = [0.0] * EMBEDDING_DIM
        total_non_semantic = (
@@ -688,9 +684,7 @@ async def hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(
-        sql_query, *params, set_public_search_path=True
-    )
+    results = await query_raw_with_schema(sql_query, *params)

    total = results[0]["total_count"] if results else 0

--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
@@ -172,8 +172,8 @@ async def test_hybrid_search_without_embeddings():
        with patch(
            "backend.api.features.store.hybrid_search.query_raw_with_schema"
        ) as mock_query:
-            # Simulate embedding failure
-            mock_embed.return_value = None
+            # Simulate embedding failure by raising exception
+            mock_embed.side_effect = Exception("Embedding generation failed")
            mock_query.return_value = mock_results

            # Should NOT raise - graceful degradation
@@ -613,7 +613,9 @@ async def test_unified_hybrid_search_graceful_degradation():
            "backend.api.features.store.hybrid_search.embed_query"
        ) as mock_embed:
            mock_query.return_value = mock_results
-            mock_embed.return_value = None  # Embedding failure
+            mock_embed.side_effect = Exception(
+                "Embedding generation failed"
+            )  # Embedding failure

            # Should NOT raise - graceful degradation
            results, total = await unified_hybrid_search(
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -761,10 +761,8 @@ async def create_new_graph(
    graph.reassign_ids(user_id=user_id, reassign_graph_id=True)
    graph.validate_graph(for_run=False)

-    # The return value of the create graph & library function is intentionally not used here,
-    # as the graph already valid and no sub-graphs are returned back.
    await graph_db.create_graph(graph, user_id=user_id)
-    await library_db.create_library_agent(graph, user_id=user_id)
+    await library_db.create_library_agent(graph, user_id)
    activated_graph = await on_graph_activate(graph, user_id=user_id)

    if create_graph.source == "builder":
@@ -888,21 +886,19 @@ async def set_graph_active_version(
 async def _update_library_agent_version_and_settings(
    user_id: str, agent_graph: graph_db.GraphModel
 ) -> library_model.LibraryAgent:
-    # Keep the library agent up to date with the new active version
    library = await library_db.update_agent_version_in_library(
        user_id, agent_graph.id, agent_graph.version
    )
-    # If the graph has HITL node, initialize the setting if it's not already set.
-    if (
-        agent_graph.has_human_in_the_loop
-        and library.settings.human_in_the_loop_safe_mode is None
-    ):
-        await library_db.update_library_agent_settings(
+    updated_settings = GraphSettings.from_graph(
+        graph=agent_graph,
+        hitl_safe_mode=library.settings.human_in_the_loop_safe_mode,
+        sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode,
+    )
+    if updated_settings != library.settings:
+        library = await library_db.update_library_agent(
+            library_agent_id=library.id,
            user_id=user_id,
-            agent_id=library.id,
-            settings=library.settings.model_copy(
-                update={"human_in_the_loop_safe_mode": True}
-            ),
+            settings=updated_settings,
        )
    return library

@@ -919,21 +915,18 @@ async def update_graph_settings(
    user_id: Annotated[str, Security(get_user_id)],
 ) -> GraphSettings:
    """Update graph settings for the user's library agent."""
-    # Get the library agent for this graph
    library_agent = await library_db.get_library_agent_by_graph_id(
        graph_id=graph_id, user_id=user_id
    )
    if not library_agent:
        raise HTTPException(404, f"Graph #{graph_id} not found in user's library")

-    # Update the library agent settings
-    updated_agent = await library_db.update_library_agent_settings(
+    updated_agent = await library_db.update_library_agent(
+        library_agent_id=library_agent.id,
        user_id=user_id,
-        agent_id=library_agent.id,
        settings=settings,
    )

-    # Return the updated settings
    return GraphSettings.model_validate(updated_agent.settings)


--- a/autogpt_platform/backend/backend/blocks/basic.py
+++ b/autogpt_platform/backend/backend/blocks/basic.py
@@ -116,6 +116,7 @@ class PrintToConsoleBlock(Block):
            input_schema=PrintToConsoleBlock.Input,
            output_schema=PrintToConsoleBlock.Output,
            test_input={"text": "Hello, World!"},
+            is_sensitive_action=True,
            test_output=[
                ("output", "Hello, World!"),
                ("status", "printed"),
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -0,0 +1,659 @@
+import json
+import shlex
+import uuid
+from typing import Literal, Optional
+
+from e2b import AsyncSandbox as BaseAsyncSandbox
+from pydantic import BaseModel, SecretStr
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+
+
+class ClaudeCodeExecutionError(Exception):
+    """Exception raised when Claude Code execution fails.
+
+    Carries the sandbox_id so it can be returned to the user for cleanup
+    when dispose_sandbox=False.
+    """
+
+    def __init__(self, message: str, sandbox_id: str = ""):
+        super().__init__(message)
+        self.sandbox_id = sandbox_id
+
+
+# Test credentials for E2B
+TEST_E2B_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="e2b",
+    api_key=SecretStr("mock-e2b-api-key"),
+    title="Mock E2B API key",
+    expires_at=None,
+)
+TEST_E2B_CREDENTIALS_INPUT = {
+    "provider": TEST_E2B_CREDENTIALS.provider,
+    "id": TEST_E2B_CREDENTIALS.id,
+    "type": TEST_E2B_CREDENTIALS.type,
+    "title": TEST_E2B_CREDENTIALS.title,
+}
+
+# Test credentials for Anthropic
+TEST_ANTHROPIC_CREDENTIALS = APIKeyCredentials(
+    id="2e568a2b-b2ea-475a-8564-9a676bf31c56",
+    provider="anthropic",
+    api_key=SecretStr("mock-anthropic-api-key"),
+    title="Mock Anthropic API key",
+    expires_at=None,
+)
+TEST_ANTHROPIC_CREDENTIALS_INPUT = {
+    "provider": TEST_ANTHROPIC_CREDENTIALS.provider,
+    "id": TEST_ANTHROPIC_CREDENTIALS.id,
+    "type": TEST_ANTHROPIC_CREDENTIALS.type,
+    "title": TEST_ANTHROPIC_CREDENTIALS.title,
+}
+
+
+class ClaudeCodeBlock(Block):
+    """
+    Execute tasks using Claude Code (Anthropic's AI coding assistant) in an E2B sandbox.
+
+    Claude Code can create files, install tools, run commands, and perform complex
+    coding tasks autonomously within a secure sandbox environment.
+    """
+
+    # Use base template - we'll install Claude Code ourselves for latest version
+    DEFAULT_TEMPLATE = "base"
+
+    class Input(BlockSchemaInput):
+        e2b_credentials: CredentialsMetaInput[
+            Literal[ProviderName.E2B], Literal["api_key"]
+        ] = CredentialsField(
+            description=(
+                "API key for the E2B platform to create the sandbox. "
+                "Get one on the [e2b website](https://e2b.dev/docs)"
+            ),
+        )
+
+        anthropic_credentials: CredentialsMetaInput[
+            Literal[ProviderName.ANTHROPIC], Literal["api_key"]
+        ] = CredentialsField(
+            description=(
+                "API key for Anthropic to power Claude Code. "
+                "Get one at [Anthropic's website](https://console.anthropic.com)"
+            ),
+        )
+
+        prompt: str = SchemaField(
+            description=(
+                "The task or instruction for Claude Code to execute. "
+                "Claude Code can create files, install packages, run commands, "
+                "and perform complex coding tasks."
+            ),
+            placeholder="Create a hello world index.html file",
+            default="",
+            advanced=False,
+        )
+
+        timeout: int = SchemaField(
+            description=(
+                "Sandbox timeout in seconds. Claude Code tasks can take "
+                "a while, so set this appropriately for your task complexity. "
+                "Note: This only applies when creating a new sandbox. "
+                "When reconnecting to an existing sandbox via sandbox_id, "
+                "the original timeout is retained."
+            ),
+            default=300,  # 5 minutes default
+            advanced=True,
+        )
+
+        setup_commands: list[str] = SchemaField(
+            description=(
+                "Optional shell commands to run before executing Claude Code. "
+                "Useful for installing dependencies or setting up the environment."
+            ),
+            default_factory=list,
+            advanced=True,
+        )
+
+        working_directory: str = SchemaField(
+            description="Working directory for Claude Code to operate in.",
+            default="/home/user",
+            advanced=True,
+        )
+
+        # Session/continuation support
+        session_id: str = SchemaField(
+            description=(
+                "Session ID to resume a previous conversation. "
+                "Leave empty for a new conversation. "
+                "Use the session_id from a previous run to continue that conversation."
+            ),
+            default="",
+            advanced=True,
+        )
+
+        sandbox_id: str = SchemaField(
+            description=(
+                "Sandbox ID to reconnect to an existing sandbox. "
+                "Required when resuming a session (along with session_id). "
+                "Use the sandbox_id from a previous run where dispose_sandbox was False."
+            ),
+            default="",
+            advanced=True,
+        )
+
+        conversation_history: str = SchemaField(
+            description=(
+                "Previous conversation history to continue from. "
+                "Use this to restore context on a fresh sandbox if the previous one timed out. "
+                "Pass the conversation_history output from a previous run."
+            ),
+            default="",
+            advanced=True,
+        )
+
+        dispose_sandbox: bool = SchemaField(
+            description=(
+                "Whether to dispose of the sandbox immediately after execution. "
+                "Set to False if you want to continue the conversation later "
+                "(you'll need both sandbox_id and session_id from the output)."
+            ),
+            default=True,
+            advanced=True,
+        )
+
+    class FileOutput(BaseModel):
+        """A file extracted from the sandbox."""
+
+        path: str
+        relative_path: str  # Path relative to working directory (for GitHub, etc.)
+        name: str
+        content: str
+
+    class Output(BlockSchemaOutput):
+        response: str = SchemaField(
+            description="The output/response from Claude Code execution"
+        )
+        files: list["ClaudeCodeBlock.FileOutput"] = SchemaField(
+            description=(
+                "List of text files created/modified by Claude Code during this execution. "
+                "Each file has 'path', 'relative_path', 'name', and 'content' fields."
+            )
+        )
+        conversation_history: str = SchemaField(
+            description=(
+                "Full conversation history including this turn. "
+                "Pass this to conversation_history input to continue on a fresh sandbox "
+                "if the previous sandbox timed out."
+            )
+        )
+        session_id: str = SchemaField(
+            description=(
+                "Session ID for this conversation. "
+                "Pass this back along with sandbox_id to continue the conversation."
+            )
+        )
+        sandbox_id: Optional[str] = SchemaField(
+            description=(
+                "ID of the sandbox instance. "
+                "Pass this back along with session_id to continue the conversation. "
+                "This is None if dispose_sandbox was True (sandbox was disposed)."
+            ),
+            default=None,
+        )
+        error: str = SchemaField(description="Error message if execution failed")
+
+    def __init__(self):
+        super().__init__(
+            id="4e34f4a5-9b89-4326-ba77-2dd6750b7194",
+            description=(
+                "Execute tasks using Claude Code in an E2B sandbox. "
+                "Claude Code can create files, install tools, run commands, "
+                "and perform complex coding tasks autonomously."
+            ),
+            categories={BlockCategory.DEVELOPER_TOOLS, BlockCategory.AI},
+            input_schema=ClaudeCodeBlock.Input,
+            output_schema=ClaudeCodeBlock.Output,
+            test_credentials={
+                "e2b_credentials": TEST_E2B_CREDENTIALS,
+                "anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS,
+            },
+            test_input={
+                "e2b_credentials": TEST_E2B_CREDENTIALS_INPUT,
+                "anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS_INPUT,
+                "prompt": "Create a hello world HTML file",
+                "timeout": 300,
+                "setup_commands": [],
+                "working_directory": "/home/user",
+                "session_id": "",
+                "sandbox_id": "",
+                "conversation_history": "",
+                "dispose_sandbox": True,
+            },
+            test_output=[
+                ("response", "Created index.html with hello world content"),
+                (
+                    "files",
+                    [
+                        {
+                            "path": "/home/user/index.html",
+                            "relative_path": "index.html",
+                            "name": "index.html",
+                            "content": "<html>Hello World</html>",
+                        }
+                    ],
+                ),
+                (
+                    "conversation_history",
+                    "User: Create a hello world HTML file\n"
+                    "Claude: Created index.html with hello world content",
+                ),
+                ("session_id", str),
+                ("sandbox_id", None),  # None because dispose_sandbox=True in test_input
+            ],
+            test_mock={
+                "execute_claude_code": lambda *args, **kwargs: (
+                    "Created index.html with hello world content",  # response
+                    [
+                        ClaudeCodeBlock.FileOutput(
+                            path="/home/user/index.html",
+                            relative_path="index.html",
+                            name="index.html",
+                            content="<html>Hello World</html>",
+                        )
+                    ],  # files
+                    "User: Create a hello world HTML file\n"
+                    "Claude: Created index.html with hello world content",  # conversation_history
+                    "test-session-id",  # session_id
+                    "sandbox_id",  # sandbox_id
+                ),
+            },
+        )
+
+    async def execute_claude_code(
+        self,
+        e2b_api_key: str,
+        anthropic_api_key: str,
+        prompt: str,
+        timeout: int,
+        setup_commands: list[str],
+        working_directory: str,
+        session_id: str,
+        existing_sandbox_id: str,
+        conversation_history: str,
+        dispose_sandbox: bool,
+    ) -> tuple[str, list["ClaudeCodeBlock.FileOutput"], str, str, str]:
+        """
+        Execute Claude Code in an E2B sandbox.
+
+        Returns:
+            Tuple of (response, files, conversation_history, session_id, sandbox_id)
+        """
+
+        # Validate that sandbox_id is provided when resuming a session
+        if session_id and not existing_sandbox_id:
+            raise ValueError(
+                "sandbox_id is required when resuming a session with session_id. "
+                "The session state is stored in the original sandbox. "
+                "If the sandbox has timed out, use conversation_history instead "
+                "to restore context on a fresh sandbox."
+            )
+
+        sandbox = None
+        sandbox_id = ""
+
+        try:
+            # Either reconnect to existing sandbox or create a new one
+            if existing_sandbox_id:
+                # Reconnect to existing sandbox for conversation continuation
+                sandbox = await BaseAsyncSandbox.connect(
+                    sandbox_id=existing_sandbox_id,
+                    api_key=e2b_api_key,
+                )
+            else:
+                # Create new sandbox
+                sandbox = await BaseAsyncSandbox.create(
+                    template=self.DEFAULT_TEMPLATE,
+                    api_key=e2b_api_key,
+                    timeout=timeout,
+                    envs={"ANTHROPIC_API_KEY": anthropic_api_key},
+                )
+
+                # Install Claude Code from npm (ensures we get the latest version)
+                install_result = await sandbox.commands.run(
+                    "npm install -g @anthropic-ai/claude-code@latest",
+                    timeout=120,  # 2 min timeout for install
+                )
+                if install_result.exit_code != 0:
+                    raise Exception(
+                        f"Failed to install Claude Code: {install_result.stderr}"
+                    )
+
+                # Run any user-provided setup commands
+                for cmd in setup_commands:
+                    setup_result = await sandbox.commands.run(cmd)
+                    if setup_result.exit_code != 0:
+                        raise Exception(
+                            f"Setup command failed: {cmd}\n"
+                            f"Exit code: {setup_result.exit_code}\n"
+                            f"Stdout: {setup_result.stdout}\n"
+                            f"Stderr: {setup_result.stderr}"
+                        )
+
+            # Capture sandbox_id immediately after creation/connection
+            # so it's available for error recovery if dispose_sandbox=False
+            sandbox_id = sandbox.sandbox_id
+
+            # Generate or use provided session ID
+            current_session_id = session_id if session_id else str(uuid.uuid4())
+
+            # Build base Claude flags
+            base_flags = "-p --dangerously-skip-permissions --output-format json"
+
+            # Add conversation history context if provided (for fresh sandbox continuation)
+            history_flag = ""
+            if conversation_history and not session_id:
+                # Inject previous conversation as context via system prompt
+                # Use consistent escaping via _escape_prompt helper
+                escaped_history = self._escape_prompt(
+                    f"Previous conversation context: {conversation_history}"
+                )
+                history_flag = f" --append-system-prompt {escaped_history}"
+
+            # Build Claude command based on whether we're resuming or starting new
+            # Use shlex.quote for working_directory and session IDs to prevent injection
+            safe_working_dir = shlex.quote(working_directory)
+            if session_id:
+                # Resuming existing session (sandbox still alive)
+                safe_session_id = shlex.quote(session_id)
+                claude_command = (
+                    f"cd {safe_working_dir} && "
+                    f"echo {self._escape_prompt(prompt)} | "
+                    f"claude --resume {safe_session_id} {base_flags}"
+                )
+            else:
+                # New session with specific ID
+                safe_current_session_id = shlex.quote(current_session_id)
+                claude_command = (
+                    f"cd {safe_working_dir} && "
+                    f"echo {self._escape_prompt(prompt)} | "
+                    f"claude --session-id {safe_current_session_id} {base_flags}{history_flag}"
+                )
+
+            # Capture timestamp before running Claude Code to filter files later
+            # Capture timestamp 1 second in the past to avoid race condition with file creation
+            timestamp_result = await sandbox.commands.run(
+                "date -u -d '1 second ago' +%Y-%m-%dT%H:%M:%S"
+            )
+            if timestamp_result.exit_code != 0:
+                raise RuntimeError(
+                    f"Failed to capture timestamp: {timestamp_result.stderr}"
+                )
+            start_timestamp = (
+                timestamp_result.stdout.strip() if timestamp_result.stdout else None
+            )
+
+            result = await sandbox.commands.run(
+                claude_command,
+                timeout=0,  # No command timeout - let sandbox timeout handle it
+            )
+
+            # Check for command failure
+            if result.exit_code != 0:
+                error_msg = result.stderr or result.stdout or "Unknown error"
+                raise Exception(
+                    f"Claude Code command failed with exit code {result.exit_code}:\n"
+                    f"{error_msg}"
+                )
+
+            raw_output = result.stdout or ""
+
+            # Parse JSON output to extract response and build conversation history
+            response = ""
+            new_conversation_history = conversation_history or ""
+
+            try:
+                # The JSON output contains the result
+                output_data = json.loads(raw_output)
+                response = output_data.get("result", raw_output)
+
+                # Build conversation history entry
+                turn_entry = f"User: {prompt}\nClaude: {response}"
+                if new_conversation_history:
+                    new_conversation_history = (
+                        f"{new_conversation_history}\n\n{turn_entry}"
+                    )
+                else:
+                    new_conversation_history = turn_entry
+
+            except json.JSONDecodeError:
+                # If not valid JSON, use raw output
+                response = raw_output
+                turn_entry = f"User: {prompt}\nClaude: {response}"
+                if new_conversation_history:
+                    new_conversation_history = (
+                        f"{new_conversation_history}\n\n{turn_entry}"
+                    )
+                else:
+                    new_conversation_history = turn_entry
+
+            # Extract files created/modified during this run
+            files = await self._extract_files(
+                sandbox, working_directory, start_timestamp
+            )
+
+            return (
+                response,
+                files,
+                new_conversation_history,
+                current_session_id,
+                sandbox_id,
+            )
+
+        except Exception as e:
+            # Wrap exception with sandbox_id so caller can access/cleanup
+            # the preserved sandbox when dispose_sandbox=False
+            raise ClaudeCodeExecutionError(str(e), sandbox_id) from e
+
+        finally:
+            if dispose_sandbox and sandbox:
+                await sandbox.kill()
+
+    async def _extract_files(
+        self,
+        sandbox: BaseAsyncSandbox,
+        working_directory: str,
+        since_timestamp: str | None = None,
+    ) -> list["ClaudeCodeBlock.FileOutput"]:
+        """
+        Extract text files created/modified during this Claude Code execution.
+
+        Args:
+            sandbox: The E2B sandbox instance
+            working_directory: Directory to search for files
+            since_timestamp: ISO timestamp - only return files modified after this time
+
+        Returns:
+            List of FileOutput objects with path, relative_path, name, and content
+        """
+        files: list[ClaudeCodeBlock.FileOutput] = []
+
+        # Text file extensions we can safely read as text
+        text_extensions = {
+            ".txt",
+            ".md",
+            ".html",
+            ".htm",
+            ".css",
+            ".js",
+            ".ts",
+            ".jsx",
+            ".tsx",
+            ".json",
+            ".xml",
+            ".yaml",
+            ".yml",
+            ".toml",
+            ".ini",
+            ".cfg",
+            ".conf",
+            ".py",
+            ".rb",
+            ".php",
+            ".java",
+            ".c",
+            ".cpp",
+            ".h",
+            ".hpp",
+            ".cs",
+            ".go",
+            ".rs",
+            ".swift",
+            ".kt",
+            ".scala",
+            ".sh",
+            ".bash",
+            ".zsh",
+            ".sql",
+            ".graphql",
+            ".env",
+            ".gitignore",
+            ".dockerfile",
+            "Dockerfile",
+            ".vue",
+            ".svelte",
+            ".astro",
+            ".mdx",
+            ".rst",
+            ".tex",
+            ".csv",
+            ".log",
+        }
+
+        try:
+            # List files recursively using find command
+            # Exclude node_modules and .git directories, but allow hidden files
+            # like .env and .gitignore (they're filtered by text_extensions later)
+            # Filter by timestamp to only get files created/modified during this run
+            safe_working_dir = shlex.quote(working_directory)
+            timestamp_filter = ""
+            if since_timestamp:
+                timestamp_filter = f"-newermt {shlex.quote(since_timestamp)} "
+            find_result = await sandbox.commands.run(
+                f"find {safe_working_dir} -type f "
+                f"{timestamp_filter}"
+                f"-not -path '*/node_modules/*' "
+                f"-not -path '*/.git/*' "
+                f"2>/dev/null"
+            )
+
+            if find_result.stdout:
+                for file_path in find_result.stdout.strip().split("\n"):
+                    if not file_path:
+                        continue
+
+                    # Check if it's a text file we can read
+                    is_text = any(
+                        file_path.endswith(ext) for ext in text_extensions
+                    ) or file_path.endswith("Dockerfile")
+
+                    if is_text:
+                        try:
+                            content = await sandbox.files.read(file_path)
+                            # Handle bytes or string
+                            if isinstance(content, bytes):
+                                content = content.decode("utf-8", errors="replace")
+
+                            # Extract filename from path
+                            file_name = file_path.split("/")[-1]
+
+                            # Calculate relative path by stripping working directory
+                            relative_path = file_path
+                            if file_path.startswith(working_directory):
+                                relative_path = file_path[len(working_directory) :]
+                                # Remove leading slash if present
+                                if relative_path.startswith("/"):
+                                    relative_path = relative_path[1:]
+
+                            files.append(
+                                ClaudeCodeBlock.FileOutput(
+                                    path=file_path,
+                                    relative_path=relative_path,
+                                    name=file_name,
+                                    content=content,
+                                )
+                            )
+                        except Exception:
+                            # Skip files that can't be read
+                            pass
+
+        except Exception:
+            # If file extraction fails, return empty results
+            pass
+
+        return files
+
+    def _escape_prompt(self, prompt: str) -> str:
+        """Escape the prompt for safe shell execution."""
+        # Use single quotes and escape any single quotes in the prompt
+        escaped = prompt.replace("'", "'\"'\"'")
+        return f"'{escaped}'"
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        e2b_credentials: APIKeyCredentials,
+        anthropic_credentials: APIKeyCredentials,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            (
+                response,
+                files,
+                conversation_history,
+                session_id,
+                sandbox_id,
+            ) = await self.execute_claude_code(
+                e2b_api_key=e2b_credentials.api_key.get_secret_value(),
+                anthropic_api_key=anthropic_credentials.api_key.get_secret_value(),
+                prompt=input_data.prompt,
+                timeout=input_data.timeout,
+                setup_commands=input_data.setup_commands,
+                working_directory=input_data.working_directory,
+                session_id=input_data.session_id,
+                existing_sandbox_id=input_data.sandbox_id,
+                conversation_history=input_data.conversation_history,
+                dispose_sandbox=input_data.dispose_sandbox,
+            )
+
+            yield "response", response
+            # Always yield files (empty list if none) to match Output schema
+            yield "files", [f.model_dump() for f in files]
+            # Always yield conversation_history so user can restore context on fresh sandbox
+            yield "conversation_history", conversation_history
+            # Always yield session_id so user can continue conversation
+            yield "session_id", session_id
+            # Always yield sandbox_id (None if disposed) to match Output schema
+            yield "sandbox_id", sandbox_id if not input_data.dispose_sandbox else None
+
+        except ClaudeCodeExecutionError as e:
+            yield "error", str(e)
+            # If sandbox was preserved (dispose_sandbox=False), yield sandbox_id
+            # so user can reconnect to or clean up the orphaned sandbox
+            if not input_data.dispose_sandbox and e.sandbox_id:
+                yield "sandbox_id", e.sandbox_id
+        except Exception as e:
+            yield "error", str(e)
--- a/autogpt_platform/backend/backend/blocks/data_manipulation.py
+++ b/autogpt_platform/backend/backend/blocks/data_manipulation.py
@@ -680,3 +680,58 @@ class ListIsEmptyBlock(Block):

    async def run(self, input_data: Input, **kwargs) -> BlockOutput:
        yield "is_empty", len(input_data.list) == 0
+
+
+class ConcatenateListsBlock(Block):
+    class Input(BlockSchemaInput):
+        lists: List[List[Any]] = SchemaField(
+            description="A list of lists to concatenate together. All lists will be combined in order into a single list.",
+            placeholder="e.g., [[1, 2], [3, 4], [5, 6]]",
+        )
+
+    class Output(BlockSchemaOutput):
+        concatenated_list: List[Any] = SchemaField(
+            description="The concatenated list containing all elements from all input lists in order."
+        )
+        error: str = SchemaField(
+            description="Error message if concatenation failed due to invalid input types."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="3cf9298b-5817-4141-9d80-7c2cc5199c8e",
+            description="Concatenates multiple lists into a single list. All elements from all input lists are combined in order.",
+            categories={BlockCategory.BASIC},
+            input_schema=ConcatenateListsBlock.Input,
+            output_schema=ConcatenateListsBlock.Output,
+            test_input=[
+                {"lists": [[1, 2, 3], [4, 5, 6]]},
+                {"lists": [["a", "b"], ["c"], ["d", "e", "f"]]},
+                {"lists": [[1, 2], []]},
+                {"lists": []},
+            ],
+            test_output=[
+                ("concatenated_list", [1, 2, 3, 4, 5, 6]),
+                ("concatenated_list", ["a", "b", "c", "d", "e", "f"]),
+                ("concatenated_list", [1, 2]),
+                ("concatenated_list", []),
+            ],
+        )
+
+    async def run(self, input_data: Input, **kwargs) -> BlockOutput:
+        concatenated = []
+        for idx, lst in enumerate(input_data.lists):
+            if lst is None:
+                # Skip None values to avoid errors
+                continue
+            if not isinstance(lst, list):
+                # Type validation: each item must be a list
+                # Strings are iterable and would cause extend() to iterate character-by-character
+                # Non-iterable types would raise TypeError
+                yield "error", (
+                    f"Invalid input at index {idx}: expected a list, got {type(lst).__name__}. "
+                    f"All items in 'lists' must be lists (e.g., [[1, 2], [3, 4]])."
+                )
+                return
+            concatenated.extend(lst)
+        yield "concatenated_list", concatenated
--- a/autogpt_platform/backend/backend/blocks/helpers/review.py
+++ b/autogpt_platform/backend/backend/blocks/helpers/review.py
@@ -9,7 +9,7 @@ from typing import Any, Optional
 from prisma.enums import ReviewStatus
 from pydantic import BaseModel

-from backend.data.execution import ExecutionContext, ExecutionStatus
+from backend.data.execution import ExecutionStatus
 from backend.data.human_review import ReviewResult
 from backend.executor.manager import async_update_node_execution_status
 from backend.util.clients import get_database_manager_async_client
@@ -28,6 +28,11 @@ class ReviewDecision(BaseModel):
 class HITLReviewHelper:
    """Helper class for Human-In-The-Loop review operations."""

+    @staticmethod
+    async def check_approval(**kwargs) -> Optional[ReviewResult]:
+        """Check if there's an existing approval for this node execution."""
+        return await get_database_manager_async_client().check_approval(**kwargs)
+
    @staticmethod
    async def get_or_create_human_review(**kwargs) -> Optional[ReviewResult]:
        """Create or retrieve a human review from the database."""
@@ -55,11 +60,11 @@ class HITLReviewHelper:
    async def _handle_review_request(
        input_data: Any,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
        graph_version: int,
-        execution_context: ExecutionContext,
        block_name: str = "Block",
        editable: bool = False,
    ) -> Optional[ReviewResult]:
@@ -69,11 +74,11 @@ class HITLReviewHelper:
        Args:
            input_data: The input data to be reviewed
            user_id: ID of the user requesting the review
+            node_id: ID of the node in the graph definition
            node_exec_id: ID of the node execution
            graph_exec_id: ID of the graph execution
            graph_id: ID of the graph
            graph_version: Version of the graph
-            execution_context: Current execution context
            block_name: Name of the block requesting review
            editable: Whether the reviewer can edit the data

@@ -83,15 +88,41 @@ class HITLReviewHelper:
        Raises:
            Exception: If review creation or status update fails
        """
-        # Skip review if safe mode is disabled - return auto-approved result
-        if not execution_context.safe_mode:
+        # Note: Safe mode checks (human_in_the_loop_safe_mode, sensitive_action_safe_mode)
+        # are handled by the caller:
+        # - HITL blocks check human_in_the_loop_safe_mode in their run() method
+        # - Sensitive action blocks check sensitive_action_safe_mode in is_block_exec_need_review()
+        # This function only handles checking for existing approvals.
+
+        # Check if this node has already been approved (normal or auto-approval)
+        if approval_result := await HITLReviewHelper.check_approval(
+            node_exec_id=node_exec_id,
+            graph_exec_id=graph_exec_id,
+            node_id=node_id,
+            user_id=user_id,
+            input_data=input_data,
+        ):
            logger.info(
-                f"Block {block_name} skipping review for node {node_exec_id} - safe mode disabled"
+                f"Block {block_name} skipping review for node {node_exec_id} - "
+                f"found existing approval"
+            )
+            # Return a new ReviewResult with the current node_exec_id but approved status
+            # For auto-approvals, always use current input_data
+            # For normal approvals, use approval_result.data unless it's None
+            is_auto_approval = approval_result.node_exec_id != node_exec_id
+            approved_data = (
+                input_data
+                if is_auto_approval
+                else (
+                    approval_result.data
+                    if approval_result.data is not None
+                    else input_data
+                )
            )
            return ReviewResult(
-                data=input_data,
+                data=approved_data,
                status=ReviewStatus.APPROVED,
-                message="Auto-approved (safe mode disabled)",
+                message=approval_result.message,
                processed=True,
                node_exec_id=node_exec_id,
            )
@@ -103,7 +134,7 @@ class HITLReviewHelper:
            graph_id=graph_id,
            graph_version=graph_version,
            input_data=input_data,
-            message=f"Review required for {block_name} execution",
+            message=block_name,  # Use block_name directly as the message
            editable=editable,
        )

@@ -129,11 +160,11 @@ class HITLReviewHelper:
    async def handle_review_decision(
        input_data: Any,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
        graph_version: int,
-        execution_context: ExecutionContext,
        block_name: str = "Block",
        editable: bool = False,
    ) -> Optional[ReviewDecision]:
@@ -143,11 +174,11 @@ class HITLReviewHelper:
        Args:
            input_data: The input data to be reviewed
            user_id: ID of the user requesting the review
+            node_id: ID of the node in the graph definition
            node_exec_id: ID of the node execution
            graph_exec_id: ID of the graph execution
            graph_id: ID of the graph
            graph_version: Version of the graph
-            execution_context: Current execution context
            block_name: Name of the block requesting review
            editable: Whether the reviewer can edit the data

@@ -158,11 +189,11 @@ class HITLReviewHelper:
        review_result = await HITLReviewHelper._handle_review_request(
            input_data=input_data,
            user_id=user_id,
+            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
-            execution_context=execution_context,
            block_name=block_name,
            editable=editable,
        )
--- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
+++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
@@ -97,6 +97,7 @@ class HumanInTheLoopBlock(Block):
        input_data: Input,
        *,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
@@ -104,7 +105,7 @@ class HumanInTheLoopBlock(Block):
        execution_context: ExecutionContext,
        **_kwargs,
    ) -> BlockOutput:
-        if not execution_context.safe_mode:
+        if not execution_context.human_in_the_loop_safe_mode:
            logger.info(
                f"HITL block skipping review for node {node_exec_id} - safe mode disabled"
            )
@@ -115,12 +116,12 @@ class HumanInTheLoopBlock(Block):
        decision = await self.handle_review_decision(
            input_data=input_data.data,
            user_id=user_id,
+            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
-            execution_context=execution_context,
-            block_name=self.name,
+            block_name=input_data.name,  # Use user-provided name instead of block type
            editable=input_data.editable,
        )

--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -79,6 +79,10 @@ class ModelMetadata(NamedTuple):
    provider: str
    context_window: int
    max_output_tokens: int | None
+    display_name: str
+    provider_name: str
+    creator_name: str
+    price_tier: Literal[1, 2, 3]


 class LlmModelMeta(EnumMeta):
@@ -171,6 +175,26 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
    V0_1_5_LG = "v0-1.5-lg"
    V0_1_0_MD = "v0-1.0-md"

+    @classmethod
+    def __get_pydantic_json_schema__(cls, schema, handler):
+        json_schema = handler(schema)
+        llm_model_metadata = {}
+        for model in cls:
+            model_name = model.value
+            metadata = model.metadata
+            llm_model_metadata[model_name] = {
+                "creator": metadata.creator_name,
+                "creator_name": metadata.creator_name,
+                "title": metadata.display_name,
+                "provider": metadata.provider,
+                "provider_name": metadata.provider_name,
+                "name": model_name,
+                "price_tier": metadata.price_tier,
+            }
+        json_schema["llm_model"] = True
+        json_schema["llm_model_metadata"] = llm_model_metadata
+        return json_schema
+
    @property
    def metadata(self) -> ModelMetadata:
        return MODEL_METADATA[self]
@@ -190,119 +214,291 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):

 MODEL_METADATA = {
    # https://platform.openai.com/docs/models
-    LlmModel.O3: ModelMetadata("openai", 200000, 100000),
-    LlmModel.O3_MINI: ModelMetadata("openai", 200000, 100000),  # o3-mini-2025-01-31
-    LlmModel.O1: ModelMetadata("openai", 200000, 100000),  # o1-2024-12-17
-    LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536),  # o1-mini-2024-09-12
+    LlmModel.O3: ModelMetadata("openai", 200000, 100000, "O3", "OpenAI", "OpenAI", 2),
+    LlmModel.O3_MINI: ModelMetadata(
+        "openai", 200000, 100000, "O3 Mini", "OpenAI", "OpenAI", 1
+    ),  # o3-mini-2025-01-31
+    LlmModel.O1: ModelMetadata(
+        "openai", 200000, 100000, "O1", "OpenAI", "OpenAI", 3
+    ),  # o1-2024-12-17
+    LlmModel.O1_MINI: ModelMetadata(
+        "openai", 128000, 65536, "O1 Mini", "OpenAI", "OpenAI", 2
+    ),  # o1-mini-2024-09-12
    # GPT-5 models
-    LlmModel.GPT5_2: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_1: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_MINI: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_NANO: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_CHAT: ModelMetadata("openai", 400000, 16384),
-    LlmModel.GPT41: ModelMetadata("openai", 1047576, 32768),
-    LlmModel.GPT41_MINI: ModelMetadata("openai", 1047576, 32768),
+    LlmModel.GPT5_2: ModelMetadata(
+        "openai", 400000, 128000, "GPT-5.2", "OpenAI", "OpenAI", 3
+    ),
+    LlmModel.GPT5_1: ModelMetadata(
+        "openai", 400000, 128000, "GPT-5.1", "OpenAI", "OpenAI", 2
+    ),
+    LlmModel.GPT5: ModelMetadata(
+        "openai", 400000, 128000, "GPT-5", "OpenAI", "OpenAI", 1
+    ),
+    LlmModel.GPT5_MINI: ModelMetadata(
+        "openai", 400000, 128000, "GPT-5 Mini", "OpenAI", "OpenAI", 1
+    ),
+    LlmModel.GPT5_NANO: ModelMetadata(
+        "openai", 400000, 128000, "GPT-5 Nano", "OpenAI", "OpenAI", 1
+    ),
+    LlmModel.GPT5_CHAT: ModelMetadata(
+        "openai", 400000, 16384, "GPT-5 Chat Latest", "OpenAI", "OpenAI", 2
+    ),
+    LlmModel.GPT41: ModelMetadata(
+        "openai", 1047576, 32768, "GPT-4.1", "OpenAI", "OpenAI", 1
+    ),
+    LlmModel.GPT41_MINI: ModelMetadata(
+        "openai", 1047576, 32768, "GPT-4.1 Mini", "OpenAI", "OpenAI", 1
+    ),
    LlmModel.GPT4O_MINI: ModelMetadata(
-        "openai", 128000, 16384
+        "openai", 128000, 16384, "GPT-4o Mini", "OpenAI", "OpenAI", 1
    ),  # gpt-4o-mini-2024-07-18
-    LlmModel.GPT4O: ModelMetadata("openai", 128000, 16384),  # gpt-4o-2024-08-06
+    LlmModel.GPT4O: ModelMetadata(
+        "openai", 128000, 16384, "GPT-4o", "OpenAI", "OpenAI", 2
+    ),  # gpt-4o-2024-08-06
    LlmModel.GPT4_TURBO: ModelMetadata(
-        "openai", 128000, 4096
+        "openai", 128000, 4096, "GPT-4 Turbo", "OpenAI", "OpenAI", 3
    ),  # gpt-4-turbo-2024-04-09
-    LlmModel.GPT3_5_TURBO: ModelMetadata("openai", 16385, 4096),  # gpt-3.5-turbo-0125
+    LlmModel.GPT3_5_TURBO: ModelMetadata(
+        "openai", 16385, 4096, "GPT-3.5 Turbo", "OpenAI", "OpenAI", 1
+    ),  # gpt-3.5-turbo-0125
    # https://docs.anthropic.com/en/docs/about-claude/models
    LlmModel.CLAUDE_4_1_OPUS: ModelMetadata(
-        "anthropic", 200000, 32000
+        "anthropic", 200000, 32000, "Claude Opus 4.1", "Anthropic", "Anthropic", 3
    ),  # claude-opus-4-1-20250805
    LlmModel.CLAUDE_4_OPUS: ModelMetadata(
-        "anthropic", 200000, 32000
+        "anthropic", 200000, 32000, "Claude Opus 4", "Anthropic", "Anthropic", 3
    ),  # claude-4-opus-20250514
    LlmModel.CLAUDE_4_SONNET: ModelMetadata(
-        "anthropic", 200000, 64000
+        "anthropic", 200000, 64000, "Claude Sonnet 4", "Anthropic", "Anthropic", 2
    ),  # claude-4-sonnet-20250514
    LlmModel.CLAUDE_4_5_OPUS: ModelMetadata(
-        "anthropic", 200000, 64000
+        "anthropic", 200000, 64000, "Claude Opus 4.5", "Anthropic", "Anthropic", 3
    ),  # claude-opus-4-5-20251101
    LlmModel.CLAUDE_4_5_SONNET: ModelMetadata(
-        "anthropic", 200000, 64000
+        "anthropic", 200000, 64000, "Claude Sonnet 4.5", "Anthropic", "Anthropic", 3
    ),  # claude-sonnet-4-5-20250929
    LlmModel.CLAUDE_4_5_HAIKU: ModelMetadata(
-        "anthropic", 200000, 64000
+        "anthropic", 200000, 64000, "Claude Haiku 4.5", "Anthropic", "Anthropic", 2
    ),  # claude-haiku-4-5-20251001
    LlmModel.CLAUDE_3_7_SONNET: ModelMetadata(
-        "anthropic", 200000, 64000
+        "anthropic", 200000, 64000, "Claude 3.7 Sonnet", "Anthropic", "Anthropic", 2
    ),  # claude-3-7-sonnet-20250219
    LlmModel.CLAUDE_3_HAIKU: ModelMetadata(
-        "anthropic", 200000, 4096
+        "anthropic", 200000, 4096, "Claude 3 Haiku", "Anthropic", "Anthropic", 1
    ),  # claude-3-haiku-20240307
    # https://docs.aimlapi.com/api-overview/model-database/text-models
-    LlmModel.AIML_API_QWEN2_5_72B: ModelMetadata("aiml_api", 32000, 8000),
-    LlmModel.AIML_API_LLAMA3_1_70B: ModelMetadata("aiml_api", 128000, 40000),
-    LlmModel.AIML_API_LLAMA3_3_70B: ModelMetadata("aiml_api", 128000, None),
-    LlmModel.AIML_API_META_LLAMA_3_1_70B: ModelMetadata("aiml_api", 131000, 2000),
-    LlmModel.AIML_API_LLAMA_3_2_3B: ModelMetadata("aiml_api", 128000, None),
-    # https://console.groq.com/docs/models
-    LlmModel.LLAMA3_3_70B: ModelMetadata("groq", 128000, 32768),
-    LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 128000, 8192),
-    # https://ollama.com/library
-    LlmModel.OLLAMA_LLAMA3_3: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_LLAMA3_2: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768, None),
-    # https://openrouter.ai/models
-    LlmModel.GEMINI_2_5_PRO: ModelMetadata("open_router", 1050000, 8192),
-    LlmModel.GEMINI_3_PRO_PREVIEW: ModelMetadata("open_router", 1048576, 65535),
-    LlmModel.GEMINI_2_5_FLASH: ModelMetadata("open_router", 1048576, 65535),
-    LlmModel.GEMINI_2_0_FLASH: ModelMetadata("open_router", 1048576, 8192),
-    LlmModel.GEMINI_2_5_FLASH_LITE_PREVIEW: ModelMetadata(
-        "open_router", 1048576, 65535
+    LlmModel.AIML_API_QWEN2_5_72B: ModelMetadata(
+        "aiml_api", 32000, 8000, "Qwen 2.5 72B Instruct Turbo", "AI/ML", "Qwen", 1
+    ),
+    LlmModel.AIML_API_LLAMA3_1_70B: ModelMetadata(
+        "aiml_api",
+        128000,
+        40000,
+        "Llama 3.1 Nemotron 70B Instruct",
+        "AI/ML",
+        "Nvidia",
+        1,
+    ),
+    LlmModel.AIML_API_LLAMA3_3_70B: ModelMetadata(
+        "aiml_api", 128000, None, "Llama 3.3 70B Instruct Turbo", "AI/ML", "Meta", 1
+    ),
+    LlmModel.AIML_API_META_LLAMA_3_1_70B: ModelMetadata(
+        "aiml_api", 131000, 2000, "Llama 3.1 70B Instruct Turbo", "AI/ML", "Meta", 1
+    ),
+    LlmModel.AIML_API_LLAMA_3_2_3B: ModelMetadata(
+        "aiml_api", 128000, None, "Llama 3.2 3B Instruct Turbo", "AI/ML", "Meta", 1
+    ),
+    # https://console.groq.com/docs/models
+    LlmModel.LLAMA3_3_70B: ModelMetadata(
+        "groq", 128000, 32768, "Llama 3.3 70B Versatile", "Groq", "Meta", 1
+    ),
+    LlmModel.LLAMA3_1_8B: ModelMetadata(
+        "groq", 128000, 8192, "Llama 3.1 8B Instant", "Groq", "Meta", 1
+    ),
+    # https://ollama.com/library
+    LlmModel.OLLAMA_LLAMA3_3: ModelMetadata(
+        "ollama", 8192, None, "Llama 3.3", "Ollama", "Meta", 1
+    ),
+    LlmModel.OLLAMA_LLAMA3_2: ModelMetadata(
+        "ollama", 8192, None, "Llama 3.2", "Ollama", "Meta", 1
+    ),
+    LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata(
+        "ollama", 8192, None, "Llama 3", "Ollama", "Meta", 1
+    ),
+    LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata(
+        "ollama", 8192, None, "Llama 3.1 405B", "Ollama", "Meta", 1
+    ),
+    LlmModel.OLLAMA_DOLPHIN: ModelMetadata(
+        "ollama", 32768, None, "Dolphin Mistral Latest", "Ollama", "Mistral AI", 1
+    ),
+    # https://openrouter.ai/models
+    LlmModel.GEMINI_2_5_PRO: ModelMetadata(
+        "open_router",
+        1050000,
+        8192,
+        "Gemini 2.5 Pro Preview 03.25",
+        "OpenRouter",
+        "Google",
+        2,
+    ),
+    LlmModel.GEMINI_3_PRO_PREVIEW: ModelMetadata(
+        "open_router", 1048576, 65535, "Gemini 3 Pro Preview", "OpenRouter", "Google", 2
+    ),
+    LlmModel.GEMINI_2_5_FLASH: ModelMetadata(
+        "open_router", 1048576, 65535, "Gemini 2.5 Flash", "OpenRouter", "Google", 1
+    ),
+    LlmModel.GEMINI_2_0_FLASH: ModelMetadata(
+        "open_router", 1048576, 8192, "Gemini 2.0 Flash 001", "OpenRouter", "Google", 1
+    ),
+    LlmModel.GEMINI_2_5_FLASH_LITE_PREVIEW: ModelMetadata(
+        "open_router",
+        1048576,
+        65535,
+        "Gemini 2.5 Flash Lite Preview 06.17",
+        "OpenRouter",
+        "Google",
+        1,
+    ),
+    LlmModel.GEMINI_2_0_FLASH_LITE: ModelMetadata(
+        "open_router",
+        1048576,
+        8192,
+        "Gemini 2.0 Flash Lite 001",
+        "OpenRouter",
+        "Google",
+        1,
+    ),
+    LlmModel.MISTRAL_NEMO: ModelMetadata(
+        "open_router", 128000, 4096, "Mistral Nemo", "OpenRouter", "Mistral AI", 1
+    ),
+    LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata(
+        "open_router", 128000, 4096, "Command R 08.2024", "OpenRouter", "Cohere", 1
+    ),
+    LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata(
+        "open_router", 128000, 4096, "Command R Plus 08.2024", "OpenRouter", "Cohere", 2
+    ),
+    LlmModel.DEEPSEEK_CHAT: ModelMetadata(
+        "open_router", 64000, 2048, "DeepSeek Chat", "OpenRouter", "DeepSeek", 1
+    ),
+    LlmModel.DEEPSEEK_R1_0528: ModelMetadata(
+        "open_router", 163840, 163840, "DeepSeek R1 0528", "OpenRouter", "DeepSeek", 1
+    ),
+    LlmModel.PERPLEXITY_SONAR: ModelMetadata(
+        "open_router", 127000, 8000, "Sonar", "OpenRouter", "Perplexity", 1
+    ),
+    LlmModel.PERPLEXITY_SONAR_PRO: ModelMetadata(
+        "open_router", 200000, 8000, "Sonar Pro", "OpenRouter", "Perplexity", 2
    ),
-    LlmModel.GEMINI_2_0_FLASH_LITE: ModelMetadata("open_router", 1048576, 8192),
-    LlmModel.MISTRAL_NEMO: ModelMetadata("open_router", 128000, 4096),
-    LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata("open_router", 128000, 4096),
-    LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata("open_router", 128000, 4096),
-    LlmModel.DEEPSEEK_CHAT: ModelMetadata("open_router", 64000, 2048),
-    LlmModel.DEEPSEEK_R1_0528: ModelMetadata("open_router", 163840, 163840),
-    LlmModel.PERPLEXITY_SONAR: ModelMetadata("open_router", 127000, 8000),
-    LlmModel.PERPLEXITY_SONAR_PRO: ModelMetadata("open_router", 200000, 8000),
    LlmModel.PERPLEXITY_SONAR_DEEP_RESEARCH: ModelMetadata(
        "open_router",
        128000,
        16000,
+        "Sonar Deep Research",
+        "OpenRouter",
+        "Perplexity",
+        3,
    ),
    LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata(
-        "open_router", 131000, 4096
+        "open_router",
+        131000,
+        4096,
+        "Hermes 3 Llama 3.1 405B",
+        "OpenRouter",
+        "Nous Research",
+        1,
    ),
    LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata(
-        "open_router", 12288, 12288
+        "open_router",
+        12288,
+        12288,
+        "Hermes 3 Llama 3.1 70B",
+        "OpenRouter",
+        "Nous Research",
+        1,
+    ),
+    LlmModel.OPENAI_GPT_OSS_120B: ModelMetadata(
+        "open_router", 131072, 131072, "GPT-OSS 120B", "OpenRouter", "OpenAI", 1
+    ),
+    LlmModel.OPENAI_GPT_OSS_20B: ModelMetadata(
+        "open_router", 131072, 32768, "GPT-OSS 20B", "OpenRouter", "OpenAI", 1
+    ),
+    LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata(
+        "open_router", 300000, 5120, "Nova Lite V1", "OpenRouter", "Amazon", 1
+    ),
+    LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata(
+        "open_router", 128000, 5120, "Nova Micro V1", "OpenRouter", "Amazon", 1
+    ),
+    LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata(
+        "open_router", 300000, 5120, "Nova Pro V1", "OpenRouter", "Amazon", 1
+    ),
+    LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata(
+        "open_router", 65536, 4096, "WizardLM 2 8x22B", "OpenRouter", "Microsoft", 1
+    ),
+    LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata(
+        "open_router", 4096, 4096, "MythoMax L2 13B", "OpenRouter", "Gryphe", 1
+    ),
+    LlmModel.META_LLAMA_4_SCOUT: ModelMetadata(
+        "open_router", 131072, 131072, "Llama 4 Scout", "OpenRouter", "Meta", 1
+    ),
+    LlmModel.META_LLAMA_4_MAVERICK: ModelMetadata(
+        "open_router", 1048576, 1000000, "Llama 4 Maverick", "OpenRouter", "Meta", 1
+    ),
+    LlmModel.GROK_4: ModelMetadata(
+        "open_router", 256000, 256000, "Grok 4", "OpenRouter", "xAI", 3
+    ),
+    LlmModel.GROK_4_FAST: ModelMetadata(
+        "open_router", 2000000, 30000, "Grok 4 Fast", "OpenRouter", "xAI", 1
+    ),
+    LlmModel.GROK_4_1_FAST: ModelMetadata(
+        "open_router", 2000000, 30000, "Grok 4.1 Fast", "OpenRouter", "xAI", 1
+    ),
+    LlmModel.GROK_CODE_FAST_1: ModelMetadata(
+        "open_router", 256000, 10000, "Grok Code Fast 1", "OpenRouter", "xAI", 1
+    ),
+    LlmModel.KIMI_K2: ModelMetadata(
+        "open_router", 131000, 131000, "Kimi K2", "OpenRouter", "Moonshot AI", 1
+    ),
+    LlmModel.QWEN3_235B_A22B_THINKING: ModelMetadata(
+        "open_router",
+        262144,
+        262144,
+        "Qwen 3 235B A22B Thinking 2507",
+        "OpenRouter",
+        "Qwen",
+        1,
+    ),
+    LlmModel.QWEN3_CODER: ModelMetadata(
+        "open_router", 262144, 262144, "Qwen 3 Coder", "OpenRouter", "Qwen", 3
    ),
-    LlmModel.OPENAI_GPT_OSS_120B: ModelMetadata("open_router", 131072, 131072),
-    LlmModel.OPENAI_GPT_OSS_20B: ModelMetadata("open_router", 131072, 32768),
-    LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata("open_router", 300000, 5120),
-    LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata("open_router", 128000, 5120),
-    LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata("open_router", 300000, 5120),
-    LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata("open_router", 65536, 4096),
-    LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4096, 4096),
-    LlmModel.META_LLAMA_4_SCOUT: ModelMetadata("open_router", 131072, 131072),
-    LlmModel.META_LLAMA_4_MAVERICK: ModelMetadata("open_router", 1048576, 1000000),
-    LlmModel.GROK_4: ModelMetadata("open_router", 256000, 256000),
-    LlmModel.GROK_4_FAST: ModelMetadata("open_router", 2000000, 30000),
-    LlmModel.GROK_4_1_FAST: ModelMetadata("open_router", 2000000, 30000),
-    LlmModel.GROK_CODE_FAST_1: ModelMetadata("open_router", 256000, 10000),
-    LlmModel.KIMI_K2: ModelMetadata("open_router", 131000, 131000),
-    LlmModel.QWEN3_235B_A22B_THINKING: ModelMetadata("open_router", 262144, 262144),
-    LlmModel.QWEN3_CODER: ModelMetadata("open_router", 262144, 262144),
    # Llama API models
-    LlmModel.LLAMA_API_LLAMA_4_SCOUT: ModelMetadata("llama_api", 128000, 4028),
-    LlmModel.LLAMA_API_LLAMA4_MAVERICK: ModelMetadata("llama_api", 128000, 4028),
-    LlmModel.LLAMA_API_LLAMA3_3_8B: ModelMetadata("llama_api", 128000, 4028),
-    LlmModel.LLAMA_API_LLAMA3_3_70B: ModelMetadata("llama_api", 128000, 4028),
+    LlmModel.LLAMA_API_LLAMA_4_SCOUT: ModelMetadata(
+        "llama_api",
+        128000,
+        4028,
+        "Llama 4 Scout 17B 16E Instruct FP8",
+        "Llama API",
+        "Meta",
+        1,
+    ),
+    LlmModel.LLAMA_API_LLAMA4_MAVERICK: ModelMetadata(
+        "llama_api",
+        128000,
+        4028,
+        "Llama 4 Maverick 17B 128E Instruct FP8",
+        "Llama API",
+        "Meta",
+        1,
+    ),
+    LlmModel.LLAMA_API_LLAMA3_3_8B: ModelMetadata(
+        "llama_api", 128000, 4028, "Llama 3.3 8B Instruct", "Llama API", "Meta", 1
+    ),
+    LlmModel.LLAMA_API_LLAMA3_3_70B: ModelMetadata(
+        "llama_api", 128000, 4028, "Llama 3.3 70B Instruct", "Llama API", "Meta", 1
+    ),
    # v0 by Vercel models
-    LlmModel.V0_1_5_MD: ModelMetadata("v0", 128000, 64000),
-    LlmModel.V0_1_5_LG: ModelMetadata("v0", 512000, 64000),
-    LlmModel.V0_1_0_MD: ModelMetadata("v0", 128000, 64000),
+    LlmModel.V0_1_5_MD: ModelMetadata("v0", 128000, 64000, "v0 1.5 MD", "V0", "V0", 1),
+    LlmModel.V0_1_5_LG: ModelMetadata("v0", 512000, 64000, "v0 1.5 LG", "V0", "V0", 1),
+    LlmModel.V0_1_0_MD: ModelMetadata("v0", 128000, 64000, "v0 1.0 MD", "V0", "V0", 1),
 }

 DEFAULT_LLM_MODEL = LlmModel.GPT5_2
--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker.py
@@ -242,7 +242,7 @@ async def test_smart_decision_maker_tracks_llm_stats():
        outputs = {}
        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -343,7 +343,7 @@ async def test_smart_decision_maker_parameter_validation():

        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -409,7 +409,7 @@ async def test_smart_decision_maker_parameter_validation():

        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -471,7 +471,7 @@ async def test_smart_decision_maker_parameter_validation():
        outputs = {}
        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -535,7 +535,7 @@ async def test_smart_decision_maker_parameter_validation():
        outputs = {}
        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -658,7 +658,7 @@ async def test_smart_decision_maker_raw_response_conversion():
        outputs = {}
        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -730,7 +730,7 @@ async def test_smart_decision_maker_raw_response_conversion():
        outputs = {}
        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -786,7 +786,7 @@ async def test_smart_decision_maker_raw_response_conversion():
        outputs = {}
        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

@@ -905,7 +905,7 @@ async def test_smart_decision_maker_agent_mode():
        # Create a mock execution context

        mock_execution_context = ExecutionContext(
-            safe_mode=False,
+            human_in_the_loop_safe_mode=False,
        )

        # Create a mock execution processor for agent mode tests
@@ -1027,7 +1027,7 @@ async def test_smart_decision_maker_traditional_mode_default():

        # Create execution context

-        mock_execution_context = ExecutionContext(safe_mode=False)
+        mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)

        # Create a mock execution processor for tests

--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dynamic_fields.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dynamic_fields.py
@@ -386,7 +386,7 @@ async def test_output_yielding_with_dynamic_fields():
            outputs = {}
            from backend.data.execution import ExecutionContext

-            mock_execution_context = ExecutionContext(safe_mode=False)
+            mock_execution_context = ExecutionContext(human_in_the_loop_safe_mode=False)
            mock_execution_processor = MagicMock()

            async for output_name, output_value in block.run(
@@ -609,7 +609,9 @@ async def test_validation_errors_dont_pollute_conversation():
                outputs = {}
                from backend.data.execution import ExecutionContext

-                mock_execution_context = ExecutionContext(safe_mode=False)
+                mock_execution_context = ExecutionContext(
+                    human_in_the_loop_safe_mode=False
+                )

                # Create a proper mock execution processor for agent mode
                from collections import defaultdict
--- a/autogpt_platform/backend/backend/conftest.py
+++ b/autogpt_platform/backend/backend/conftest.py
@@ -1,7 +1,7 @@
 import logging
 import os

-import pytest
+import pytest_asyncio
 from dotenv import load_dotenv

 from backend.util.logging import configure_logging
@@ -19,7 +19,7 @@ if not os.getenv("PRISMA_DEBUG"):
    prisma_logger.setLevel(logging.INFO)


-@pytest.fixture(scope="session")
+@pytest_asyncio.fixture(scope="session", loop_scope="session")
 async def server():
    from backend.util.test import SpinTestServer

@@ -27,7 +27,7 @@ async def server():
        yield server


-@pytest.fixture(scope="session", autouse=True)
+@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
 async def graph_cleanup(server):
    created_graph_ids = []
    original_create_graph = server.agent_server.test_create_graph
--- a/autogpt_platform/backend/backend/data/block.py
+++ b/autogpt_platform/backend/backend/data/block.py
@@ -441,6 +441,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        static_output: bool = False,
        block_type: BlockType = BlockType.STANDARD,
        webhook_config: Optional[BlockWebhookConfig | BlockManualWebhookConfig] = None,
+        is_sensitive_action: bool = False,
    ):
        """
        Initialize the block with the given schema.
@@ -473,8 +474,8 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        self.static_output = static_output
        self.block_type = block_type
        self.webhook_config = webhook_config
+        self.is_sensitive_action = is_sensitive_action
        self.execution_stats: NodeExecutionStats = NodeExecutionStats()
-        self.requires_human_review: bool = False

        if self.webhook_config:
            if isinstance(self.webhook_config, BlockWebhookConfig):
@@ -622,6 +623,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        input_data: BlockInput,
        *,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
@@ -637,8 +639,9 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
            - should_pause: True if execution should be paused for review
            - input_data_to_use: The input data to use (may be modified by reviewer)
        """
-        # Skip review if not required or safe mode is disabled
-        if not self.requires_human_review or not execution_context.safe_mode:
+        if not (
+            self.is_sensitive_action and execution_context.sensitive_action_safe_mode
+        ):
            return False, input_data

        from backend.blocks.helpers.review import HITLReviewHelper
@@ -647,11 +650,11 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        decision = await HITLReviewHelper.handle_review_decision(
            input_data=input_data,
            user_id=user_id,
+            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
-            execution_context=execution_context,
            block_name=self.name,
            editable=True,
        )
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -99,10 +99,15 @@ MODEL_COST: dict[LlmModel, int] = {
    LlmModel.OPENAI_GPT_OSS_20B: 1,
    LlmModel.GEMINI_2_5_PRO: 4,
    LlmModel.GEMINI_3_PRO_PREVIEW: 5,
+    LlmModel.GEMINI_2_5_FLASH: 1,
+    LlmModel.GEMINI_2_0_FLASH: 1,
+    LlmModel.GEMINI_2_5_FLASH_LITE_PREVIEW: 1,
+    LlmModel.GEMINI_2_0_FLASH_LITE: 1,
    LlmModel.MISTRAL_NEMO: 1,
    LlmModel.COHERE_COMMAND_R_08_2024: 1,
    LlmModel.COHERE_COMMAND_R_PLUS_08_2024: 3,
    LlmModel.DEEPSEEK_CHAT: 2,
+    LlmModel.DEEPSEEK_R1_0528: 1,
    LlmModel.PERPLEXITY_SONAR: 1,
    LlmModel.PERPLEXITY_SONAR_PRO: 5,
    LlmModel.PERPLEXITY_SONAR_DEEP_RESEARCH: 10,
@@ -126,11 +131,6 @@ MODEL_COST: dict[LlmModel, int] = {
    LlmModel.KIMI_K2: 1,
    LlmModel.QWEN3_235B_A22B_THINKING: 1,
    LlmModel.QWEN3_CODER: 9,
-    LlmModel.GEMINI_2_5_FLASH: 1,
-    LlmModel.GEMINI_2_0_FLASH: 1,
-    LlmModel.GEMINI_2_5_FLASH_LITE_PREVIEW: 1,
-    LlmModel.GEMINI_2_0_FLASH_LITE: 1,
-    LlmModel.DEEPSEEK_R1_0528: 1,
    # v0 by Vercel models
    LlmModel.V0_1_5_MD: 1,
    LlmModel.V0_1_5_LG: 2,
--- a/autogpt_platform/backend/backend/data/db.py
+++ b/autogpt_platform/backend/backend/data/db.py
@@ -38,20 +38,6 @@ POOL_TIMEOUT = os.getenv("DB_POOL_TIMEOUT")
 if POOL_TIMEOUT:
    DATABASE_URL = add_param(DATABASE_URL, "pool_timeout", POOL_TIMEOUT)

-# Add public schema to search_path for pgvector type access
-# The vector extension is in public schema, but search_path is determined by schema parameter
-# Extract the schema from DATABASE_URL or default to 'public' (matching get_database_schema())
-parsed_url = urlparse(DATABASE_URL)
-url_params = dict(parse_qsl(parsed_url.query))
-db_schema = url_params.get("schema", "public")
-# Build search_path, avoiding duplicates if db_schema is already 'public'
-search_path_schemas = list(
-    dict.fromkeys([db_schema, "public"])
-)  # Preserves order, removes duplicates
-search_path = ",".join(search_path_schemas)
-# This allows using ::vector without schema qualification
-DATABASE_URL = add_param(DATABASE_URL, "options", f"-c search_path={search_path}")
-
 HTTP_TIMEOUT = int(POOL_TIMEOUT) if POOL_TIMEOUT else None

 prisma = Prisma(
@@ -127,38 +113,48 @@ async def _raw_with_schema(
    *args,
    execute: bool = False,
    client: Prisma | None = None,
-    set_public_search_path: bool = False,
 ) -> list[dict] | int:
    """Internal: Execute raw SQL with proper schema handling.

    Use query_raw_with_schema() or execute_raw_with_schema() instead.

+    Supports placeholders:
+        - {schema_prefix}: Table/type prefix (e.g., "platform".)
+        - {schema}: Raw schema name for application tables (e.g., platform)
+
+    Note on pgvector types:
+        Use unqualified ::vector and <=> operator in queries. PostgreSQL resolves
+        these via search_path, which includes the schema where pgvector is installed
+        on all environments (local, CI, dev).
+
    Args:
-        query_template: SQL query with {schema_prefix} placeholder
+        query_template: SQL query with {schema_prefix} and/or {schema} placeholders
        *args: Query parameters
        execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
        client: Optional Prisma client for transactions (only used when execute=True).
-        set_public_search_path: If True, sets search_path to include public schema.
-                                Needed for pgvector types and other public schema objects.

    Returns:
        - list[dict] if execute=False (query results)
        - int if execute=True (number of affected rows)
+
+    Example with vector type:
+        await execute_raw_with_schema(
+            'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::vector)',
+            embedding_data
+        )
    """
    schema = get_database_schema()
    schema_prefix = f'"{schema}".' if schema != "public" else ""
-    formatted_query = query_template.format(schema_prefix=schema_prefix)
+
+    formatted_query = query_template.format(
+        schema_prefix=schema_prefix,
+        schema=schema,
+    )

    import prisma as prisma_module

    db_client = client if client else prisma_module.get_client()

-    # Set search_path to include public schema if requested
-    # Prisma doesn't support the 'options' connection parameter, so we set it per-session
-    # This is idempotent and safe to call multiple times
-    if set_public_search_path:
-        await db_client.execute_raw(f"SET search_path = {schema}, public")  # type: ignore
-
    if execute:
        result = await db_client.execute_raw(formatted_query, *args)  # type: ignore
    else:
@@ -167,16 +163,12 @@ async def _raw_with_schema(
    return result


-async def query_raw_with_schema(
-    query_template: str, *args, set_public_search_path: bool = False
-) -> list[dict]:
+async def query_raw_with_schema(query_template: str, *args) -> list[dict]:
    """Execute raw SQL SELECT query with proper schema handling.

    Args:
-        query_template: SQL query with {schema_prefix} placeholder
+        query_template: SQL query with {schema_prefix} and/or {schema} placeholders
        *args: Query parameters
-        set_public_search_path: If True, sets search_path to include public schema.
-                                Needed for pgvector types and other public schema objects.

    Returns:
        List of result rows as dictionaries
@@ -187,23 +179,20 @@ async def query_raw_with_schema(
            user_id
        )
    """
-    return await _raw_with_schema(query_template, *args, execute=False, set_public_search_path=set_public_search_path)  # type: ignore
+    return await _raw_with_schema(query_template, *args, execute=False)  # type: ignore


 async def execute_raw_with_schema(
    query_template: str,
    *args,
    client: Prisma | None = None,
-    set_public_search_path: bool = False,
 ) -> int:
    """Execute raw SQL command (INSERT/UPDATE/DELETE) with proper schema handling.

    Args:
-        query_template: SQL query with {schema_prefix} placeholder
+        query_template: SQL query with {schema_prefix} and/or {schema} placeholders
        *args: Query parameters
        client: Optional Prisma client for transactions
-        set_public_search_path: If True, sets search_path to include public schema.
-                                Needed for pgvector types and other public schema objects.

    Returns:
        Number of affected rows
@@ -215,7 +204,7 @@ async def execute_raw_with_schema(
            client=tx  # Optional transaction client
        )
    """
-    return await _raw_with_schema(query_template, *args, execute=True, client=client, set_public_search_path=set_public_search_path)  # type: ignore
+    return await _raw_with_schema(query_template, *args, execute=True, client=client)  # type: ignore


 class BaseDbModel(BaseModel):
--- a/autogpt_platform/backend/backend/data/event_bus.py
+++ b/autogpt_platform/backend/backend/data/event_bus.py
@@ -103,8 +103,18 @@ class RedisEventBus(BaseRedisEventBus[M], ABC):
        return redis.get_redis()

    def publish_event(self, event: M, channel_key: str):
-        message, full_channel_name = self._serialize_message(event, channel_key)
-        self.connection.publish(full_channel_name, message)
+        """
+        Publish an event to Redis. Gracefully handles connection failures
+        by logging the error instead of raising exceptions.
+        """
+        try:
+            message, full_channel_name = self._serialize_message(event, channel_key)
+            self.connection.publish(full_channel_name, message)
+        except Exception:
+            logger.exception(
+                f"Failed to publish event to Redis channel {channel_key}. "
+                "Event bus operation will continue without Redis connectivity."
+            )

    def listen_events(self, channel_key: str) -> Generator[M, None, None]:
        pubsub, full_channel_name = self._get_pubsub_channel(
@@ -128,9 +138,19 @@ class AsyncRedisEventBus(BaseRedisEventBus[M], ABC):
        return await redis.get_redis_async()

    async def publish_event(self, event: M, channel_key: str):
-        message, full_channel_name = self._serialize_message(event, channel_key)
-        connection = await self.connection
-        await connection.publish(full_channel_name, message)
+        """
+        Publish an event to Redis. Gracefully handles connection failures
+        by logging the error instead of raising exceptions.
+        """
+        try:
+            message, full_channel_name = self._serialize_message(event, channel_key)
+            connection = await self.connection
+            await connection.publish(full_channel_name, message)
+        except Exception:
+            logger.exception(
+                f"Failed to publish event to Redis channel {channel_key}. "
+                "Event bus operation will continue without Redis connectivity."
+            )

    async def listen_events(self, channel_key: str) -> AsyncGenerator[M, None]:
        pubsub, full_channel_name = self._get_pubsub_channel(
--- a/autogpt_platform/backend/backend/data/event_bus_test.py
+++ b/autogpt_platform/backend/backend/data/event_bus_test.py
@@ -0,0 +1,56 @@
+"""
+Tests for event_bus graceful degradation when Redis is unavailable.
+"""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from pydantic import BaseModel
+
+from backend.data.event_bus import AsyncRedisEventBus
+
+
+class TestEvent(BaseModel):
+    """Test event model."""
+
+    message: str
+
+
+class TestNotificationBus(AsyncRedisEventBus[TestEvent]):
+    """Test implementation of AsyncRedisEventBus."""
+
+    Model = TestEvent
+
+    @property
+    def event_bus_name(self) -> str:
+        return "test_event_bus"
+
+
+@pytest.mark.asyncio
+async def test_publish_event_handles_connection_failure_gracefully():
+    """Test that publish_event logs exception instead of raising when Redis is unavailable."""
+    bus = TestNotificationBus()
+    event = TestEvent(message="test message")
+
+    # Mock get_redis_async to raise connection error
+    with patch(
+        "backend.data.event_bus.redis.get_redis_async",
+        side_effect=ConnectionError("Authentication required."),
+    ):
+        # Should not raise exception
+        await bus.publish_event(event, "test_channel")
+
+
+@pytest.mark.asyncio
+async def test_publish_event_works_with_redis_available():
+    """Test that publish_event works normally when Redis is available."""
+    bus = TestNotificationBus()
+    event = TestEvent(message="test message")
+
+    # Mock successful Redis connection
+    mock_redis = AsyncMock()
+    mock_redis.publish = AsyncMock()
+
+    with patch("backend.data.event_bus.redis.get_redis_async", return_value=mock_redis):
+        await bus.publish_event(event, "test_channel")
+        mock_redis.publish.assert_called_once()
--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -81,7 +81,10 @@ class ExecutionContext(BaseModel):
    This includes information needed by blocks, sub-graphs, and execution management.
    """

-    safe_mode: bool = True
+    model_config = {"extra": "ignore"}
+
+    human_in_the_loop_safe_mode: bool = True
+    sensitive_action_safe_mode: bool = False
    user_timezone: str = "UTC"
    root_execution_id: Optional[str] = None
    parent_execution_id: Optional[str] = None
--- a/autogpt_platform/backend/backend/data/graph.py
+++ b/autogpt_platform/backend/backend/data/graph.py
@@ -3,7 +3,7 @@ import logging
 import uuid
 from collections import defaultdict
 from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Any, Literal, Optional, cast
+from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, cast

 from prisma.enums import SubmissionStatus
 from prisma.models import (
@@ -20,7 +20,7 @@ from prisma.types import (
    AgentNodeLinkCreateInput,
    StoreListingVersionWhereInput,
 )
-from pydantic import BaseModel, Field, create_model
+from pydantic import BaseModel, BeforeValidator, Field, create_model
 from pydantic.fields import computed_field

 from backend.blocks.agent import AgentExecutorBlock
@@ -62,7 +62,31 @@ logger = logging.getLogger(__name__)


 class GraphSettings(BaseModel):
-    human_in_the_loop_safe_mode: bool | None = None
+    # Use Annotated with BeforeValidator to coerce None to default values.
+    # This handles cases where the database has null values for these fields.
+    model_config = {"extra": "ignore"}
+
+    human_in_the_loop_safe_mode: Annotated[
+        bool, BeforeValidator(lambda v: v if v is not None else True)
+    ] = True
+    sensitive_action_safe_mode: Annotated[
+        bool, BeforeValidator(lambda v: v if v is not None else False)
+    ] = False
+
+    @classmethod
+    def from_graph(
+        cls,
+        graph: "GraphModel",
+        hitl_safe_mode: bool | None = None,
+        sensitive_action_safe_mode: bool = False,
+    ) -> "GraphSettings":
+        # Default to True if not explicitly set
+        if hitl_safe_mode is None:
+            hitl_safe_mode = True
+        return cls(
+            human_in_the_loop_safe_mode=hitl_safe_mode,
+            sensitive_action_safe_mode=sensitive_action_safe_mode,
+        )


 class Link(BaseDbModel):
@@ -244,10 +268,14 @@ class BaseGraph(BaseDbModel):
        return any(
            node.block_id
            for node in self.nodes
-            if (
-                node.block.block_type == BlockType.HUMAN_IN_THE_LOOP
-                or node.block.requires_human_review
-            )
+            if node.block.block_type == BlockType.HUMAN_IN_THE_LOOP
+        )
+
+    @computed_field
+    @property
+    def has_sensitive_action(self) -> bool:
+        return any(
+            node.block_id for node in self.nodes if node.block.is_sensitive_action
        )

    @property
--- a/autogpt_platform/backend/backend/data/human_review.py
+++ b/autogpt_platform/backend/backend/data/human_review.py
@@ -6,10 +6,10 @@ Handles all database operations for pending human reviews.
 import asyncio
 import logging
 from datetime import datetime, timezone
-from typing import Optional
+from typing import TYPE_CHECKING, Optional

 from prisma.enums import ReviewStatus
-from prisma.models import PendingHumanReview
+from prisma.models import AgentNodeExecution, PendingHumanReview
 from prisma.types import PendingHumanReviewUpdateInput
 from pydantic import BaseModel

@@ -17,8 +17,12 @@ from backend.api.features.executions.review.model import (
    PendingHumanReviewModel,
    SafeJsonData,
 )
+from backend.data.execution import get_graph_execution_meta
 from backend.util.json import SafeJson

+if TYPE_CHECKING:
+    pass
+
 logger = logging.getLogger(__name__)


@@ -32,6 +36,125 @@ class ReviewResult(BaseModel):
    node_exec_id: str


+def get_auto_approve_key(graph_exec_id: str, node_id: str) -> str:
+    """Generate the special nodeExecId key for auto-approval records."""
+    return f"auto_approve_{graph_exec_id}_{node_id}"
+
+
+async def check_approval(
+    node_exec_id: str,
+    graph_exec_id: str,
+    node_id: str,
+    user_id: str,
+    input_data: SafeJsonData | None = None,
+) -> Optional[ReviewResult]:
+    """
+    Check if there's an existing approval for this node execution.
+
+    Checks both:
+    1. Normal approval by node_exec_id (previous run of the same node execution)
+    2. Auto-approval by special key pattern "auto_approve_{graph_exec_id}_{node_id}"
+
+    Args:
+        node_exec_id: ID of the node execution
+        graph_exec_id: ID of the graph execution
+        node_id: ID of the node definition (not execution)
+        user_id: ID of the user (for data isolation)
+        input_data: Current input data (used for auto-approvals to avoid stale data)
+
+    Returns:
+        ReviewResult if approval found (either normal or auto), None otherwise
+    """
+    auto_approve_key = get_auto_approve_key(graph_exec_id, node_id)
+
+    # Check for either normal approval or auto-approval in a single query
+    existing_review = await PendingHumanReview.prisma().find_first(
+        where={
+            "OR": [
+                {"nodeExecId": node_exec_id},
+                {"nodeExecId": auto_approve_key},
+            ],
+            "status": ReviewStatus.APPROVED,
+            "userId": user_id,
+        },
+    )
+
+    if existing_review:
+        is_auto_approval = existing_review.nodeExecId == auto_approve_key
+        logger.info(
+            f"Found {'auto-' if is_auto_approval else ''}approval for node {node_id} "
+            f"(exec: {node_exec_id}) in execution {graph_exec_id}"
+        )
+        # For auto-approvals, use current input_data to avoid replaying stale payload
+        # For normal approvals, use the stored payload (which may have been edited)
+        return ReviewResult(
+            data=(
+                input_data
+                if is_auto_approval and input_data is not None
+                else existing_review.payload
+            ),
+            status=ReviewStatus.APPROVED,
+            message=(
+                "Auto-approved (user approved all future actions for this node)"
+                if is_auto_approval
+                else existing_review.reviewMessage or ""
+            ),
+            processed=True,
+            node_exec_id=existing_review.nodeExecId,
+        )
+
+    return None
+
+
+async def create_auto_approval_record(
+    user_id: str,
+    graph_exec_id: str,
+    graph_id: str,
+    graph_version: int,
+    node_id: str,
+    payload: SafeJsonData,
+) -> None:
+    """
+    Create an auto-approval record for a node in this execution.
+
+    This is stored as a PendingHumanReview with a special nodeExecId pattern
+    and status=APPROVED, so future executions of the same node can skip review.
+
+    Raises:
+        ValueError: If the graph execution doesn't belong to the user
+    """
+    # Validate that the graph execution belongs to this user (defense in depth)
+    graph_exec = await get_graph_execution_meta(
+        user_id=user_id, execution_id=graph_exec_id
+    )
+    if not graph_exec:
+        raise ValueError(
+            f"Graph execution {graph_exec_id} not found or doesn't belong to user {user_id}"
+        )
+
+    auto_approve_key = get_auto_approve_key(graph_exec_id, node_id)
+
+    await PendingHumanReview.prisma().upsert(
+        where={"nodeExecId": auto_approve_key},
+        data={
+            "create": {
+                "nodeExecId": auto_approve_key,
+                "userId": user_id,
+                "graphExecId": graph_exec_id,
+                "graphId": graph_id,
+                "graphVersion": graph_version,
+                "payload": SafeJson(payload),
+                "instructions": "Auto-approval record",
+                "editable": False,
+                "status": ReviewStatus.APPROVED,
+                "processed": True,
+                "reviewedAt": datetime.now(timezone.utc),
+            },
+            "update": {},  # Already exists, no update needed
+        },
+    )
+
+
 async def get_or_create_human_review(
    user_id: str,
    node_exec_id: str,
@@ -108,6 +231,87 @@ async def get_or_create_human_review(
        )


+async def get_pending_review_by_node_exec_id(
+    node_exec_id: str, user_id: str
+) -> Optional["PendingHumanReviewModel"]:
+    """
+    Get a pending review by its node execution ID.
+
+    Args:
+        node_exec_id: The node execution ID to look up
+        user_id: User ID for authorization (only returns if review belongs to this user)
+
+    Returns:
+        The pending review if found and belongs to user, None otherwise
+    """
+    review = await PendingHumanReview.prisma().find_first(
+        where={
+            "nodeExecId": node_exec_id,
+            "userId": user_id,
+            "status": ReviewStatus.WAITING,
+        }
+    )
+
+    if not review:
+        return None
+
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
+    node_exec = await get_node_execution(review.nodeExecId)
+    node_id = node_exec.node_id if node_exec else review.nodeExecId
+    return PendingHumanReviewModel.from_db(review, node_id=node_id)
+
+
+async def get_pending_reviews_by_node_exec_ids(
+    node_exec_ids: list[str], user_id: str
+) -> dict[str, "PendingHumanReviewModel"]:
+    """
+    Get multiple pending reviews by their node execution IDs in a single batch query.
+
+    Args:
+        node_exec_ids: List of node execution IDs to look up
+        user_id: User ID for authorization (only returns reviews belonging to this user)
+
+    Returns:
+        Dictionary mapping node_exec_id -> PendingHumanReviewModel for found reviews
+    """
+    if not node_exec_ids:
+        return {}
+
+    reviews = await PendingHumanReview.prisma().find_many(
+        where={
+            "nodeExecId": {"in": node_exec_ids},
+            "userId": user_id,
+            "status": ReviewStatus.WAITING,
+        }
+    )
+
+    if not reviews:
+        return {}
+
+    # Batch fetch all node executions to avoid N+1 queries
+    node_exec_ids_to_fetch = [review.nodeExecId for review in reviews]
+    node_execs = await AgentNodeExecution.prisma().find_many(
+        where={"id": {"in": node_exec_ids_to_fetch}},
+        include={"Node": True},
+    )
+
+    # Create mapping from node_exec_id to node_id
+    node_exec_id_to_node_id = {
+        node_exec.id: node_exec.agentNodeId for node_exec in node_execs
+    }
+
+    result = {}
+    for review in reviews:
+        node_id = node_exec_id_to_node_id.get(review.nodeExecId, review.nodeExecId)
+        result[review.nodeExecId] = PendingHumanReviewModel.from_db(
+            review, node_id=node_id
+        )
+
+    return result
+
+
 async def has_pending_reviews_for_graph_exec(graph_exec_id: str) -> bool:
    """
    Check if a graph execution has any pending reviews.
@@ -137,8 +341,11 @@ async def get_pending_reviews_for_user(
        page_size: Number of reviews per page

    Returns:
-        List of pending review models
+        List of pending review models with node_id included
    """
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
    # Calculate offset for pagination
    offset = (page - 1) * page_size

@@ -149,7 +356,14 @@ async def get_pending_reviews_for_user(
        take=page_size,
    )

-    return [PendingHumanReviewModel.from_db(review) for review in reviews]
+    # Fetch node_id for each review from NodeExecution
+    result = []
+    for review in reviews:
+        node_exec = await get_node_execution(review.nodeExecId)
+        node_id = node_exec.node_id if node_exec else review.nodeExecId
+        result.append(PendingHumanReviewModel.from_db(review, node_id=node_id))
+
+    return result


 async def get_pending_reviews_for_execution(
@@ -163,8 +377,11 @@ async def get_pending_reviews_for_execution(
        user_id: User ID for security validation

    Returns:
-        List of pending review models
+        List of pending review models with node_id included
    """
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
    reviews = await PendingHumanReview.prisma().find_many(
        where={
            "userId": user_id,
@@ -174,7 +391,14 @@ async def get_pending_reviews_for_execution(
        order={"createdAt": "asc"},
    )

-    return [PendingHumanReviewModel.from_db(review) for review in reviews]
+    # Fetch node_id for each review from NodeExecution
+    result = []
+    for review in reviews:
+        node_exec = await get_node_execution(review.nodeExecId)
+        node_id = node_exec.node_id if node_exec else review.nodeExecId
+        result.append(PendingHumanReviewModel.from_db(review, node_id=node_id))
+
+    return result


 async def process_all_reviews_for_execution(
@@ -244,11 +468,19 @@ async def process_all_reviews_for_execution(
    # Note: Execution resumption is now handled at the API layer after ALL reviews
    # for an execution are processed (both approved and rejected)

-    # Return as dict for easy access
-    return {
-        review.nodeExecId: PendingHumanReviewModel.from_db(review)
-        for review in updated_reviews
-    }
+    # Fetch node_id for each review and return as dict for easy access
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
+    result = {}
+    for review in updated_reviews:
+        node_exec = await get_node_execution(review.nodeExecId)
+        node_id = node_exec.node_id if node_exec else review.nodeExecId
+        result[review.nodeExecId] = PendingHumanReviewModel.from_db(
+            review, node_id=node_id
+        )
+
+    return result


 async def update_review_processed_status(node_exec_id: str, processed: bool) -> None:
@@ -256,3 +488,44 @@ async def update_review_processed_status(node_exec_id: str, processed: bool) ->
    await PendingHumanReview.prisma().update(
        where={"nodeExecId": node_exec_id}, data={"processed": processed}
    )
+
+
+async def cancel_pending_reviews_for_execution(graph_exec_id: str, user_id: str) -> int:
+    """
+    Cancel all pending reviews for a graph execution (e.g., when execution is stopped).
+
+    Marks all WAITING reviews as REJECTED with a message indicating the execution was stopped.
+
+    Args:
+        graph_exec_id: The graph execution ID
+        user_id: User ID who owns the execution (for security validation)
+
+    Returns:
+        Number of reviews cancelled
+
+    Raises:
+        ValueError: If the graph execution doesn't belong to the user
+    """
+    # Validate user ownership before cancelling reviews
+    graph_exec = await get_graph_execution_meta(
+        user_id=user_id, execution_id=graph_exec_id
+    )
+    if not graph_exec:
+        raise ValueError(
+            f"Graph execution {graph_exec_id} not found or doesn't belong to user {user_id}"
+        )
+
+    result = await PendingHumanReview.prisma().update_many(
+        where={
+            "graphExecId": graph_exec_id,
+            "userId": user_id,
+            "status": ReviewStatus.WAITING,
+        },
+        data={
+            "status": ReviewStatus.REJECTED,
+            "reviewMessage": "Execution was stopped by user",
+            "processed": True,
+            "reviewedAt": datetime.now(timezone.utc),
+        },
+    )
+    return result
--- a/autogpt_platform/backend/backend/data/human_review_test.py
+++ b/autogpt_platform/backend/backend/data/human_review_test.py
@@ -36,7 +36,7 @@ def sample_db_review():
    return mock_review


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_or_create_human_review_new(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -46,8 +46,8 @@ async def test_get_or_create_human_review_new(
    sample_db_review.status = ReviewStatus.WAITING
    sample_db_review.processed = False

-    mock_upsert = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
-    mock_upsert.return_value.upsert = AsyncMock(return_value=sample_db_review)
+    mock_prisma = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
+    mock_prisma.return_value.upsert = AsyncMock(return_value=sample_db_review)

    result = await get_or_create_human_review(
        user_id="test-user-123",
@@ -64,7 +64,7 @@ async def test_get_or_create_human_review_new(
    assert result is None


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_or_create_human_review_approved(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -75,8 +75,8 @@ async def test_get_or_create_human_review_approved(
    sample_db_review.processed = False
    sample_db_review.reviewMessage = "Looks good"

-    mock_upsert = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
-    mock_upsert.return_value.upsert = AsyncMock(return_value=sample_db_review)
+    mock_prisma = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
+    mock_prisma.return_value.upsert = AsyncMock(return_value=sample_db_review)

    result = await get_or_create_human_review(
        user_id="test-user-123",
@@ -96,7 +96,7 @@ async def test_get_or_create_human_review_approved(
    assert result.message == "Looks good"


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_has_pending_reviews_for_graph_exec_true(
    mocker: pytest_mock.MockFixture,
 ):
@@ -109,7 +109,7 @@ async def test_has_pending_reviews_for_graph_exec_true(
    assert result is True


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_has_pending_reviews_for_graph_exec_false(
    mocker: pytest_mock.MockFixture,
 ):
@@ -122,7 +122,7 @@ async def test_has_pending_reviews_for_graph_exec_false(
    assert result is False


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_pending_reviews_for_user(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -131,10 +131,19 @@ async def test_get_pending_reviews_for_user(
    mock_find_many = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
    mock_find_many.return_value.find_many = AsyncMock(return_value=[sample_db_review])

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await get_pending_reviews_for_user("test_user", page=2, page_size=10)

    assert len(result) == 1
    assert result[0].node_exec_id == "test_node_123"
+    assert result[0].node_id == "test_node_def_789"

    # Verify pagination parameters
    call_args = mock_find_many.return_value.find_many.call_args
@@ -142,7 +151,7 @@ async def test_get_pending_reviews_for_user(
    assert call_args.kwargs["take"] == 10


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_pending_reviews_for_execution(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -151,12 +160,21 @@ async def test_get_pending_reviews_for_execution(
    mock_find_many = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
    mock_find_many.return_value.find_many = AsyncMock(return_value=[sample_db_review])

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await get_pending_reviews_for_execution(
        "test_graph_exec_456", "test-user-123"
    )

    assert len(result) == 1
    assert result[0].graph_exec_id == "test_graph_exec_456"
+    assert result[0].node_id == "test_node_def_789"

    # Verify it filters by execution and user
    call_args = mock_find_many.return_value.find_many.call_args
@@ -166,7 +184,7 @@ async def test_get_pending_reviews_for_execution(
    assert where_clause["status"] == ReviewStatus.WAITING


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_for_execution_success(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -201,6 +219,14 @@ async def test_process_all_reviews_for_execution_success(
        new=AsyncMock(return_value=[updated_review]),
    )

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await process_all_reviews_for_execution(
        user_id="test-user-123",
        review_decisions={
@@ -211,9 +237,10 @@ async def test_process_all_reviews_for_execution_success(
    assert len(result) == 1
    assert "test_node_123" in result
    assert result["test_node_123"].status == ReviewStatus.APPROVED
+    assert result["test_node_123"].node_id == "test_node_def_789"


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_for_execution_validation_errors(
    mocker: pytest_mock.MockFixture,
 ):
@@ -233,7 +260,7 @@ async def test_process_all_reviews_for_execution_validation_errors(
        )


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_edit_permission_error(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -259,7 +286,7 @@ async def test_process_all_reviews_edit_permission_error(
        )


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_mixed_approval_rejection(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -329,6 +356,14 @@ async def test_process_all_reviews_mixed_approval_rejection(
        new=AsyncMock(return_value=[approved_review, rejected_review]),
    )

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await process_all_reviews_for_execution(
        user_id="test-user-123",
        review_decisions={
@@ -340,3 +375,5 @@ async def test_process_all_reviews_mixed_approval_rejection(
    assert len(result) == 2
    assert "test_node_123" in result
    assert "test_node_456" in result
+    assert result["test_node_123"].node_id == "test_node_def_789"
+    assert result["test_node_456"].node_id == "test_node_def_789"
--- a/autogpt_platform/backend/backend/executor/database.py
+++ b/autogpt_platform/backend/backend/executor/database.py
@@ -50,6 +50,8 @@ from backend.data.graph import (
    validate_graph_execution_permissions,
 )
 from backend.data.human_review import (
+    cancel_pending_reviews_for_execution,
+    check_approval,
    get_or_create_human_review,
    has_pending_reviews_for_graph_exec,
    update_review_processed_status,
@@ -190,6 +192,8 @@ class DatabaseManager(AppService):
    get_user_notification_preference = _(get_user_notification_preference)

    # Human In The Loop
+    cancel_pending_reviews_for_execution = _(cancel_pending_reviews_for_execution)
+    check_approval = _(check_approval)
    get_or_create_human_review = _(get_or_create_human_review)
    has_pending_reviews_for_graph_exec = _(has_pending_reviews_for_graph_exec)
    update_review_processed_status = _(update_review_processed_status)
@@ -313,6 +317,8 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    set_execution_kv_data = d.set_execution_kv_data

    # Human In The Loop
+    cancel_pending_reviews_for_execution = d.cancel_pending_reviews_for_execution
+    check_approval = d.check_approval
    get_or_create_human_review = d.get_or_create_human_review
    update_review_processed_status = d.update_review_processed_status

--- a/autogpt_platform/backend/backend/executor/scheduler.py
+++ b/autogpt_platform/backend/backend/executor/scheduler.py
@@ -309,7 +309,7 @@ def ensure_embeddings_coverage():

        # Process in batches until no more missing embeddings
        while True:
-            result = db_client.backfill_missing_embeddings(batch_size=10)
+            result = db_client.backfill_missing_embeddings(batch_size=100)

            total_processed += result["processed"]
            total_success += result["success"]
--- a/autogpt_platform/backend/backend/executor/utils.py
+++ b/autogpt_platform/backend/backend/executor/utils.py
@@ -10,6 +10,7 @@ from pydantic import BaseModel, JsonValue, ValidationError

 from backend.data import execution as execution_db
 from backend.data import graph as graph_db
+from backend.data import human_review as human_review_db
 from backend.data import onboarding as onboarding_db
 from backend.data import user as user_db
 from backend.data.block import (
@@ -749,9 +750,27 @@ async def stop_graph_execution(
        if graph_exec.status in [
            ExecutionStatus.QUEUED,
            ExecutionStatus.INCOMPLETE,
+            ExecutionStatus.REVIEW,
        ]:
-            # If the graph is still on the queue, we can prevent them from being executed
-            # by setting the status to TERMINATED.
+            # If the graph is queued/incomplete/paused for review, terminate immediately
+            # No need to wait for executor since it's not actively running
+
+            # If graph is in REVIEW status, clean up pending reviews before terminating
+            if graph_exec.status == ExecutionStatus.REVIEW:
+                # Use human_review_db if Prisma connected, else database manager
+                review_db = (
+                    human_review_db
+                    if prisma.is_connected()
+                    else get_database_manager_async_client()
+                )
+                # Mark all pending reviews as rejected/cancelled
+                cancelled_count = await review_db.cancel_pending_reviews_for_execution(
+                    graph_exec_id, user_id
+                )
+                logger.info(
+                    f"Cancelled {cancelled_count} pending review(s) for stopped execution {graph_exec_id}"
+                )
+
            graph_exec.status = ExecutionStatus.TERMINATED

            await asyncio.gather(
@@ -873,11 +892,8 @@ async def add_graph_execution(
        settings = await gdb.get_graph_settings(user_id=user_id, graph_id=graph_id)

        execution_context = ExecutionContext(
-            safe_mode=(
-                settings.human_in_the_loop_safe_mode
-                if settings.human_in_the_loop_safe_mode is not None
-                else True
-            ),
+            human_in_the_loop_safe_mode=settings.human_in_the_loop_safe_mode,
+            sensitive_action_safe_mode=settings.sensitive_action_safe_mode,
            user_timezone=(
                user.timezone if user.timezone != USER_TIMEZONE_NOT_SET else "UTC"
            ),
@@ -890,9 +906,28 @@ async def add_graph_execution(
            nodes_to_skip=nodes_to_skip,
            execution_context=execution_context,
        )
-        logger.info(f"Publishing execution {graph_exec.id} to execution queue")
+        logger.info(f"Queueing execution {graph_exec.id}")
+
+        # Update execution status to QUEUED BEFORE publishing to prevent race condition
+        # where two concurrent requests could both publish the same execution
+        updated_exec = await edb.update_graph_execution_stats(
+            graph_exec_id=graph_exec.id,
+            status=ExecutionStatus.QUEUED,
+        )
+
+        # Verify the status update succeeded (prevents duplicate queueing in race conditions)
+        # If another request already updated the status, this execution will not be QUEUED
+        if not updated_exec or updated_exec.status != ExecutionStatus.QUEUED:
+            logger.warning(
+                f"Skipping queue publish for execution {graph_exec.id} - "
+                f"status update failed or execution already queued by another request"
+            )
+            return graph_exec
+
+        graph_exec.status = ExecutionStatus.QUEUED

        # Publish to execution queue for executor to pick up
+        # This happens AFTER status update to ensure only one request publishes
        exec_queue = await get_async_execution_queue()
        await exec_queue.publish_message(
            routing_key=GRAPH_EXECUTION_ROUTING_KEY,
@@ -900,13 +935,6 @@ async def add_graph_execution(
            exchange=GRAPH_EXECUTION_EXCHANGE,
        )
        logger.info(f"Published execution {graph_exec.id} to RabbitMQ queue")
-
-        # Update execution status to QUEUED
-        graph_exec.status = ExecutionStatus.QUEUED
-        await edb.update_graph_execution_stats(
-            graph_exec_id=graph_exec.id,
-            status=graph_exec.status,
-        )
    except BaseException as e:
        err = str(e) or type(e).__name__
        if not graph_exec:
--- a/autogpt_platform/backend/backend/executor/utils_test.py
+++ b/autogpt_platform/backend/backend/executor/utils_test.py
@@ -4,6 +4,7 @@ import pytest
 from pytest_mock import MockerFixture

 from backend.data.dynamic_fields import merge_execution_input, parse_execution_output
+from backend.data.execution import ExecutionStatus
 from backend.util.mock import MockObject


@@ -346,6 +347,7 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
    mock_graph_exec.id = "execution-id-123"
    mock_graph_exec.node_executions = []  # Add this to avoid AttributeError
+    mock_graph_exec.status = ExecutionStatus.QUEUED  # Required for race condition check
    mock_graph_exec.to_graph_execution_entry.return_value = mocker.MagicMock()

    # Mock the queue and event bus
@@ -386,6 +388,7 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
    mock_user.timezone = "UTC"
    mock_settings = mocker.MagicMock()
    mock_settings.human_in_the_loop_safe_mode = True
+    mock_settings.sensitive_action_safe_mode = False

    mock_udb.get_user_by_id = mocker.AsyncMock(return_value=mock_user)
    mock_gdb.get_graph_settings = mocker.AsyncMock(return_value=mock_settings)
@@ -610,6 +613,7 @@ async def test_add_graph_execution_with_nodes_to_skip(mocker: MockerFixture):
    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
    mock_graph_exec.id = "execution-id-123"
    mock_graph_exec.node_executions = []
+    mock_graph_exec.status = ExecutionStatus.QUEUED  # Required for race condition check

    # Track what's passed to to_graph_execution_entry
    captured_kwargs = {}
@@ -651,6 +655,7 @@ async def test_add_graph_execution_with_nodes_to_skip(mocker: MockerFixture):
    mock_user.timezone = "UTC"
    mock_settings = mocker.MagicMock()
    mock_settings.human_in_the_loop_safe_mode = True
+    mock_settings.sensitive_action_safe_mode = False

    mock_udb.get_user_by_id = mocker.AsyncMock(return_value=mock_user)
    mock_gdb.get_graph_settings = mocker.AsyncMock(return_value=mock_settings)
@@ -668,3 +673,232 @@ async def test_add_graph_execution_with_nodes_to_skip(mocker: MockerFixture):
    # Verify nodes_to_skip was passed to to_graph_execution_entry
    assert "nodes_to_skip" in captured_kwargs
    assert captured_kwargs["nodes_to_skip"] == nodes_to_skip
+
+
+@pytest.mark.asyncio
+async def test_stop_graph_execution_in_review_status_cancels_pending_reviews(
+    mocker: MockerFixture,
+):
+    """Test that stopping an execution in REVIEW status cancels pending reviews."""
+    from backend.data.execution import ExecutionStatus, GraphExecutionMeta
+    from backend.executor.utils import stop_graph_execution
+
+    user_id = "test-user"
+    graph_exec_id = "test-exec-123"
+
+    # Mock graph execution in REVIEW status
+    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_graph_exec.id = graph_exec_id
+    mock_graph_exec.status = ExecutionStatus.REVIEW
+
+    # Mock dependencies
+    mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
+    mock_queue_client = mocker.AsyncMock()
+    mock_get_queue.return_value = mock_queue_client
+
+    mock_prisma = mocker.patch("backend.executor.utils.prisma")
+    mock_prisma.is_connected.return_value = True
+
+    mock_human_review_db = mocker.patch("backend.executor.utils.human_review_db")
+    mock_human_review_db.cancel_pending_reviews_for_execution = mocker.AsyncMock(
+        return_value=2  # 2 reviews cancelled
+    )
+
+    mock_execution_db = mocker.patch("backend.executor.utils.execution_db")
+    mock_execution_db.get_graph_execution_meta = mocker.AsyncMock(
+        return_value=mock_graph_exec
+    )
+    mock_execution_db.update_graph_execution_stats = mocker.AsyncMock()
+
+    mock_get_event_bus = mocker.patch(
+        "backend.executor.utils.get_async_execution_event_bus"
+    )
+    mock_event_bus = mocker.MagicMock()
+    mock_event_bus.publish = mocker.AsyncMock()
+    mock_get_event_bus.return_value = mock_event_bus
+
+    mock_get_child_executions = mocker.patch(
+        "backend.executor.utils._get_child_executions"
+    )
+    mock_get_child_executions.return_value = []  # No children
+
+    # Call stop_graph_execution with timeout to allow status check
+    await stop_graph_execution(
+        user_id=user_id,
+        graph_exec_id=graph_exec_id,
+        wait_timeout=1.0,  # Wait to allow status check
+        cascade=True,
+    )
+
+    # Verify pending reviews were cancelled
+    mock_human_review_db.cancel_pending_reviews_for_execution.assert_called_once_with(
+        graph_exec_id, user_id
+    )
+
+    # Verify execution status was updated to TERMINATED
+    mock_execution_db.update_graph_execution_stats.assert_called_once()
+    call_kwargs = mock_execution_db.update_graph_execution_stats.call_args[1]
+    assert call_kwargs["graph_exec_id"] == graph_exec_id
+    assert call_kwargs["status"] == ExecutionStatus.TERMINATED
+
+
+@pytest.mark.asyncio
+async def test_stop_graph_execution_with_database_manager_when_prisma_disconnected(
+    mocker: MockerFixture,
+):
+    """Test that stop uses database manager when Prisma is not connected."""
+    from backend.data.execution import ExecutionStatus, GraphExecutionMeta
+    from backend.executor.utils import stop_graph_execution
+
+    user_id = "test-user"
+    graph_exec_id = "test-exec-456"
+
+    # Mock graph execution in REVIEW status
+    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_graph_exec.id = graph_exec_id
+    mock_graph_exec.status = ExecutionStatus.REVIEW
+
+    # Mock dependencies
+    mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
+    mock_queue_client = mocker.AsyncMock()
+    mock_get_queue.return_value = mock_queue_client
+
+    # Prisma is NOT connected
+    mock_prisma = mocker.patch("backend.executor.utils.prisma")
+    mock_prisma.is_connected.return_value = False
+
+    # Mock database manager client
+    mock_get_db_manager = mocker.patch(
+        "backend.executor.utils.get_database_manager_async_client"
+    )
+    mock_db_manager = mocker.AsyncMock()
+    mock_db_manager.get_graph_execution_meta = mocker.AsyncMock(
+        return_value=mock_graph_exec
+    )
+    mock_db_manager.cancel_pending_reviews_for_execution = mocker.AsyncMock(
+        return_value=3  # 3 reviews cancelled
+    )
+    mock_db_manager.update_graph_execution_stats = mocker.AsyncMock()
+    mock_get_db_manager.return_value = mock_db_manager
+
+    mock_get_event_bus = mocker.patch(
+        "backend.executor.utils.get_async_execution_event_bus"
+    )
+    mock_event_bus = mocker.MagicMock()
+    mock_event_bus.publish = mocker.AsyncMock()
+    mock_get_event_bus.return_value = mock_event_bus
+
+    mock_get_child_executions = mocker.patch(
+        "backend.executor.utils._get_child_executions"
+    )
+    mock_get_child_executions.return_value = []  # No children
+
+    # Call stop_graph_execution with timeout
+    await stop_graph_execution(
+        user_id=user_id,
+        graph_exec_id=graph_exec_id,
+        wait_timeout=1.0,
+        cascade=True,
+    )
+
+    # Verify database manager was used for cancel_pending_reviews
+    mock_db_manager.cancel_pending_reviews_for_execution.assert_called_once_with(
+        graph_exec_id, user_id
+    )
+
+    # Verify execution status was updated via database manager
+    mock_db_manager.update_graph_execution_stats.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_stop_graph_execution_cascades_to_child_with_reviews(
+    mocker: MockerFixture,
+):
+    """Test that stopping parent execution cascades to children and cancels their reviews."""
+    from backend.data.execution import ExecutionStatus, GraphExecutionMeta
+    from backend.executor.utils import stop_graph_execution
+
+    user_id = "test-user"
+    parent_exec_id = "parent-exec"
+    child_exec_id = "child-exec"
+
+    # Mock parent execution in RUNNING status
+    mock_parent_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_parent_exec.id = parent_exec_id
+    mock_parent_exec.status = ExecutionStatus.RUNNING
+
+    # Mock child execution in REVIEW status
+    mock_child_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_child_exec.id = child_exec_id
+    mock_child_exec.status = ExecutionStatus.REVIEW
+
+    # Mock dependencies
+    mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
+    mock_queue_client = mocker.AsyncMock()
+    mock_get_queue.return_value = mock_queue_client
+
+    mock_prisma = mocker.patch("backend.executor.utils.prisma")
+    mock_prisma.is_connected.return_value = True
+
+    mock_human_review_db = mocker.patch("backend.executor.utils.human_review_db")
+    mock_human_review_db.cancel_pending_reviews_for_execution = mocker.AsyncMock(
+        return_value=1  # 1 child review cancelled
+    )
+
+    # Mock execution_db to return different status based on which execution is queried
+    mock_execution_db = mocker.patch("backend.executor.utils.execution_db")
+
+    # Track call count to simulate status transition
+    call_count = {"count": 0}
+
+    async def get_exec_meta_side_effect(execution_id, user_id):
+        call_count["count"] += 1
+        if execution_id == parent_exec_id:
+            # After a few calls (child processing happens), transition parent to TERMINATED
+            # This simulates the executor service processing the stop request
+            if call_count["count"] > 3:
+                mock_parent_exec.status = ExecutionStatus.TERMINATED
+            return mock_parent_exec
+        elif execution_id == child_exec_id:
+            return mock_child_exec
+        return None
+
+    mock_execution_db.get_graph_execution_meta = mocker.AsyncMock(
+        side_effect=get_exec_meta_side_effect
+    )
+    mock_execution_db.update_graph_execution_stats = mocker.AsyncMock()
+
+    mock_get_event_bus = mocker.patch(
+        "backend.executor.utils.get_async_execution_event_bus"
+    )
+    mock_event_bus = mocker.MagicMock()
+    mock_event_bus.publish = mocker.AsyncMock()
+    mock_get_event_bus.return_value = mock_event_bus
+
+    # Mock _get_child_executions to return the child
+    mock_get_child_executions = mocker.patch(
+        "backend.executor.utils._get_child_executions"
+    )
+
+    def get_children_side_effect(parent_id):
+        if parent_id == parent_exec_id:
+            return [mock_child_exec]
+        return []
+
+    mock_get_child_executions.side_effect = get_children_side_effect
+
+    # Call stop_graph_execution on parent with cascade=True
+    await stop_graph_execution(
+        user_id=user_id,
+        graph_exec_id=parent_exec_id,
+        wait_timeout=1.0,
+        cascade=True,
+    )
+
+    # Verify child reviews were cancelled
+    mock_human_review_db.cancel_pending_reviews_for_execution.assert_called_once_with(
+        child_exec_id, user_id
+    )
+
+    # Verify both parent and child status updates
+    assert mock_execution_db.update_graph_execution_stats.call_count >= 1
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -350,6 +350,19 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
        description="Whether to mark failed scans as clean or not",
    )

+    agentgenerator_host: str = Field(
+        default="",
+        description="The host for the Agent Generator service (empty to use built-in)",
+    )
+    agentgenerator_port: int = Field(
+        default=8000,
+        description="The port for the Agent Generator service",
+    )
+    agentgenerator_timeout: int = Field(
+        default=120,
+        description="The timeout in seconds for Agent Generator service requests",
+    )
+
    enable_example_blocks: bool = Field(
        default=False,
        description="Whether to enable example blocks in production",
--- a/autogpt_platform/backend/backend/util/test.py
+++ b/autogpt_platform/backend/backend/util/test.py
@@ -1,3 +1,4 @@
+import asyncio
 import inspect
 import logging
 import time
@@ -58,6 +59,11 @@ class SpinTestServer:
        self.db_api.__exit__(exc_type, exc_val, exc_tb)
        self.notif_manager.__exit__(exc_type, exc_val, exc_tb)

+        # Give services time to fully shut down
+        #  This prevents event loop issues where services haven't fully cleaned up
+        # before the next test starts
+        await asyncio.sleep(0.5)
+
    def setup_dependency_overrides(self):
        # Override get_user_id for testing
        self.agent_server.set_test_dependency_overrides(
--- a/autogpt_platform/backend/migrations/20260109181714_add_docs_embedding/migration.sql
+++ b/autogpt_platform/backend/migrations/20260109181714_add_docs_embedding/migration.sql
@@ -1,11 +1,37 @@
 -- CreateExtension
 -- Supabase: pgvector must be enabled via Dashboard → Database → Extensions first
-- Create in public schema so vector type is available across all schemas
+-- Ensures vector extension is in the current schema (from DATABASE_URL ?schema= param)
+-- If it exists in a different schema (e.g., public), we drop and recreate it in the current schema
+-- This ensures vector type is in the same schema as tables, making ::vector work without explicit qualification
 DO $$
+DECLARE
+    current_schema_name text;
+    vector_schema text;
 BEGIN
-    CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "public";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'vector extension not available or already exists, skipping';
+    -- Get the current schema from search_path
+    SELECT current_schema() INTO current_schema_name;
+
+    -- Check if vector extension exists and which schema it's in
+    SELECT n.nspname INTO vector_schema
+    FROM pg_extension e
+    JOIN pg_namespace n ON e.extnamespace = n.oid
+    WHERE e.extname = 'vector';
+
+    -- Handle removal if in wrong schema
+    IF vector_schema IS NOT NULL AND vector_schema != current_schema_name THEN
+        BEGIN
+            -- Vector exists in a different schema, drop it first
+            RAISE WARNING 'pgvector found in schema "%" but need it in "%". Dropping and reinstalling...',
+                vector_schema, current_schema_name;
+            EXECUTE 'DROP EXTENSION IF EXISTS vector CASCADE';
+        EXCEPTION WHEN OTHERS THEN
+            RAISE EXCEPTION 'Failed to drop pgvector from schema "%": %. You may need to drop it manually.',
+                vector_schema, SQLERRM;
+        END;
+    END IF;
+
+    -- Create extension in current schema (let it fail naturally if not available)
+    EXECUTE format('CREATE EXTENSION IF NOT EXISTS vector SCHEMA %I', current_schema_name);
 END $$;

 -- CreateEnum
@@ -19,7 +45,7 @@ CREATE TABLE "UnifiedContentEmbedding" (
    "contentType" "ContentType" NOT NULL,
    "contentId" TEXT NOT NULL,
    "userId" TEXT,
-    "embedding" public.vector(1536) NOT NULL,
+    "embedding" vector(1536) NOT NULL,
    "searchableText" TEXT NOT NULL,
    "metadata" JSONB NOT NULL DEFAULT '{}',

@@ -45,4 +71,4 @@ CREATE UNIQUE INDEX "UnifiedContentEmbedding_contentType_contentId_userId_key" O
 -- Uses cosine distance operator (<=>), which matches the query in hybrid_search.py
 -- Note: Drop first in case Prisma created a btree index (Prisma doesn't support HNSW)
 DROP INDEX IF EXISTS "UnifiedContentEmbedding_embedding_idx";
-CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" public.vector_cosine_ops);
+CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" vector_cosine_ops);
--- a/autogpt_platform/backend/migrations/20260112173500_add_supabase_extensions_to_platform_schema/migration.sql
+++ b/autogpt_platform/backend/migrations/20260112173500_add_supabase_extensions_to_platform_schema/migration.sql
@@ -1,71 +0,0 @@
-- Acknowledge Supabase-managed extensions to prevent drift warnings
-- These extensions are pre-installed by Supabase in specific schemas
-- This migration ensures they exist where available (Supabase) or skips gracefully (CI)
-
-- Create schemas (safe in both CI and Supabase)
-CREATE SCHEMA IF NOT EXISTS "extensions";
-
-- Extensions that exist in both CI and Supabase
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pgcrypto" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pgcrypto extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'uuid-ossp extension not available, skipping';
-END $$;
-
-- Supabase-specific extensions (skip gracefully in CI)
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pg_stat_statements" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pg_stat_statements extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pg_net" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pg_net extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pgjwt" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pgjwt extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE SCHEMA IF NOT EXISTS "graphql";
-    CREATE EXTENSION IF NOT EXISTS "pg_graphql" WITH SCHEMA "graphql";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pg_graphql extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE SCHEMA IF NOT EXISTS "pgsodium";
-    CREATE EXTENSION IF NOT EXISTS "pgsodium" WITH SCHEMA "pgsodium";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pgsodium extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE SCHEMA IF NOT EXISTS "vault";
-    CREATE EXTENSION IF NOT EXISTS "supabase_vault" WITH SCHEMA "vault";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'supabase_vault extension not available, skipping';
-END $$;
-
-
-- Return to platform
-CREATE SCHEMA IF NOT EXISTS "platform";
--- a/autogpt_platform/backend/migrations/20260121200000_remove_node_execution_fk_from_pending_human_review/migration.sql
+++ b/autogpt_platform/backend/migrations/20260121200000_remove_node_execution_fk_from_pending_human_review/migration.sql
@@ -0,0 +1,7 @@
+-- Remove NodeExecution foreign key from PendingHumanReview
+-- The nodeExecId column remains as the primary key, but we remove the FK constraint
+-- to AgentNodeExecution since PendingHumanReview records can persist after node
+-- execution records are deleted.
+
+-- Drop foreign key constraint that linked PendingHumanReview.nodeExecId to AgentNodeExecution.id
+ALTER TABLE "PendingHumanReview" DROP CONSTRAINT IF EXISTS "PendingHumanReview_nodeExecId_fkey";
--- a/autogpt_platform/backend/schema.prisma
+++ b/autogpt_platform/backend/schema.prisma
@@ -517,8 +517,6 @@ model AgentNodeExecution {

  stats Json?

-  PendingHumanReview PendingHumanReview?
-
  @@index([agentGraphExecutionId, agentNodeId, executionStatus])
  @@index([agentNodeId, executionStatus])
  @@index([addedTime, queuedTime])
@@ -567,6 +565,7 @@ enum ReviewStatus {
 }

 // Pending human reviews for Human-in-the-loop blocks
+// Also stores auto-approval records with special nodeExecId patterns (e.g., "auto_approve_{graph_exec_id}_{node_id}")
 model PendingHumanReview {
  nodeExecId    String       @id
  userId        String
@@ -585,7 +584,6 @@ model PendingHumanReview {
  reviewedAt    DateTime?

  User           User                @relation(fields: [userId], references: [id], onDelete: Cascade)
-  NodeExecution  AgentNodeExecution  @relation(fields: [nodeExecId], references: [id], onDelete: Cascade)
  GraphExecution AgentGraphExecution @relation(fields: [graphExecId], references: [id], onDelete: Cascade)

  @@unique([nodeExecId]) // One pending review per node execution
--- a/autogpt_platform/backend/scripts/generate_block_docs.py
+++ b/autogpt_platform/backend/scripts/generate_block_docs.py
@@ -34,7 +34,10 @@ logger = logging.getLogger(__name__)

 # Default output directory relative to repo root
 DEFAULT_OUTPUT_DIR = (
-    Path(__file__).parent.parent.parent.parent / "docs" / "integrations"
+    Path(__file__).parent.parent.parent.parent
+    / "docs"
+    / "integrations"
+    / "block-integrations"
 )


@@ -366,12 +369,12 @@ def generate_block_markdown(
    lines.append("")

    # What it is (full description)
-    lines.append(f"### What it is")
+    lines.append("### What it is")
    lines.append(block.description or "No description available.")
    lines.append("")

    # How it works (manual section)
-    lines.append(f"### How it works")
+    lines.append("### How it works")
    how_it_works = manual_content.get(
        "how_it_works", "_Add technical explanation here._"
    )
@@ -383,7 +386,7 @@ def generate_block_markdown(
    # Inputs table (auto-generated)
    visible_inputs = [f for f in block.inputs if not f.hidden]
    if visible_inputs:
-        lines.append(f"### Inputs")
+        lines.append("### Inputs")
        lines.append("")
        lines.append("| Input | Description | Type | Required |")
        lines.append("|-------|-------------|------|----------|")
@@ -400,7 +403,7 @@ def generate_block_markdown(
    # Outputs table (auto-generated)
    visible_outputs = [f for f in block.outputs if not f.hidden]
    if visible_outputs:
-        lines.append(f"### Outputs")
+        lines.append("### Outputs")
        lines.append("")
        lines.append("| Output | Description | Type |")
        lines.append("|--------|-------------|------|")
@@ -414,13 +417,21 @@ def generate_block_markdown(
        lines.append("")

    # Possible use case (manual section)
-    lines.append(f"### Possible use case")
+    lines.append("### Possible use case")
    use_case = manual_content.get("use_case", "_Add practical use case examples here._")
    lines.append("<!-- MANUAL: use_case -->")
    lines.append(use_case)
    lines.append("<!-- END MANUAL -->")
    lines.append("")

+    # Optional per-block extras (only include if has content)
+    extras = manual_content.get("extras", "")
+    if extras:
+        lines.append("<!-- MANUAL: extras -->")
+        lines.append(extras)
+        lines.append("<!-- END MANUAL -->")
+        lines.append("")
+
    lines.append("---")
    lines.append("")

@@ -456,25 +467,52 @@ def get_block_file_mapping(blocks: list[BlockDoc]) -> dict[str, list[BlockDoc]]:
    return dict(file_mapping)


-def generate_overview_table(blocks: list[BlockDoc]) -> str:
-    """Generate the overview table markdown (blocks.md)."""
+def generate_overview_table(blocks: list[BlockDoc], block_dir_prefix: str = "") -> str:
+    """Generate the overview table markdown (blocks.md).
+
+    Args:
+        blocks: List of block documentation objects
+        block_dir_prefix: Prefix for block file links (e.g., "block-integrations/")
+    """
    lines = []

+    # GitBook YAML frontmatter
+    lines.append("---")
+    lines.append("layout:")
+    lines.append("  width: default")
+    lines.append("  title:")
+    lines.append("    visible: true")
+    lines.append("  description:")
+    lines.append("    visible: true")
+    lines.append("  tableOfContents:")
+    lines.append("    visible: false")
+    lines.append("  outline:")
+    lines.append("    visible: true")
+    lines.append("  pagination:")
+    lines.append("    visible: true")
+    lines.append("  metadata:")
+    lines.append("    visible: true")
+    lines.append("---")
+    lines.append("")
+
    lines.append("# AutoGPT Blocks Overview")
    lines.append("")
    lines.append(
        'AutoGPT uses a modular approach with various "blocks" to handle different tasks. These blocks are the building blocks of AutoGPT workflows, allowing users to create complex automations by combining simple, specialized components.'
    )
    lines.append("")
-    lines.append('!!! info "Creating Your Own Blocks"')
-    lines.append("    Want to create your own custom blocks? Check out our guides:")
-    lines.append("    ")
+    lines.append('{% hint style="info" %}')
+    lines.append("**Creating Your Own Blocks**")
+    lines.append("")
+    lines.append("Want to create your own custom blocks? Check out our guides:")
+    lines.append("")
    lines.append(
-        "    - [Build your own Blocks](https://docs.agpt.co/platform/new_blocks/) - Step-by-step tutorial with examples"
+        "* [Build your own Blocks](https://docs.agpt.co/platform/new_blocks/) - Step-by-step tutorial with examples"
    )
    lines.append(
-        "    - [Block SDK Guide](https://docs.agpt.co/platform/block-sdk-guide/) - Advanced SDK patterns with OAuth, webhooks, and provider configuration"
+        "* [Block SDK Guide](https://docs.agpt.co/platform/block-sdk-guide/) - Advanced SDK patterns with OAuth, webhooks, and provider configuration"
    )
+    lines.append("{% endhint %}")
    lines.append("")
    lines.append(
        "Below is a comprehensive list of all available blocks, categorized by their primary function. Click on any block name to view its detailed documentation."
@@ -537,7 +575,8 @@ def generate_overview_table(blocks: list[BlockDoc]) -> str:
                    else "No description"
                )
                short_desc = short_desc.replace("\n", " ").replace("|", "\\|")
-                lines.append(f"| [{block.name}]({file_path}#{anchor}) | {short_desc} |")
+                link_path = f"{block_dir_prefix}{file_path}"
+                lines.append(f"| [{block.name}]({link_path}#{anchor}) | {short_desc} |")
            lines.append("")
            continue

@@ -563,13 +602,55 @@ def generate_overview_table(blocks: list[BlockDoc]) -> str:
            )
            short_desc = short_desc.replace("\n", " ").replace("|", "\\|")

-            lines.append(f"| [{block.name}]({file_path}#{anchor}) | {short_desc} |")
+            link_path = f"{block_dir_prefix}{file_path}"
+            lines.append(f"| [{block.name}]({link_path}#{anchor}) | {short_desc} |")

        lines.append("")

    return "\n".join(lines)


+def generate_summary_md(
+    blocks: list[BlockDoc], root_dir: Path, block_dir_prefix: str = ""
+) -> str:
+    """Generate SUMMARY.md for GitBook navigation.
+
+    Args:
+        blocks: List of block documentation objects
+        root_dir: The root docs directory (e.g., docs/integrations/)
+        block_dir_prefix: Prefix for block file links (e.g., "block-integrations/")
+    """
+    lines = []
+    lines.append("# Table of contents")
+    lines.append("")
+    lines.append("* [AutoGPT Blocks Overview](README.md)")
+    lines.append("")
+
+    # Check for guides/ directory at the root level (docs/integrations/guides/)
+    guides_dir = root_dir / "guides"
+    if guides_dir.exists():
+        lines.append("## Guides")
+        lines.append("")
+        for guide_file in sorted(guides_dir.glob("*.md")):
+            # Use just the file name for title (replace hyphens/underscores with spaces)
+            title = file_path_to_title(guide_file.stem.replace("-", "_") + ".md")
+            lines.append(f"* [{title}](guides/{guide_file.name})")
+        lines.append("")
+
+    lines.append("## Block Integrations")
+    lines.append("")
+
+    file_mapping = get_block_file_mapping(blocks)
+    for file_path in sorted(file_mapping.keys()):
+        title = file_path_to_title(file_path)
+        link_path = f"{block_dir_prefix}{file_path}"
+        lines.append(f"* [{title}]({link_path})")
+
+    lines.append("")
+
+    return "\n".join(lines)
+
+
 def load_all_blocks_for_docs() -> list[BlockDoc]:
    """Load all blocks and extract documentation."""
    from backend.blocks import load_all_blocks
@@ -653,6 +734,16 @@ def write_block_docs(
                )
            )

+        # Add file-level additional_content section if present
+        file_additional = extract_manual_content(existing_content).get(
+            "additional_content", ""
+        )
+        if file_additional:
+            content_parts.append("<!-- MANUAL: additional_content -->")
+            content_parts.append(file_additional)
+            content_parts.append("<!-- END MANUAL -->")
+            content_parts.append("")
+
        full_content = file_header + "\n" + "\n".join(content_parts)
        generated_files[str(file_path)] = full_content

@@ -661,14 +752,28 @@ def write_block_docs(

        full_path.write_text(full_content)

-    # Generate overview file
-    overview_content = generate_overview_table(blocks)
-    overview_path = output_dir / "README.md"
+    # Generate overview file at the parent directory (docs/integrations/)
+    # with links prefixed to point into block-integrations/
+    root_dir = output_dir.parent
+    block_dir_name = output_dir.name  # "block-integrations"
+    block_dir_prefix = f"{block_dir_name}/"
+
+    overview_content = generate_overview_table(blocks, block_dir_prefix)
+    overview_path = root_dir / "README.md"
    generated_files["README.md"] = overview_content
    overview_path.write_text(overview_content)

    if verbose:
-        print("  Writing README.md (overview)")
+        print("  Writing README.md (overview) to parent directory")
+
+    # Generate SUMMARY.md for GitBook navigation at the parent directory
+    summary_content = generate_summary_md(blocks, root_dir, block_dir_prefix)
+    summary_path = root_dir / "SUMMARY.md"
+    generated_files["SUMMARY.md"] = summary_content
+    summary_path.write_text(summary_content)
+
+    if verbose:
+        print("  Writing SUMMARY.md (navigation) to parent directory")

    return generated_files

@@ -748,6 +853,16 @@ def check_docs_in_sync(output_dir: Path, blocks: list[BlockDoc]) -> bool:
            elif block_match.group(1).strip() != expected_block_content.strip():
                mismatched_blocks.append(block.name)

+        # Add file-level additional_content to expected content (matches write_block_docs)
+        file_additional = extract_manual_content(existing_content).get(
+            "additional_content", ""
+        )
+        if file_additional:
+            content_parts.append("<!-- MANUAL: additional_content -->")
+            content_parts.append(file_additional)
+            content_parts.append("<!-- END MANUAL -->")
+            content_parts.append("")
+
        expected_content = file_header + "\n" + "\n".join(content_parts)

        if existing_content.strip() != expected_content.strip():
@@ -757,11 +872,15 @@ def check_docs_in_sync(output_dir: Path, blocks: list[BlockDoc]) -> bool:
            out_of_sync_details.append((file_path, mismatched_blocks))
            all_match = False

-    # Check overview
-    overview_path = output_dir / "README.md"
+    # Check overview at the parent directory (docs/integrations/)
+    root_dir = output_dir.parent
+    block_dir_name = output_dir.name  # "block-integrations"
+    block_dir_prefix = f"{block_dir_name}/"
+
+    overview_path = root_dir / "README.md"
    if overview_path.exists():
        existing_overview = overview_path.read_text()
-        expected_overview = generate_overview_table(blocks)
+        expected_overview = generate_overview_table(blocks, block_dir_prefix)
        if existing_overview.strip() != expected_overview.strip():
            print("OUT OF SYNC: README.md (overview)")
            print("  The blocks overview table needs regeneration")
@@ -772,6 +891,21 @@ def check_docs_in_sync(output_dir: Path, blocks: list[BlockDoc]) -> bool:
        out_of_sync_details.append(("README.md", ["overview table"]))
        all_match = False

+    # Check SUMMARY.md at the parent directory
+    summary_path = root_dir / "SUMMARY.md"
+    if summary_path.exists():
+        existing_summary = summary_path.read_text()
+        expected_summary = generate_summary_md(blocks, root_dir, block_dir_prefix)
+        if existing_summary.strip() != expected_summary.strip():
+            print("OUT OF SYNC: SUMMARY.md (navigation)")
+            print("  The GitBook navigation needs regeneration")
+            out_of_sync_details.append(("SUMMARY.md", ["navigation"]))
+            all_match = False
+    else:
+        print("MISSING: SUMMARY.md (navigation)")
+        out_of_sync_details.append(("SUMMARY.md", ["navigation"]))
+        all_match = False
+
    # Check for unfilled manual sections
    unfilled_patterns = [
        "_Add a description of this category of blocks._",
--- a/autogpt_platform/backend/snapshots/grph_single
+++ b/autogpt_platform/backend/snapshots/grph_single
@@ -11,6 +11,7 @@
  "forked_from_version": null,
  "has_external_trigger": false,
  "has_human_in_the_loop": false,
+  "has_sensitive_action": false,
  "id": "graph-123",
  "input_schema": {
    "properties": {},
--- a/autogpt_platform/backend/snapshots/grphs_all
+++ b/autogpt_platform/backend/snapshots/grphs_all
@@ -11,6 +11,7 @@
    "forked_from_version": null,
    "has_external_trigger": false,
    "has_human_in_the_loop": false,
+    "has_sensitive_action": false,
    "id": "graph-123",
    "input_schema": {
      "properties": {},
--- a/autogpt_platform/backend/snapshots/lib_agts_search
+++ b/autogpt_platform/backend/snapshots/lib_agts_search
@@ -27,6 +27,8 @@
        "properties": {}
      },
      "has_external_trigger": false,
+      "has_human_in_the_loop": false,
+      "has_sensitive_action": false,
      "trigger_setup_info": null,
      "new_output": false,
      "can_access_graph": true,
@@ -34,7 +36,8 @@
      "is_favorite": false,
      "recommended_schedule_cron": null,
      "settings": {
-        "human_in_the_loop_safe_mode": null
+        "human_in_the_loop_safe_mode": true,
+        "sensitive_action_safe_mode": false
      },
      "marketplace_listing": null
    },
@@ -65,6 +68,8 @@
        "properties": {}
      },
      "has_external_trigger": false,
+      "has_human_in_the_loop": false,
+      "has_sensitive_action": false,
      "trigger_setup_info": null,
      "new_output": false,
      "can_access_graph": false,
@@ -72,7 +77,8 @@
      "is_favorite": false,
      "recommended_schedule_cron": null,
      "settings": {
-        "human_in_the_loop_safe_mode": null
+        "human_in_the_loop_safe_mode": true,
+        "sensitive_action_safe_mode": false
      },
      "marketplace_listing": null
    }
--- a/autogpt_platform/backend/test/agent_generator/init.py
+++ b/autogpt_platform/backend/test/agent_generator/init.py
@@ -0,0 +1 @@
+"""Tests for agent generator module."""
--- a/autogpt_platform/backend/test/agent_generator/test_core_integration.py
+++ b/autogpt_platform/backend/test/agent_generator/test_core_integration.py
@@ -0,0 +1,273 @@
+"""
+Tests for the Agent Generator core module.
+
+This test suite verifies that the core functions correctly delegate to
+the external Agent Generator service.
+"""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from backend.api.features.chat.tools.agent_generator import core
+from backend.api.features.chat.tools.agent_generator.core import (
+    AgentGeneratorNotConfiguredError,
+)
+
+
+class TestServiceNotConfigured:
+    """Test that functions raise AgentGeneratorNotConfiguredError when service is not configured."""
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_raises_when_not_configured(self):
+        """Test that decompose_goal raises error when service not configured."""
+        with patch.object(core, "is_external_service_configured", return_value=False):
+            with pytest.raises(AgentGeneratorNotConfiguredError):
+                await core.decompose_goal("Build a chatbot")
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_raises_when_not_configured(self):
+        """Test that generate_agent raises error when service not configured."""
+        with patch.object(core, "is_external_service_configured", return_value=False):
+            with pytest.raises(AgentGeneratorNotConfiguredError):
+                await core.generate_agent({"steps": []})
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_patch_raises_when_not_configured(self):
+        """Test that generate_agent_patch raises error when service not configured."""
+        with patch.object(core, "is_external_service_configured", return_value=False):
+            with pytest.raises(AgentGeneratorNotConfiguredError):
+                await core.generate_agent_patch("Add a node", {"nodes": []})
+
+
+class TestDecomposeGoal:
+    """Test decompose_goal function service delegation."""
+
+    @pytest.mark.asyncio
+    async def test_calls_external_service(self):
+        """Test that decompose_goal calls the external service."""
+        expected_result = {"type": "instructions", "steps": ["Step 1"]}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "decompose_goal_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            result = await core.decompose_goal("Build a chatbot")
+
+            mock_external.assert_called_once_with("Build a chatbot", "")
+            assert result == expected_result
+
+    @pytest.mark.asyncio
+    async def test_passes_context_to_external_service(self):
+        """Test that decompose_goal passes context to external service."""
+        expected_result = {"type": "instructions", "steps": ["Step 1"]}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "decompose_goal_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            await core.decompose_goal("Build a chatbot", "Use Python")
+
+            mock_external.assert_called_once_with("Build a chatbot", "Use Python")
+
+    @pytest.mark.asyncio
+    async def test_returns_none_on_service_failure(self):
+        """Test that decompose_goal returns None when external service fails."""
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "decompose_goal_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = None
+
+            result = await core.decompose_goal("Build a chatbot")
+
+            assert result is None
+
+
+class TestGenerateAgent:
+    """Test generate_agent function service delegation."""
+
+    @pytest.mark.asyncio
+    async def test_calls_external_service(self):
+        """Test that generate_agent calls the external service."""
+        expected_result = {"name": "Test Agent", "nodes": [], "links": []}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            instructions = {"type": "instructions", "steps": ["Step 1"]}
+            result = await core.generate_agent(instructions)
+
+            mock_external.assert_called_once_with(instructions)
+            # Result should have id, version, is_active added if not present
+            assert result is not None
+            assert result["name"] == "Test Agent"
+            assert "id" in result
+            assert result["version"] == 1
+            assert result["is_active"] is True
+
+    @pytest.mark.asyncio
+    async def test_preserves_existing_id_and_version(self):
+        """Test that external service result preserves existing id and version."""
+        expected_result = {
+            "id": "existing-id",
+            "version": 3,
+            "is_active": False,
+            "name": "Test Agent",
+        }
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result.copy()
+
+            result = await core.generate_agent({"steps": []})
+
+            assert result is not None
+            assert result["id"] == "existing-id"
+            assert result["version"] == 3
+            assert result["is_active"] is False
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_external_service_fails(self):
+        """Test that generate_agent returns None when external service fails."""
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = None
+
+            result = await core.generate_agent({"steps": []})
+
+            assert result is None
+
+
+class TestGenerateAgentPatch:
+    """Test generate_agent_patch function service delegation."""
+
+    @pytest.mark.asyncio
+    async def test_calls_external_service(self):
+        """Test that generate_agent_patch calls the external service."""
+        expected_result = {"name": "Updated Agent", "nodes": [], "links": []}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_patch_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            current_agent = {"nodes": [], "links": []}
+            result = await core.generate_agent_patch("Add a node", current_agent)
+
+            mock_external.assert_called_once_with("Add a node", current_agent)
+            assert result == expected_result
+
+    @pytest.mark.asyncio
+    async def test_returns_clarifying_questions(self):
+        """Test that generate_agent_patch returns clarifying questions."""
+        expected_result = {
+            "type": "clarifying_questions",
+            "questions": [{"question": "What type of node?"}],
+        }
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_patch_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            result = await core.generate_agent_patch("Add a node", {"nodes": []})
+
+            assert result == expected_result
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_external_service_fails(self):
+        """Test that generate_agent_patch returns None when service fails."""
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_patch_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = None
+
+            result = await core.generate_agent_patch("Add a node", {"nodes": []})
+
+            assert result is None
+
+
+class TestJsonToGraph:
+    """Test json_to_graph function."""
+
+    def test_converts_agent_json_to_graph(self):
+        """Test conversion of agent JSON to Graph model."""
+        agent_json = {
+            "id": "test-id",
+            "version": 2,
+            "is_active": True,
+            "name": "Test Agent",
+            "description": "A test agent",
+            "nodes": [
+                {
+                    "id": "node1",
+                    "block_id": "block1",
+                    "input_default": {"key": "value"},
+                    "metadata": {"x": 100},
+                }
+            ],
+            "links": [
+                {
+                    "id": "link1",
+                    "source_id": "node1",
+                    "sink_id": "output",
+                    "source_name": "result",
+                    "sink_name": "input",
+                    "is_static": False,
+                }
+            ],
+        }
+
+        graph = core.json_to_graph(agent_json)
+
+        assert graph.id == "test-id"
+        assert graph.version == 2
+        assert graph.is_active is True
+        assert graph.name == "Test Agent"
+        assert graph.description == "A test agent"
+        assert len(graph.nodes) == 1
+        assert graph.nodes[0].id == "node1"
+        assert graph.nodes[0].block_id == "block1"
+        assert len(graph.links) == 1
+        assert graph.links[0].source_id == "node1"
+
+    def test_generates_ids_if_missing(self):
+        """Test that missing IDs are generated."""
+        agent_json = {
+            "name": "Test Agent",
+            "nodes": [{"block_id": "block1"}],
+            "links": [],
+        }
+
+        graph = core.json_to_graph(agent_json)
+
+        assert graph.id is not None
+        assert graph.nodes[0].id is not None
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/autogpt_platform/backend/test/agent_generator/test_service.py
+++ b/autogpt_platform/backend/test/agent_generator/test_service.py
@@ -0,0 +1,422 @@
+"""
+Tests for the Agent Generator external service client.
+
+This test suite verifies the external Agent Generator service integration,
+including service detection, API calls, and error handling.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from backend.api.features.chat.tools.agent_generator import service
+
+
+class TestServiceConfiguration:
+    """Test service configuration detection."""
+
+    def setup_method(self):
+        """Reset settings singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    def test_external_service_not_configured_when_host_empty(self):
+        """Test that external service is not configured when host is empty."""
+        mock_settings = MagicMock()
+        mock_settings.config.agentgenerator_host = ""
+
+        with patch.object(service, "_get_settings", return_value=mock_settings):
+            assert service.is_external_service_configured() is False
+
+    def test_external_service_configured_when_host_set(self):
+        """Test that external service is configured when host is set."""
+        mock_settings = MagicMock()
+        mock_settings.config.agentgenerator_host = "agent-generator.local"
+
+        with patch.object(service, "_get_settings", return_value=mock_settings):
+            assert service.is_external_service_configured() is True
+
+    def test_get_base_url(self):
+        """Test base URL construction."""
+        mock_settings = MagicMock()
+        mock_settings.config.agentgenerator_host = "agent-generator.local"
+        mock_settings.config.agentgenerator_port = 8000
+
+        with patch.object(service, "_get_settings", return_value=mock_settings):
+            url = service._get_base_url()
+            assert url == "http://agent-generator.local:8000"
+
+
+class TestDecomposeGoalExternal:
+    """Test decompose_goal_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_returns_instructions(self):
+        """Test successful decomposition returning instructions."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "instructions",
+            "steps": ["Step 1", "Step 2"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result == {"type": "instructions", "steps": ["Step 1", "Step 2"]}
+        mock_client.post.assert_called_once_with(
+            "/api/decompose-description", json={"description": "Build a chatbot"}
+        )
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_returns_clarifying_questions(self):
+        """Test decomposition returning clarifying questions."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "clarifying_questions",
+            "questions": ["What platform?", "What language?"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build something")
+
+        assert result == {
+            "type": "clarifying_questions",
+            "questions": ["What platform?", "What language?"],
+        }
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_with_context(self):
+        """Test decomposition with additional context."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "instructions",
+            "steps": ["Step 1"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            await service.decompose_goal_external(
+                "Build a chatbot", context="Use Python"
+            )
+
+        mock_client.post.assert_called_once_with(
+            "/api/decompose-description",
+            json={"description": "Build a chatbot", "user_instruction": "Use Python"},
+        )
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_returns_unachievable_goal(self):
+        """Test decomposition returning unachievable goal response."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "unachievable_goal",
+            "reason": "Cannot do X",
+            "suggested_goal": "Try Y instead",
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Do something impossible")
+
+        assert result == {
+            "type": "unachievable_goal",
+            "reason": "Cannot do X",
+            "suggested_goal": "Try Y instead",
+        }
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_handles_http_error(self):
+        """Test decomposition handles HTTP errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.HTTPStatusError(
+            "Server error", request=MagicMock(), response=MagicMock()
+        )
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_handles_request_error(self):
+        """Test decomposition handles request errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_handles_service_error(self):
+        """Test decomposition handles service returning error."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": False,
+            "error": "Internal error",
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result is None
+
+
+class TestGenerateAgentExternal:
+    """Test generate_agent_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_success(self):
+        """Test successful agent generation."""
+        agent_json = {
+            "name": "Test Agent",
+            "nodes": [],
+            "links": [],
+        }
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "agent_json": agent_json,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        instructions = {"type": "instructions", "steps": ["Step 1"]}
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_external(instructions)
+
+        assert result == agent_json
+        mock_client.post.assert_called_once_with(
+            "/api/generate-agent", json={"instructions": instructions}
+        )
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_handles_error(self):
+        """Test agent generation handles errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_external({"steps": []})
+
+        assert result is None
+
+
+class TestGenerateAgentPatchExternal:
+    """Test generate_agent_patch_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_generate_patch_returns_updated_agent(self):
+        """Test successful patch generation returning updated agent."""
+        updated_agent = {
+            "name": "Updated Agent",
+            "nodes": [{"id": "1", "block_id": "test"}],
+            "links": [],
+        }
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "agent_json": updated_agent,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        current_agent = {"name": "Old Agent", "nodes": [], "links": []}
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_patch_external(
+                "Add a new node", current_agent
+            )
+
+        assert result == updated_agent
+        mock_client.post.assert_called_once_with(
+            "/api/update-agent",
+            json={
+                "update_request": "Add a new node",
+                "current_agent_json": current_agent,
+            },
+        )
+
+    @pytest.mark.asyncio
+    async def test_generate_patch_returns_clarifying_questions(self):
+        """Test patch generation returning clarifying questions."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "clarifying_questions",
+            "questions": ["What type of node?"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_patch_external(
+                "Add something", {"nodes": []}
+            )
+
+        assert result == {
+            "type": "clarifying_questions",
+            "questions": ["What type of node?"],
+        }
+
+
+class TestHealthCheck:
+    """Test health_check function."""
+
+    def setup_method(self):
+        """Reset singletons before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_false_when_not_configured(self):
+        """Test health check returns False when service not configured."""
+        with patch.object(
+            service, "is_external_service_configured", return_value=False
+        ):
+            result = await service.health_check()
+            assert result is False
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_true_when_healthy(self):
+        """Test health check returns True when service is healthy."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "status": "healthy",
+            "blocks_loaded": True,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get.return_value = mock_response
+
+        with patch.object(service, "is_external_service_configured", return_value=True):
+            with patch.object(service, "_get_client", return_value=mock_client):
+                result = await service.health_check()
+
+        assert result is True
+        mock_client.get.assert_called_once_with("/health")
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_false_when_not_healthy(self):
+        """Test health check returns False when service is not healthy."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "status": "unhealthy",
+            "blocks_loaded": False,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get.return_value = mock_response
+
+        with patch.object(service, "is_external_service_configured", return_value=True):
+            with patch.object(service, "_get_client", return_value=mock_client):
+                result = await service.health_check()
+
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_false_on_error(self):
+        """Test health check returns False on connection error."""
+        mock_client = AsyncMock()
+        mock_client.get.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "is_external_service_configured", return_value=True):
+            with patch.object(service, "_get_client", return_value=mock_client):
+                result = await service.health_check()
+
+        assert result is False
+
+
+class TestGetBlocksExternal:
+    """Test get_blocks_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_get_blocks_success(self):
+        """Test successful blocks retrieval."""
+        blocks = [
+            {"id": "block1", "name": "Block 1"},
+            {"id": "block2", "name": "Block 2"},
+        ]
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "blocks": blocks,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.get_blocks_external()
+
+        assert result == blocks
+        mock_client.get.assert_called_once_with("/api/blocks")
+
+    @pytest.mark.asyncio
+    async def test_get_blocks_handles_error(self):
+        """Test blocks retrieval handles errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.get.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.get_blocks_external()
+
+        assert result is None
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/autogpt_platform/frontend/.env.default
+++ b/autogpt_platform/frontend/.env.default
@@ -29,4 +29,4 @@ NEXT_PUBLIC_CLOUDFLARE_TURNSTILE_SITE_KEY=
 NEXT_PUBLIC_TURNSTILE=disabled

 # PR previews
-NEXT_PUBLIC_PREVIEW_STEALING_DEV=
+NEXT_PUBLIC_PREVIEW_STEALING_DEV=
--- a/autogpt_platform/frontend/CONTRIBUTING.md
+++ b/autogpt_platform/frontend/CONTRIBUTING.md
@@ -175,6 +175,8 @@ While server components and actions are cool and cutting-edge, they introduce a

 - Prefer [React Query](https://tanstack.com/query/latest/docs/framework/react/overview) for server state, colocated near consumers (see [state colocation](https://kentcdodds.com/blog/state-colocation-will-make-your-react-app-faster))
 - Co-locate UI state inside components/hooks; keep global state minimal
+- Avoid `useMemo` and `useCallback` unless you have a measured performance issue
+- Do not abuse `useEffect`; prefer state colocation and derive values directly when possible

 ### Styling and components

@@ -549,9 +551,48 @@ Files:
 Types:

 - Prefer `interface` for object shapes
- Component props should be `interface Props { ... }`
+- Component props should be `interface Props { ... }` (not exported)
+- Only use specific exported names (e.g., `export interface MyComponentProps`) when the interface needs to be used outside the component
+- Keep type definitions inline with the component - do not create separate `types.ts` files unless types are shared across multiple files
 - Use precise types; avoid `any` and unsafe casts

+**Props naming examples:**
+
+```tsx
+// ✅ Good - internal props, not exported
+interface Props {
+  title: string;
+  onClose: () => void;
+}
+
+export function Modal({ title, onClose }: Props) {
+  // ...
+}
+
+// ✅ Good - exported when needed externally
+export interface ModalProps {
+  title: string;
+  onClose: () => void;
+}
+
+export function Modal({ title, onClose }: ModalProps) {
+  // ...
+}
+
+// ❌ Bad - unnecessarily specific name for internal use
+interface ModalComponentProps {
+  title: string;
+  onClose: () => void;
+}
+
+// ❌ Bad - separate types.ts file for single component
+// types.ts
+export interface ModalProps { ... }
+
+// Modal.tsx
+import type { ModalProps } from './types';
+```
+
 Parameters:

 - If more than one parameter is needed, pass a single `Args` object for clarity
--- a/autogpt_platform/frontend/orval.config.ts
+++ b/autogpt_platform/frontend/orval.config.ts
@@ -16,6 +16,12 @@ export default defineConfig({
      client: "react-query",
      httpClient: "fetch",
      indexFiles: false,
+      mock: {
+        type: "msw",
+        baseUrl: "http://localhost:3000/api/proxy",
+        generateEachHttpStatus: true,
+        delay: 0,
+      },
      override: {
        mutator: {
          path: "./mutators/custom-mutator.ts",
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -15,6 +15,8 @@
    "types": "tsc --noEmit",
    "test": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test",
    "test-ui": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test --ui",
+    "test:unit": "vitest run",
+    "test:unit:watch": "vitest",
    "test:no-build": "playwright test",
    "gentests": "playwright codegen http://localhost:3000",
    "storybook": "storybook dev -p 6006",
@@ -118,6 +120,7 @@
  },
  "devDependencies": {
    "@chromatic-com/storybook": "4.1.2",
+    "happy-dom": "20.3.4",
    "@opentelemetry/instrumentation": "0.209.0",
    "@playwright/test": "1.56.1",
    "@storybook/addon-a11y": "9.1.5",
@@ -127,6 +130,8 @@
    "@storybook/nextjs": "9.1.5",
    "@tanstack/eslint-plugin-query": "5.91.2",
    "@tanstack/react-query-devtools": "5.90.2",
+    "@testing-library/dom": "10.4.1",
+    "@testing-library/react": "16.3.2",
    "@types/canvas-confetti": "1.9.0",
    "@types/lodash": "4.17.20",
    "@types/negotiator": "0.6.4",
@@ -135,6 +140,7 @@
    "@types/react-dom": "18.3.5",
    "@types/react-modal": "3.16.3",
    "@types/react-window": "1.8.8",
+    "@vitejs/plugin-react": "5.1.2",
    "axe-playwright": "2.2.2",
    "chromatic": "13.3.3",
    "concurrently": "9.2.1",
@@ -153,7 +159,9 @@
    "require-in-the-middle": "8.0.1",
    "storybook": "9.1.5",
    "tailwindcss": "3.4.17",
-    "typescript": "5.9.3"
+    "typescript": "5.9.3",
+    "vite-tsconfig-paths": "6.0.4",
+    "vitest": "4.0.17"
  },
  "msw": {
    "workerDirectory": [
--- a/autogpt_platform/frontend/pnpm-lock.yaml
+++ b/autogpt_platform/frontend/pnpm-lock.yaml
--- a/autogpt_platform/frontend/public/integrations/amazon.png
+++ b/autogpt_platform/frontend/public/integrations/amazon.png
--- a/autogpt_platform/frontend/public/integrations/anthropic-color.png
+++ b/autogpt_platform/frontend/public/integrations/anthropic-color.png
--- a/autogpt_platform/frontend/public/integrations/cohere.png
+++ b/autogpt_platform/frontend/public/integrations/cohere.png
--- a/autogpt_platform/frontend/public/integrations/deepseek.png
+++ b/autogpt_platform/frontend/public/integrations/deepseek.png
--- a/autogpt_platform/frontend/public/integrations/gemini.png
+++ b/autogpt_platform/frontend/public/integrations/gemini.png
--- a/autogpt_platform/frontend/public/integrations/gryphe.png
+++ b/autogpt_platform/frontend/public/integrations/gryphe.png
--- a/autogpt_platform/frontend/public/integrations/microsoft.webp
+++ b/autogpt_platform/frontend/public/integrations/microsoft.webp
--- a/autogpt_platform/frontend/public/integrations/mistral.png
+++ b/autogpt_platform/frontend/public/integrations/mistral.png
--- a/autogpt_platform/frontend/public/integrations/moonshot.png
+++ b/autogpt_platform/frontend/public/integrations/moonshot.png
--- a/autogpt_platform/frontend/public/integrations/nousresearch.avif
+++ b/autogpt_platform/frontend/public/integrations/nousresearch.avif
--- a/autogpt_platform/frontend/public/integrations/perplexity.webp
+++ b/autogpt_platform/frontend/public/integrations/perplexity.webp
--- a/autogpt_platform/frontend/public/integrations/qwen.png
+++ b/autogpt_platform/frontend/public/integrations/qwen.png
--- a/autogpt_platform/frontend/public/integrations/xai.webp
+++ b/autogpt_platform/frontend/public/integrations/xai.webp
--- a/autogpt_platform/frontend/src/app/(no-navbar)/logout/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/logout/page.tsx
@@ -0,0 +1,58 @@
+"use client";
+
+import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
+import { Text } from "@/components/atoms/Text/Text";
+import { useToast } from "@/components/molecules/Toast/use-toast";
+import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
+import { useRouter } from "next/navigation";
+import { useEffect, useRef } from "react";
+
+const LOGOUT_REDIRECT_DELAY_MS = 400;
+
+function wait(ms: number): Promise<void> {
+  return new Promise(function resolveAfterDelay(resolve) {
+    setTimeout(resolve, ms);
+  });
+}
+
+export default function LogoutPage() {
+  const { logOut } = useSupabase();
+  const { toast } = useToast();
+  const router = useRouter();
+  const hasStartedRef = useRef(false);
+
+  useEffect(
+    function handleLogoutEffect() {
+      if (hasStartedRef.current) return;
+      hasStartedRef.current = true;
+
+      async function runLogout() {
+        try {
+          await logOut();
+        } catch {
+          toast({
+            title: "Failed to log out. Redirecting to login.",
+            variant: "destructive",
+          });
+        } finally {
+          await wait(LOGOUT_REDIRECT_DELAY_MS);
+          router.replace("/login");
+        }
+      }
+
+      void runLogout();
+    },
+    [logOut, router, toast],
+  );
+
+  return (
+    <div className="flex min-h-screen items-center justify-center px-4">
+      <div className="flex flex-col items-center justify-center gap-4 py-8">
+        <LoadingSpinner size="large" />
+        <Text variant="body" className="text-center">
+          Logging you out...
+        </Text>
+      </div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/auth/callback/route.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/auth/callback/route.ts
@@ -9,7 +9,7 @@ export async function GET(request: Request) {
  const { searchParams, origin } = new URL(request.url);
  const code = searchParams.get("code");

-  let next = "/marketplace";
+  let next = "/";

  if (code) {
    const supabase = await getServerSupabase();
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/AgentOutputs/AgentOutputs.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/AgentOutputs/AgentOutputs.tsx
@@ -38,8 +38,12 @@ export const AgentOutputs = ({ flowID }: { flowID: string | null }) => {

    return outputNodes
      .map((node) => {
-        const executionResult = node.data.nodeExecutionResult;
-        const outputData = executionResult?.output_data?.output;
+        const executionResults = node.data.nodeExecutionResults || [];
+        const latestResult =
+          executionResults.length > 0
+            ? executionResults[executionResults.length - 1]
+            : undefined;
+        const outputData = latestResult?.output_data?.output;

        const renderer = globalRegistry.getRenderer(outputData);

--- a/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/RunGraph/RunGraph.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/RunGraph/RunGraph.tsx
@@ -5,10 +5,11 @@ import {
  TooltipContent,
  TooltipTrigger,
 } from "@/components/atoms/Tooltip/BaseTooltip";
-import { PlayIcon, StopIcon } from "@phosphor-icons/react";
+import { CircleNotchIcon, PlayIcon, StopIcon } from "@phosphor-icons/react";
 import { useShallow } from "zustand/react/shallow";
 import { RunInputDialog } from "../RunInputDialog/RunInputDialog";
 import { useRunGraph } from "./useRunGraph";
+import { cn } from "@/lib/utils";

 export const RunGraph = ({ flowID }: { flowID: string | null }) => {
  const {
@@ -24,6 +25,31 @@ export const RunGraph = ({ flowID }: { flowID: string | null }) => {
    useShallow((state) => state.isGraphRunning),
  );

+  const isLoading = isExecutingGraph || isTerminatingGraph || isSaving;
+
+  // Determine which icon to show with proper animation
+  const renderIcon = () => {
+    const iconClass = cn(
+      "size-4 transition-transform duration-200 ease-out",
+      !isLoading && "group-hover:scale-110",
+    );
+
+    if (isLoading) {
+      return (
+        <CircleNotchIcon
+          className={cn(iconClass, "animate-spin")}
+          weight="bold"
+        />
+      );
+    }
+
+    if (isGraphRunning) {
+      return <StopIcon className={iconClass} weight="fill" />;
+    }
+
+    return <PlayIcon className={iconClass} weight="fill" />;
+  };
+
  return (
    <>
      <Tooltip>
@@ -33,18 +59,18 @@ export const RunGraph = ({ flowID }: { flowID: string | null }) => {
            variant={isGraphRunning ? "destructive" : "primary"}
            data-id={isGraphRunning ? "stop-graph-button" : "run-graph-button"}
            onClick={isGraphRunning ? handleStopGraph : handleRunGraph}
-            disabled={!flowID || isExecutingGraph || isTerminatingGraph}
-            loading={isExecutingGraph || isTerminatingGraph || isSaving}
+            disabled={!flowID || isLoading}
+            className="group"
          >
-            {!isGraphRunning ? (
-              <PlayIcon className="size-4" />
-            ) : (
-              <StopIcon className="size-4" />
-            )}
+            {renderIcon()}
          </Button>
        </TooltipTrigger>
        <TooltipContent>
-          {isGraphRunning ? "Stop agent" : "Run agent"}
+          {isLoading
+            ? "Processing..."
+            : isGraphRunning
+              ? "Stop agent"
+              : "Run agent"}
        </TooltipContent>
      </Tooltip>
      <RunInputDialog
--- a/Show More
+++ b/Show More