Refactor LLM admin route tests for improved mocking and snapshots

Updated tests to use actual model and response classes from llm_model instead of dicts, ensuring more accurate type usage. Snapshot assertions now serialize responses to JSON strings for compatibility. Cleaned up test_delete_llm_model_missing_replacement to remove unnecessary mocking.
Set router prefix in llm_routes_test.py
2026-01-19 20:18:22 -05:00 · 2026-01-19 14:28:33 +00:00 · 2026-01-19 14:16:08 +00:00 · 2026-01-19 14:15:04 +00:00 · 2026-01-19 13:39:20 +00:00 · 2026-01-16 14:29:46 +00:00
948 changed files with 212165 additions and 35539 deletions
--- a/.branchlet.json
+++ b/.branchlet.json
@@ -0,0 +1,37 @@
+{
+  "worktreeCopyPatterns": [
+    ".env*",
+    ".vscode/**",
+    ".auth/**",
+    ".claude/**",
+    "autogpt_platform/.env*",
+    "autogpt_platform/backend/.env*",
+    "autogpt_platform/frontend/.env*",
+    "autogpt_platform/frontend/.auth/**",
+    "autogpt_platform/db/docker/.env*"
+  ],
+  "worktreeCopyIgnores": [
+    "**/node_modules/**",
+    "**/dist/**",
+    "**/.git/**",
+    "**/Thumbs.db",
+    "**/.DS_Store",
+    "**/.next/**",
+    "**/__pycache__/**",
+    "**/.ruff_cache/**",
+    "**/.pytest_cache/**",
+    "**/*.pyc",
+    "**/playwright-report/**",
+    "**/logs/**",
+    "**/site/**"
+  ],
+  "worktreePathTemplate": "$BASE_PATH.worktree",
+  "postCreateCmd": [
+    "cd autogpt_platform/autogpt_libs && poetry install",
+    "cd autogpt_platform/backend && poetry install && poetry run prisma generate",
+    "cd autogpt_platform/frontend && pnpm install",
+    "cd docs && pip install -r requirements.txt"
+  ],
+  "terminalCommand": "code .",
+  "deleteBranchWithWorktree": false
+}
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,9 @@
 # Ignore everything by default, selectively add things to context
 *

+# Documentation (for embeddings/search)
+!docs/
+
 # Platform - Libs
 !autogpt_platform/autogpt_libs/autogpt_libs/
 !autogpt_platform/autogpt_libs/pyproject.toml
@@ -16,6 +19,7 @@
 !autogpt_platform/backend/poetry.lock
 !autogpt_platform/backend/README.md
 !autogpt_platform/backend/.env
+!autogpt_platform/backend/gen_prisma_types_stub.py

 # Platform - Market
 !autogpt_platform/market/market/
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -29,7 +29,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.12", "3.13", "3.14"]
+        python-version: ["3.10"]
        platform-os: [ubuntu, macos, macos-arm64, windows]
    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

--- a/.github/workflows/classic-autogpts-ci.yml
+++ b/.github/workflows/classic-autogpts-ci.yml
@@ -11,6 +11,9 @@ on:
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
      - 'classic/benchmark/**'
+      - 'classic/run'
+      - 'classic/cli.py'
+      - 'classic/setup.py'
      - '!**/*.md'
  pull_request:
    branches: [ master, dev, release-* ]
@@ -19,6 +22,9 @@ on:
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
      - 'classic/benchmark/**'
+      - 'classic/run'
+      - 'classic/cli.py'
+      - 'classic/setup.py'
      - '!**/*.md'

 defaults:
@@ -53,15 +59,10 @@ jobs:
        run: |
          curl -sSL https://install.python-poetry.org | python -

-      - name: Install dependencies
-        working-directory: ./classic/${{ matrix.agent-name }}/
-        run: poetry install
-
      - name: Run regression tests
        run: |
+          ./run agent start ${{ matrix.agent-name }}
          cd ${{ matrix.agent-name }}
-          poetry run serve &
-          sleep 10  # Wait for server to start
          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
          poetry run agbenchmark --test=WriteFile
        env:
--- a/.github/workflows/classic-benchmark-ci.yml
+++ b/.github/workflows/classic-benchmark-ci.yml
@@ -23,7 +23,7 @@ defaults:
    shell: bash

 env:
-  min-python-version: '3.12'
+  min-python-version: '3.10'

 jobs:
  test:
@@ -33,7 +33,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.12", "3.13", "3.14"]
+        python-version: ["3.10"]
        platform-os: [ubuntu, macos, macos-arm64, windows]
    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
    defaults:
@@ -128,16 +128,11 @@ jobs:
        run: |
          curl -sSL https://install.python-poetry.org | python -

-      - name: Install agent dependencies
-        working-directory: classic/${{ matrix.agent-name }}
-        run: poetry install
-
      - name: Run regression tests
        working-directory: classic
        run: |
+          ./run agent start ${{ matrix.agent-name }}
          cd ${{ matrix.agent-name }}
-          poetry run python -m forge &
-          sleep 10  # Wait for server to start

          set +e # Ignore non-zero exit codes and continue execution
          echo "Running the following command: poetry run agbenchmark --maintain --mock"
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -31,7 +31,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.12", "3.13", "3.14"]
+        python-version: ["3.10"]
        platform-os: [ubuntu, macos, macos-arm64, windows]
    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -0,0 +1,60 @@
+name: Classic - Frontend CI/CD
+
+on:
+  push:
+    branches:
+      - master
+      - dev
+      - 'ci-test*' # This will match any branch that starts with "ci-test"
+    paths:
+      - 'classic/frontend/**'
+      - '.github/workflows/classic-frontend-ci.yml'
+  pull_request:
+    paths:
+      - 'classic/frontend/**'
+      - '.github/workflows/classic-frontend-ci.yml'
+
+jobs:
+  build:
+    permissions:
+      contents: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    env:
+      BUILD_BRANCH: ${{ format('classic-frontend-build/{0}', github.ref_name) }}
+
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Setup Flutter
+        uses: subosito/flutter-action@v2
+        with:
+          flutter-version: '3.13.2'
+
+      - name: Build Flutter to Web
+        run: |
+          cd classic/frontend
+          flutter build web --base-href /app/
+
+      # - name: Commit and Push to ${{ env.BUILD_BRANCH }}
+      #   if: github.event_name == 'push'
+      #   run: |
+      #     git config --local user.email "action@github.com"
+      #     git config --local user.name "GitHub Action"
+      #     git add classic/frontend/build/web
+      #     git checkout -B ${{ env.BUILD_BRANCH }}
+      #     git commit -m "Update frontend build to ${GITHUB_SHA:0:7}" -a
+      #     git push -f origin ${{ env.BUILD_BRANCH }}
+
+      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
+        if: github.event_name == 'push'
+        uses: peter-evans/create-pull-request@v7
+        with:
+          add-paths: classic/frontend/build/web
+          base: ${{ github.ref_name }}
+          branch: ${{ env.BUILD_BRANCH }}
+          delete-branch: true
+          title: "Update frontend build in `${{ github.ref_name }}`"
+          body: "This PR updates the frontend build based on commit ${{ github.sha }}."
+          commit-message: "Update frontend build based on commit ${{ github.sha }}"
--- a/.github/workflows/classic-python-checks.yml
+++ b/.github/workflows/classic-python-checks.yml
@@ -59,7 +59,7 @@ jobs:
    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.12"
+      min-python-version: "3.10"

    strategy:
      matrix:
@@ -111,7 +111,7 @@ jobs:
    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.12"
+      min-python-version: "3.10"

    strategy:
      matrix:
--- a/.github/workflows/claude-dependabot.yml
+++ b/.github/workflows/claude-dependabot.yml
@@ -74,7 +74,7 @@ jobs:

      - name: Generate Prisma Client
        working-directory: autogpt_platform/backend
-        run: poetry run prisma generate
+        run: poetry run prisma generate && poetry run gen-prisma-stub

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@@ -90,7 +90,7 @@ jobs:

      - name: Generate Prisma Client
        working-directory: autogpt_platform/backend
-        run: poetry run prisma generate
+        run: poetry run prisma generate && poetry run gen-prisma-stub

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -72,7 +72,7 @@ jobs:

      - name: Generate Prisma Client
        working-directory: autogpt_platform/backend
-        run: poetry run prisma generate
+        run: poetry run prisma generate && poetry run gen-prisma-stub

      # Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
      - name: Set up Node.js
@@ -108,6 +108,16 @@ jobs:
      #   run: pnpm playwright install --with-deps chromium

      # Docker setup for development environment
+      - name: Free up disk space
+        run: |
+          # Remove large unused tools to free disk space for Docker builds
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker system prune -af
+          df -h
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

--- a/.github/workflows/platform-backend-ci.yml
+++ b/.github/workflows/platform-backend-ci.yml
@@ -134,7 +134,7 @@ jobs:
        run: poetry install

      - name: Generate Prisma Client
-        run: poetry run prisma generate
+        run: poetry run prisma generate && poetry run gen-prisma-stub

      - id: supabase
        name: Start Supabase
@@ -176,7 +176,7 @@ jobs:
          }

      - name: Run Database Migrations
-        run: poetry run prisma migrate dev --name updates
+        run: poetry run prisma migrate deploy
        env:
          DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
          DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -11,6 +11,7 @@ on:
      - ".github/workflows/platform-frontend-ci.yml"
      - "autogpt_platform/frontend/**"
  merge_group:
+  workflow_dispatch:

 concurrency:
  group: ${{ github.workflow }}-${{ github.event_name == 'merge_group' && format('merge-queue-{0}', github.ref) || format('{0}-{1}', github.ref, github.event.pull_request.number || github.sha) }}
@@ -151,6 +152,14 @@ jobs:
        run: |
          cp ../.env.default ../.env

+      - name: Copy backend .env and set OpenAI API key
+        run: |
+          cp ../backend/.env.default ../backend/.env
+          echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
+        env:
+          # Used by E2E test data script to generate embeddings for approved store agents
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

@@ -226,13 +235,25 @@ jobs:

      - name: Run Playwright tests
        run: pnpm test:no-build
+        continue-on-error: false

-      - name: Upload Playwright artifacts
-        if: failure()
+      - name: Upload Playwright report
+        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: playwright-report
          path: playwright-report
+          if-no-files-found: ignore
+          retention-days: 3
+
+      - name: Upload Playwright test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-test-results
+          path: test-results
+          if-no-files-found: ignore
+          retention-days: 3

      - name: Print Final Docker Compose logs
        if: always()
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@
 classic/original_autogpt/keys.py
 classic/original_autogpt/*.json
 auto_gpt_workspace/*
-.autogpt/
 *.mpeg
 .env
 # Root .env files
@@ -178,5 +177,5 @@ autogpt_platform/backend/settings.py

 *.ign.*
 .test-contents
-**/.claude/settings.local.json
+.claude/settings.local.json
 /autogpt_platform/backend/logs
--- a/autogpt_platform/Makefile
+++ b/autogpt_platform/Makefile
@@ -6,12 +6,14 @@ start-core:

 # Stop core services
 stop-core:
-	docker compose stop deps
+	docker compose stop 

 reset-db:
+	docker compose stop db
 	rm -rf db/docker/volumes/db/data
 	cd backend && poetry run prisma migrate deploy
 	cd backend && poetry run prisma generate
+	cd backend && poetry run gen-prisma-stub
 	
 # View logs for core services
 logs-core:
@@ -33,6 +35,7 @@ init-env:
 migrate:
 	cd backend && poetry run prisma migrate deploy
 	cd backend && poetry run prisma generate
+	cd backend && poetry run gen-prisma-stub

 run-backend:
 	cd backend && poetry run app
@@ -58,4 +61,4 @@ help:
 	@echo "  run-backend - Run the backend FastAPI server"
 	@echo "  run-frontend - Run the frontend Next.js development server"
 	@echo "  test-data - Run the test data creator"
-	@echo "  load-store-agents - Load store agents from agents/ folder into test database"
+	@echo "  load-store-agents - Load store agents from agents/ folder into test database"
--- a/autogpt_platform/backend/.env.default
+++ b/autogpt_platform/backend/.env.default
@@ -58,6 +58,13 @@ V0_API_KEY=
 OPEN_ROUTER_API_KEY=
 NVIDIA_API_KEY=

+# Langfuse Prompt Management
+# Used for managing the CoPilot system prompt externally
+# Get credentials from https://cloud.langfuse.com or your self-hosted instance
+LANGFUSE_PUBLIC_KEY=
+LANGFUSE_SECRET_KEY=
+LANGFUSE_HOST=https://cloud.langfuse.com
+
 # OAuth Credentials
 # For the OAuth callback URL, use <your_frontend_url>/auth/integrations/oauth_callback,
 # e.g. http://localhost:3000/auth/integrations/oauth_callback
--- a/autogpt_platform/backend/.gitignore
+++ b/autogpt_platform/backend/.gitignore
@@ -18,3 +18,4 @@ load-tests/results/
 load-tests/*.json
 load-tests/*.log
 load-tests/node_modules/*
+migrations/*/rollback*.sql
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -48,7 +48,8 @@ RUN poetry install --no-ansi --no-root
 # Generate Prisma client
 COPY autogpt_platform/backend/schema.prisma ./
 COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/partial_types.py
-RUN poetry run prisma generate
+COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
+RUN poetry run prisma generate && poetry run gen-prisma-stub

 FROM debian:13-slim AS server_dependencies

@@ -99,6 +100,7 @@ COPY autogpt_platform/backend/migrations /app/autogpt_platform/backend/migration
 FROM server_dependencies AS server

 COPY autogpt_platform/backend /app/autogpt_platform/backend
+COPY docs /app/docs
 RUN poetry install --no-ansi --only-root

 ENV PORT=8000
--- a/autogpt_platform/backend/backend/api/conn_manager.py
+++ b/autogpt_platform/backend/backend/api/conn_manager.py
@@ -122,6 +122,24 @@ class ConnectionManager:

        return len(connections)

+    async def broadcast_to_all(self, *, method: WSMethod, data: dict) -> int:
+        """Broadcast a message to all active websocket connections."""
+        message = WSMessage(
+            method=method,
+            data=data,
+        ).model_dump_json()
+
+        connections = tuple(self.active_connections)
+        if not connections:
+            return 0
+
+        await asyncio.gather(
+            *(connection.send_text(message) for connection in connections),
+            return_exceptions=True,
+        )
+
+        return len(connections)
+
    async def _subscribe(self, channel_key: str, websocket: WebSocket) -> str:
        if channel_key not in self.subscriptions:
            self.subscriptions[channel_key] = set()
--- a/autogpt_platform/backend/backend/api/external/v1/tools.py
+++ b/autogpt_platform/backend/backend/api/external/v1/tools.py
@@ -70,7 +70,7 @@ class RunAgentRequest(BaseModel):
    )


-def _create_ephemeral_session(user_id: str | None) -> ChatSession:
+def _create_ephemeral_session(user_id: str) -> ChatSession:
    """Create an ephemeral session for stateless API requests."""
    return ChatSession.new(user_id)

--- a/autogpt_platform/backend/backend/api/features/admin/execution_analytics_routes.py
+++ b/autogpt_platform/backend/backend/api/features/admin/execution_analytics_routes.py
@@ -173,30 +173,64 @@ async def get_execution_analytics_config(
        # Return with provider prefix for clarity
        return f"{provider_name}: {model_name}"

-    # Include all LlmModel values (no more filtering by hardcoded list)
-    recommended_model = LlmModel.GPT4O_MINI.value
-    for model in LlmModel:
-        label = generate_model_label(model)
+    # Get all models from the registry (dynamic, not hardcoded enum)
+    from backend.data import llm_registry
+    from backend.server.v2.llm import db as llm_db
+
+    # Get the recommended model from the database (configurable via admin UI)
+    recommended_model_slug = await llm_db.get_recommended_model_slug()
+
+    # Build the available models list
+    first_enabled_slug = None
+    for registry_model in llm_registry.iter_dynamic_models():
+        # Only include enabled models in the list
+        if not registry_model.is_enabled:
+            continue
+
+        # Track first enabled model as fallback
+        if first_enabled_slug is None:
+            first_enabled_slug = registry_model.slug
+
+        model_enum = LlmModel(registry_model.slug)  # Create enum instance from slug
+        label = generate_model_label(model_enum)
        # Add "(Recommended)" suffix to the recommended model
-        if model.value == recommended_model:
+        if registry_model.slug == recommended_model_slug:
            label += " (Recommended)"

        available_models.append(
            ModelInfo(
-                value=model.value,
+                value=registry_model.slug,
                label=label,
-                provider=model.provider,
+                provider=registry_model.metadata.provider,
            )
        )

    # Sort models by provider and name for better UX
    available_models.sort(key=lambda x: (x.provider, x.label))

+    # Handle case where no models are available
+    if not available_models:
+        logger.warning(
+            "No enabled LLM models found in registry. "
+            "Ensure models are configured and enabled in the LLM Registry."
+        )
+        # Provide a placeholder entry so admins see meaningful feedback
+        available_models.append(
+            ModelInfo(
+                value="",
+                label="No models available - configure in LLM Registry",
+                provider="none",
+            )
+        )
+
+    # Use the DB recommended model, or fallback to first enabled model
+    final_recommended = recommended_model_slug or first_enabled_slug or ""
+
    return ExecutionAnalyticsConfig(
        available_models=available_models,
        default_system_prompt=DEFAULT_SYSTEM_PROMPT,
        default_user_prompt=DEFAULT_USER_PROMPT,
-        recommended_model=recommended_model,
+        recommended_model=final_recommended,
    )


--- a/autogpt_platform/backend/backend/api/features/admin/llm_routes.py
+++ b/autogpt_platform/backend/backend/api/features/admin/llm_routes.py
@@ -0,0 +1,557 @@
+import logging
+
+import autogpt_libs.auth
+import fastapi
+
+from backend.data import llm_registry
+from backend.data.block_cost_config import refresh_llm_costs
+from backend.server.v2.llm import db as llm_db
+from backend.server.v2.llm import model as llm_model
+
+logger = logging.getLogger(__name__)
+
+router = fastapi.APIRouter(
+    tags=["llm", "admin"],
+    dependencies=[fastapi.Security(autogpt_libs.auth.requires_admin_user)],
+)
+
+
+async def _refresh_runtime_state() -> None:
+    """Refresh the LLM registry and clear all related caches to ensure real-time updates."""
+    logger.info("Refreshing LLM registry runtime state...")
+    try:
+        # Refresh registry from database
+        await llm_registry.refresh_llm_registry()
+        refresh_llm_costs()
+
+        # Clear block schema caches so they're regenerated with updated model options
+        from backend.data.block import BlockSchema
+
+        BlockSchema.clear_all_schema_caches()
+        logger.info("Cleared all block schema caches")
+
+        # Clear the /blocks endpoint cache so frontend gets updated schemas
+        try:
+            from backend.api.features.v1 import _get_cached_blocks
+
+            _get_cached_blocks.cache_clear()
+            logger.info("Cleared /blocks endpoint cache")
+        except Exception as e:
+            logger.warning("Failed to clear /blocks cache: %s", e)
+
+        # Clear the v2 builder providers cache (if it exists)
+        try:
+            from backend.api.features.builder import db as builder_db
+
+            if hasattr(builder_db, "_get_all_providers"):
+                builder_db._get_all_providers.cache_clear()
+                logger.info("Cleared v2 builder providers cache")
+        except Exception as e:
+            logger.debug("Could not clear v2 builder cache: %s", e)
+
+        # Notify all executor services to refresh their registry cache
+        from backend.data.llm_registry import publish_registry_refresh_notification
+
+        await publish_registry_refresh_notification()
+        logger.info("Published registry refresh notification")
+    except Exception as exc:
+        logger.exception(
+            "LLM runtime state refresh failed; caches may be stale: %s", exc
+        )
+
+
+@router.get(
+    "/providers",
+    summary="List LLM providers",
+    response_model=llm_model.LlmProvidersResponse,
+)
+async def list_llm_providers(include_models: bool = True):
+    providers = await llm_db.list_providers(include_models=include_models)
+    return llm_model.LlmProvidersResponse(providers=providers)
+
+
+@router.post(
+    "/providers",
+    summary="Create LLM provider",
+    response_model=llm_model.LlmProvider,
+)
+async def create_llm_provider(request: llm_model.UpsertLlmProviderRequest):
+    provider = await llm_db.upsert_provider(request=request)
+    await _refresh_runtime_state()
+    return provider
+
+
+@router.patch(
+    "/providers/{provider_id}",
+    summary="Update LLM provider",
+    response_model=llm_model.LlmProvider,
+)
+async def update_llm_provider(
+    provider_id: str,
+    request: llm_model.UpsertLlmProviderRequest,
+):
+    provider = await llm_db.upsert_provider(request=request, provider_id=provider_id)
+    await _refresh_runtime_state()
+    return provider
+
+
+@router.get(
+    "/models",
+    summary="List LLM models",
+    response_model=llm_model.LlmModelsResponse,
+)
+async def list_llm_models(provider_id: str | None = fastapi.Query(default=None)):
+    models = await llm_db.list_models(provider_id=provider_id)
+    return llm_model.LlmModelsResponse(models=models)
+
+
+@router.post(
+    "/models",
+    summary="Create LLM model",
+    response_model=llm_model.LlmModel,
+)
+async def create_llm_model(request: llm_model.CreateLlmModelRequest):
+    model = await llm_db.create_model(request=request)
+    await _refresh_runtime_state()
+    return model
+
+
+@router.patch(
+    "/models/{model_id}",
+    summary="Update LLM model",
+    response_model=llm_model.LlmModel,
+)
+async def update_llm_model(
+    model_id: str,
+    request: llm_model.UpdateLlmModelRequest,
+):
+    model = await llm_db.update_model(model_id=model_id, request=request)
+    await _refresh_runtime_state()
+    return model
+
+
+@router.patch(
+    "/models/{model_id}/toggle",
+    summary="Toggle LLM model availability",
+    response_model=llm_model.ToggleLlmModelResponse,
+)
+async def toggle_llm_model(
+    model_id: str,
+    request: llm_model.ToggleLlmModelRequest,
+):
+    """
+    Toggle a model's enabled status, optionally migrating workflows when disabling.
+
+    If disabling a model and `migrate_to_slug` is provided, all workflows using
+    this model will be migrated to the specified replacement model before disabling.
+    A migration record is created which can be reverted later using the revert endpoint.
+
+    Optional fields:
+    - `migration_reason`: Reason for the migration (e.g., "Provider outage")
+    - `custom_credit_cost`: Custom pricing override for billing during migration
+    """
+    try:
+        result = await llm_db.toggle_model(
+            model_id=model_id,
+            is_enabled=request.is_enabled,
+            migrate_to_slug=request.migrate_to_slug,
+            migration_reason=request.migration_reason,
+            custom_credit_cost=request.custom_credit_cost,
+        )
+        await _refresh_runtime_state()
+        if result.nodes_migrated > 0:
+            logger.info(
+                "Toggled model '%s' to %s and migrated %d nodes to '%s' (migration_id=%s)",
+                result.model.slug,
+                "enabled" if request.is_enabled else "disabled",
+                result.nodes_migrated,
+                result.migrated_to_slug,
+                result.migration_id,
+            )
+        return result
+    except ValueError as exc:
+        logger.warning("Model toggle validation failed: %s", exc)
+        raise fastapi.HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Failed to toggle LLM model %s: %s", model_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to toggle model availability",
+        ) from exc
+
+
+@router.get(
+    "/models/{model_id}/usage",
+    summary="Get model usage count",
+    response_model=llm_model.LlmModelUsageResponse,
+)
+async def get_llm_model_usage(model_id: str):
+    """Get the number of workflow nodes using this model."""
+    try:
+        return await llm_db.get_model_usage(model_id=model_id)
+    except ValueError as exc:
+        raise fastapi.HTTPException(status_code=404, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Failed to get model usage %s: %s", model_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to get model usage",
+        ) from exc
+
+
+@router.delete(
+    "/models/{model_id}",
+    summary="Delete LLM model and migrate workflows",
+    response_model=llm_model.DeleteLlmModelResponse,
+)
+async def delete_llm_model(
+    model_id: str,
+    replacement_model_slug: str = fastapi.Query(
+        ..., description="Slug of the model to migrate existing workflows to"
+    ),
+):
+    """
+    Delete a model and automatically migrate all workflows using it to a replacement model.
+
+    This endpoint:
+    1. Validates the replacement model exists and is enabled
+    2. Counts how many workflow nodes use the model being deleted
+    3. Updates all AgentNode.constantInput->model fields to the replacement
+    4. Deletes the model record
+    5. Refreshes all caches and notifies executors
+
+    Example: DELETE /admin/llm/models/{id}?replacement_model_slug=gpt-4o
+    """
+    try:
+        result = await llm_db.delete_model(
+            model_id=model_id, replacement_model_slug=replacement_model_slug
+        )
+        await _refresh_runtime_state()
+        logger.info(
+            "Deleted model '%s' and migrated %d nodes to '%s'",
+            result.deleted_model_slug,
+            result.nodes_migrated,
+            result.replacement_model_slug,
+        )
+        return result
+    except ValueError as exc:
+        # Validation errors (model not found, replacement invalid, etc.)
+        logger.warning("Model deletion validation failed: %s", exc)
+        raise fastapi.HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Failed to delete LLM model %s: %s", model_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to delete model and migrate workflows",
+        ) from exc
+
+
+# ============================================================================
+# Migration Management Endpoints
+# ============================================================================
+
+
+@router.get(
+    "/migrations",
+    summary="List model migrations",
+    response_model=llm_model.LlmMigrationsResponse,
+)
+async def list_llm_migrations(
+    include_reverted: bool = fastapi.Query(
+        default=False, description="Include reverted migrations in the list"
+    ),
+):
+    """
+    List all model migrations.
+
+    Migrations are created when disabling a model with the migrate_to_slug option.
+    They can be reverted to restore the original model configuration.
+    """
+    try:
+        migrations = await llm_db.list_migrations(include_reverted=include_reverted)
+        return llm_model.LlmMigrationsResponse(migrations=migrations)
+    except Exception as exc:
+        logger.exception("Failed to list migrations: %s", exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to list migrations",
+        ) from exc
+
+
+@router.get(
+    "/migrations/{migration_id}",
+    summary="Get migration details",
+    response_model=llm_model.LlmModelMigration,
+)
+async def get_llm_migration(migration_id: str):
+    """Get details of a specific migration."""
+    try:
+        migration = await llm_db.get_migration(migration_id)
+        if not migration:
+            raise fastapi.HTTPException(
+                status_code=404, detail=f"Migration '{migration_id}' not found"
+            )
+        return migration
+    except fastapi.HTTPException:
+        raise
+    except Exception as exc:
+        logger.exception("Failed to get migration %s: %s", migration_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to get migration",
+        ) from exc
+
+
+@router.post(
+    "/migrations/{migration_id}/revert",
+    summary="Revert a model migration",
+    response_model=llm_model.RevertMigrationResponse,
+)
+async def revert_llm_migration(
+    migration_id: str,
+    request: llm_model.RevertMigrationRequest | None = None,
+):
+    """
+    Revert a model migration, restoring affected workflows to their original model.
+
+    This only reverts the specific nodes that were part of the migration.
+    The source model must exist for the revert to succeed.
+
+    Options:
+    - `re_enable_source_model`: Whether to re-enable the source model if disabled (default: True)
+
+    Response includes:
+    - `nodes_reverted`: Number of nodes successfully reverted
+    - `nodes_already_changed`: Number of nodes that were modified since migration (not reverted)
+    - `source_model_re_enabled`: Whether the source model was re-enabled
+
+    Requirements:
+    - Migration must not already be reverted
+    - Source model must exist
+    """
+    try:
+        re_enable = request.re_enable_source_model if request else True
+        result = await llm_db.revert_migration(
+            migration_id,
+            re_enable_source_model=re_enable,
+        )
+        await _refresh_runtime_state()
+        logger.info(
+            "Reverted migration '%s': %d nodes restored from '%s' to '%s' "
+            "(%d already changed, source re-enabled=%s)",
+            migration_id,
+            result.nodes_reverted,
+            result.target_model_slug,
+            result.source_model_slug,
+            result.nodes_already_changed,
+            result.source_model_re_enabled,
+        )
+        return result
+    except ValueError as exc:
+        logger.warning("Migration revert validation failed: %s", exc)
+        raise fastapi.HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Failed to revert migration %s: %s", migration_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to revert migration",
+        ) from exc
+
+
+# ============================================================================
+# Creator Management Endpoints
+# ============================================================================
+
+
+@router.get(
+    "/creators",
+    summary="List model creators",
+    response_model=llm_model.LlmCreatorsResponse,
+)
+async def list_llm_creators():
+    """
+    List all model creators.
+
+    Creators are organizations that create/train models (e.g., OpenAI, Meta, Anthropic).
+    This is distinct from providers who host/serve the models (e.g., OpenRouter).
+    """
+    try:
+        creators = await llm_db.list_creators()
+        return llm_model.LlmCreatorsResponse(creators=creators)
+    except Exception as exc:
+        logger.exception("Failed to list creators: %s", exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to list creators",
+        ) from exc
+
+
+@router.get(
+    "/creators/{creator_id}",
+    summary="Get creator details",
+    operation_id="getV2GetLlmCreatorDetails",
+    response_model=llm_model.LlmModelCreator,
+)
+async def get_llm_creator(creator_id: str):
+    """Get details of a specific model creator."""
+    try:
+        creator = await llm_db.get_creator(creator_id)
+        if not creator:
+            raise fastapi.HTTPException(
+                status_code=404, detail=f"Creator '{creator_id}' not found"
+            )
+        return creator
+    except fastapi.HTTPException:
+        raise
+    except Exception as exc:
+        logger.exception("Failed to get creator %s: %s", creator_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to get creator",
+        ) from exc
+
+
+@router.post(
+    "/creators",
+    summary="Create model creator",
+    response_model=llm_model.LlmModelCreator,
+)
+async def create_llm_creator(request: llm_model.UpsertLlmCreatorRequest):
+    """
+    Create a new model creator.
+
+    A creator represents an organization that creates/trains AI models,
+    such as OpenAI, Anthropic, Meta, or Google.
+    """
+    try:
+        creator = await llm_db.upsert_creator(request=request)
+        await _refresh_runtime_state()
+        logger.info("Created model creator '%s' (%s)", creator.display_name, creator.id)
+        return creator
+    except Exception as exc:
+        logger.exception("Failed to create creator: %s", exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to create creator",
+        ) from exc
+
+
+@router.patch(
+    "/creators/{creator_id}",
+    summary="Update model creator",
+    response_model=llm_model.LlmModelCreator,
+)
+async def update_llm_creator(
+    creator_id: str,
+    request: llm_model.UpsertLlmCreatorRequest,
+):
+    """Update an existing model creator."""
+    try:
+        creator = await llm_db.upsert_creator(request=request, creator_id=creator_id)
+        await _refresh_runtime_state()
+        logger.info("Updated model creator '%s' (%s)", creator.display_name, creator_id)
+        return creator
+    except Exception as exc:
+        logger.exception("Failed to update creator %s: %s", creator_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to update creator",
+        ) from exc
+
+
+@router.delete(
+    "/creators/{creator_id}",
+    summary="Delete model creator",
+    response_model=dict,
+)
+async def delete_llm_creator(creator_id: str):
+    """
+    Delete a model creator.
+
+    This will remove the creator association from all models that reference it
+    (sets creatorId to NULL), but will not delete the models themselves.
+    """
+    try:
+        await llm_db.delete_creator(creator_id)
+        await _refresh_runtime_state()
+        logger.info("Deleted model creator '%s'", creator_id)
+        return {"success": True, "message": f"Creator '{creator_id}' deleted"}
+    except ValueError as exc:
+        logger.warning("Creator deletion validation failed: %s", exc)
+        raise fastapi.HTTPException(status_code=404, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Failed to delete creator %s: %s", creator_id, exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to delete creator",
+        ) from exc
+
+
+# ============================================================================
+# Recommended Model Endpoints
+# ============================================================================
+
+
+@router.get(
+    "/recommended-model",
+    summary="Get recommended model",
+    response_model=llm_model.RecommendedModelResponse,
+)
+async def get_recommended_model():
+    """
+    Get the currently recommended LLM model.
+
+    The recommended model is shown to users as the default/suggested option
+    in model selection dropdowns.
+    """
+    try:
+        model = await llm_db.get_recommended_model()
+        return llm_model.RecommendedModelResponse(
+            model=model,
+            slug=model.slug if model else None,
+        )
+    except Exception as exc:
+        logger.exception("Failed to get recommended model: %s", exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to get recommended model",
+        ) from exc
+
+
+@router.post(
+    "/recommended-model",
+    summary="Set recommended model",
+    response_model=llm_model.SetRecommendedModelResponse,
+)
+async def set_recommended_model(request: llm_model.SetRecommendedModelRequest):
+    """
+    Set a model as the recommended model.
+
+    This clears the recommended flag from any other model and sets it on
+    the specified model. The model must be enabled to be set as recommended.
+
+    The recommended model is displayed to users as the default/suggested
+    option in model selection dropdowns throughout the platform.
+    """
+    try:
+        model, previous_slug = await llm_db.set_recommended_model(request.model_id)
+        await _refresh_runtime_state()
+        logger.info(
+            "Set recommended model to '%s' (previous: %s)",
+            model.slug,
+            previous_slug or "none",
+        )
+        return llm_model.SetRecommendedModelResponse(
+            model=model,
+            previous_recommended_slug=previous_slug,
+            message=f"Model '{model.display_name}' is now the recommended model",
+        )
+    except ValueError as exc:
+        logger.warning("Set recommended model validation failed: %s", exc)
+        raise fastapi.HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Failed to set recommended model: %s", exc)
+        raise fastapi.HTTPException(
+            status_code=500,
+            detail="Failed to set recommended model",
+        ) from exc
--- a/autogpt_platform/backend/backend/api/features/admin/llm_routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/admin/llm_routes_test.py
@@ -0,0 +1,436 @@
+import json
+from unittest.mock import AsyncMock
+
+import fastapi
+import fastapi.testclient
+import pytest
+import pytest_mock
+from autogpt_libs.auth.jwt_utils import get_jwt_payload
+from pytest_snapshot.plugin import Snapshot
+
+import backend.api.features.admin.llm_routes as llm_routes
+from backend.server.v2.llm import model as llm_model
+
+app = fastapi.FastAPI()
+app.include_router(llm_routes.router, prefix="/admin/llm")
+
+client = fastapi.testclient.TestClient(app)
+
+
+@pytest.fixture(autouse=True)
+def setup_app_admin_auth(mock_jwt_admin):
+    """Setup admin auth overrides for all tests in this module"""
+    app.dependency_overrides[get_jwt_payload] = mock_jwt_admin["get_jwt_payload"]
+    yield
+    app.dependency_overrides.clear()
+
+
+def test_list_llm_providers_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful listing of LLM providers"""
+    # Mock the database function
+    mock_providers = [
+        {
+            "id": "provider-1",
+            "name": "openai",
+            "display_name": "OpenAI",
+            "description": "OpenAI LLM provider",
+            "supports_tools": True,
+            "supports_json_output": True,
+            "supports_reasoning": False,
+            "supports_parallel_tool": True,
+            "metadata": {},
+            "models": [],
+        },
+        {
+            "id": "provider-2",
+            "name": "anthropic",
+            "display_name": "Anthropic",
+            "description": "Anthropic LLM provider",
+            "supports_tools": True,
+            "supports_json_output": True,
+            "supports_reasoning": False,
+            "supports_parallel_tool": True,
+            "metadata": {},
+            "models": [],
+        },
+    ]
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.list_providers",
+        new=AsyncMock(return_value=mock_providers),
+    )
+
+    response = client.get("/admin/llm/providers")
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert len(response_data["providers"]) == 2
+    assert response_data["providers"][0]["name"] == "openai"
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "list_llm_providers_success.json",
+    )
+
+
+def test_list_llm_models_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful listing of LLM models"""
+    # Mock the database function
+    mock_models = [
+        {
+            "id": "model-1",
+            "slug": "gpt-4o",
+            "display_name": "GPT-4o",
+            "description": "GPT-4 Optimized",
+            "provider_id": "provider-1",
+            "context_window": 128000,
+            "max_output_tokens": 16384,
+            "is_enabled": True,
+            "capabilities": {},
+            "metadata": {},
+            "costs": [
+                {
+                    "id": "cost-1",
+                    "credit_cost": 10,
+                    "credential_provider": "openai",
+                    "metadata": {},
+                }
+            ],
+        }
+    ]
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.list_models",
+        new=AsyncMock(return_value=mock_models),
+    )
+
+    response = client.get("/admin/llm/models")
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert len(response_data["models"]) == 1
+    assert response_data["models"][0]["slug"] == "gpt-4o"
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "list_llm_models_success.json",
+    )
+
+
+def test_create_llm_provider_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful creation of LLM provider"""
+    mock_provider = {
+        "id": "new-provider-id",
+        "name": "groq",
+        "display_name": "Groq",
+        "description": "Groq LLM provider",
+        "supports_tools": True,
+        "supports_json_output": True,
+        "supports_reasoning": False,
+        "supports_parallel_tool": False,
+        "metadata": {},
+    }
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.upsert_provider",
+        new=AsyncMock(return_value=mock_provider),
+    )
+
+    mock_refresh = mocker.patch(
+        "backend.api.features.admin.llm_routes._refresh_runtime_state",
+        new=AsyncMock(),
+    )
+
+    request_data = {
+        "name": "groq",
+        "display_name": "Groq",
+        "description": "Groq LLM provider",
+        "supports_tools": True,
+        "supports_json_output": True,
+        "supports_reasoning": False,
+        "supports_parallel_tool": False,
+        "metadata": {},
+    }
+
+    response = client.post("/admin/llm/providers", json=request_data)
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert response_data["name"] == "groq"
+    assert response_data["display_name"] == "Groq"
+
+    # Verify refresh was called
+    mock_refresh.assert_called_once()
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "create_llm_provider_success.json",
+    )
+
+
+def test_create_llm_model_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful creation of LLM model"""
+    mock_model = {
+        "id": "new-model-id",
+        "slug": "gpt-4.1-mini",
+        "display_name": "GPT-4.1 Mini",
+        "description": "Latest GPT-4.1 Mini model",
+        "provider_id": "provider-1",
+        "context_window": 128000,
+        "max_output_tokens": 16384,
+        "is_enabled": True,
+        "capabilities": {},
+        "metadata": {},
+        "costs": [
+            {
+                "id": "cost-id",
+                "credit_cost": 5,
+                "credential_provider": "openai",
+                "metadata": {},
+            }
+        ],
+    }
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.create_model",
+        new=AsyncMock(return_value=mock_model),
+    )
+
+    mock_refresh = mocker.patch(
+        "backend.api.features.admin.llm_routes._refresh_runtime_state",
+        new=AsyncMock(),
+    )
+
+    request_data = {
+        "slug": "gpt-4.1-mini",
+        "display_name": "GPT-4.1 Mini",
+        "description": "Latest GPT-4.1 Mini model",
+        "provider_id": "provider-1",
+        "context_window": 128000,
+        "max_output_tokens": 16384,
+        "is_enabled": True,
+        "capabilities": {},
+        "metadata": {},
+        "costs": [
+            {
+                "credit_cost": 5,
+                "credential_provider": "openai",
+                "metadata": {},
+            }
+        ],
+    }
+
+    response = client.post("/admin/llm/models", json=request_data)
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert response_data["slug"] == "gpt-4.1-mini"
+    assert response_data["is_enabled"] is True
+
+    # Verify refresh was called
+    mock_refresh.assert_called_once()
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "create_llm_model_success.json",
+    )
+
+
+def test_update_llm_model_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful update of LLM model"""
+    mock_model = {
+        "id": "model-1",
+        "slug": "gpt-4o",
+        "display_name": "GPT-4o Updated",
+        "description": "Updated description",
+        "provider_id": "provider-1",
+        "context_window": 256000,
+        "max_output_tokens": 32768,
+        "is_enabled": True,
+        "capabilities": {},
+        "metadata": {},
+        "costs": [
+            {
+                "id": "cost-1",
+                "credit_cost": 15,
+                "credential_provider": "openai",
+                "metadata": {},
+            }
+        ],
+    }
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.update_model",
+        new=AsyncMock(return_value=mock_model),
+    )
+
+    mock_refresh = mocker.patch(
+        "backend.api.features.admin.llm_routes._refresh_runtime_state",
+        new=AsyncMock(),
+    )
+
+    request_data = {
+        "display_name": "GPT-4o Updated",
+        "description": "Updated description",
+        "context_window": 256000,
+        "max_output_tokens": 32768,
+    }
+
+    response = client.patch("/admin/llm/models/model-1", json=request_data)
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert response_data["display_name"] == "GPT-4o Updated"
+    assert response_data["context_window"] == 256000
+
+    # Verify refresh was called
+    mock_refresh.assert_called_once()
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "update_llm_model_success.json",
+    )
+
+
+def test_toggle_llm_model_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful toggling of LLM model enabled status"""
+    # Create a proper mock model object
+    mock_model = llm_model.LlmModel(
+        id="model-1",
+        slug="gpt-4o",
+        display_name="GPT-4o",
+        description="GPT-4 Optimized",
+        provider_id="provider-1",
+        context_window=128000,
+        max_output_tokens=16384,
+        is_enabled=False,
+        capabilities={},
+        metadata={},
+        costs=[],
+    )
+
+    # Create a proper ToggleLlmModelResponse
+    mock_response = llm_model.ToggleLlmModelResponse(
+        model=mock_model,
+        nodes_migrated=0,
+        migrated_to_slug=None,
+        migration_id=None,
+    )
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.toggle_model",
+        new=AsyncMock(return_value=mock_response),
+    )
+
+    mock_refresh = mocker.patch(
+        "backend.api.features.admin.llm_routes._refresh_runtime_state",
+        new=AsyncMock(),
+    )
+
+    request_data = {"is_enabled": False}
+
+    response = client.patch("/admin/llm/models/model-1/toggle", json=request_data)
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert response_data["model"]["is_enabled"] is False
+
+    # Verify refresh was called
+    mock_refresh.assert_called_once()
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "toggle_llm_model_success.json",
+    )
+
+
+def test_delete_llm_model_success(
+    mocker: pytest_mock.MockFixture,
+    configured_snapshot: Snapshot,
+) -> None:
+    """Test successful deletion of LLM model with migration"""
+    # Create a proper DeleteLlmModelResponse
+    mock_response = llm_model.DeleteLlmModelResponse(
+        deleted_model_slug="gpt-3.5-turbo",
+        deleted_model_display_name="GPT-3.5 Turbo",
+        replacement_model_slug="gpt-4o-mini",
+        nodes_migrated=42,
+        message="Successfully deleted model 'GPT-3.5 Turbo' (gpt-3.5-turbo) "
+        "and migrated 42 workflow node(s) to 'gpt-4o-mini'.",
+    )
+
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.delete_model",
+        new=AsyncMock(return_value=mock_response),
+    )
+
+    mock_refresh = mocker.patch(
+        "backend.api.features.admin.llm_routes._refresh_runtime_state",
+        new=AsyncMock(),
+    )
+
+    response = client.delete(
+        "/admin/llm/models/model-1?replacement_model_slug=gpt-4o-mini"
+    )
+
+    assert response.status_code == 200
+    response_data = response.json()
+    assert response_data["deleted_model_slug"] == "gpt-3.5-turbo"
+    assert response_data["nodes_migrated"] == 42
+    assert response_data["replacement_model_slug"] == "gpt-4o-mini"
+
+    # Verify refresh was called
+    mock_refresh.assert_called_once()
+
+    # Snapshot test the response (must be string)
+    configured_snapshot.assert_match(
+        json.dumps(response_data, indent=2, sort_keys=True),
+        "delete_llm_model_success.json",
+    )
+
+
+def test_delete_llm_model_validation_error(
+    mocker: pytest_mock.MockFixture,
+) -> None:
+    """Test deletion fails with proper error when validation fails"""
+    mocker.patch(
+        "backend.api.features.admin.llm_routes.llm_db.delete_model",
+        new=AsyncMock(side_effect=ValueError("Replacement model 'invalid' not found")),
+    )
+
+    response = client.delete("/admin/llm/models/model-1?replacement_model_slug=invalid")
+
+    assert response.status_code == 400
+    assert "Replacement model 'invalid' not found" in response.json()["detail"]
+
+
+def test_delete_llm_model_missing_replacement() -> None:
+    """Test deletion fails when replacement_model_slug is not provided"""
+    response = client.delete("/admin/llm/models/model-1")
+
+    # FastAPI will return 422 for missing required query params
+    assert response.status_code == 422
--- a/autogpt_platform/backend/backend/api/features/builder/db.py
+++ b/autogpt_platform/backend/backend/api/features/builder/db.py
@@ -15,6 +15,7 @@ from backend.blocks import load_all_blocks
 from backend.blocks.llm import LlmModel
 from backend.data.block import AnyBlockSchema, BlockCategory, BlockInfo, BlockSchema
 from backend.data.db import query_raw_with_schema
+from backend.data.llm_registry import get_all_model_slugs_for_validation
 from backend.integrations.providers import ProviderName
 from backend.util.cache import cached
 from backend.util.models import Pagination
@@ -31,7 +32,14 @@ from .model import (
 )

 logger = logging.getLogger(__name__)
-llm_models = [name.name.lower().replace("_", " ") for name in LlmModel]
+
+
+def _get_llm_models() -> list[str]:
+    """Get LLM model names for search matching from the registry."""
+    return [
+        slug.lower().replace("-", " ") for slug in get_all_model_slugs_for_validation()
+    ]
+

 MAX_LIBRARY_AGENT_RESULTS = 100
 MAX_MARKETPLACE_AGENT_RESULTS = 100
@@ -496,8 +504,8 @@ async def _get_static_counts():
 def _matches_llm_model(schema_cls: type[BlockSchema], query: str) -> bool:
    for field in schema_cls.model_fields.values():
        if field.annotation == LlmModel:
-            # Check if query matches any value in llm_models
-            if any(query in name for name in llm_models):
+            # Check if query matches any value in llm_models from registry
+            if any(query in name for name in _get_llm_models()):
                return True
    return False

--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -1,7 +1,6 @@
 """Configuration management for chat system."""

 import os
-from pathlib import Path

 from pydantic import Field, field_validator
 from pydantic_settings import BaseSettings
@@ -12,7 +11,11 @@ class ChatConfig(BaseSettings):

    # OpenAI API Configuration
    model: str = Field(
-        default="qwen/qwen3-235b-a22b-2507", description="Default model to use"
+        default="anthropic/claude-opus-4.5", description="Default model to use"
+    )
+    title_model: str = Field(
+        default="openai/gpt-4o-mini",
+        description="Model to use for generating session titles (should be fast/cheap)",
    )
    api_key: str | None = Field(default=None, description="OpenAI API key")
    base_url: str | None = Field(
@@ -23,12 +26,6 @@ class ChatConfig(BaseSettings):
    # Session TTL Configuration - 12 hours
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")

-    # System Prompt Configuration
-    system_prompt_path: str = Field(
-        default="prompts/chat_system.md",
-        description="Path to system prompt file relative to chat module",
-    )
-
    # Streaming Configuration
    max_context_messages: int = Field(
        default=50, ge=1, le=200, description="Maximum context messages"
@@ -41,6 +38,13 @@ class ChatConfig(BaseSettings):
        default=3, description="Maximum number of agent schedules"
    )

+    # Langfuse Prompt Management Configuration
+    # Note: Langfuse credentials are in Settings().secrets (settings.py)
+    langfuse_prompt_name: str = Field(
+        default="CoPilot Prompt",
+        description="Name of the prompt in Langfuse to fetch",
+    )
+
    @field_validator("api_key", mode="before")
    @classmethod
    def get_api_key(cls, v):
@@ -72,43 +76,11 @@ class ChatConfig(BaseSettings):
                v = "https://openrouter.ai/api/v1"
        return v

-    def get_system_prompt(self, **template_vars) -> str:
-        """Load and render the system prompt from file.
-
-        Args:
-            **template_vars: Variables to substitute in the template
-
-        Returns:
-            Rendered system prompt string
-
-        """
-        # Get the path relative to this module
-        module_dir = Path(__file__).parent
-        prompt_path = module_dir / self.system_prompt_path
-
-        # Check for .j2 extension first (Jinja2 template)
-        j2_path = Path(str(prompt_path) + ".j2")
-        if j2_path.exists():
-            try:
-                from jinja2 import Template
-
-                template = Template(j2_path.read_text())
-                return template.render(**template_vars)
-            except ImportError:
-                # Jinja2 not installed, fall back to reading as plain text
-                return j2_path.read_text()
-
-        # Check for markdown file
-        if prompt_path.exists():
-            content = prompt_path.read_text()
-
-            # Simple variable substitution if Jinja2 is not available
-            for key, value in template_vars.items():
-                placeholder = f"{{{key}}}"
-                content = content.replace(placeholder, str(value))
-
-            return content
-        raise FileNotFoundError(f"System prompt file not found: {prompt_path}")
+    # Prompt paths for different contexts
+    PROMPT_PATHS: dict[str, str] = {
+        "default": "prompts/chat_system.md",
+        "onboarding": "prompts/onboarding_system.md",
+    }

    class Config:
        """Pydantic config."""
--- a/autogpt_platform/backend/backend/api/features/chat/db.py
+++ b/autogpt_platform/backend/backend/api/features/chat/db.py
@@ -0,0 +1,249 @@
+"""Database operations for chat sessions."""
+
+import asyncio
+import logging
+from datetime import UTC, datetime
+from typing import Any, cast
+
+from prisma.models import ChatMessage as PrismaChatMessage
+from prisma.models import ChatSession as PrismaChatSession
+from prisma.types import (
+    ChatMessageCreateInput,
+    ChatSessionCreateInput,
+    ChatSessionUpdateInput,
+    ChatSessionWhereInput,
+)
+
+from backend.data.db import transaction
+from backend.util.json import SafeJson
+
+logger = logging.getLogger(__name__)
+
+
+async def get_chat_session(session_id: str) -> PrismaChatSession | None:
+    """Get a chat session by ID from the database."""
+    session = await PrismaChatSession.prisma().find_unique(
+        where={"id": session_id},
+        include={"Messages": True},
+    )
+    if session and session.Messages:
+        # Sort messages by sequence in Python - Prisma Python client doesn't support
+        # order_by in include clauses (unlike Prisma JS), so we sort after fetching
+        session.Messages.sort(key=lambda m: m.sequence)
+    return session
+
+
+async def create_chat_session(
+    session_id: str,
+    user_id: str,
+) -> PrismaChatSession:
+    """Create a new chat session in the database."""
+    data = ChatSessionCreateInput(
+        id=session_id,
+        userId=user_id,
+        credentials=SafeJson({}),
+        successfulAgentRuns=SafeJson({}),
+        successfulAgentSchedules=SafeJson({}),
+    )
+    return await PrismaChatSession.prisma().create(
+        data=data,
+        include={"Messages": True},
+    )
+
+
+async def update_chat_session(
+    session_id: str,
+    credentials: dict[str, Any] | None = None,
+    successful_agent_runs: dict[str, Any] | None = None,
+    successful_agent_schedules: dict[str, Any] | None = None,
+    total_prompt_tokens: int | None = None,
+    total_completion_tokens: int | None = None,
+    title: str | None = None,
+) -> PrismaChatSession | None:
+    """Update a chat session's metadata."""
+    data: ChatSessionUpdateInput = {"updatedAt": datetime.now(UTC)}
+
+    if credentials is not None:
+        data["credentials"] = SafeJson(credentials)
+    if successful_agent_runs is not None:
+        data["successfulAgentRuns"] = SafeJson(successful_agent_runs)
+    if successful_agent_schedules is not None:
+        data["successfulAgentSchedules"] = SafeJson(successful_agent_schedules)
+    if total_prompt_tokens is not None:
+        data["totalPromptTokens"] = total_prompt_tokens
+    if total_completion_tokens is not None:
+        data["totalCompletionTokens"] = total_completion_tokens
+    if title is not None:
+        data["title"] = title
+
+    session = await PrismaChatSession.prisma().update(
+        where={"id": session_id},
+        data=data,
+        include={"Messages": True},
+    )
+    if session and session.Messages:
+        # Sort in Python - Prisma Python doesn't support order_by in include clauses
+        session.Messages.sort(key=lambda m: m.sequence)
+    return session
+
+
+async def add_chat_message(
+    session_id: str,
+    role: str,
+    sequence: int,
+    content: str | None = None,
+    name: str | None = None,
+    tool_call_id: str | None = None,
+    refusal: str | None = None,
+    tool_calls: list[dict[str, Any]] | None = None,
+    function_call: dict[str, Any] | None = None,
+) -> PrismaChatMessage:
+    """Add a message to a chat session."""
+    # Build input dict dynamically rather than using ChatMessageCreateInput directly
+    # because Prisma's TypedDict validation rejects optional fields set to None.
+    # We only include fields that have values, then cast at the end.
+    data: dict[str, Any] = {
+        "Session": {"connect": {"id": session_id}},
+        "role": role,
+        "sequence": sequence,
+    }
+
+    # Add optional string fields
+    if content is not None:
+        data["content"] = content
+    if name is not None:
+        data["name"] = name
+    if tool_call_id is not None:
+        data["toolCallId"] = tool_call_id
+    if refusal is not None:
+        data["refusal"] = refusal
+
+    # Add optional JSON fields only when they have values
+    if tool_calls is not None:
+        data["toolCalls"] = SafeJson(tool_calls)
+    if function_call is not None:
+        data["functionCall"] = SafeJson(function_call)
+
+    # Run message create and session timestamp update in parallel for lower latency
+    _, message = await asyncio.gather(
+        PrismaChatSession.prisma().update(
+            where={"id": session_id},
+            data={"updatedAt": datetime.now(UTC)},
+        ),
+        PrismaChatMessage.prisma().create(data=cast(ChatMessageCreateInput, data)),
+    )
+    return message
+
+
+async def add_chat_messages_batch(
+    session_id: str,
+    messages: list[dict[str, Any]],
+    start_sequence: int,
+) -> list[PrismaChatMessage]:
+    """Add multiple messages to a chat session in a batch.
+
+    Uses a transaction for atomicity - if any message creation fails,
+    the entire batch is rolled back.
+    """
+    if not messages:
+        return []
+
+    created_messages = []
+
+    async with transaction() as tx:
+        for i, msg in enumerate(messages):
+            # Build input dict dynamically rather than using ChatMessageCreateInput
+            # directly because Prisma's TypedDict validation rejects optional fields
+            # set to None. We only include fields that have values, then cast.
+            data: dict[str, Any] = {
+                "Session": {"connect": {"id": session_id}},
+                "role": msg["role"],
+                "sequence": start_sequence + i,
+            }
+
+            # Add optional string fields
+            if msg.get("content") is not None:
+                data["content"] = msg["content"]
+            if msg.get("name") is not None:
+                data["name"] = msg["name"]
+            if msg.get("tool_call_id") is not None:
+                data["toolCallId"] = msg["tool_call_id"]
+            if msg.get("refusal") is not None:
+                data["refusal"] = msg["refusal"]
+
+            # Add optional JSON fields only when they have values
+            if msg.get("tool_calls") is not None:
+                data["toolCalls"] = SafeJson(msg["tool_calls"])
+            if msg.get("function_call") is not None:
+                data["functionCall"] = SafeJson(msg["function_call"])
+
+            created = await PrismaChatMessage.prisma(tx).create(
+                data=cast(ChatMessageCreateInput, data)
+            )
+            created_messages.append(created)
+
+        # Update session's updatedAt timestamp within the same transaction.
+        # Note: Token usage (total_prompt_tokens, total_completion_tokens) is updated
+        # separately via update_chat_session() after streaming completes.
+        await PrismaChatSession.prisma(tx).update(
+            where={"id": session_id},
+            data={"updatedAt": datetime.now(UTC)},
+        )
+
+    return created_messages
+
+
+async def get_user_chat_sessions(
+    user_id: str,
+    limit: int = 50,
+    offset: int = 0,
+) -> list[PrismaChatSession]:
+    """Get chat sessions for a user, ordered by most recent."""
+    return await PrismaChatSession.prisma().find_many(
+        where={"userId": user_id},
+        order={"updatedAt": "desc"},
+        take=limit,
+        skip=offset,
+    )
+
+
+async def get_user_session_count(user_id: str) -> int:
+    """Get the total number of chat sessions for a user."""
+    return await PrismaChatSession.prisma().count(where={"userId": user_id})
+
+
+async def delete_chat_session(session_id: str, user_id: str | None = None) -> bool:
+    """Delete a chat session and all its messages.
+
+    Args:
+        session_id: The session ID to delete.
+        user_id: If provided, validates that the session belongs to this user
+            before deletion. This prevents unauthorized deletion of other
+            users' sessions.
+
+    Returns:
+        True if deleted successfully, False otherwise.
+    """
+    try:
+        # Build typed where clause with optional user_id validation
+        where_clause: ChatSessionWhereInput = {"id": session_id}
+        if user_id is not None:
+            where_clause["userId"] = user_id
+
+        result = await PrismaChatSession.prisma().delete_many(where=where_clause)
+        if result == 0:
+            logger.warning(
+                f"No session deleted for {session_id} "
+                f"(user_id validation: {user_id is not None})"
+            )
+            return False
+        return True
+    except Exception as e:
+        logger.error(f"Failed to delete chat session {session_id}: {e}")
+        return False
+
+
+async def get_chat_session_message_count(session_id: str) -> int:
+    """Get the number of messages in a chat session."""
+    count = await PrismaChatMessage.prisma().count(where={"sessionId": session_id})
+    return count
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -1,6 +1,9 @@
+import asyncio
 import logging
 import uuid
 from datetime import UTC, datetime
+from typing import Any
+from weakref import WeakValueDictionary

 from openai.types.chat import (
    ChatCompletionAssistantMessageParam,
@@ -16,17 +19,63 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
    ChatCompletionMessageToolCallParam,
    Function,
 )
+from prisma.models import ChatMessage as PrismaChatMessage
+from prisma.models import ChatSession as PrismaChatSession
 from pydantic import BaseModel

 from backend.data.redis_client import get_redis_async
-from backend.util.exceptions import RedisError
+from backend.util import json
+from backend.util.exceptions import DatabaseError, RedisError

+from . import db as chat_db
 from .config import ChatConfig

 logger = logging.getLogger(__name__)
 config = ChatConfig()


+def _parse_json_field(value: str | dict | list | None, default: Any = None) -> Any:
+    """Parse a JSON field that may be stored as string or already parsed."""
+    if value is None:
+        return default
+    if isinstance(value, str):
+        return json.loads(value)
+    return value
+
+
+# Redis cache key prefix for chat sessions
+CHAT_SESSION_CACHE_PREFIX = "chat:session:"
+
+
+def _get_session_cache_key(session_id: str) -> str:
+    """Get the Redis cache key for a chat session."""
+    return f"{CHAT_SESSION_CACHE_PREFIX}{session_id}"
+
+
+# Session-level locks to prevent race conditions during concurrent upserts.
+# Uses WeakValueDictionary to automatically garbage collect locks when no longer referenced,
+# preventing unbounded memory growth while maintaining lock semantics for active sessions.
+# Invalidation: Locks are auto-removed by GC when no coroutine holds a reference (after
+# async with lock: completes). Explicit cleanup also occurs in delete_chat_session().
+_session_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
+_session_locks_mutex = asyncio.Lock()
+
+
+async def _get_session_lock(session_id: str) -> asyncio.Lock:
+    """Get or create a lock for a specific session to prevent concurrent upserts.
+
+    Uses WeakValueDictionary for automatic cleanup: locks are garbage collected
+    when no coroutine holds a reference to them, preventing memory leaks from
+    unbounded growth of session locks.
+    """
+    async with _session_locks_mutex:
+        lock = _session_locks.get(session_id)
+        if lock is None:
+            lock = asyncio.Lock()
+            _session_locks[session_id] = lock
+        return lock
+
+
 class ChatMessage(BaseModel):
    role: str
    content: str | None = None
@@ -45,7 +94,8 @@ class Usage(BaseModel):

 class ChatSession(BaseModel):
    session_id: str
-    user_id: str | None
+    user_id: str
+    title: str | None = None
    messages: list[ChatMessage]
    usage: list[Usage]
    credentials: dict[str, dict] = {}  # Map of provider -> credential metadata
@@ -55,10 +105,11 @@ class ChatSession(BaseModel):
    successful_agent_schedules: dict[str, int] = {}

    @staticmethod
-    def new(user_id: str | None) -> "ChatSession":
+    def new(user_id: str) -> "ChatSession":
        return ChatSession(
            session_id=str(uuid.uuid4()),
            user_id=user_id,
+            title=None,
            messages=[],
            usage=[],
            credentials={},
@@ -66,6 +117,61 @@ class ChatSession(BaseModel):
            updated_at=datetime.now(UTC),
        )

+    @staticmethod
+    def from_db(
+        prisma_session: PrismaChatSession,
+        prisma_messages: list[PrismaChatMessage] | None = None,
+    ) -> "ChatSession":
+        """Convert Prisma models to Pydantic ChatSession."""
+        messages = []
+        if prisma_messages:
+            for msg in prisma_messages:
+                messages.append(
+                    ChatMessage(
+                        role=msg.role,
+                        content=msg.content,
+                        name=msg.name,
+                        tool_call_id=msg.toolCallId,
+                        refusal=msg.refusal,
+                        tool_calls=_parse_json_field(msg.toolCalls),
+                        function_call=_parse_json_field(msg.functionCall),
+                    )
+                )
+
+        # Parse JSON fields from Prisma
+        credentials = _parse_json_field(prisma_session.credentials, default={})
+        successful_agent_runs = _parse_json_field(
+            prisma_session.successfulAgentRuns, default={}
+        )
+        successful_agent_schedules = _parse_json_field(
+            prisma_session.successfulAgentSchedules, default={}
+        )
+
+        # Calculate usage from token counts
+        usage = []
+        if prisma_session.totalPromptTokens or prisma_session.totalCompletionTokens:
+            usage.append(
+                Usage(
+                    prompt_tokens=prisma_session.totalPromptTokens or 0,
+                    completion_tokens=prisma_session.totalCompletionTokens or 0,
+                    total_tokens=(prisma_session.totalPromptTokens or 0)
+                    + (prisma_session.totalCompletionTokens or 0),
+                )
+            )
+
+        return ChatSession(
+            session_id=prisma_session.id,
+            user_id=prisma_session.userId,
+            title=prisma_session.title,
+            messages=messages,
+            usage=usage,
+            credentials=credentials,
+            started_at=prisma_session.createdAt,
+            updated_at=prisma_session.updatedAt,
+            successful_agent_runs=successful_agent_runs,
+            successful_agent_schedules=successful_agent_schedules,
+        )
+
    def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
        messages = []
        for message in self.messages:
@@ -155,50 +261,337 @@ class ChatSession(BaseModel):
        return messages


-async def get_chat_session(
-    session_id: str,
-    user_id: str | None,
-) -> ChatSession | None:
-    """Get a chat session by ID."""
-    redis_key = f"chat:session:{session_id}"
+async def _get_session_from_cache(session_id: str) -> ChatSession | None:
+    """Get a chat session from Redis cache."""
+    redis_key = _get_session_cache_key(session_id)
    async_redis = await get_redis_async()
-
    raw_session: bytes | None = await async_redis.get(redis_key)

    if raw_session is None:
-        logger.warning(f"Session {session_id} not found in Redis")
        return None

    try:
        session = ChatSession.model_validate_json(raw_session)
+        logger.info(
+            f"Loading session {session_id} from cache: "
+            f"message_count={len(session.messages)}, "
+            f"roles={[m.role for m in session.messages]}"
+        )
+        return session
    except Exception as e:
        logger.error(f"Failed to deserialize session {session_id}: {e}", exc_info=True)
        raise RedisError(f"Corrupted session data for {session_id}") from e

-    if session.user_id is not None and session.user_id != user_id:
+
+async def _cache_session(session: ChatSession) -> None:
+    """Cache a chat session in Redis."""
+    redis_key = _get_session_cache_key(session.session_id)
+    async_redis = await get_redis_async()
+    await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())
+
+
+async def _get_session_from_db(session_id: str) -> ChatSession | None:
+    """Get a chat session from the database."""
+    prisma_session = await chat_db.get_chat_session(session_id)
+    if not prisma_session:
+        return None
+
+    messages = prisma_session.Messages
+    logger.info(
+        f"Loading session {session_id} from DB: "
+        f"has_messages={messages is not None}, "
+        f"message_count={len(messages) if messages else 0}, "
+        f"roles={[m.role for m in messages] if messages else []}"
+    )
+
+    return ChatSession.from_db(prisma_session, messages)
+
+
+async def _save_session_to_db(
+    session: ChatSession, existing_message_count: int
+) -> None:
+    """Save or update a chat session in the database."""
+    # Check if session exists in DB
+    existing = await chat_db.get_chat_session(session.session_id)
+
+    if not existing:
+        # Create new session
+        await chat_db.create_chat_session(
+            session_id=session.session_id,
+            user_id=session.user_id,
+        )
+        existing_message_count = 0
+
+    # Calculate total tokens from usage
+    total_prompt = sum(u.prompt_tokens for u in session.usage)
+    total_completion = sum(u.completion_tokens for u in session.usage)
+
+    # Update session metadata
+    await chat_db.update_chat_session(
+        session_id=session.session_id,
+        credentials=session.credentials,
+        successful_agent_runs=session.successful_agent_runs,
+        successful_agent_schedules=session.successful_agent_schedules,
+        total_prompt_tokens=total_prompt,
+        total_completion_tokens=total_completion,
+    )
+
+    # Add new messages (only those after existing count)
+    new_messages = session.messages[existing_message_count:]
+    if new_messages:
+        messages_data = []
+        for msg in new_messages:
+            messages_data.append(
+                {
+                    "role": msg.role,
+                    "content": msg.content,
+                    "name": msg.name,
+                    "tool_call_id": msg.tool_call_id,
+                    "refusal": msg.refusal,
+                    "tool_calls": msg.tool_calls,
+                    "function_call": msg.function_call,
+                }
+            )
+        logger.info(
+            f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
+            f"roles={[m['role'] for m in messages_data]}, "
+            f"start_sequence={existing_message_count}"
+        )
+        await chat_db.add_chat_messages_batch(
+            session_id=session.session_id,
+            messages=messages_data,
+            start_sequence=existing_message_count,
+        )
+
+
+async def get_chat_session(
+    session_id: str,
+    user_id: str | None = None,
+) -> ChatSession | None:
+    """Get a chat session by ID.
+
+    Checks Redis cache first, falls back to database if not found.
+    Caches database results back to Redis.
+
+    Args:
+        session_id: The session ID to fetch.
+        user_id: If provided, validates that the session belongs to this user.
+            If None, ownership is not validated (admin/system access).
+    """
+    # Try cache first
+    try:
+        session = await _get_session_from_cache(session_id)
+        if session:
+            # Verify user ownership if user_id was provided for validation
+            if user_id is not None and session.user_id != user_id:
+                logger.warning(
+                    f"Session {session_id} user id mismatch: {session.user_id} != {user_id}"
+                )
+                return None
+            return session
+    except RedisError:
+        logger.warning(f"Cache error for session {session_id}, trying database")
+    except Exception as e:
+        logger.warning(f"Unexpected cache error for session {session_id}: {e}")
+
+    # Fall back to database
+    logger.info(f"Session {session_id} not in cache, checking database")
+    session = await _get_session_from_db(session_id)
+
+    if session is None:
+        logger.warning(f"Session {session_id} not found in cache or database")
+        return None
+
+    # Verify user ownership if user_id was provided for validation
+    if user_id is not None and session.user_id != user_id:
        logger.warning(
            f"Session {session_id} user id mismatch: {session.user_id} != {user_id}"
        )
        return None

+    # Cache the session from DB
+    try:
+        await _cache_session(session)
+        logger.info(f"Cached session {session_id} from database")
+    except Exception as e:
+        logger.warning(f"Failed to cache session {session_id}: {e}")
+
    return session


 async def upsert_chat_session(
    session: ChatSession,
 ) -> ChatSession:
-    """Update a chat session with the given messages."""
+    """Update a chat session in both cache and database.

-    redis_key = f"chat:session:{session.session_id}"
+    Uses session-level locking to prevent race conditions when concurrent
+    operations (e.g., background title update and main stream handler)
+    attempt to upsert the same session simultaneously.

-    async_redis = await get_redis_async()
-    resp = await async_redis.setex(
-        redis_key, config.session_ttl, session.model_dump_json()
-    )
+    Raises:
+        DatabaseError: If the database write fails. The cache is still updated
+            as a best-effort optimization, but the error is propagated to ensure
+            callers are aware of the persistence failure.
+        RedisError: If the cache write fails (after successful DB write).
+    """
+    # Acquire session-specific lock to prevent concurrent upserts
+    lock = await _get_session_lock(session.session_id)

-    if not resp:
-        raise RedisError(
-            f"Failed to persist chat session {session.session_id} to Redis: {resp}"
+    async with lock:
+        # Get existing message count from DB for incremental saves
+        existing_message_count = await chat_db.get_chat_session_message_count(
+            session.session_id
        )

+        db_error: Exception | None = None
+
+        # Save to database (primary storage)
+        try:
+            await _save_session_to_db(session, existing_message_count)
+        except Exception as e:
+            logger.error(
+                f"Failed to save session {session.session_id} to database: {e}"
+            )
+            db_error = e
+
+        # Save to cache (best-effort, even if DB failed)
+        try:
+            await _cache_session(session)
+        except Exception as e:
+            # If DB succeeded but cache failed, raise cache error
+            if db_error is None:
+                raise RedisError(
+                    f"Failed to persist chat session {session.session_id} to Redis: {e}"
+                ) from e
+            # If both failed, log cache error but raise DB error (more critical)
+            logger.warning(
+                f"Cache write also failed for session {session.session_id}: {e}"
+            )
+
+        # Propagate DB error after attempting cache (prevents data loss)
+        if db_error is not None:
+            raise DatabaseError(
+                f"Failed to persist chat session {session.session_id} to database"
+            ) from db_error
+
+        return session
+
+
+async def create_chat_session(user_id: str) -> ChatSession:
+    """Create a new chat session and persist it.
+
+    Raises:
+        DatabaseError: If the database write fails. We fail fast to ensure
+            callers never receive a non-persisted session that only exists
+            in cache (which would be lost when the cache expires).
+    """
+    session = ChatSession.new(user_id)
+
+    # Create in database first - fail fast if this fails
+    try:
+        await chat_db.create_chat_session(
+            session_id=session.session_id,
+            user_id=user_id,
+        )
+    except Exception as e:
+        logger.error(f"Failed to create session {session.session_id} in database: {e}")
+        raise DatabaseError(
+            f"Failed to create chat session {session.session_id} in database"
+        ) from e
+
+    # Cache the session (best-effort optimization, DB is source of truth)
+    try:
+        await _cache_session(session)
+    except Exception as e:
+        logger.warning(f"Failed to cache new session {session.session_id}: {e}")
+
    return session
+
+
+async def get_user_sessions(
+    user_id: str,
+    limit: int = 50,
+    offset: int = 0,
+) -> tuple[list[ChatSession], int]:
+    """Get chat sessions for a user from the database with total count.
+
+    Returns:
+        A tuple of (sessions, total_count) where total_count is the overall
+        number of sessions for the user (not just the current page).
+    """
+    prisma_sessions = await chat_db.get_user_chat_sessions(user_id, limit, offset)
+    total_count = await chat_db.get_user_session_count(user_id)
+
+    sessions = []
+    for prisma_session in prisma_sessions:
+        # Convert without messages for listing (lighter weight)
+        sessions.append(ChatSession.from_db(prisma_session, None))
+
+    return sessions, total_count
+
+
+async def delete_chat_session(session_id: str, user_id: str | None = None) -> bool:
+    """Delete a chat session from both cache and database.
+
+    Args:
+        session_id: The session ID to delete.
+        user_id: If provided, validates that the session belongs to this user
+            before deletion. This prevents unauthorized deletion.
+
+    Returns:
+        True if deleted successfully, False otherwise.
+    """
+    # Delete from database first (with optional user_id validation)
+    # This confirms ownership before invalidating cache
+    deleted = await chat_db.delete_chat_session(session_id, user_id)
+
+    if not deleted:
+        return False
+
+    # Only invalidate cache and clean up lock after DB confirms deletion
+    try:
+        redis_key = _get_session_cache_key(session_id)
+        async_redis = await get_redis_async()
+        await async_redis.delete(redis_key)
+    except Exception as e:
+        logger.warning(f"Failed to delete session {session_id} from cache: {e}")
+
+    # Clean up session lock (belt-and-suspenders with WeakValueDictionary)
+    async with _session_locks_mutex:
+        _session_locks.pop(session_id, None)
+
+    return True
+
+
+async def update_session_title(session_id: str, title: str) -> bool:
+    """Update only the title of a chat session.
+
+    This is a lightweight operation that doesn't touch messages, avoiding
+    race conditions with concurrent message updates. Use this for background
+    title generation instead of upsert_chat_session.
+
+    Args:
+        session_id: The session ID to update.
+        title: The new title to set.
+
+    Returns:
+        True if updated successfully, False otherwise.
+    """
+    try:
+        result = await chat_db.update_chat_session(session_id=session_id, title=title)
+        if result is None:
+            logger.warning(f"Session {session_id} not found for title update")
+            return False
+
+        # Invalidate cache so next fetch gets updated title
+        try:
+            redis_key = _get_session_cache_key(session_id)
+            async_redis = await get_redis_async()
+            await async_redis.delete(redis_key)
+        except Exception as e:
+            logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
+
+        return True
+    except Exception as e:
+        logger.error(f"Failed to update title for session {session_id}: {e}")
+        return False
--- a/autogpt_platform/backend/backend/api/features/chat/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py
@@ -43,9 +43,9 @@ async def test_chatsession_serialization_deserialization():


@pytest.mark.asyncio(loop_scope="session")
-async def test_chatsession_redis_storage():
+async def test_chatsession_redis_storage(setup_test_user, test_user_id):

-    s = ChatSession.new(user_id=None)
+    s = ChatSession.new(user_id=test_user_id)
    s.messages = messages

    s = await upsert_chat_session(s)
@@ -59,12 +59,61 @@ async def test_chatsession_redis_storage():


@pytest.mark.asyncio(loop_scope="session")
-async def test_chatsession_redis_storage_user_id_mismatch():
+async def test_chatsession_redis_storage_user_id_mismatch(
+    setup_test_user, test_user_id
+):

-    s = ChatSession.new(user_id="abc123")
+    s = ChatSession.new(user_id=test_user_id)
    s.messages = messages
    s = await upsert_chat_session(s)

-    s2 = await get_chat_session(s.session_id, None)
+    s2 = await get_chat_session(s.session_id, "different_user_id")

    assert s2 is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_chatsession_db_storage(setup_test_user, test_user_id):
+    """Test that messages are correctly saved to and loaded from DB (not cache)."""
+    from backend.data.redis_client import get_redis_async
+
+    # Create session with messages including assistant message
+    s = ChatSession.new(user_id=test_user_id)
+    s.messages = messages  # Contains user, assistant, and tool messages
+    assert s.session_id is not None, "Session id is not set"
+    # Upsert to save to both cache and DB
+    s = await upsert_chat_session(s)
+
+    # Clear the Redis cache to force DB load
+    redis_key = f"chat:session:{s.session_id}"
+    async_redis = await get_redis_async()
+    await async_redis.delete(redis_key)
+
+    # Load from DB (cache was cleared)
+    s2 = await get_chat_session(
+        session_id=s.session_id,
+        user_id=s.user_id,
+    )
+
+    assert s2 is not None, "Session not found after loading from DB"
+    assert len(s2.messages) == len(
+        s.messages
+    ), f"Message count mismatch: expected {len(s.messages)}, got {len(s2.messages)}"
+
+    # Verify all roles are present
+    roles = [m.role for m in s2.messages]
+    assert "user" in roles, f"User message missing. Roles found: {roles}"
+    assert "assistant" in roles, f"Assistant message missing. Roles found: {roles}"
+    assert "tool" in roles, f"Tool message missing. Roles found: {roles}"
+
+    # Verify message content
+    for orig, loaded in zip(s.messages, s2.messages):
+        assert orig.role == loaded.role, f"Role mismatch: {orig.role} != {loaded.role}"
+        assert (
+            orig.content == loaded.content
+        ), f"Content mismatch for {orig.role}: {orig.content} != {loaded.content}"
+        if orig.tool_calls:
+            assert (
+                loaded.tool_calls is not None
+            ), f"Tool calls missing for {orig.role} message"
+            assert len(orig.tool_calls) == len(loaded.tool_calls)
--- a/autogpt_platform/backend/backend/api/features/chat/prompts/chat_system.md
+++ b/autogpt_platform/backend/backend/api/features/chat/prompts/chat_system.md
@@ -1,104 +0,0 @@
-You are Otto, an AI Co-Pilot and Forward Deployed Engineer for AutoGPT, an AI Business Automation tool. Your mission is to help users quickly find and set up AutoGPT agents to solve their business problems.
-
-Here are the functions available to you:
-
-<functions>
-1. **find_agent** - Search for agents that solve the user's problem
-2. **run_agent** - Run or schedule an agent (automatically handles setup)
-</functions>
-
-## HOW run_agent WORKS
-
-The `run_agent` tool automatically handles the entire setup flow:
-
-1. **First call** (no inputs) → Returns available inputs so user can decide what values to use
-2. **Credentials check** → If missing, UI automatically prompts user to add them (you don't need to mention this)
-3. **Execution** → Runs when you provide `inputs` OR set `use_defaults=true`
-
-Parameters:
- `username_agent_slug` (required): Agent identifier like "creator/agent-name"
- `inputs`: Object with input values for the agent
- `use_defaults`: Set to `true` to run with default values (only after user confirms)
- `schedule_name` + `cron`: For scheduled execution
-
-## WORKFLOW
-
-1. **find_agent** - Search for agents that solve the user's problem
-2. **run_agent** (first call, no inputs) - Get available inputs for the agent
-3. **Ask user** what values they want to use OR if they want to use defaults
-4. **run_agent** (second call) - Either with `inputs={...}` or `use_defaults=true`
-
-## YOUR APPROACH
-
-**Step 1: Understand the Problem**
- Ask maximum 1-2 targeted questions
- Focus on: What business problem are they solving?
- Move quickly to searching for solutions
-
-**Step 2: Find Agents**
- Use `find_agent` immediately with relevant keywords
- Suggest the best option from search results
- Explain briefly how it solves their problem
-
-**Step 3: Get Agent Inputs**
- Call `run_agent(username_agent_slug="creator/agent-name")` without inputs
- This returns the available inputs (required and optional)
- Present these to the user and ask what values they want
-
-**Step 4: Run with User's Choice**
- If user provides values: `run_agent(username_agent_slug="...", inputs={...})`
- If user says "use defaults": `run_agent(username_agent_slug="...", use_defaults=true)`
- On success, share the agent link with the user
-
-**For Scheduled Execution:**
- Add `schedule_name` and `cron` parameters
- Example: `run_agent(username_agent_slug="...", inputs={...}, schedule_name="Daily Report", cron="0 9 * * *")`
-
-## FUNCTION CALL FORMAT
-
-To call a function, use this exact format:
-`<function_call>function_name(parameter="value")</function_call>`
-
-Examples:
- `<function_call>find_agent(query="social media automation")</function_call>`
- `<function_call>run_agent(username_agent_slug="creator/agent-name")</function_call>` (get inputs)
- `<function_call>run_agent(username_agent_slug="creator/agent-name", inputs={"topic": "AI news"})</function_call>`
- `<function_call>run_agent(username_agent_slug="creator/agent-name", use_defaults=true)</function_call>`
-
-## KEY RULES
-
-**What You DON'T Do:**
- Don't help with login (frontend handles this)
- Don't mention or explain credentials to the user (frontend handles this automatically)
- Don't run agents without first showing available inputs to the user
- Don't use `use_defaults=true` without user explicitly confirming
- Don't write responses longer than 3 sentences
-
-**What You DO:**
- Always call run_agent first without inputs to see what's available
- Ask user what values they want OR if they want to use defaults
- Keep all responses to maximum 3 sentences
- Include the agent link in your response after successful execution
-
-**Error Handling:**
- Authentication needed → "Please sign in via the interface"
- Credentials missing → The UI handles this automatically. Focus on asking the user about input values instead.
-
-## RESPONSE STRUCTURE
-
-Before responding, wrap your analysis in <thinking> tags to systematically plan your approach:
- Extract the key business problem or request from the user's message
- Determine what function call (if any) you need to make next
- Plan your response to stay under the 3-sentence maximum
-
-Example interaction:
-```
-User: "Run the AI news agent for me"
-Otto: <function_call>run_agent(username_agent_slug="autogpt/ai-news")</function_call>
-[Tool returns: Agent accepts inputs - Required: topic. Optional: num_articles (default: 5)]
-Otto: The AI News agent needs a topic. What topic would you like news about, or should I use the defaults?
-User: "Use defaults"
-Otto: <function_call>run_agent(username_agent_slug="autogpt/ai-news", use_defaults=true)</function_call>
-```
-
-KEEP ANSWERS TO 3 SENTENCES
--- a/autogpt_platform/backend/backend/api/features/chat/response_model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py
@@ -1,3 +1,10 @@
+"""
+Response models for Vercel AI SDK UI Stream Protocol.
+
+This module implements the AI SDK UI Stream Protocol (v1) for streaming chat responses.
+See: https://ai-sdk.dev/docs/ai-sdk-ui/stream-protocol
+"""
+
 from enum import Enum
 from typing import Any

@@ -5,97 +12,133 @@ from pydantic import BaseModel, Field


 class ResponseType(str, Enum):
-    """Types of streaming responses."""
+    """Types of streaming responses following AI SDK protocol."""

-    TEXT_CHUNK = "text_chunk"
-    TEXT_ENDED = "text_ended"
-    TOOL_CALL = "tool_call"
-    TOOL_CALL_START = "tool_call_start"
-    TOOL_RESPONSE = "tool_response"
+    # Message lifecycle
+    START = "start"
+    FINISH = "finish"
+
+    # Text streaming
+    TEXT_START = "text-start"
+    TEXT_DELTA = "text-delta"
+    TEXT_END = "text-end"
+
+    # Tool interaction
+    TOOL_INPUT_START = "tool-input-start"
+    TOOL_INPUT_AVAILABLE = "tool-input-available"
+    TOOL_OUTPUT_AVAILABLE = "tool-output-available"
+
+    # Other
    ERROR = "error"
    USAGE = "usage"
-    STREAM_END = "stream_end"


 class StreamBaseResponse(BaseModel):
    """Base response model for all streaming responses."""

    type: ResponseType
-    timestamp: str | None = None

    def to_sse(self) -> str:
        """Convert to SSE format."""
        return f"data: {self.model_dump_json()}\n\n"


-class StreamTextChunk(StreamBaseResponse):
-    """Streaming text content from the assistant."""
-
-    type: ResponseType = ResponseType.TEXT_CHUNK
-    content: str = Field(..., description="Text content chunk")
+# ========== Message Lifecycle ==========


-class StreamToolCallStart(StreamBaseResponse):
+class StreamStart(StreamBaseResponse):
+    """Start of a new message."""
+
+    type: ResponseType = ResponseType.START
+    messageId: str = Field(..., description="Unique message ID")
+
+
+class StreamFinish(StreamBaseResponse):
+    """End of message/stream."""
+
+    type: ResponseType = ResponseType.FINISH
+
+
+# ========== Text Streaming ==========
+
+
+class StreamTextStart(StreamBaseResponse):
+    """Start of a text block."""
+
+    type: ResponseType = ResponseType.TEXT_START
+    id: str = Field(..., description="Text block ID")
+
+
+class StreamTextDelta(StreamBaseResponse):
+    """Streaming text content delta."""
+
+    type: ResponseType = ResponseType.TEXT_DELTA
+    id: str = Field(..., description="Text block ID")
+    delta: str = Field(..., description="Text content delta")
+
+
+class StreamTextEnd(StreamBaseResponse):
+    """End of a text block."""
+
+    type: ResponseType = ResponseType.TEXT_END
+    id: str = Field(..., description="Text block ID")
+
+
+# ========== Tool Interaction ==========
+
+
+class StreamToolInputStart(StreamBaseResponse):
    """Tool call started notification."""

-    type: ResponseType = ResponseType.TOOL_CALL_START
-    tool_name: str = Field(..., description="Name of the tool that was executed")
-    tool_id: str = Field(..., description="Unique tool call ID")
+    type: ResponseType = ResponseType.TOOL_INPUT_START
+    toolCallId: str = Field(..., description="Unique tool call ID")
+    toolName: str = Field(..., description="Name of the tool being called")


-class StreamToolCall(StreamBaseResponse):
-    """Tool invocation notification."""
+class StreamToolInputAvailable(StreamBaseResponse):
+    """Tool input is ready for execution."""

-    type: ResponseType = ResponseType.TOOL_CALL
-    tool_id: str = Field(..., description="Unique tool call ID")
-    tool_name: str = Field(..., description="Name of the tool being called")
-    arguments: dict[str, Any] = Field(
-        default_factory=dict, description="Tool arguments"
+    type: ResponseType = ResponseType.TOOL_INPUT_AVAILABLE
+    toolCallId: str = Field(..., description="Unique tool call ID")
+    toolName: str = Field(..., description="Name of the tool being called")
+    input: dict[str, Any] = Field(
+        default_factory=dict, description="Tool input arguments"
    )


-class StreamToolExecutionResult(StreamBaseResponse):
+class StreamToolOutputAvailable(StreamBaseResponse):
    """Tool execution result."""

-    type: ResponseType = ResponseType.TOOL_RESPONSE
-    tool_id: str = Field(..., description="Tool call ID this responds to")
-    tool_name: str = Field(..., description="Name of the tool that was executed")
-    result: str | dict[str, Any] = Field(..., description="Tool execution result")
+    type: ResponseType = ResponseType.TOOL_OUTPUT_AVAILABLE
+    toolCallId: str = Field(..., description="Tool call ID this responds to")
+    output: str | dict[str, Any] = Field(..., description="Tool execution output")
+    # Additional fields for internal use (not part of AI SDK spec but useful)
+    toolName: str | None = Field(
+        default=None, description="Name of the tool that was executed"
+    )
    success: bool = Field(
        default=True, description="Whether the tool execution succeeded"
    )


+# ========== Other ==========
+
+
 class StreamUsage(StreamBaseResponse):
    """Token usage statistics."""

    type: ResponseType = ResponseType.USAGE
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
+    promptTokens: int = Field(..., description="Number of prompt tokens")
+    completionTokens: int = Field(..., description="Number of completion tokens")
+    totalTokens: int = Field(..., description="Total number of tokens")


 class StreamError(StreamBaseResponse):
    """Error response."""

    type: ResponseType = ResponseType.ERROR
-    message: str = Field(..., description="Error message")
+    errorText: str = Field(..., description="Error message text")
    code: str | None = Field(default=None, description="Error code")
    details: dict[str, Any] | None = Field(
        default=None, description="Additional error details"
    )
-
-
-class StreamTextEnded(StreamBaseResponse):
-    """Text streaming completed marker."""
-
-    type: ResponseType = ResponseType.TEXT_ENDED
-
-
-class StreamEnd(StreamBaseResponse):
-    """End of stream marker."""
-
-    type: ResponseType = ResponseType.STREAM_END
-    summary: dict[str, Any] | None = Field(
-        default=None, description="Stream summary statistics"
-    )
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -13,12 +13,25 @@ from backend.util.exceptions import NotFoundError

 from . import service as chat_service
 from .config import ChatConfig
+from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions

 config = ChatConfig()


 logger = logging.getLogger(__name__)

+
+async def _validate_and_get_session(
+    session_id: str,
+    user_id: str | None,
+) -> ChatSession:
+    """Validate session exists and belongs to user."""
+    session = await get_chat_session(session_id, user_id)
+    if not session:
+        raise NotFoundError(f"Session {session_id} not found.")
+    return session
+
+
 router = APIRouter(
    tags=["chat"],
 )
@@ -26,6 +39,14 @@ router = APIRouter(
 # ========== Request/Response Models ==========


+class StreamChatRequest(BaseModel):
+    """Request model for streaming chat with optional context."""
+
+    message: str
+    is_user_message: bool = True
+    context: dict[str, str] | None = None  # {url: str, content: str}
+
+
 class CreateSessionResponse(BaseModel):
    """Response model containing information on a newly created chat session."""

@@ -44,22 +65,77 @@ class SessionDetailResponse(BaseModel):
    messages: list[dict]


+class SessionSummaryResponse(BaseModel):
+    """Response model for a session summary (without messages)."""
+
+    id: str
+    created_at: str
+    updated_at: str
+    title: str | None = None
+
+
+class ListSessionsResponse(BaseModel):
+    """Response model for listing chat sessions."""
+
+    sessions: list[SessionSummaryResponse]
+    total: int
+
+
 # ========== Routes ==========


+@router.get(
+    "/sessions",
+    dependencies=[Security(auth.requires_user)],
+)
+async def list_sessions(
+    user_id: Annotated[str, Security(auth.get_user_id)],
+    limit: int = Query(default=50, ge=1, le=100),
+    offset: int = Query(default=0, ge=0),
+) -> ListSessionsResponse:
+    """
+    List chat sessions for the authenticated user.
+
+    Returns a paginated list of chat sessions belonging to the current user,
+    ordered by most recently updated.
+
+    Args:
+        user_id: The authenticated user's ID.
+        limit: Maximum number of sessions to return (1-100).
+        offset: Number of sessions to skip for pagination.
+
+    Returns:
+        ListSessionsResponse: List of session summaries and total count.
+    """
+    sessions, total_count = await get_user_sessions(user_id, limit, offset)
+
+    return ListSessionsResponse(
+        sessions=[
+            SessionSummaryResponse(
+                id=session.session_id,
+                created_at=session.started_at.isoformat(),
+                updated_at=session.updated_at.isoformat(),
+                title=session.title,
+            )
+            for session in sessions
+        ],
+        total=total_count,
+    )
+
+
@router.post(
    "/sessions",
 )
 async def create_session(
-    user_id: Annotated[str | None, Depends(auth.get_user_id)],
+    user_id: Annotated[str, Depends(auth.get_user_id)],
 ) -> CreateSessionResponse:
    """
    Create a new chat session.

-    Initiates a new chat session for either an authenticated or anonymous user.
+    Initiates a new chat session for the authenticated user.

    Args:
-        user_id: The optional authenticated user ID parsed from the JWT. If missing, creates an anonymous session.
+        user_id: The authenticated user ID parsed from the JWT (required).

    Returns:
        CreateSessionResponse: Details of the created session.
@@ -67,15 +143,15 @@ async def create_session(
    """
    logger.info(
        f"Creating session with user_id: "
-        f"...{user_id[-8:] if user_id and len(user_id) > 8 else '<redacted>'}"
+        f"...{user_id[-8:] if len(user_id) > 8 else '<redacted>'}"
    )

-    session = await chat_service.create_chat_session(user_id)
+    session = await create_chat_session(user_id)

    return CreateSessionResponse(
        id=session.session_id,
        created_at=session.started_at.isoformat(),
-        user_id=session.user_id or None,
+        user_id=session.user_id,
    )


@@ -99,29 +175,88 @@ async def get_session(
        SessionDetailResponse: Details for the requested session; raises NotFoundError if not found.

    """
-    session = await chat_service.get_session(session_id, user_id)
+    session = await get_chat_session(session_id, user_id)
    if not session:
        raise NotFoundError(f"Session {session_id} not found")
+
+    messages = [message.model_dump() for message in session.messages]
+    logger.info(
+        f"Returning session {session_id}: "
+        f"message_count={len(messages)}, "
+        f"roles={[m.get('role') for m in messages]}"
+    )
+
    return SessionDetailResponse(
        id=session.session_id,
        created_at=session.started_at.isoformat(),
        updated_at=session.updated_at.isoformat(),
        user_id=session.user_id or None,
-        messages=[message.model_dump() for message in session.messages],
+        messages=messages,
+    )
+
+
+@router.post(
+    "/sessions/{session_id}/stream",
+)
+async def stream_chat_post(
+    session_id: str,
+    request: StreamChatRequest,
+    user_id: str | None = Depends(auth.get_user_id),
+):
+    """
+    Stream chat responses for a session (POST with context support).
+
+    Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
+      - Text fragments as they are generated
+      - Tool call UI elements (if invoked)
+      - Tool execution results
+
+    Args:
+        session_id: The chat session identifier to associate with the streamed messages.
+        request: Request body containing message, is_user_message, and optional context.
+        user_id: Optional authenticated user ID.
+    Returns:
+        StreamingResponse: SSE-formatted response chunks.
+
+    """
+    session = await _validate_and_get_session(session_id, user_id)
+
+    async def event_generator() -> AsyncGenerator[str, None]:
+        async for chunk in chat_service.stream_chat_completion(
+            session_id,
+            request.message,
+            is_user_message=request.is_user_message,
+            user_id=user_id,
+            session=session,  # Pass pre-fetched session to avoid double-fetch
+            context=request.context,
+        ):
+            yield chunk.to_sse()
+        # AI SDK protocol termination
+        yield "data: [DONE]\n\n"
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",  # Disable nginx buffering
+            "x-vercel-ai-ui-message-stream": "v1",  # AI SDK protocol header
+        },
    )


@router.get(
    "/sessions/{session_id}/stream",
 )
-async def stream_chat(
+async def stream_chat_get(
    session_id: str,
    message: Annotated[str, Query(min_length=1, max_length=10000)],
    user_id: str | None = Depends(auth.get_user_id),
    is_user_message: bool = Query(default=True),
 ):
    """
-    Stream chat responses for a session.
+    Stream chat responses for a session (GET - legacy endpoint).

    Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
      - Text fragments as they are generated
@@ -137,14 +272,7 @@ async def stream_chat(
        StreamingResponse: SSE-formatted response chunks.

    """
-    # Validate session exists before starting the stream
-    # This prevents errors after the response has already started
-    session = await chat_service.get_session(session_id, user_id)
-
-    if not session:
-        raise NotFoundError(f"Session {session_id} not found. ")
-    if session.user_id is None and user_id is not None:
-        session = await chat_service.assign_user_to_session(session_id, user_id)
+    session = await _validate_and_get_session(session_id, user_id)

    async def event_generator() -> AsyncGenerator[str, None]:
        async for chunk in chat_service.stream_chat_completion(
@@ -155,6 +283,8 @@ async def stream_chat(
            session=session,  # Pass pre-fetched session to avoid double-fetch
        ):
            yield chunk.to_sse()
+        # AI SDK protocol termination
+        yield "data: [DONE]\n\n"

    return StreamingResponse(
        event_generator(),
@@ -163,6 +293,7 @@ async def stream_chat(
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",  # Disable nginx buffering
+            "x-vercel-ai-ui-message-stream": "v1",  # AI SDK protocol header
        },
    )

@@ -201,16 +332,28 @@ async def health_check() -> dict:
    """
    Health check endpoint for the chat service.

-    Performs a full cycle test of session creation, assignment, and retrieval. Should always return healthy
+    Performs a full cycle test of session creation and retrieval. Should always return healthy
    if the service and data layer are operational.

    Returns:
        dict: A status dictionary indicating health, service name, and API version.

    """
-    session = await chat_service.create_chat_session(None)
-    await chat_service.assign_user_to_session(session.session_id, "test_user")
-    await chat_service.get_session(session.session_id, "test_user")
+    from backend.data.user import get_or_create_user
+
+    # Ensure health check user exists (required for FK constraint)
+    health_check_user_id = "health-check-user"
+    await get_or_create_user(
+        {
+            "sub": health_check_user_id,
+            "email": "health-check@system.local",
+            "user_metadata": {"name": "Health Check User"},
+        }
+    )
+
+    # Create and retrieve session to verify full data layer
+    session = await create_chat_session(health_check_user_id)
+    await get_chat_session(session.session_id, health_check_user_id)

    return {
        "status": "healthy",
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
--- a/autogpt_platform/backend/backend/api/features/chat/service_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service_test.py
@@ -4,18 +4,19 @@ from os import getenv
 import pytest

 from . import service as chat_service
+from .model import create_chat_session, get_chat_session, upsert_chat_session
 from .response_model import (
-    StreamEnd,
    StreamError,
-    StreamTextChunk,
-    StreamToolExecutionResult,
+    StreamFinish,
+    StreamTextDelta,
+    StreamToolOutputAvailable,
 )

 logger = logging.getLogger(__name__)


@pytest.mark.asyncio(loop_scope="session")
-async def test_stream_chat_completion():
+async def test_stream_chat_completion(setup_test_user, test_user_id):
    """
    Test the stream_chat_completion function.
    """
@@ -23,7 +24,7 @@ async def test_stream_chat_completion():
    if not api_key:
        return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")

-    session = await chat_service.create_chat_session()
+    session = await create_chat_session(test_user_id)

    has_errors = False
    has_ended = False
@@ -34,9 +35,9 @@ async def test_stream_chat_completion():
        logger.info(chunk)
        if isinstance(chunk, StreamError):
            has_errors = True
-        if isinstance(chunk, StreamTextChunk):
-            assistant_message += chunk.content
-        if isinstance(chunk, StreamEnd):
+        if isinstance(chunk, StreamTextDelta):
+            assistant_message += chunk.delta
+        if isinstance(chunk, StreamFinish):
            has_ended = True

    assert has_ended, "Chat completion did not end"
@@ -45,7 +46,7 @@ async def test_stream_chat_completion():


@pytest.mark.asyncio(loop_scope="session")
-async def test_stream_chat_completion_with_tool_calls():
+async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user_id):
    """
    Test the stream_chat_completion function.
    """
@@ -53,8 +54,8 @@ async def test_stream_chat_completion_with_tool_calls():
    if not api_key:
        return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")

-    session = await chat_service.create_chat_session()
-    session = await chat_service.upsert_chat_session(session)
+    session = await create_chat_session(test_user_id)
+    session = await upsert_chat_session(session)

    has_errors = False
    has_ended = False
@@ -68,14 +69,14 @@ async def test_stream_chat_completion_with_tool_calls():
        if isinstance(chunk, StreamError):
            has_errors = True

-        if isinstance(chunk, StreamEnd):
+        if isinstance(chunk, StreamFinish):
            has_ended = True
-        if isinstance(chunk, StreamToolExecutionResult):
+        if isinstance(chunk, StreamToolOutputAvailable):
            had_tool_calls = True

    assert has_ended, "Chat completion did not end"
    assert not has_errors, "Error occurred while streaming chat completion"
    assert had_tool_calls, "Tool calls did not occur"
-    session = await chat_service.get_session(session.session_id)
+    session = await get_chat_session(session.session_id)
    assert session, "Session not found"
    assert session.usage, "Usage is empty"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/init.py
@@ -4,21 +4,32 @@ from openai.types.chat import ChatCompletionToolParam

 from backend.api.features.chat.model import ChatSession

+from .add_understanding import AddUnderstandingTool
+from .agent_output import AgentOutputTool
 from .base import BaseTool
 from .find_agent import FindAgentTool
+from .find_library_agent import FindLibraryAgentTool
 from .run_agent import RunAgentTool

 if TYPE_CHECKING:
-    from backend.api.features.chat.response_model import StreamToolExecutionResult
+    from backend.api.features.chat.response_model import StreamToolOutputAvailable

-# Initialize tool instances
-find_agent_tool = FindAgentTool()
-run_agent_tool = RunAgentTool()
+# Single source of truth for all tools
+TOOL_REGISTRY: dict[str, BaseTool] = {
+    "add_understanding": AddUnderstandingTool(),
+    "find_agent": FindAgentTool(),
+    "find_library_agent": FindLibraryAgentTool(),
+    "run_agent": RunAgentTool(),
+    "agent_output": AgentOutputTool(),
+}

-# Export tools as OpenAI format
+# Export individual tool instances for backwards compatibility
+find_agent_tool = TOOL_REGISTRY["find_agent"]
+run_agent_tool = TOOL_REGISTRY["run_agent"]
+
+# Generated from registry for OpenAI API
 tools: list[ChatCompletionToolParam] = [
-    find_agent_tool.as_openai_tool(),
-    run_agent_tool.as_openai_tool(),
+    tool.as_openai_tool() for tool in TOOL_REGISTRY.values()
 ]


@@ -28,14 +39,9 @@ async def execute_tool(
    user_id: str | None,
    session: ChatSession,
    tool_call_id: str,
-) -> "StreamToolExecutionResult":
-
-    tool_map: dict[str, BaseTool] = {
-        "find_agent": find_agent_tool,
-        "run_agent": run_agent_tool,
-    }
-    if tool_name not in tool_map:
+) -> "StreamToolOutputAvailable":
+    """Execute a tool by name."""
+    tool = TOOL_REGISTRY.get(tool_name)
+    if not tool:
        raise ValueError(f"Tool {tool_name} not found")
-    return await tool_map[tool_name].execute(
-        user_id, session, tool_call_id, **parameters
-    )
+    return await tool.execute(user_id, session, tool_call_id, **parameters)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/_test_data.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/_test_data.py
@@ -3,6 +3,7 @@ from datetime import UTC, datetime
 from os import getenv

 import pytest
+from prisma.types import ProfileCreateInput
 from pydantic import SecretStr

 from backend.api.features.chat.model import ChatSession
@@ -17,7 +18,7 @@ from backend.data.user import get_or_create_user
 from backend.integrations.credentials_store import IntegrationCredentialsStore


-def make_session(user_id: str | None = None):
+def make_session(user_id: str):
    return ChatSession(
        session_id=str(uuid.uuid4()),
        user_id=user_id,
@@ -49,13 +50,13 @@ async def setup_test_data():
    # 1b. Create a profile with username for the user (required for store agent lookup)
    username = user.email.split("@")[0]
    await prisma.profile.create(
-        data={
-            "userId": user.id,
-            "username": username,
-            "name": f"Test User {username}",
-            "description": "Test user profile",
-            "links": [],  # Required field - empty array for test profiles
-        }
+        data=ProfileCreateInput(
+            userId=user.id,
+            username=username,
+            name=f"Test User {username}",
+            description="Test user profile",
+            links=[],  # Required field - empty array for test profiles
+        )
    )

    # 2. Create a test graph with agent input -> agent output
@@ -172,13 +173,13 @@ async def setup_llm_test_data():
    # 1b. Create a profile with username for the user (required for store agent lookup)
    username = user.email.split("@")[0]
    await prisma.profile.create(
-        data={
-            "userId": user.id,
-            "username": username,
-            "name": f"Test User {username}",
-            "description": "Test user profile for LLM tests",
-            "links": [],  # Required field - empty array for test profiles
-        }
+        data=ProfileCreateInput(
+            userId=user.id,
+            username=username,
+            name=f"Test User {username}",
+            description="Test user profile for LLM tests",
+            links=[],  # Required field - empty array for test profiles
+        )
    )

    # 2. Create test OpenAI credentials for the user
@@ -332,13 +333,13 @@ async def setup_firecrawl_test_data():
    # 1b. Create a profile with username for the user (required for store agent lookup)
    username = user.email.split("@")[0]
    await prisma.profile.create(
-        data={
-            "userId": user.id,
-            "username": username,
-            "name": f"Test User {username}",
-            "description": "Test user profile for Firecrawl tests",
-            "links": [],  # Required field - empty array for test profiles
-        }
+        data=ProfileCreateInput(
+            userId=user.id,
+            username=username,
+            name=f"Test User {username}",
+            description="Test user profile for Firecrawl tests",
+            links=[],  # Required field - empty array for test profiles
+        )
    )

    # NOTE: We deliberately do NOT create Firecrawl credentials for this user
--- a/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
@@ -0,0 +1,119 @@
+"""Tool for capturing user business understanding incrementally."""
+
+import logging
+from typing import Any
+
+from backend.api.features.chat.model import ChatSession
+from backend.data.understanding import (
+    BusinessUnderstandingInput,
+    upsert_business_understanding,
+)
+
+from .base import BaseTool
+from .models import ErrorResponse, ToolResponseBase, UnderstandingUpdatedResponse
+
+logger = logging.getLogger(__name__)
+
+
+class AddUnderstandingTool(BaseTool):
+    """Tool for capturing user's business understanding incrementally."""
+
+    @property
+    def name(self) -> str:
+        return "add_understanding"
+
+    @property
+    def description(self) -> str:
+        return """Capture and store information about the user's business context,
+workflows, pain points, and automation goals. Call this tool whenever the user
+shares information about their business. Each call incrementally adds to the
+existing understanding - you don't need to provide all fields at once.
+
+Use this to build a comprehensive profile that helps recommend better agents
+and automations for the user's specific needs."""
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        # Auto-generate from Pydantic model schema
+        schema = BusinessUnderstandingInput.model_json_schema()
+        properties = {}
+        for field_name, field_schema in schema.get("properties", {}).items():
+            prop: dict[str, Any] = {"description": field_schema.get("description", "")}
+            # Handle anyOf for Optional types
+            if "anyOf" in field_schema:
+                for option in field_schema["anyOf"]:
+                    if option.get("type") != "null":
+                        prop["type"] = option.get("type", "string")
+                        if "items" in option:
+                            prop["items"] = option["items"]
+                        break
+            else:
+                prop["type"] = field_schema.get("type", "string")
+                if "items" in field_schema:
+                    prop["items"] = field_schema["items"]
+            properties[field_name] = prop
+        return {"type": "object", "properties": properties, "required": []}
+
+    @property
+    def requires_auth(self) -> bool:
+        """Requires authentication to store user-specific data."""
+        return True
+
+    async def _execute(
+        self,
+        user_id: str | None,
+        session: ChatSession,
+        **kwargs,
+    ) -> ToolResponseBase:
+        """
+        Capture and store business understanding incrementally.
+
+        Each call merges new data with existing understanding:
+        - String fields are overwritten if provided
+        - List fields are appended (with deduplication)
+        """
+        session_id = session.session_id
+
+        if not user_id:
+            return ErrorResponse(
+                message="Authentication required to save business understanding.",
+                session_id=session_id,
+            )
+
+        # Check if any data was provided
+        if not any(v is not None for v in kwargs.values()):
+            return ErrorResponse(
+                message="Please provide at least one field to update.",
+                session_id=session_id,
+            )
+
+        # Build input model from kwargs (only include fields defined in the model)
+        valid_fields = set(BusinessUnderstandingInput.model_fields.keys())
+        input_data = BusinessUnderstandingInput(
+            **{k: v for k, v in kwargs.items() if k in valid_fields}
+        )
+
+        # Track which fields were updated
+        updated_fields = [
+            k for k, v in kwargs.items() if k in valid_fields and v is not None
+        ]
+
+        # Upsert with merge
+        understanding = await upsert_business_understanding(user_id, input_data)
+
+        # Build current understanding summary (filter out empty values)
+        current_understanding = {
+            k: v
+            for k, v in understanding.model_dump(
+                exclude={"id", "user_id", "created_at", "updated_at"}
+            ).items()
+            if v is not None and v != [] and v != ""
+        }
+
+        return UnderstandingUpdatedResponse(
+            message=f"Updated understanding with: {', '.join(updated_fields)}. "
+            "I now have a better picture of your business context.",
+            session_id=session_id,
+            updated_fields=updated_fields,
+            current_understanding=current_understanding,
+        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
@@ -0,0 +1,446 @@
+"""Tool for retrieving agent execution outputs from user's library."""
+
+import logging
+import re
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+from pydantic import BaseModel, field_validator
+
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.library import db as library_db
+from backend.api.features.library.model import LibraryAgent
+from backend.data import execution as execution_db
+from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta
+
+from .base import BaseTool
+from .models import (
+    AgentOutputResponse,
+    ErrorResponse,
+    ExecutionOutputInfo,
+    NoResultsResponse,
+    ToolResponseBase,
+)
+from .utils import fetch_graph_from_store_slug
+
+logger = logging.getLogger(__name__)
+
+
+class AgentOutputInput(BaseModel):
+    """Input parameters for the agent_output tool."""
+
+    agent_name: str = ""
+    library_agent_id: str = ""
+    store_slug: str = ""
+    execution_id: str = ""
+    run_time: str = "latest"
+
+    @field_validator(
+        "agent_name",
+        "library_agent_id",
+        "store_slug",
+        "execution_id",
+        "run_time",
+        mode="before",
+    )
+    @classmethod
+    def strip_strings(cls, v: Any) -> Any:
+        """Strip whitespace from string fields."""
+        return v.strip() if isinstance(v, str) else v
+
+
+def parse_time_expression(
+    time_expr: str | None,
+) -> tuple[datetime | None, datetime | None]:
+    """
+    Parse time expression into datetime range (start, end).
+
+    Supports: "latest", "yesterday", "today", "last week", "last 7 days",
+    "last month", "last 30 days", ISO date "YYYY-MM-DD", ISO datetime.
+    """
+    if not time_expr or time_expr.lower() == "latest":
+        return None, None
+
+    now = datetime.now(timezone.utc)
+    today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
+    expr = time_expr.lower().strip()
+
+    # Relative time expressions lookup
+    relative_times: dict[str, tuple[datetime, datetime]] = {
+        "yesterday": (today_start - timedelta(days=1), today_start),
+        "today": (today_start, now),
+        "last week": (now - timedelta(days=7), now),
+        "last 7 days": (now - timedelta(days=7), now),
+        "last month": (now - timedelta(days=30), now),
+        "last 30 days": (now - timedelta(days=30), now),
+    }
+    if expr in relative_times:
+        return relative_times[expr]
+
+    # Try ISO date format (YYYY-MM-DD)
+    date_match = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", expr)
+    if date_match:
+        try:
+            year, month, day = map(int, date_match.groups())
+            start = datetime(year, month, day, 0, 0, 0, tzinfo=timezone.utc)
+            return start, start + timedelta(days=1)
+        except ValueError:
+            # Invalid date components (e.g., month=13, day=32)
+            pass
+
+    # Try ISO datetime
+    try:
+        parsed = datetime.fromisoformat(expr.replace("Z", "+00:00"))
+        if parsed.tzinfo is None:
+            parsed = parsed.replace(tzinfo=timezone.utc)
+        return parsed - timedelta(hours=1), parsed + timedelta(hours=1)
+    except ValueError:
+        return None, None
+
+
+class AgentOutputTool(BaseTool):
+    """Tool for retrieving execution outputs from user's library agents."""
+
+    @property
+    def name(self) -> str:
+        return "agent_output"
+
+    @property
+    def description(self) -> str:
+        return """Retrieve execution outputs from agents in the user's library.
+
+        Identify the agent using one of:
+        - agent_name: Fuzzy search in user's library
+        - library_agent_id: Exact library agent ID
+        - store_slug: Marketplace format 'username/agent-name'
+
+        Select which run to retrieve using:
+        - execution_id: Specific execution ID
+        - run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
+        """
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "agent_name": {
+                    "type": "string",
+                    "description": "Agent name to search for in user's library (fuzzy match)",
+                },
+                "library_agent_id": {
+                    "type": "string",
+                    "description": "Exact library agent ID",
+                },
+                "store_slug": {
+                    "type": "string",
+                    "description": "Marketplace identifier: 'username/agent-slug'",
+                },
+                "execution_id": {
+                    "type": "string",
+                    "description": "Specific execution ID to retrieve",
+                },
+                "run_time": {
+                    "type": "string",
+                    "description": (
+                        "Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
+                    ),
+                },
+            },
+            "required": [],
+        }
+
+    @property
+    def requires_auth(self) -> bool:
+        return True
+
+    async def _resolve_agent(
+        self,
+        user_id: str,
+        agent_name: str | None,
+        library_agent_id: str | None,
+        store_slug: str | None,
+    ) -> tuple[LibraryAgent | None, str | None]:
+        """
+        Resolve agent from provided identifiers.
+        Returns (library_agent, error_message).
+        """
+        # Priority 1: Exact library agent ID
+        if library_agent_id:
+            try:
+                agent = await library_db.get_library_agent(library_agent_id, user_id)
+                return agent, None
+            except Exception as e:
+                logger.warning(f"Failed to get library agent by ID: {e}")
+                return None, f"Library agent '{library_agent_id}' not found"
+
+        # Priority 2: Store slug (username/agent-name)
+        if store_slug and "/" in store_slug:
+            username, agent_slug = store_slug.split("/", 1)
+            graph, _ = await fetch_graph_from_store_slug(username, agent_slug)
+            if not graph:
+                return None, f"Agent '{store_slug}' not found in marketplace"
+
+            # Find in user's library by graph_id
+            agent = await library_db.get_library_agent_by_graph_id(user_id, graph.id)
+            if not agent:
+                return (
+                    None,
+                    f"Agent '{store_slug}' is not in your library. "
+                    "Add it first to see outputs.",
+                )
+            return agent, None
+
+        # Priority 3: Fuzzy name search in library
+        if agent_name:
+            try:
+                response = await library_db.list_library_agents(
+                    user_id=user_id,
+                    search_term=agent_name,
+                    page_size=5,
+                )
+                if not response.agents:
+                    return (
+                        None,
+                        f"No agents matching '{agent_name}' found in your library",
+                    )
+
+                # Return best match (first result from search)
+                return response.agents[0], None
+            except Exception as e:
+                logger.error(f"Error searching library agents: {e}")
+                return None, f"Error searching for agent: {e}"
+
+        return (
+            None,
+            "Please specify an agent name, library_agent_id, or store_slug",
+        )
+
+    async def _get_execution(
+        self,
+        user_id: str,
+        graph_id: str,
+        execution_id: str | None,
+        time_start: datetime | None,
+        time_end: datetime | None,
+    ) -> tuple[GraphExecution | None, list[GraphExecutionMeta], str | None]:
+        """
+        Fetch execution(s) based on filters.
+        Returns (single_execution, available_executions_meta, error_message).
+        """
+        # If specific execution_id provided, fetch it directly
+        if execution_id:
+            execution = await execution_db.get_graph_execution(
+                user_id=user_id,
+                execution_id=execution_id,
+                include_node_executions=False,
+            )
+            if not execution:
+                return None, [], f"Execution '{execution_id}' not found"
+            return execution, [], None
+
+        # Get completed executions with time filters
+        executions = await execution_db.get_graph_executions(
+            graph_id=graph_id,
+            user_id=user_id,
+            statuses=[ExecutionStatus.COMPLETED],
+            created_time_gte=time_start,
+            created_time_lte=time_end,
+            limit=10,
+        )
+
+        if not executions:
+            return None, [], None  # No error, just no executions
+
+        # If only one execution, fetch full details
+        if len(executions) == 1:
+            full_execution = await execution_db.get_graph_execution(
+                user_id=user_id,
+                execution_id=executions[0].id,
+                include_node_executions=False,
+            )
+            return full_execution, [], None
+
+        # Multiple executions - return latest with full details, plus list of available
+        full_execution = await execution_db.get_graph_execution(
+            user_id=user_id,
+            execution_id=executions[0].id,
+            include_node_executions=False,
+        )
+        return full_execution, executions, None
+
+    def _build_response(
+        self,
+        agent: LibraryAgent,
+        execution: GraphExecution | None,
+        available_executions: list[GraphExecutionMeta],
+        session_id: str | None,
+    ) -> AgentOutputResponse:
+        """Build the response based on execution data."""
+        library_agent_link = f"/library/agents/{agent.id}"
+
+        if not execution:
+            return AgentOutputResponse(
+                message=f"No completed executions found for agent '{agent.name}'",
+                session_id=session_id,
+                agent_name=agent.name,
+                agent_id=agent.graph_id,
+                library_agent_id=agent.id,
+                library_agent_link=library_agent_link,
+                total_executions=0,
+            )
+
+        execution_info = ExecutionOutputInfo(
+            execution_id=execution.id,
+            status=execution.status.value,
+            started_at=execution.started_at,
+            ended_at=execution.ended_at,
+            outputs=dict(execution.outputs),
+            inputs_summary=execution.inputs if execution.inputs else None,
+        )
+
+        available_list = None
+        if len(available_executions) > 1:
+            available_list = [
+                {
+                    "id": e.id,
+                    "status": e.status.value,
+                    "started_at": e.started_at.isoformat() if e.started_at else None,
+                }
+                for e in available_executions[:5]
+            ]
+
+        message = f"Found execution outputs for agent '{agent.name}'"
+        if len(available_executions) > 1:
+            message += (
+                f". Showing latest of {len(available_executions)} matching executions."
+            )
+
+        return AgentOutputResponse(
+            message=message,
+            session_id=session_id,
+            agent_name=agent.name,
+            agent_id=agent.graph_id,
+            library_agent_id=agent.id,
+            library_agent_link=library_agent_link,
+            execution=execution_info,
+            available_executions=available_list,
+            total_executions=len(available_executions) if available_executions else 1,
+        )
+
+    async def _execute(
+        self,
+        user_id: str | None,
+        session: ChatSession,
+        **kwargs,
+    ) -> ToolResponseBase:
+        """Execute the agent_output tool."""
+        session_id = session.session_id
+
+        # Parse and validate input
+        try:
+            input_data = AgentOutputInput(**kwargs)
+        except Exception as e:
+            logger.error(f"Invalid input: {e}")
+            return ErrorResponse(
+                message="Invalid input parameters",
+                error=str(e),
+                session_id=session_id,
+            )
+
+        # Ensure user_id is present (should be guaranteed by requires_auth)
+        if not user_id:
+            return ErrorResponse(
+                message="User authentication required",
+                session_id=session_id,
+            )
+
+        # Check if at least one identifier is provided
+        if not any(
+            [
+                input_data.agent_name,
+                input_data.library_agent_id,
+                input_data.store_slug,
+                input_data.execution_id,
+            ]
+        ):
+            return ErrorResponse(
+                message=(
+                    "Please specify at least one of: agent_name, "
+                    "library_agent_id, store_slug, or execution_id"
+                ),
+                session_id=session_id,
+            )
+
+        # If only execution_id provided, we need to find the agent differently
+        if (
+            input_data.execution_id
+            and not input_data.agent_name
+            and not input_data.library_agent_id
+            and not input_data.store_slug
+        ):
+            # Fetch execution directly to get graph_id
+            execution = await execution_db.get_graph_execution(
+                user_id=user_id,
+                execution_id=input_data.execution_id,
+                include_node_executions=False,
+            )
+            if not execution:
+                return ErrorResponse(
+                    message=f"Execution '{input_data.execution_id}' not found",
+                    session_id=session_id,
+                )
+
+            # Find library agent by graph_id
+            agent = await library_db.get_library_agent_by_graph_id(
+                user_id, execution.graph_id
+            )
+            if not agent:
+                return NoResultsResponse(
+                    message=(
+                        f"Execution found but agent not in your library. "
+                        f"Graph ID: {execution.graph_id}"
+                    ),
+                    session_id=session_id,
+                    suggestions=["Add the agent to your library to see more details"],
+                )
+
+            return self._build_response(agent, execution, [], session_id)
+
+        # Resolve agent from identifiers
+        agent, error = await self._resolve_agent(
+            user_id=user_id,
+            agent_name=input_data.agent_name or None,
+            library_agent_id=input_data.library_agent_id or None,
+            store_slug=input_data.store_slug or None,
+        )
+
+        if error or not agent:
+            return NoResultsResponse(
+                message=error or "Agent not found",
+                session_id=session_id,
+                suggestions=[
+                    "Check the agent name or ID",
+                    "Make sure the agent is in your library",
+                ],
+            )
+
+        # Parse time expression
+        time_start, time_end = parse_time_expression(input_data.run_time)
+
+        # Fetch execution(s)
+        execution, available_executions, exec_error = await self._get_execution(
+            user_id=user_id,
+            graph_id=agent.graph_id,
+            execution_id=input_data.execution_id or None,
+            time_start=time_start,
+            time_end=time_end,
+        )
+
+        if exec_error:
+            return ErrorResponse(
+                message=exec_error,
+                session_id=session_id,
+            )
+
+        return self._build_response(agent, execution, available_executions, session_id)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
@@ -0,0 +1,151 @@
+"""Shared agent search functionality for find_agent and find_library_agent tools."""
+
+import logging
+from typing import Literal
+
+from backend.api.features.library import db as library_db
+from backend.api.features.store import db as store_db
+from backend.util.exceptions import DatabaseError, NotFoundError
+
+from .models import (
+    AgentInfo,
+    AgentsFoundResponse,
+    ErrorResponse,
+    NoResultsResponse,
+    ToolResponseBase,
+)
+
+logger = logging.getLogger(__name__)
+
+SearchSource = Literal["marketplace", "library"]
+
+
+async def search_agents(
+    query: str,
+    source: SearchSource,
+    session_id: str | None,
+    user_id: str | None = None,
+) -> ToolResponseBase:
+    """
+    Search for agents in marketplace or user library.
+
+    Args:
+        query: Search query string
+        source: "marketplace" or "library"
+        session_id: Chat session ID
+        user_id: User ID (required for library search)
+
+    Returns:
+        AgentsFoundResponse, NoResultsResponse, or ErrorResponse
+    """
+    if not query:
+        return ErrorResponse(
+            message="Please provide a search query", session_id=session_id
+        )
+
+    if source == "library" and not user_id:
+        return ErrorResponse(
+            message="User authentication required to search library",
+            session_id=session_id,
+        )
+
+    agents: list[AgentInfo] = []
+    try:
+        if source == "marketplace":
+            logger.info(f"Searching marketplace for: {query}")
+            results = await store_db.get_store_agents(search_query=query, page_size=5)
+            for agent in results.agents:
+                agents.append(
+                    AgentInfo(
+                        id=f"{agent.creator}/{agent.slug}",
+                        name=agent.agent_name,
+                        description=agent.description or "",
+                        source="marketplace",
+                        in_library=False,
+                        creator=agent.creator,
+                        category="general",
+                        rating=agent.rating,
+                        runs=agent.runs,
+                        is_featured=False,
+                    )
+                )
+        else:  # library
+            logger.info(f"Searching user library for: {query}")
+            results = await library_db.list_library_agents(
+                user_id=user_id,  # type: ignore[arg-type]
+                search_term=query,
+                page_size=10,
+            )
+            for agent in results.agents:
+                agents.append(
+                    AgentInfo(
+                        id=agent.id,
+                        name=agent.name,
+                        description=agent.description or "",
+                        source="library",
+                        in_library=True,
+                        creator=agent.creator_name,
+                        status=agent.status.value,
+                        can_access_graph=agent.can_access_graph,
+                        has_external_trigger=agent.has_external_trigger,
+                        new_output=agent.new_output,
+                        graph_id=agent.graph_id,
+                    )
+                )
+        logger.info(f"Found {len(agents)} agents in {source}")
+    except NotFoundError:
+        pass
+    except DatabaseError as e:
+        logger.error(f"Error searching {source}: {e}", exc_info=True)
+        return ErrorResponse(
+            message=f"Failed to search {source}. Please try again.",
+            error=str(e),
+            session_id=session_id,
+        )
+
+    if not agents:
+        suggestions = (
+            [
+                "Try more general terms",
+                "Browse categories in the marketplace",
+                "Check spelling",
+            ]
+            if source == "marketplace"
+            else [
+                "Try different keywords",
+                "Use find_agent to search the marketplace",
+                "Check your library at /library",
+            ]
+        )
+        no_results_msg = (
+            f"No agents found matching '{query}'. Try different keywords or browse the marketplace."
+            if source == "marketplace"
+            else f"No agents matching '{query}' found in your library."
+        )
+        return NoResultsResponse(
+            message=no_results_msg, session_id=session_id, suggestions=suggestions
+        )
+
+    title = f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} "
+    title += (
+        f"for '{query}'"
+        if source == "marketplace"
+        else f"in your library for '{query}'"
+    )
+
+    message = (
+        "Now you have found some options for the user to choose from. "
+        "You can add a link to a recommended agent at: /marketplace/agent/agent_id "
+        "Please ask the user if they would like to use any of these agents."
+        if source == "marketplace"
+        else "Found agents in the user's library. You can provide a link to view an agent at: "
+        "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute."
+    )
+
+    return AgentsFoundResponse(
+        message=message,
+        title=title,
+        agents=agents,
+        count=len(agents),
+        session_id=session_id,
+    )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/base.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/base.py
@@ -6,7 +6,7 @@ from typing import Any
 from openai.types.chat import ChatCompletionToolParam

 from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.response_model import StreamToolExecutionResult
+from backend.api.features.chat.response_model import StreamToolOutputAvailable

 from .models import ErrorResponse, NeedLoginResponse, ToolResponseBase

@@ -53,7 +53,7 @@ class BaseTool:
        session: ChatSession,
        tool_call_id: str,
        **kwargs,
-    ) -> StreamToolExecutionResult:
+    ) -> StreamToolOutputAvailable:
        """Execute the tool with authentication check.

        Args:
@@ -69,10 +69,10 @@ class BaseTool:
            logger.error(
                f"Attempted tool call for {self.name} but user not authenticated"
            )
-            return StreamToolExecutionResult(
-                tool_id=tool_call_id,
-                tool_name=self.name,
-                result=NeedLoginResponse(
+            return StreamToolOutputAvailable(
+                toolCallId=tool_call_id,
+                toolName=self.name,
+                output=NeedLoginResponse(
                    message=f"Please sign in to use {self.name}",
                    session_id=session.session_id,
                ).model_dump_json(),
@@ -81,17 +81,17 @@ class BaseTool:

        try:
            result = await self._execute(user_id, session, **kwargs)
-            return StreamToolExecutionResult(
-                tool_id=tool_call_id,
-                tool_name=self.name,
-                result=result.model_dump_json(),
+            return StreamToolOutputAvailable(
+                toolCallId=tool_call_id,
+                toolName=self.name,
+                output=result.model_dump_json(),
            )
        except Exception as e:
            logger.error(f"Error in {self.name}: {e}", exc_info=True)
-            return StreamToolExecutionResult(
-                tool_id=tool_call_id,
-                tool_name=self.name,
-                result=ErrorResponse(
+            return StreamToolOutputAvailable(
+                toolCallId=tool_call_id,
+                toolName=self.name,
+                output=ErrorResponse(
                    message=f"An error occurred while executing {self.name}",
                    error=str(e),
                    session_id=session.session_id,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
@@ -1,26 +1,16 @@
-"""Tool for discovering agents from marketplace and user library."""
+"""Tool for discovering agents from marketplace."""

-import logging
 from typing import Any

 from backend.api.features.chat.model import ChatSession
-from backend.api.features.store import db as store_db
-from backend.util.exceptions import DatabaseError, NotFoundError

+from .agent_search import search_agents
 from .base import BaseTool
-from .models import (
-    AgentCarouselResponse,
-    AgentInfo,
-    ErrorResponse,
-    NoResultsResponse,
-    ToolResponseBase,
-)
-
-logger = logging.getLogger(__name__)
+from .models import ToolResponseBase


 class FindAgentTool(BaseTool):
-    """Tool for discovering agents based on user needs."""
+    """Tool for discovering agents from the marketplace."""

    @property
    def name(self) -> str:
@@ -46,84 +36,11 @@ class FindAgentTool(BaseTool):
        }

    async def _execute(
-        self,
-        user_id: str | None,
-        session: ChatSession,
-        **kwargs,
+        self, user_id: str | None, session: ChatSession, **kwargs
    ) -> ToolResponseBase:
-        """Search for agents in the marketplace.
-
-        Args:
-            user_id: User ID (may be anonymous)
-            session_id: Chat session ID
-            query: Search query
-
-        Returns:
-            AgentCarouselResponse: List of agents found in the marketplace
-            NoResultsResponse: No agents found in the marketplace
-            ErrorResponse: Error message
-        """
-        query = kwargs.get("query", "").strip()
-        session_id = session.session_id
-        if not query:
-            return ErrorResponse(
-                message="Please provide a search query",
-                session_id=session_id,
-            )
-        agents = []
-        try:
-            logger.info(f"Searching marketplace for: {query}")
-            store_results = await store_db.get_store_agents(
-                search_query=query,
-                page_size=5,
-            )
-
-            logger.info(f"Find agents tool found {len(store_results.agents)} agents")
-            for agent in store_results.agents:
-                agent_id = f"{agent.creator}/{agent.slug}"
-                logger.info(f"Building agent ID = {agent_id}")
-                agents.append(
-                    AgentInfo(
-                        id=agent_id,
-                        name=agent.agent_name,
-                        description=agent.description or "",
-                        source="marketplace",
-                        in_library=False,
-                        creator=agent.creator,
-                        category="general",
-                        rating=agent.rating,
-                        runs=agent.runs,
-                        is_featured=False,
-                    ),
-                )
-        except NotFoundError:
-            pass
-        except DatabaseError as e:
-            logger.error(f"Error searching agents: {e}", exc_info=True)
-            return ErrorResponse(
-                message="Failed to search for agents. Please try again.",
-                error=str(e),
-                session_id=session_id,
-            )
-        if not agents:
-            return NoResultsResponse(
-                message=f"No agents found matching '{query}'. Try different keywords or browse the marketplace. If you have 3 consecutive find_agent tool calls results and found no agents. Please stop trying and ask the user if there is anything else you can help with.",
-                session_id=session_id,
-                suggestions=[
-                    "Try more general terms",
-                    "Browse categories in the marketplace",
-                    "Check spelling",
-                ],
-            )
-
-        # Return formatted carousel
-        title = (
-            f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} for '{query}'"
-        )
-        return AgentCarouselResponse(
-            message="Now you have found some options for the user to choose from. You can add a link to a recommended agent at: /marketplace/agent/agent_id Please ask the user if they would like to use any of these agents. If they do, please call the get_agent_details tool for this agent.",
-            title=title,
-            agents=agents,
-            count=len(agents),
-            session_id=session_id,
+        return await search_agents(
+            query=kwargs.get("query", "").strip(),
+            source="marketplace",
+            session_id=session.session_id,
+            user_id=user_id,
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
@@ -0,0 +1,52 @@
+"""Tool for searching agents in the user's library."""
+
+from typing import Any
+
+from backend.api.features.chat.model import ChatSession
+
+from .agent_search import search_agents
+from .base import BaseTool
+from .models import ToolResponseBase
+
+
+class FindLibraryAgentTool(BaseTool):
+    """Tool for searching agents in the user's library."""
+
+    @property
+    def name(self) -> str:
+        return "find_library_agent"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Search for agents in the user's library. Use this to find agents "
+            "the user has already added to their library, including agents they "
+            "created or added from the marketplace."
+        )
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Search query to find agents by name or description.",
+                },
+            },
+            "required": ["query"],
+        }
+
+    @property
+    def requires_auth(self) -> bool:
+        return True
+
+    async def _execute(
+        self, user_id: str | None, session: ChatSession, **kwargs
+    ) -> ToolResponseBase:
+        return await search_agents(
+            query=kwargs.get("query", "").strip(),
+            source="library",
+            session_id=session.session_id,
+            user_id=user_id,
+        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -1,5 +1,6 @@
 """Pydantic models for tool responses."""

+from datetime import datetime
 from enum import Enum
 from typing import Any

@@ -11,14 +12,15 @@ from backend.data.model import CredentialsMetaInput
 class ResponseType(str, Enum):
    """Types of tool responses."""

-    AGENT_CAROUSEL = "agent_carousel"
+    AGENTS_FOUND = "agents_found"
    AGENT_DETAILS = "agent_details"
    SETUP_REQUIREMENTS = "setup_requirements"
    EXECUTION_STARTED = "execution_started"
    NEED_LOGIN = "need_login"
    ERROR = "error"
    NO_RESULTS = "no_results"
-    SUCCESS = "success"
+    AGENT_OUTPUT = "agent_output"
+    UNDERSTANDING_UPDATED = "understanding_updated"


 # Base response model
@@ -51,14 +53,14 @@ class AgentInfo(BaseModel):
    graph_id: str | None = None


-class AgentCarouselResponse(ToolResponseBase):
+class AgentsFoundResponse(ToolResponseBase):
    """Response for find_agent tool."""

-    type: ResponseType = ResponseType.AGENT_CAROUSEL
+    type: ResponseType = ResponseType.AGENTS_FOUND
    title: str = "Available Agents"
    agents: list[AgentInfo]
    count: int
-    name: str = "agent_carousel"
+    name: str = "agents_found"


 class NoResultsResponse(ToolResponseBase):
@@ -173,3 +175,37 @@ class ErrorResponse(ToolResponseBase):
    type: ResponseType = ResponseType.ERROR
    error: str | None = None
    details: dict[str, Any] | None = None
+
+
+# Agent output models
+class ExecutionOutputInfo(BaseModel):
+    """Summary of a single execution's outputs."""
+
+    execution_id: str
+    status: str
+    started_at: datetime | None = None
+    ended_at: datetime | None = None
+    outputs: dict[str, list[Any]]
+    inputs_summary: dict[str, Any] | None = None
+
+
+class AgentOutputResponse(ToolResponseBase):
+    """Response for agent_output tool."""
+
+    type: ResponseType = ResponseType.AGENT_OUTPUT
+    agent_name: str
+    agent_id: str
+    library_agent_id: str | None = None
+    library_agent_link: str | None = None
+    execution: ExecutionOutputInfo | None = None
+    available_executions: list[dict[str, Any]] | None = None
+    total_executions: int = 0
+
+
+# Business understanding models
+class UnderstandingUpdatedResponse(ToolResponseBase):
+    """Response for add_understanding tool."""
+
+    type: ResponseType = ResponseType.UNDERSTANDING_UPDATED
+    updated_fields: list[str] = Field(default_factory=list)
+    current_understanding: dict[str, Any] = Field(default_factory=dict)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -7,6 +7,7 @@ from pydantic import BaseModel, Field, field_validator

 from backend.api.features.chat.config import ChatConfig
 from backend.api.features.chat.model import ChatSession
+from backend.api.features.library import db as library_db
 from backend.data.graph import GraphModel
 from backend.data.model import CredentialsMetaInput
 from backend.data.user import get_user_by_id
@@ -57,6 +58,7 @@ class RunAgentInput(BaseModel):
    """Input parameters for the run_agent tool."""

    username_agent_slug: str = ""
+    library_agent_id: str = ""
    inputs: dict[str, Any] = Field(default_factory=dict)
    use_defaults: bool = False
    schedule_name: str = ""
@@ -64,7 +66,12 @@ class RunAgentInput(BaseModel):
    timezone: str = "UTC"

    @field_validator(
-        "username_agent_slug", "schedule_name", "cron", "timezone", mode="before"
+        "username_agent_slug",
+        "library_agent_id",
+        "schedule_name",
+        "cron",
+        "timezone",
+        mode="before",
    )
    @classmethod
    def strip_strings(cls, v: Any) -> Any:
@@ -90,7 +97,7 @@ class RunAgentTool(BaseTool):

    @property
    def description(self) -> str:
-        return """Run or schedule an agent from the marketplace.
+        return """Run or schedule an agent from the marketplace or user's library.

        The tool automatically handles the setup flow:
        - Returns missing inputs if required fields are not provided
@@ -98,6 +105,10 @@ class RunAgentTool(BaseTool):
        - Executes immediately if all requirements are met
        - Schedules execution if cron expression is provided

+        Identify the agent using either:
+        - username_agent_slug: Marketplace format 'username/agent-name'
+        - library_agent_id: ID of an agent in the user's library
+
        For scheduled execution, provide: schedule_name, cron, and optionally timezone."""

    @property
@@ -109,6 +120,10 @@ class RunAgentTool(BaseTool):
                    "type": "string",
                    "description": "Agent identifier in format 'username/agent-name'",
                },
+                "library_agent_id": {
+                    "type": "string",
+                    "description": "Library agent ID from user's library",
+                },
                "inputs": {
                    "type": "object",
                    "description": "Input values for the agent",
@@ -131,7 +146,7 @@ class RunAgentTool(BaseTool):
                    "description": "IANA timezone for schedule (default: UTC)",
                },
            },
-            "required": ["username_agent_slug"],
+            "required": [],
        }

    @property
@@ -149,10 +164,16 @@ class RunAgentTool(BaseTool):
        params = RunAgentInput(**kwargs)
        session_id = session.session_id

-        # Validate agent slug format
-        if not params.username_agent_slug or "/" not in params.username_agent_slug:
+        # Validate at least one identifier is provided
+        has_slug = params.username_agent_slug and "/" in params.username_agent_slug
+        has_library_id = bool(params.library_agent_id)
+
+        if not has_slug and not has_library_id:
            return ErrorResponse(
-                message="Please provide an agent slug in format 'username/agent-name'",
+                message=(
+                    "Please provide either a username_agent_slug "
+                    "(format 'username/agent-name') or a library_agent_id"
+                ),
                session_id=session_id,
            )

@@ -167,13 +188,41 @@ class RunAgentTool(BaseTool):
        is_schedule = bool(params.schedule_name or params.cron)

        try:
-            # Step 1: Fetch agent details (always happens first)
-            username, agent_name = params.username_agent_slug.split("/", 1)
-            graph, store_agent = await fetch_graph_from_store_slug(username, agent_name)
+            # Step 1: Fetch agent details
+            graph: GraphModel | None = None
+            library_agent = None
+
+            # Priority: library_agent_id if provided
+            if has_library_id:
+                library_agent = await library_db.get_library_agent(
+                    params.library_agent_id, user_id
+                )
+                if not library_agent:
+                    return ErrorResponse(
+                        message=f"Library agent '{params.library_agent_id}' not found",
+                        session_id=session_id,
+                    )
+                # Get the graph from the library agent
+                from backend.data.graph import get_graph
+
+                graph = await get_graph(
+                    library_agent.graph_id,
+                    library_agent.graph_version,
+                    user_id=user_id,
+                )
+            else:
+                # Fetch from marketplace slug
+                username, agent_name = params.username_agent_slug.split("/", 1)
+                graph, _ = await fetch_graph_from_store_slug(username, agent_name)

            if not graph:
+                identifier = (
+                    params.library_agent_id
+                    if has_library_id
+                    else params.username_agent_slug
+                )
                return ErrorResponse(
-                    message=f"Agent '{params.username_agent_slug}' not found in marketplace",
+                    message=f"Agent '{identifier}' not found",
                    session_id=session_id,
                )

--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
@@ -1,4 +1,5 @@
 import uuid
+from unittest.mock import AsyncMock, patch

 import orjson
 import pytest
@@ -17,6 +18,17 @@ setup_test_data = setup_test_data
 setup_firecrawl_test_data = setup_firecrawl_test_data


+@pytest.fixture(scope="session", autouse=True)
+def mock_embedding_functions():
+    """Mock embedding functions for all tests to avoid database/API dependencies."""
+    with patch(
+        "backend.api.features.store.db.ensure_embedding",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
+        yield
+
+
@pytest.mark.asyncio(scope="session")
 async def test_run_agent(setup_test_data):
    """Test that the run_agent tool successfully executes an approved agent"""
@@ -46,11 +58,11 @@ async def test_run_agent(setup_test_data):

    # Verify the response
    assert response is not None
-    assert hasattr(response, "result")
+    assert hasattr(response, "output")
    # Parse the result JSON to verify the execution started

-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)
    assert "execution_id" in result_data
    assert "graph_id" in result_data
    assert result_data["graph_id"] == graph.id
@@ -86,11 +98,11 @@ async def test_run_agent_missing_inputs(setup_test_data):

    # Verify that we get an error response
    assert response is not None
-    assert hasattr(response, "result")
+    assert hasattr(response, "output")
    # The tool should return an ErrorResponse when setup info indicates not ready

-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)
    assert "message" in result_data


@@ -118,10 +130,10 @@ async def test_run_agent_invalid_agent_id(setup_test_data):

    # Verify that we get an error response
    assert response is not None
-    assert hasattr(response, "result")
+    assert hasattr(response, "output")

-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)
    assert "message" in result_data
    # Should get an error about failed setup or not found
    assert any(
@@ -158,12 +170,12 @@ async def test_run_agent_with_llm_credentials(setup_llm_test_data):

    # Verify the response
    assert response is not None
-    assert hasattr(response, "result")
+    assert hasattr(response, "output")

    # Parse the result JSON to verify the execution started

-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should successfully start execution since credentials are available
    assert "execution_id" in result_data
@@ -195,9 +207,9 @@ async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_da
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should return agent_details type showing available inputs
    assert result_data.get("type") == "agent_details"
@@ -230,9 +242,9 @@ async def test_run_agent_with_use_defaults(setup_test_data):
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should execute successfully
    assert "execution_id" in result_data
@@ -260,9 +272,9 @@ async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should return setup_requirements type with missing credentials
    assert result_data.get("type") == "setup_requirements"
@@ -292,9 +304,9 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should return error
    assert result_data.get("type") == "error"
@@ -305,9 +317,10 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
 async def test_run_agent_unauthenticated():
    """Test that run_agent returns need_login for unauthenticated users."""
    tool = RunAgentTool()
-    session = make_session(user_id=None)
+    # Session has a user_id (session owner), but we test tool execution without user_id
+    session = make_session(user_id="test-session-owner")

-    # Execute without user_id
+    # Execute without user_id to test unauthenticated behavior
    response = await tool.execute(
        user_id=None,
        session_id=str(uuid.uuid4()),
@@ -318,9 +331,9 @@ async def test_run_agent_unauthenticated():
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Base tool returns need_login type for unauthenticated users
    assert result_data.get("type") == "need_login"
@@ -350,9 +363,9 @@ async def test_run_agent_schedule_without_cron(setup_test_data):
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should return error about missing cron
    assert result_data.get("type") == "error"
@@ -382,9 +395,9 @@ async def test_run_agent_schedule_without_name(setup_test_data):
    )

    assert response is not None
-    assert hasattr(response, "result")
-    assert isinstance(response.result, str)
-    result_data = orjson.loads(response.result)
+    assert hasattr(response, "output")
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)

    # Should return error about missing schedule_name
    assert result_data.get("type") == "error"
--- a/autogpt_platform/backend/backend/api/features/integrations/router.py
+++ b/autogpt_platform/backend/backend/api/features/integrations/router.py
@@ -35,11 +35,7 @@ from backend.data.model import (
    OAuth2Credentials,
    UserIntegrations,
 )
-from backend.data.onboarding import (
-    OnboardingStep,
-    complete_onboarding_step,
-    increment_runs,
-)
+from backend.data.onboarding import OnboardingStep, complete_onboarding_step
 from backend.data.user import get_user_integrations
 from backend.executor.utils import add_graph_execution
 from backend.integrations.ayrshare import AyrshareClient, SocialPlatform
@@ -175,6 +171,7 @@ async def callback(
        f"Successfully processed OAuth callback for user {user_id} "
        f"and provider {provider.value}"
    )
+
    return CredentialsMetaResponse(
        id=credentials.id,
        provider=credentials.provider,
@@ -193,6 +190,7 @@ async def list_credentials(
    user_id: Annotated[str, Security(get_user_id)],
 ) -> list[CredentialsMetaResponse]:
    credentials = await creds_manager.store.get_all_creds(user_id)
+
    return [
        CredentialsMetaResponse(
            id=cred.id,
@@ -215,6 +213,7 @@ async def list_credentials_by_provider(
    user_id: Annotated[str, Security(get_user_id)],
 ) -> list[CredentialsMetaResponse]:
    credentials = await creds_manager.store.get_creds_by_provider(user_id, provider)
+
    return [
        CredentialsMetaResponse(
            id=cred.id,
@@ -378,7 +377,6 @@ async def webhook_ingress_generic(
        return

    await complete_onboarding_step(user_id, OnboardingStep.TRIGGER_WEBHOOK)
-    await increment_runs(user_id)

    # Execute all triggers concurrently for better performance
    tasks = []
@@ -831,6 +829,18 @@ async def list_providers() -> List[str]:
    return all_providers


+@router.get("/providers/system", response_model=List[str])
+async def list_system_providers() -> List[str]:
+    """
+    Get a list of providers that have platform credits (system credentials) available.
+
+    These providers can be used without the user providing their own API keys.
+    """
+    from backend.integrations.credentials_store import SYSTEM_PROVIDERS
+
+    return list(SYSTEM_PROVIDERS)
+
+
@router.get("/providers/names", response_model=ProviderNamesResponse)
 async def get_provider_names() -> ProviderNamesResponse:
    """
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -489,7 +489,7 @@ async def update_agent_version_in_library(
    agent_graph_version: int,
 ) -> library_model.LibraryAgent:
    """
-    Updates the agent version in the library if useGraphIsActiveVersion is True.
+    Updates the agent version in the library for any agent owned by the user.

    Args:
        user_id: Owner of the LibraryAgent.
@@ -498,20 +498,31 @@ async def update_agent_version_in_library(

    Raises:
        DatabaseError: If there's an error with the update.
+        NotFoundError: If no library agent is found for this user and agent.
    """
    logger.debug(
        f"Updating agent version in library for user #{user_id}, "
        f"agent #{agent_graph_id} v{agent_graph_version}"
    )
-    try:
-        library_agent = await prisma.models.LibraryAgent.prisma().find_first_or_raise(
+    async with transaction() as tx:
+        library_agent = await prisma.models.LibraryAgent.prisma(tx).find_first_or_raise(
            where={
                "userId": user_id,
                "agentGraphId": agent_graph_id,
-                "useGraphIsActiveVersion": True,
            },
        )
-        lib = await prisma.models.LibraryAgent.prisma().update(
+
+        # Delete any conflicting LibraryAgent for the target version
+        await prisma.models.LibraryAgent.prisma(tx).delete_many(
+            where={
+                "userId": user_id,
+                "agentGraphId": agent_graph_id,
+                "agentGraphVersion": agent_graph_version,
+                "id": {"not": library_agent.id},
+            }
+        )
+
+        lib = await prisma.models.LibraryAgent.prisma(tx).update(
            where={"id": library_agent.id},
            data={
                "AgentGraph": {
@@ -525,13 +536,13 @@ async def update_agent_version_in_library(
            },
            include={"AgentGraph": True},
        )
-        if lib is None:
-            raise NotFoundError(f"Library agent {library_agent.id} not found")

-        return library_model.LibraryAgent.from_db(lib)
-    except prisma.errors.PrismaError as e:
-        logger.error(f"Database error updating agent version in library: {e}")
-        raise DatabaseError("Failed to update agent version in library") from e
+    if lib is None:
+        raise NotFoundError(
+            f"Failed to update library agent for {agent_graph_id} v{agent_graph_version}"
+        )
+
+    return library_model.LibraryAgent.from_db(lib)


 async def update_library_agent(
@@ -825,6 +836,7 @@ async def add_store_agent_to_library(
                    }
                },
                "isCreatedByUser": False,
+                "useGraphIsActiveVersion": False,
                "settings": SafeJson(
                    _initialize_graph_settings(graph_model).model_dump()
                ),
--- a/autogpt_platform/backend/backend/api/features/library/model.py
+++ b/autogpt_platform/backend/backend/api/features/library/model.py
@@ -48,6 +48,7 @@ class LibraryAgent(pydantic.BaseModel):
    id: str
    graph_id: str
    graph_version: int
+    owner_user_id: str  # ID of user who owns/created this agent graph

    image_url: str | None

@@ -163,6 +164,7 @@ class LibraryAgent(pydantic.BaseModel):
            id=agent.id,
            graph_id=agent.agentGraphId,
            graph_version=agent.agentGraphVersion,
+            owner_user_id=agent.userId,
            image_url=agent.imageUrl,
            creator_name=creator_name,
            creator_image_url=creator_image_url,
--- a/autogpt_platform/backend/backend/api/features/library/routes/presets.py
+++ b/autogpt_platform/backend/backend/api/features/library/routes/presets.py
@@ -8,7 +8,6 @@ from backend.data.execution import GraphExecutionMeta
 from backend.data.graph import get_graph
 from backend.data.integrations import get_webhook
 from backend.data.model import CredentialsMetaInput
-from backend.data.onboarding import increment_runs
 from backend.executor.utils import add_graph_execution, make_node_credentials_input_map
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.integrations.webhooks import get_webhook_manager
@@ -403,8 +402,6 @@ async def execute_preset(
    merged_node_input = preset.inputs | inputs
    merged_credential_inputs = preset.credentials | credential_inputs

-    await increment_runs(user_id)
-
    return await add_graph_execution(
        user_id=user_id,
        graph_id=preset.graph_id,
--- a/autogpt_platform/backend/backend/api/features/library/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/library/routes_test.py
@@ -42,6 +42,7 @@ async def test_get_library_agents_success(
                id="test-agent-1",
                graph_id="test-agent-1",
                graph_version=1,
+                owner_user_id=test_user_id,
                name="Test Agent 1",
                description="Test Description 1",
                image_url=None,
@@ -64,6 +65,7 @@ async def test_get_library_agents_success(
                id="test-agent-2",
                graph_id="test-agent-2",
                graph_version=1,
+                owner_user_id=test_user_id,
                name="Test Agent 2",
                description="Test Description 2",
                image_url=None,
@@ -138,6 +140,7 @@ async def test_get_favorite_library_agents_success(
                id="test-agent-1",
                graph_id="test-agent-1",
                graph_version=1,
+                owner_user_id=test_user_id,
                name="Favorite Agent 1",
                description="Test Favorite Description 1",
                image_url=None,
@@ -205,6 +208,7 @@ def test_add_agent_to_library_success(
        id="test-library-agent-id",
        graph_id="test-agent-1",
        graph_version=1,
+        owner_user_id=test_user_id,
        name="Test Agent 1",
        description="Test Description 1",
        image_url=None,
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers.py
@@ -0,0 +1,431 @@
+"""
+Content Type Handlers for Unified Embeddings
+
+Pluggable system for different content sources (store agents, blocks, docs).
+Each handler knows how to fetch and process its content type for embedding.
+"""
+
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from prisma.enums import ContentType
+
+from backend.data.db import query_raw_with_schema
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ContentItem:
+    """Represents a piece of content to be embedded."""
+
+    content_id: str  # Unique identifier (DB ID or file path)
+    content_type: ContentType
+    searchable_text: str  # Combined text for embedding
+    metadata: dict[str, Any]  # Content-specific metadata
+    user_id: str | None = None  # For user-scoped content
+
+
+class ContentHandler(ABC):
+    """Base handler for fetching and processing content for embeddings."""
+
+    @property
+    @abstractmethod
+    def content_type(self) -> ContentType:
+        """The ContentType this handler manages."""
+        pass
+
+    @abstractmethod
+    async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
+        """
+        Fetch items that don't have embeddings yet.
+
+        Args:
+            batch_size: Maximum number of items to return
+
+        Returns:
+            List of ContentItem objects ready for embedding
+        """
+        pass
+
+    @abstractmethod
+    async def get_stats(self) -> dict[str, int]:
+        """
+        Get statistics about embedding coverage.
+
+        Returns:
+            Dict with keys: total, with_embeddings, without_embeddings
+        """
+        pass
+
+
+class StoreAgentHandler(ContentHandler):
+    """Handler for marketplace store agent listings."""
+
+    @property
+    def content_type(self) -> ContentType:
+        return ContentType.STORE_AGENT
+
+    async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
+        """Fetch approved store listings without embeddings."""
+        from backend.api.features.store.embeddings import build_searchable_text
+
+        missing = await query_raw_with_schema(
+            """
+            SELECT
+                slv.id,
+                slv.name,
+                slv.description,
+                slv."subHeading",
+                slv.categories
+            FROM {schema_prefix}"StoreListingVersion" slv
+            LEFT JOIN {schema_prefix}"UnifiedContentEmbedding" uce
+                ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
+            WHERE slv."submissionStatus" = 'APPROVED'
+            AND slv."isDeleted" = false
+            AND uce."contentId" IS NULL
+            LIMIT $1
+            """,
+            batch_size,
+        )
+
+        return [
+            ContentItem(
+                content_id=row["id"],
+                content_type=ContentType.STORE_AGENT,
+                searchable_text=build_searchable_text(
+                    name=row["name"],
+                    description=row["description"],
+                    sub_heading=row["subHeading"],
+                    categories=row["categories"] or [],
+                ),
+                metadata={
+                    "name": row["name"],
+                    "categories": row["categories"] or [],
+                },
+                user_id=None,  # Store agents are public
+            )
+            for row in missing
+        ]
+
+    async def get_stats(self) -> dict[str, int]:
+        """Get statistics about store agent embedding coverage."""
+        # Count approved versions
+        approved_result = await query_raw_with_schema(
+            """
+            SELECT COUNT(*) as count
+            FROM {schema_prefix}"StoreListingVersion"
+            WHERE "submissionStatus" = 'APPROVED'
+            AND "isDeleted" = false
+            """
+        )
+        total_approved = approved_result[0]["count"] if approved_result else 0
+
+        # Count versions with embeddings
+        embedded_result = await query_raw_with_schema(
+            """
+            SELECT COUNT(*) as count
+            FROM {schema_prefix}"StoreListingVersion" slv
+            JOIN {schema_prefix}"UnifiedContentEmbedding" uce ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
+            WHERE slv."submissionStatus" = 'APPROVED'
+            AND slv."isDeleted" = false
+            """
+        )
+        with_embeddings = embedded_result[0]["count"] if embedded_result else 0
+
+        return {
+            "total": total_approved,
+            "with_embeddings": with_embeddings,
+            "without_embeddings": total_approved - with_embeddings,
+        }
+
+
+class BlockHandler(ContentHandler):
+    """Handler for block definitions (Python classes)."""
+
+    @property
+    def content_type(self) -> ContentType:
+        return ContentType.BLOCK
+
+    async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
+        """Fetch blocks without embeddings."""
+        from backend.data.block import get_blocks
+
+        # Get all available blocks
+        all_blocks = get_blocks()
+
+        # Check which ones have embeddings
+        if not all_blocks:
+            return []
+
+        block_ids = list(all_blocks.keys())
+
+        # Query for existing embeddings
+        placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
+        existing_result = await query_raw_with_schema(
+            f"""
+            SELECT "contentId"
+            FROM {{schema_prefix}}"UnifiedContentEmbedding"
+            WHERE "contentType" = 'BLOCK'::{{schema_prefix}}"ContentType"
+            AND "contentId" = ANY(ARRAY[{placeholders}])
+            """,
+            *block_ids,
+        )
+
+        existing_ids = {row["contentId"] for row in existing_result}
+        missing_blocks = [
+            (block_id, block_cls)
+            for block_id, block_cls in all_blocks.items()
+            if block_id not in existing_ids
+        ]
+
+        # Convert to ContentItem
+        items = []
+        for block_id, block_cls in missing_blocks[:batch_size]:
+            try:
+                block_instance = block_cls()
+
+                # Build searchable text from block metadata
+                parts = []
+                if hasattr(block_instance, "name") and block_instance.name:
+                    parts.append(block_instance.name)
+                if (
+                    hasattr(block_instance, "description")
+                    and block_instance.description
+                ):
+                    parts.append(block_instance.description)
+                if hasattr(block_instance, "categories") and block_instance.categories:
+                    # Convert BlockCategory enum to strings
+                    parts.append(
+                        " ".join(str(cat.value) for cat in block_instance.categories)
+                    )
+
+                # Add input/output schema info
+                if hasattr(block_instance, "input_schema"):
+                    schema = block_instance.input_schema
+                    if hasattr(schema, "model_json_schema"):
+                        schema_dict = schema.model_json_schema()
+                        if "properties" in schema_dict:
+                            for prop_name, prop_info in schema_dict[
+                                "properties"
+                            ].items():
+                                if "description" in prop_info:
+                                    parts.append(
+                                        f"{prop_name}: {prop_info['description']}"
+                                    )
+
+                searchable_text = " ".join(parts)
+
+                # Convert categories set of enums to list of strings for JSON serialization
+                categories = getattr(block_instance, "categories", set())
+                categories_list = (
+                    [cat.value for cat in categories] if categories else []
+                )
+
+                items.append(
+                    ContentItem(
+                        content_id=block_id,
+                        content_type=ContentType.BLOCK,
+                        searchable_text=searchable_text,
+                        metadata={
+                            "name": getattr(block_instance, "name", ""),
+                            "categories": categories_list,
+                        },
+                        user_id=None,  # Blocks are public
+                    )
+                )
+            except Exception as e:
+                logger.warning(f"Failed to process block {block_id}: {e}")
+                continue
+
+        return items
+
+    async def get_stats(self) -> dict[str, int]:
+        """Get statistics about block embedding coverage."""
+        from backend.data.block import get_blocks
+
+        all_blocks = get_blocks()
+        total_blocks = len(all_blocks)
+
+        if total_blocks == 0:
+            return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
+
+        block_ids = list(all_blocks.keys())
+        placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
+
+        embedded_result = await query_raw_with_schema(
+            f"""
+            SELECT COUNT(*) as count
+            FROM {{schema_prefix}}"UnifiedContentEmbedding"
+            WHERE "contentType" = 'BLOCK'::{{schema_prefix}}"ContentType"
+            AND "contentId" = ANY(ARRAY[{placeholders}])
+            """,
+            *block_ids,
+        )
+
+        with_embeddings = embedded_result[0]["count"] if embedded_result else 0
+
+        return {
+            "total": total_blocks,
+            "with_embeddings": with_embeddings,
+            "without_embeddings": total_blocks - with_embeddings,
+        }
+
+
+class DocumentationHandler(ContentHandler):
+    """Handler for documentation files (.md/.mdx)."""
+
+    @property
+    def content_type(self) -> ContentType:
+        return ContentType.DOCUMENTATION
+
+    def _get_docs_root(self) -> Path:
+        """Get the documentation root directory."""
+        # content_handlers.py is at: backend/backend/api/features/store/content_handlers.py
+        # Need to go up to project root then into docs/
+        # In container: /app/autogpt_platform/backend/backend/api/features/store -> /app/docs
+        # In development: /repo/autogpt_platform/backend/backend/api/features/store -> /repo/docs
+        this_file = Path(
+            __file__
+        )  # .../backend/backend/api/features/store/content_handlers.py
+        project_root = (
+            this_file.parent.parent.parent.parent.parent.parent.parent
+        )  # -> /app or /repo
+        docs_root = project_root / "docs"
+        return docs_root
+
+    def _extract_title_and_content(self, file_path: Path) -> tuple[str, str]:
+        """Extract title and content from markdown file."""
+        try:
+            content = file_path.read_text(encoding="utf-8")
+
+            # Try to extract title from first # heading
+            lines = content.split("\n")
+            title = ""
+            body_lines = []
+
+            for line in lines:
+                if line.startswith("# ") and not title:
+                    title = line[2:].strip()
+                else:
+                    body_lines.append(line)
+
+            # If no title found, use filename
+            if not title:
+                title = file_path.stem.replace("-", " ").replace("_", " ").title()
+
+            body = "\n".join(body_lines)
+
+            return title, body
+        except Exception as e:
+            logger.warning(f"Failed to read {file_path}: {e}")
+            return file_path.stem, ""
+
+    async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
+        """Fetch documentation files without embeddings."""
+        docs_root = self._get_docs_root()
+
+        if not docs_root.exists():
+            logger.warning(f"Documentation root not found: {docs_root}")
+            return []
+
+        # Find all .md and .mdx files
+        all_docs = list(docs_root.rglob("*.md")) + list(docs_root.rglob("*.mdx"))
+
+        # Get relative paths for content IDs
+        doc_paths = [str(doc.relative_to(docs_root)) for doc in all_docs]
+
+        if not doc_paths:
+            return []
+
+        # Check which ones have embeddings
+        placeholders = ",".join([f"${i+1}" for i in range(len(doc_paths))])
+        existing_result = await query_raw_with_schema(
+            f"""
+            SELECT "contentId"
+            FROM {{schema_prefix}}"UnifiedContentEmbedding"
+            WHERE "contentType" = 'DOCUMENTATION'::{{schema_prefix}}"ContentType"
+            AND "contentId" = ANY(ARRAY[{placeholders}])
+            """,
+            *doc_paths,
+        )
+
+        existing_ids = {row["contentId"] for row in existing_result}
+        missing_docs = [
+            (doc_path, doc_file)
+            for doc_path, doc_file in zip(doc_paths, all_docs)
+            if doc_path not in existing_ids
+        ]
+
+        # Convert to ContentItem
+        items = []
+        for doc_path, doc_file in missing_docs[:batch_size]:
+            try:
+                title, content = self._extract_title_and_content(doc_file)
+
+                # Build searchable text
+                searchable_text = f"{title} {content}"
+
+                items.append(
+                    ContentItem(
+                        content_id=doc_path,
+                        content_type=ContentType.DOCUMENTATION,
+                        searchable_text=searchable_text,
+                        metadata={
+                            "title": title,
+                            "path": doc_path,
+                        },
+                        user_id=None,  # Documentation is public
+                    )
+                )
+            except Exception as e:
+                logger.warning(f"Failed to process doc {doc_path}: {e}")
+                continue
+
+        return items
+
+    async def get_stats(self) -> dict[str, int]:
+        """Get statistics about documentation embedding coverage."""
+        docs_root = self._get_docs_root()
+
+        if not docs_root.exists():
+            return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
+
+        # Count all .md and .mdx files
+        all_docs = list(docs_root.rglob("*.md")) + list(docs_root.rglob("*.mdx"))
+        total_docs = len(all_docs)
+
+        if total_docs == 0:
+            return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
+
+        doc_paths = [str(doc.relative_to(docs_root)) for doc in all_docs]
+        placeholders = ",".join([f"${i+1}" for i in range(len(doc_paths))])
+
+        embedded_result = await query_raw_with_schema(
+            f"""
+            SELECT COUNT(*) as count
+            FROM {{schema_prefix}}"UnifiedContentEmbedding"
+            WHERE "contentType" = 'DOCUMENTATION'::{{schema_prefix}}"ContentType"
+            AND "contentId" = ANY(ARRAY[{placeholders}])
+            """,
+            *doc_paths,
+        )
+
+        with_embeddings = embedded_result[0]["count"] if embedded_result else 0
+
+        return {
+            "total": total_docs,
+            "with_embeddings": with_embeddings,
+            "without_embeddings": total_docs - with_embeddings,
+        }
+
+
+# Content handler registry
+CONTENT_HANDLERS: dict[ContentType, ContentHandler] = {
+    ContentType.STORE_AGENT: StoreAgentHandler(),
+    ContentType.BLOCK: BlockHandler(),
+    ContentType.DOCUMENTATION: DocumentationHandler(),
+}
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers_integration_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers_integration_test.py
@@ -0,0 +1,215 @@
+"""
+Integration tests for content handlers using real DB.
+
+Run with: poetry run pytest backend/api/features/store/content_handlers_integration_test.py -xvs
+
+These tests use the real database but mock OpenAI calls.
+"""
+
+from unittest.mock import patch
+
+import pytest
+
+from backend.api.features.store.content_handlers import (
+    CONTENT_HANDLERS,
+    BlockHandler,
+    DocumentationHandler,
+    StoreAgentHandler,
+)
+from backend.api.features.store.embeddings import (
+    EMBEDDING_DIM,
+    backfill_all_content_types,
+    ensure_content_embedding,
+    get_embedding_stats,
+)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_agent_handler_real_db():
+    """Test StoreAgentHandler with real database queries."""
+    handler = StoreAgentHandler()
+
+    # Get stats from real DB
+    stats = await handler.get_stats()
+
+    # Stats should have correct structure
+    assert "total" in stats
+    assert "with_embeddings" in stats
+    assert "without_embeddings" in stats
+    assert stats["total"] >= 0
+    assert stats["with_embeddings"] >= 0
+    assert stats["without_embeddings"] >= 0
+
+    # Get missing items (max 1 to keep test fast)
+    items = await handler.get_missing_items(batch_size=1)
+
+    # Items should be list (may be empty if all have embeddings)
+    assert isinstance(items, list)
+
+    if items:
+        item = items[0]
+        assert item.content_id is not None
+        assert item.content_type.value == "STORE_AGENT"
+        assert item.searchable_text != ""
+        assert item.user_id is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_block_handler_real_db():
+    """Test BlockHandler with real database queries."""
+    handler = BlockHandler()
+
+    # Get stats from real DB
+    stats = await handler.get_stats()
+
+    # Stats should have correct structure
+    assert "total" in stats
+    assert "with_embeddings" in stats
+    assert "without_embeddings" in stats
+    assert stats["total"] >= 0  # Should have at least some blocks
+    assert stats["with_embeddings"] >= 0
+    assert stats["without_embeddings"] >= 0
+
+    # Get missing items (max 1 to keep test fast)
+    items = await handler.get_missing_items(batch_size=1)
+
+    # Items should be list
+    assert isinstance(items, list)
+
+    if items:
+        item = items[0]
+        assert item.content_id is not None  # Should be block UUID
+        assert item.content_type.value == "BLOCK"
+        assert item.searchable_text != ""
+        assert item.user_id is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_documentation_handler_real_fs():
+    """Test DocumentationHandler with real filesystem."""
+    handler = DocumentationHandler()
+
+    # Get stats from real filesystem
+    stats = await handler.get_stats()
+
+    # Stats should have correct structure
+    assert "total" in stats
+    assert "with_embeddings" in stats
+    assert "without_embeddings" in stats
+    assert stats["total"] >= 0
+    assert stats["with_embeddings"] >= 0
+    assert stats["without_embeddings"] >= 0
+
+    # Get missing items (max 1 to keep test fast)
+    items = await handler.get_missing_items(batch_size=1)
+
+    # Items should be list
+    assert isinstance(items, list)
+
+    if items:
+        item = items[0]
+        assert item.content_id is not None  # Should be relative path
+        assert item.content_type.value == "DOCUMENTATION"
+        assert item.searchable_text != ""
+        assert item.user_id is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_get_embedding_stats_all_types():
+    """Test get_embedding_stats aggregates all content types."""
+    stats = await get_embedding_stats()
+
+    # Should have structure with by_type and totals
+    assert "by_type" in stats
+    assert "totals" in stats
+
+    # Check each content type is present
+    by_type = stats["by_type"]
+    assert "STORE_AGENT" in by_type
+    assert "BLOCK" in by_type
+    assert "DOCUMENTATION" in by_type
+
+    # Check totals are aggregated
+    totals = stats["totals"]
+    assert totals["total"] >= 0
+    assert totals["with_embeddings"] >= 0
+    assert totals["without_embeddings"] >= 0
+    assert "coverage_percent" in totals
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@patch("backend.api.features.store.embeddings.generate_embedding")
+async def test_ensure_content_embedding_blocks(mock_generate):
+    """Test creating embeddings for blocks (mocked OpenAI)."""
+    # Mock OpenAI to return fake embedding
+    mock_generate.return_value = [0.1] * EMBEDDING_DIM
+
+    # Get one block without embedding
+    handler = BlockHandler()
+    items = await handler.get_missing_items(batch_size=1)
+
+    if not items:
+        pytest.skip("No blocks without embeddings")
+
+    item = items[0]
+
+    # Try to create embedding (OpenAI mocked)
+    result = await ensure_content_embedding(
+        content_type=item.content_type,
+        content_id=item.content_id,
+        searchable_text=item.searchable_text,
+        metadata=item.metadata,
+        user_id=item.user_id,
+    )
+
+    # Should succeed with mocked OpenAI
+    assert result is True
+    mock_generate.assert_called_once()
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@patch("backend.api.features.store.embeddings.generate_embedding")
+async def test_backfill_all_content_types_dry_run(mock_generate):
+    """Test backfill_all_content_types processes all handlers in order."""
+    # Mock OpenAI to return fake embedding
+    mock_generate.return_value = [0.1] * EMBEDDING_DIM
+
+    # Run backfill with batch_size=1 to process max 1 per type
+    result = await backfill_all_content_types(batch_size=1)
+
+    # Should have results for all content types
+    assert "by_type" in result
+    assert "totals" in result
+
+    by_type = result["by_type"]
+    assert "BLOCK" in by_type
+    assert "STORE_AGENT" in by_type
+    assert "DOCUMENTATION" in by_type
+
+    # Each type should have correct structure
+    for content_type, type_result in by_type.items():
+        assert "processed" in type_result
+        assert "success" in type_result
+        assert "failed" in type_result
+
+    # Totals should aggregate
+    totals = result["totals"]
+    assert totals["processed"] >= 0
+    assert totals["success"] >= 0
+    assert totals["failed"] >= 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_content_handler_registry():
+    """Test all handlers are registered in correct order."""
+    from prisma.enums import ContentType
+
+    # All three types should be registered
+    assert ContentType.STORE_AGENT in CONTENT_HANDLERS
+    assert ContentType.BLOCK in CONTENT_HANDLERS
+    assert ContentType.DOCUMENTATION in CONTENT_HANDLERS
+
+    # Check handler types
+    assert isinstance(CONTENT_HANDLERS[ContentType.STORE_AGENT], StoreAgentHandler)
+    assert isinstance(CONTENT_HANDLERS[ContentType.BLOCK], BlockHandler)
+    assert isinstance(CONTENT_HANDLERS[ContentType.DOCUMENTATION], DocumentationHandler)
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
@@ -0,0 +1,324 @@
+"""
+E2E tests for content handlers (blocks, store agents, documentation).
+
+Tests the full flow: discovering content → generating embeddings → storing.
+"""
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from prisma.enums import ContentType
+
+from backend.api.features.store.content_handlers import (
+    CONTENT_HANDLERS,
+    BlockHandler,
+    DocumentationHandler,
+    StoreAgentHandler,
+)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_agent_handler_get_missing_items(mocker):
+    """Test StoreAgentHandler fetches approved agents without embeddings."""
+    handler = StoreAgentHandler()
+
+    # Mock database query
+    mock_missing = [
+        {
+            "id": "agent-1",
+            "name": "Test Agent",
+            "description": "A test agent",
+            "subHeading": "Test heading",
+            "categories": ["AI", "Testing"],
+        }
+    ]
+
+    with patch(
+        "backend.api.features.store.content_handlers.query_raw_with_schema",
+        return_value=mock_missing,
+    ):
+        items = await handler.get_missing_items(batch_size=10)
+
+        assert len(items) == 1
+        assert items[0].content_id == "agent-1"
+        assert items[0].content_type == ContentType.STORE_AGENT
+        assert "Test Agent" in items[0].searchable_text
+        assert "A test agent" in items[0].searchable_text
+        assert items[0].metadata["name"] == "Test Agent"
+        assert items[0].user_id is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_agent_handler_get_stats(mocker):
+    """Test StoreAgentHandler returns correct stats."""
+    handler = StoreAgentHandler()
+
+    # Mock approved count query
+    mock_approved = [{"count": 50}]
+    # Mock embedded count query
+    mock_embedded = [{"count": 30}]
+
+    with patch(
+        "backend.api.features.store.content_handlers.query_raw_with_schema",
+        side_effect=[mock_approved, mock_embedded],
+    ):
+        stats = await handler.get_stats()
+
+        assert stats["total"] == 50
+        assert stats["with_embeddings"] == 30
+        assert stats["without_embeddings"] == 20
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_block_handler_get_missing_items(mocker):
+    """Test BlockHandler discovers blocks without embeddings."""
+    handler = BlockHandler()
+
+    # Mock get_blocks to return test blocks
+    mock_block_class = MagicMock()
+    mock_block_instance = MagicMock()
+    mock_block_instance.name = "Calculator Block"
+    mock_block_instance.description = "Performs calculations"
+    mock_block_instance.categories = [MagicMock(value="MATH")]
+    mock_block_instance.input_schema.model_json_schema.return_value = {
+        "properties": {"expression": {"description": "Math expression to evaluate"}}
+    }
+    mock_block_class.return_value = mock_block_instance
+
+    mock_blocks = {"block-uuid-1": mock_block_class}
+
+    # Mock existing embeddings query (no embeddings exist)
+    mock_existing = []
+
+    with patch(
+        "backend.data.block.get_blocks",
+        return_value=mock_blocks,
+    ):
+        with patch(
+            "backend.api.features.store.content_handlers.query_raw_with_schema",
+            return_value=mock_existing,
+        ):
+            items = await handler.get_missing_items(batch_size=10)
+
+            assert len(items) == 1
+            assert items[0].content_id == "block-uuid-1"
+            assert items[0].content_type == ContentType.BLOCK
+            assert "Calculator Block" in items[0].searchable_text
+            assert "Performs calculations" in items[0].searchable_text
+            assert "MATH" in items[0].searchable_text
+            assert "expression: Math expression" in items[0].searchable_text
+            assert items[0].user_id is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_block_handler_get_stats(mocker):
+    """Test BlockHandler returns correct stats."""
+    handler = BlockHandler()
+
+    # Mock get_blocks
+    mock_blocks = {
+        "block-1": MagicMock(),
+        "block-2": MagicMock(),
+        "block-3": MagicMock(),
+    }
+
+    # Mock embedded count query (2 blocks have embeddings)
+    mock_embedded = [{"count": 2}]
+
+    with patch(
+        "backend.data.block.get_blocks",
+        return_value=mock_blocks,
+    ):
+        with patch(
+            "backend.api.features.store.content_handlers.query_raw_with_schema",
+            return_value=mock_embedded,
+        ):
+            stats = await handler.get_stats()
+
+            assert stats["total"] == 3
+            assert stats["with_embeddings"] == 2
+            assert stats["without_embeddings"] == 1
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_documentation_handler_get_missing_items(tmp_path, mocker):
+    """Test DocumentationHandler discovers docs without embeddings."""
+    handler = DocumentationHandler()
+
+    # Create temporary docs directory with test files
+    docs_root = tmp_path / "docs"
+    docs_root.mkdir()
+
+    (docs_root / "guide.md").write_text("# Getting Started\n\nThis is a guide.")
+    (docs_root / "api.mdx").write_text("# API Reference\n\nAPI documentation.")
+
+    # Mock _get_docs_root to return temp dir
+    with patch.object(handler, "_get_docs_root", return_value=docs_root):
+        # Mock existing embeddings query (no embeddings exist)
+        with patch(
+            "backend.api.features.store.content_handlers.query_raw_with_schema",
+            return_value=[],
+        ):
+            items = await handler.get_missing_items(batch_size=10)
+
+            assert len(items) == 2
+
+            # Check guide.md
+            guide_item = next(
+                (item for item in items if item.content_id == "guide.md"), None
+            )
+            assert guide_item is not None
+            assert guide_item.content_type == ContentType.DOCUMENTATION
+            assert "Getting Started" in guide_item.searchable_text
+            assert "This is a guide" in guide_item.searchable_text
+            assert guide_item.metadata["title"] == "Getting Started"
+            assert guide_item.user_id is None
+
+            # Check api.mdx
+            api_item = next(
+                (item for item in items if item.content_id == "api.mdx"), None
+            )
+            assert api_item is not None
+            assert "API Reference" in api_item.searchable_text
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_documentation_handler_get_stats(tmp_path, mocker):
+    """Test DocumentationHandler returns correct stats."""
+    handler = DocumentationHandler()
+
+    # Create temporary docs directory
+    docs_root = tmp_path / "docs"
+    docs_root.mkdir()
+    (docs_root / "doc1.md").write_text("# Doc 1")
+    (docs_root / "doc2.md").write_text("# Doc 2")
+    (docs_root / "doc3.mdx").write_text("# Doc 3")
+
+    # Mock embedded count query (1 doc has embedding)
+    mock_embedded = [{"count": 1}]
+
+    with patch.object(handler, "_get_docs_root", return_value=docs_root):
+        with patch(
+            "backend.api.features.store.content_handlers.query_raw_with_schema",
+            return_value=mock_embedded,
+        ):
+            stats = await handler.get_stats()
+
+            assert stats["total"] == 3
+            assert stats["with_embeddings"] == 1
+            assert stats["without_embeddings"] == 2
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_documentation_handler_title_extraction(tmp_path):
+    """Test DocumentationHandler extracts title from markdown heading."""
+    handler = DocumentationHandler()
+
+    # Test with heading
+    doc_with_heading = tmp_path / "with_heading.md"
+    doc_with_heading.write_text("# My Title\n\nContent here")
+    title, content = handler._extract_title_and_content(doc_with_heading)
+    assert title == "My Title"
+    assert "# My Title" not in content
+    assert "Content here" in content
+
+    # Test without heading
+    doc_without_heading = tmp_path / "no-heading.md"
+    doc_without_heading.write_text("Just content, no heading")
+    title, content = handler._extract_title_and_content(doc_without_heading)
+    assert title == "No Heading"  # Uses filename
+    assert "Just content" in content
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_content_handlers_registry():
+    """Test all content types are registered."""
+    assert ContentType.STORE_AGENT in CONTENT_HANDLERS
+    assert ContentType.BLOCK in CONTENT_HANDLERS
+    assert ContentType.DOCUMENTATION in CONTENT_HANDLERS
+
+    assert isinstance(CONTENT_HANDLERS[ContentType.STORE_AGENT], StoreAgentHandler)
+    assert isinstance(CONTENT_HANDLERS[ContentType.BLOCK], BlockHandler)
+    assert isinstance(CONTENT_HANDLERS[ContentType.DOCUMENTATION], DocumentationHandler)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_block_handler_handles_missing_attributes():
+    """Test BlockHandler gracefully handles blocks with missing attributes."""
+    handler = BlockHandler()
+
+    # Mock block with minimal attributes
+    mock_block_class = MagicMock()
+    mock_block_instance = MagicMock()
+    mock_block_instance.name = "Minimal Block"
+    # No description, categories, or schema
+    del mock_block_instance.description
+    del mock_block_instance.categories
+    del mock_block_instance.input_schema
+    mock_block_class.return_value = mock_block_instance
+
+    mock_blocks = {"block-minimal": mock_block_class}
+
+    with patch(
+        "backend.data.block.get_blocks",
+        return_value=mock_blocks,
+    ):
+        with patch(
+            "backend.api.features.store.content_handlers.query_raw_with_schema",
+            return_value=[],
+        ):
+            items = await handler.get_missing_items(batch_size=10)
+
+            assert len(items) == 1
+            assert items[0].searchable_text == "Minimal Block"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_block_handler_skips_failed_blocks():
+    """Test BlockHandler skips blocks that fail to instantiate."""
+    handler = BlockHandler()
+
+    # Mock one good block and one bad block
+    good_block = MagicMock()
+    good_instance = MagicMock()
+    good_instance.name = "Good Block"
+    good_instance.description = "Works fine"
+    good_instance.categories = []
+    good_block.return_value = good_instance
+
+    bad_block = MagicMock()
+    bad_block.side_effect = Exception("Instantiation failed")
+
+    mock_blocks = {"good-block": good_block, "bad-block": bad_block}
+
+    with patch(
+        "backend.data.block.get_blocks",
+        return_value=mock_blocks,
+    ):
+        with patch(
+            "backend.api.features.store.content_handlers.query_raw_with_schema",
+            return_value=[],
+        ):
+            items = await handler.get_missing_items(batch_size=10)
+
+            # Should only get the good block
+            assert len(items) == 1
+            assert items[0].content_id == "good-block"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_documentation_handler_missing_docs_directory():
+    """Test DocumentationHandler handles missing docs directory gracefully."""
+    handler = DocumentationHandler()
+
+    # Mock _get_docs_root to return non-existent path
+    fake_path = Path("/nonexistent/docs")
+    with patch.object(handler, "_get_docs_root", return_value=fake_path):
+        items = await handler.get_missing_items(batch_size=10)
+        assert items == []
+
+        stats = await handler.get_stats()
+        assert stats["total"] == 0
+        assert stats["with_embeddings"] == 0
+        assert stats["without_embeddings"] == 0
--- a/autogpt_platform/backend/backend/api/features/store/db.py
+++ b/autogpt_platform/backend/backend/api/features/store/db.py
@@ -1,8 +1,7 @@
 import asyncio
 import logging
-import typing
 from datetime import datetime, timezone
-from typing import Literal
+from typing import Any, Literal

 import fastapi
 import prisma.enums
@@ -10,7 +9,7 @@ import prisma.errors
 import prisma.models
 import prisma.types

-from backend.data.db import query_raw_with_schema, transaction
+from backend.data.db import transaction
 from backend.data.graph import (
    GraphMeta,
    GraphModel,
@@ -30,6 +29,8 @@ from backend.util.settings import Settings

 from . import exceptions as store_exceptions
 from . import model as store_model
+from .embeddings import ensure_embedding
+from .hybrid_search import hybrid_search

 logger = logging.getLogger(__name__)
 settings = Settings()
@@ -50,128 +51,77 @@ async def get_store_agents(
    page_size: int = 20,
 ) -> store_model.StoreAgentsResponse:
    """
-    Get PUBLIC store agents from the StoreAgent view
+    Get PUBLIC store agents from the StoreAgent view.
+
+    Search behavior:
+    - With search_query: Uses hybrid search (semantic + lexical)
+    - Fallback: If embeddings unavailable, gracefully degrades to lexical-only
+    - Rationale: User-facing endpoint prioritizes availability over accuracy
+
+    Note: Admin operations (approval) use fail-fast to prevent inconsistent state.
    """
    logger.debug(
        f"Getting store agents. featured={featured}, creators={creators}, sorted_by={sorted_by}, search={search_query}, category={category}, page={page}"
    )

+    search_used_hybrid = False
+    store_agents: list[store_model.StoreAgent] = []
+    agents: list[dict[str, Any]] = []
+    total = 0
+    total_pages = 0
+
    try:
-        # If search_query is provided, use full-text search
+        # If search_query is provided, use hybrid search (embeddings + tsvector)
        if search_query:
-            offset = (page - 1) * page_size
+            # Try hybrid search combining semantic and lexical signals
+            # Falls back to lexical-only if OpenAI unavailable (user-facing, high SLA)
+            try:
+                agents, total = await hybrid_search(
+                    query=search_query,
+                    featured=featured,
+                    creators=creators,
+                    category=category,
+                    sorted_by="relevance",  # Use hybrid scoring for relevance
+                    page=page,
+                    page_size=page_size,
+                )
+                search_used_hybrid = True
+            except Exception as e:
+                # Log error but fall back to lexical search for better UX
+                logger.error(
+                    f"Hybrid search failed (likely OpenAI unavailable), "
+                    f"falling back to lexical search: {e}"
+                )
+                # search_used_hybrid remains False, will use fallback path below

-            # Whitelist allowed order_by columns
-            ALLOWED_ORDER_BY = {
-                "rating": "rating DESC, rank DESC",
-                "runs": "runs DESC, rank DESC",
-                "name": "agent_name ASC, rank ASC",
-                "updated_at": "updated_at DESC, rank DESC",
-            }
+            # Convert hybrid search results (dict format) if hybrid succeeded
+            if search_used_hybrid:
+                total_pages = (total + page_size - 1) // page_size
+                store_agents: list[store_model.StoreAgent] = []
+                for agent in agents:
+                    try:
+                        store_agent = store_model.StoreAgent(
+                            slug=agent["slug"],
+                            agent_name=agent["agent_name"],
+                            agent_image=(
+                                agent["agent_image"][0] if agent["agent_image"] else ""
+                            ),
+                            creator=agent["creator_username"] or "Needs Profile",
+                            creator_avatar=agent["creator_avatar"] or "",
+                            sub_heading=agent["sub_heading"],
+                            description=agent["description"],
+                            runs=agent["runs"],
+                            rating=agent["rating"],
+                        )
+                        store_agents.append(store_agent)
+                    except Exception as e:
+                        logger.error(
+                            f"Error parsing Store agent from hybrid search results: {e}"
+                        )
+                        continue

-            # Validate and get order clause
-            if sorted_by and sorted_by in ALLOWED_ORDER_BY:
-                order_by_clause = ALLOWED_ORDER_BY[sorted_by]
-            else:
-                order_by_clause = "updated_at DESC, rank DESC"
-
-            # Build WHERE conditions and parameters list
-            where_parts: list[str] = []
-            params: list[typing.Any] = [search_query]  # $1 - search term
-            param_index = 2  # Start at $2 for next parameter
-
-            # Always filter for available agents
-            where_parts.append("is_available = true")
-
-            if featured:
-                where_parts.append("featured = true")
-
-            if creators and creators:
-                # Use ANY with array parameter
-                where_parts.append(f"creator_username = ANY(${param_index})")
-                params.append(creators)
-                param_index += 1
-
-            if category and category:
-                where_parts.append(f"${param_index} = ANY(categories)")
-                params.append(category)
-                param_index += 1
-
-            sql_where_clause: str = " AND ".join(where_parts) if where_parts else "1=1"
-
-            # Add pagination params
-            params.extend([page_size, offset])
-            limit_param = f"${param_index}"
-            offset_param = f"${param_index + 1}"
-
-            # Execute full-text search query with parameterized values
-            sql_query = f"""
-                SELECT
-                    slug,
-                    agent_name,
-                    agent_image,
-                    creator_username,
-                    creator_avatar,
-                    sub_heading,
-                    description,
-                    runs,
-                    rating,
-                    categories,
-                    featured,
-                    is_available,
-                    updated_at,
-                    ts_rank_cd(search, query) AS rank
-                FROM {{schema_prefix}}"StoreAgent",
-                    plainto_tsquery('english', $1) AS query
-                WHERE {sql_where_clause}
-                    AND search @@ query
-                ORDER BY {order_by_clause}
-                LIMIT {limit_param} OFFSET {offset_param}
-            """
-
-            # Count query for pagination - only uses search term parameter
-            count_query = f"""
-                SELECT COUNT(*) as count
-                FROM {{schema_prefix}}"StoreAgent",
-                    plainto_tsquery('english', $1) AS query
-                WHERE {sql_where_clause}
-                    AND search @@ query
-            """
-
-            # Execute both queries with parameters
-            agents = await query_raw_with_schema(sql_query, *params)
-
-            # For count, use params without pagination (last 2 params)
-            count_params = params[:-2]
-            count_result = await query_raw_with_schema(count_query, *count_params)
-
-            total = count_result[0]["count"] if count_result else 0
-            total_pages = (total + page_size - 1) // page_size
-
-            # Convert raw results to StoreAgent models
-            store_agents: list[store_model.StoreAgent] = []
-            for agent in agents:
-                try:
-                    store_agent = store_model.StoreAgent(
-                        slug=agent["slug"],
-                        agent_name=agent["agent_name"],
-                        agent_image=(
-                            agent["agent_image"][0] if agent["agent_image"] else ""
-                        ),
-                        creator=agent["creator_username"] or "Needs Profile",
-                        creator_avatar=agent["creator_avatar"] or "",
-                        sub_heading=agent["sub_heading"],
-                        description=agent["description"],
-                        runs=agent["runs"],
-                        rating=agent["rating"],
-                    )
-                    store_agents.append(store_agent)
-                except Exception as e:
-                    logger.error(f"Error parsing Store agent from search results: {e}")
-                    continue
-
-        else:
-            # Non-search query path (original logic)
+        if not search_used_hybrid:
+            # Fallback path - use basic search or no search
            where_clause: prisma.types.StoreAgentWhereInput = {"is_available": True}
            if featured:
                where_clause["featured"] = featured
@@ -180,6 +130,14 @@ async def get_store_agents(
            if category:
                where_clause["categories"] = {"has": category}

+            # Add basic text search if search_query provided but hybrid failed
+            if search_query:
+                where_clause["OR"] = [
+                    {"agent_name": {"contains": search_query, "mode": "insensitive"}},
+                    {"sub_heading": {"contains": search_query, "mode": "insensitive"}},
+                    {"description": {"contains": search_query, "mode": "insensitive"}},
+                ]
+
            order_by = []
            if sorted_by == "rating":
                order_by.append({"rating": "desc"})
@@ -188,7 +146,7 @@ async def get_store_agents(
            elif sorted_by == "name":
                order_by.append({"agent_name": "asc"})

-            agents = await prisma.models.StoreAgent.prisma().find_many(
+            db_agents = await prisma.models.StoreAgent.prisma().find_many(
                where=where_clause,
                order=order_by,
                skip=(page - 1) * page_size,
@@ -199,7 +157,7 @@ async def get_store_agents(
            total_pages = (total + page_size - 1) // page_size

            store_agents: list[store_model.StoreAgent] = []
-            for agent in agents:
+            for agent in db_agents:
                try:
                    # Create the StoreAgent object safely
                    store_agent = store_model.StoreAgent(
@@ -614,6 +572,7 @@ async def get_store_submissions(
        submission_models = []
        for sub in submissions:
            submission_model = store_model.StoreSubmission(
+                listing_id=sub.listing_id,
                agent_id=sub.agent_id,
                agent_version=sub.agent_version,
                name=sub.name,
@@ -667,35 +626,48 @@ async def delete_store_submission(
    submission_id: str,
 ) -> bool:
    """
-    Delete a store listing submission as the submitting user.
+    Delete a store submission version as the submitting user.

    Args:
        user_id: ID of the authenticated user
-        submission_id: ID of the submission to be deleted
+        submission_id: StoreListingVersion ID to delete

    Returns:
-        bool: True if the submission was successfully deleted, False otherwise
+        bool: True if successfully deleted
    """
-    logger.debug(f"Deleting store submission {submission_id} for user {user_id}")
-
    try:
-        # Verify the submission belongs to this user
-        submission = await prisma.models.StoreListing.prisma().find_first(
-            where={"agentGraphId": submission_id, "owningUserId": user_id}
+        # Find the submission version with ownership check
+        version = await prisma.models.StoreListingVersion.prisma().find_first(
+            where={"id": submission_id}, include={"StoreListing": True}
        )

-        if not submission:
-            logger.warning(f"Submission not found for user {user_id}: {submission_id}")
-            raise store_exceptions.SubmissionNotFoundError(
-                f"Submission not found for this user. User ID: {user_id}, Submission ID: {submission_id}"
+        if (
+            not version
+            or not version.StoreListing
+            or version.StoreListing.owningUserId != user_id
+        ):
+            raise store_exceptions.SubmissionNotFoundError("Submission not found")
+
+        # Prevent deletion of approved submissions
+        if version.submissionStatus == prisma.enums.SubmissionStatus.APPROVED:
+            raise store_exceptions.InvalidOperationError(
+                "Cannot delete approved submissions"
            )

-        # Delete the submission
-        await prisma.models.StoreListing.prisma().delete(where={"id": submission.id})
-
-        logger.debug(
-            f"Successfully deleted submission {submission_id} for user {user_id}"
+        # Delete the version
+        await prisma.models.StoreListingVersion.prisma().delete(
+            where={"id": version.id}
        )
+
+        # Clean up empty listing if this was the last version
+        remaining = await prisma.models.StoreListingVersion.prisma().count(
+            where={"storeListingId": version.storeListingId}
+        )
+        if remaining == 0:
+            await prisma.models.StoreListing.prisma().delete(
+                where={"id": version.storeListingId}
+            )
+
        return True

    except Exception as e:
@@ -759,9 +731,15 @@ async def create_store_submission(
            logger.warning(
                f"Agent not found for user {user_id}: {agent_id} v{agent_version}"
            )
-            raise store_exceptions.AgentNotFoundError(
-                f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
-            )
+            # Provide more user-friendly error message when agent_id is empty
+            if not agent_id or agent_id.strip() == "":
+                raise store_exceptions.AgentNotFoundError(
+                    "No agent selected. Please select an agent before submitting to the store."
+                )
+            else:
+                raise store_exceptions.AgentNotFoundError(
+                    f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
+                )

        # Check if listing already exists for this agent
        existing_listing = await prisma.models.StoreListing.prisma().find_first(
@@ -833,6 +811,7 @@ async def create_store_submission(
        logger.debug(f"Created store listing for agent {agent_id}")
        # Return submission details
        return store_model.StoreSubmission(
+            listing_id=listing.id,
            agent_id=agent_id,
            agent_version=agent_version,
            name=name,
@@ -944,81 +923,56 @@ async def edit_store_submission(
        # Currently we are not allowing user to update the agent associated with a submission
        # If we allow it in future, then we need a check here to verify the agent belongs to this user.

-        # Check if we can edit this submission
-        if current_version.submissionStatus == prisma.enums.SubmissionStatus.REJECTED:
+        # Only allow editing of PENDING submissions
+        if current_version.submissionStatus != prisma.enums.SubmissionStatus.PENDING:
            raise store_exceptions.InvalidOperationError(
-                "Cannot edit a rejected submission"
-            )
-
-        # For APPROVED submissions, we need to create a new version
-        if current_version.submissionStatus == prisma.enums.SubmissionStatus.APPROVED:
-            # Create a new version for the existing listing
-            return await create_store_version(
-                user_id=user_id,
-                agent_id=current_version.agentGraphId,
-                agent_version=current_version.agentGraphVersion,
-                store_listing_id=current_version.storeListingId,
-                name=name,
-                video_url=video_url,
-                agent_output_demo_url=agent_output_demo_url,
-                image_urls=image_urls,
-                description=description,
-                sub_heading=sub_heading,
-                categories=categories,
-                changes_summary=changes_summary,
-                recommended_schedule_cron=recommended_schedule_cron,
-                instructions=instructions,
+                f"Cannot edit a {current_version.submissionStatus.value.lower()} submission. Only pending submissions can be edited."
            )

        # For PENDING submissions, we can update the existing version
-        elif current_version.submissionStatus == prisma.enums.SubmissionStatus.PENDING:
-            # Update the existing version
-            updated_version = await prisma.models.StoreListingVersion.prisma().update(
-                where={"id": store_listing_version_id},
-                data=prisma.types.StoreListingVersionUpdateInput(
-                    name=name,
-                    videoUrl=video_url,
-                    agentOutputDemoUrl=agent_output_demo_url,
-                    imageUrls=image_urls,
-                    description=description,
-                    categories=categories,
-                    subHeading=sub_heading,
-                    changesSummary=changes_summary,
-                    recommendedScheduleCron=recommended_schedule_cron,
-                    instructions=instructions,
-                ),
-            )
-
-            logger.debug(
-                f"Updated existing version {store_listing_version_id} for agent {current_version.agentGraphId}"
-            )
-
-            if not updated_version:
-                raise DatabaseError("Failed to update store listing version")
-            return store_model.StoreSubmission(
-                agent_id=current_version.agentGraphId,
-                agent_version=current_version.agentGraphVersion,
+        # Update the existing version
+        updated_version = await prisma.models.StoreListingVersion.prisma().update(
+            where={"id": store_listing_version_id},
+            data=prisma.types.StoreListingVersionUpdateInput(
                name=name,
-                sub_heading=sub_heading,
-                slug=current_version.StoreListing.slug,
+                videoUrl=video_url,
+                agentOutputDemoUrl=agent_output_demo_url,
+                imageUrls=image_urls,
                description=description,
-                instructions=instructions,
-                image_urls=image_urls,
-                date_submitted=updated_version.submittedAt or updated_version.createdAt,
-                status=updated_version.submissionStatus,
-                runs=0,
-                rating=0.0,
-                store_listing_version_id=updated_version.id,
-                changes_summary=changes_summary,
-                video_url=video_url,
                categories=categories,
-                version=updated_version.version,
-            )
+                subHeading=sub_heading,
+                changesSummary=changes_summary,
+                recommendedScheduleCron=recommended_schedule_cron,
+                instructions=instructions,
+            ),
+        )

-        else:
-            raise store_exceptions.InvalidOperationError(
-                f"Cannot edit submission with status: {current_version.submissionStatus}"
-            )
+        logger.debug(
+            f"Updated existing version {store_listing_version_id} for agent {current_version.agentGraphId}"
+        )
+
+        if not updated_version:
+            raise DatabaseError("Failed to update store listing version")
+        return store_model.StoreSubmission(
+            listing_id=current_version.StoreListing.id,
+            agent_id=current_version.agentGraphId,
+            agent_version=current_version.agentGraphVersion,
+            name=name,
+            sub_heading=sub_heading,
+            slug=current_version.StoreListing.slug,
+            description=description,
+            instructions=instructions,
+            image_urls=image_urls,
+            date_submitted=updated_version.submittedAt or updated_version.createdAt,
+            status=updated_version.submissionStatus,
+            runs=0,
+            rating=0.0,
+            store_listing_version_id=updated_version.id,
+            changes_summary=changes_summary,
+            video_url=video_url,
+            categories=categories,
+            version=updated_version.version,
+        )

    except (
        store_exceptions.SubmissionNotFoundError,
@@ -1097,38 +1051,78 @@ async def create_store_version(
                f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
            )

-        # Get the latest version number
-        latest_version = listing.Versions[0] if listing.Versions else None
-
-        next_version = (latest_version.version + 1) if latest_version else 1
-
-        # Create a new version for the existing listing
-        new_version = await prisma.models.StoreListingVersion.prisma().create(
-            data=prisma.types.StoreListingVersionCreateInput(
-                version=next_version,
-                agentGraphId=agent_id,
-                agentGraphVersion=agent_version,
-                name=name,
-                videoUrl=video_url,
-                agentOutputDemoUrl=agent_output_demo_url,
-                imageUrls=image_urls,
-                description=description,
-                instructions=instructions,
-                categories=categories,
-                subHeading=sub_heading,
-                submissionStatus=prisma.enums.SubmissionStatus.PENDING,
-                submittedAt=datetime.now(),
-                changesSummary=changes_summary,
-                recommendedScheduleCron=recommended_schedule_cron,
-                storeListingId=store_listing_id,
+        # Check if there's already a PENDING submission for this agent (any version)
+        existing_pending_submission = (
+            await prisma.models.StoreListingVersion.prisma().find_first(
+                where=prisma.types.StoreListingVersionWhereInput(
+                    storeListingId=store_listing_id,
+                    agentGraphId=agent_id,
+                    submissionStatus=prisma.enums.SubmissionStatus.PENDING,
+                    isDeleted=False,
+                )
            )
        )

+        # Handle existing pending submission and create new one atomically
+        async with transaction() as tx:
+            # Get the latest version number first
+            latest_listing = await prisma.models.StoreListing.prisma(tx).find_first(
+                where=prisma.types.StoreListingWhereInput(
+                    id=store_listing_id, owningUserId=user_id
+                ),
+                include={"Versions": {"order_by": {"version": "desc"}, "take": 1}},
+            )
+
+            if not latest_listing:
+                raise store_exceptions.ListingNotFoundError(
+                    f"Store listing not found. User ID: {user_id}, Listing ID: {store_listing_id}"
+                )
+
+            latest_version = (
+                latest_listing.Versions[0] if latest_listing.Versions else None
+            )
+            next_version = (latest_version.version + 1) if latest_version else 1
+
+            # If there's an existing pending submission, delete it atomically before creating new one
+            if existing_pending_submission:
+                logger.info(
+                    f"Found existing PENDING submission for agent {agent_id} (was v{existing_pending_submission.agentGraphVersion}, now v{agent_version}), replacing existing submission instead of creating duplicate"
+                )
+                await prisma.models.StoreListingVersion.prisma(tx).delete(
+                    where={"id": existing_pending_submission.id}
+                )
+                logger.debug(
+                    f"Deleted existing pending submission {existing_pending_submission.id}"
+                )
+
+            # Create a new version for the existing listing
+            new_version = await prisma.models.StoreListingVersion.prisma(tx).create(
+                data=prisma.types.StoreListingVersionCreateInput(
+                    version=next_version,
+                    agentGraphId=agent_id,
+                    agentGraphVersion=agent_version,
+                    name=name,
+                    videoUrl=video_url,
+                    agentOutputDemoUrl=agent_output_demo_url,
+                    imageUrls=image_urls,
+                    description=description,
+                    instructions=instructions,
+                    categories=categories,
+                    subHeading=sub_heading,
+                    submissionStatus=prisma.enums.SubmissionStatus.PENDING,
+                    submittedAt=datetime.now(),
+                    changesSummary=changes_summary,
+                    recommendedScheduleCron=recommended_schedule_cron,
+                    storeListingId=store_listing_id,
+                )
+            )
+
        logger.debug(
            f"Created new version for listing {store_listing_id} of agent {agent_id}"
        )
        # Return submission details
        return store_model.StoreSubmission(
+            listing_id=listing.id,
            agent_id=agent_id,
            agent_version=agent_version,
            name=name,
@@ -1541,7 +1535,7 @@ async def review_store_submission(
                )

                # Update the AgentGraph with store listing data
-                await prisma.models.AgentGraph.prisma().update(
+                await prisma.models.AgentGraph.prisma(tx).update(
                    where={
                        "graphVersionId": {
                            "id": store_listing_version.agentGraphId,
@@ -1556,6 +1550,23 @@ async def review_store_submission(
                    },
                )

+                # Generate embedding for approved listing (blocking - admin operation)
+                # Inside transaction: if embedding fails, entire transaction rolls back
+                embedding_success = await ensure_embedding(
+                    version_id=store_listing_version_id,
+                    name=store_listing_version.name,
+                    description=store_listing_version.description,
+                    sub_heading=store_listing_version.subHeading,
+                    categories=store_listing_version.categories or [],
+                    tx=tx,
+                )
+                if not embedding_success:
+                    raise ValueError(
+                        f"Failed to generate embedding for listing {store_listing_version_id}. "
+                        "This is likely due to OpenAI API being unavailable. "
+                        "Please try again later or contact support if the issue persists."
+                    )
+
                await prisma.models.StoreListing.prisma(tx).update(
                    where={"id": store_listing_version.StoreListing.id},
                    data={
@@ -1708,15 +1719,12 @@ async def review_store_submission(

        # Convert to Pydantic model for consistency
        return store_model.StoreSubmission(
+            listing_id=(submission.StoreListing.id if submission.StoreListing else ""),
            agent_id=submission.agentGraphId,
            agent_version=submission.agentGraphVersion,
            name=submission.name,
            sub_heading=submission.subHeading,
-            slug=(
-                submission.StoreListing.slug
-                if hasattr(submission, "storeListing") and submission.StoreListing
-                else ""
-            ),
+            slug=(submission.StoreListing.slug if submission.StoreListing else ""),
            description=submission.description,
            instructions=submission.instructions,
            image_urls=submission.imageUrls or [],
@@ -1818,9 +1826,7 @@ async def get_admin_listings_with_versions(
        where = prisma.types.StoreListingWhereInput(**where_dict)
        include = prisma.types.StoreListingInclude(
            Versions=prisma.types.FindManyStoreListingVersionArgsFromStoreListing(
-                order_by=prisma.types._StoreListingVersion_version_OrderByInput(
-                    version="desc"
-                )
+                order_by={"version": "desc"}
            ),
            OwningUser=True,
        )
@@ -1845,6 +1851,7 @@ async def get_admin_listings_with_versions(
            # If we have versions, turn them into StoreSubmission models
            for version in listing.Versions or []:
                version_model = store_model.StoreSubmission(
+                    listing_id=listing.id,
                    agent_id=version.agentGraphId,
                    agent_version=version.agentGraphVersion,
                    name=version.name,
--- a/autogpt_platform/backend/backend/api/features/store/embeddings.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py
@@ -0,0 +1,962 @@
+"""
+Unified Content Embeddings Service
+
+Handles generation and storage of OpenAI embeddings for all content types
+(store listings, blocks, documentation, library agents) to enable semantic/hybrid search.
+"""
+
+import asyncio
+import logging
+import time
+from typing import Any
+
+import prisma
+from prisma.enums import ContentType
+from tiktoken import encoding_for_model
+
+from backend.api.features.store.content_handlers import CONTENT_HANDLERS
+from backend.data.db import execute_raw_with_schema, query_raw_with_schema
+from backend.util.clients import get_openai_client
+from backend.util.json import dumps
+
+logger = logging.getLogger(__name__)
+
+
+# OpenAI embedding model configuration
+EMBEDDING_MODEL = "text-embedding-3-small"
+# Embedding dimension for the model above
+# text-embedding-3-small: 1536, text-embedding-3-large: 3072
+EMBEDDING_DIM = 1536
+# OpenAI embedding token limit (8,191 with 1 token buffer for safety)
+EMBEDDING_MAX_TOKENS = 8191
+
+
+def build_searchable_text(
+    name: str,
+    description: str,
+    sub_heading: str,
+    categories: list[str],
+) -> str:
+    """
+    Build searchable text from listing version fields.
+
+    Combines relevant fields into a single string for embedding.
+    """
+    parts = []
+
+    # Name is important - include it
+    if name:
+        parts.append(name)
+
+    # Sub-heading provides context
+    if sub_heading:
+        parts.append(sub_heading)
+
+    # Description is the main content
+    if description:
+        parts.append(description)
+
+    # Categories help with semantic matching
+    if categories:
+        parts.append(" ".join(categories))
+
+    return " ".join(parts)
+
+
+async def generate_embedding(text: str) -> list[float] | None:
+    """
+    Generate embedding for text using OpenAI API.
+
+    Returns None if embedding generation fails.
+    Fail-fast: no retries to maintain consistency with approval flow.
+    """
+    try:
+        client = get_openai_client()
+        if not client:
+            logger.error("openai_internal_api_key not set, cannot generate embedding")
+            return None
+
+        # Truncate text to token limit using tiktoken
+        # Character-based truncation is insufficient because token ratios vary by content type
+        enc = encoding_for_model(EMBEDDING_MODEL)
+        tokens = enc.encode(text)
+        if len(tokens) > EMBEDDING_MAX_TOKENS:
+            tokens = tokens[:EMBEDDING_MAX_TOKENS]
+            truncated_text = enc.decode(tokens)
+            logger.info(
+                f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
+            )
+        else:
+            truncated_text = text
+
+        start_time = time.time()
+        response = await client.embeddings.create(
+            model=EMBEDDING_MODEL,
+            input=truncated_text,
+        )
+        latency_ms = (time.time() - start_time) * 1000
+
+        embedding = response.data[0].embedding
+        logger.info(
+            f"Generated embedding: {len(embedding)} dims, "
+            f"{len(tokens)} tokens, {latency_ms:.0f}ms"
+        )
+        return embedding
+
+    except Exception as e:
+        logger.error(f"Failed to generate embedding: {e}")
+        return None
+
+
+async def store_embedding(
+    version_id: str,
+    embedding: list[float],
+    tx: prisma.Prisma | None = None,
+) -> bool:
+    """
+    Store embedding in the database.
+
+    BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
+    DEPRECATED: Use ensure_embedding() instead (includes searchable_text).
+    """
+    return await store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=version_id,
+        embedding=embedding,
+        searchable_text="",  # Empty for backward compat; ensure_embedding() populates this
+        metadata=None,
+        user_id=None,  # Store agents are public
+        tx=tx,
+    )
+
+
+async def store_content_embedding(
+    content_type: ContentType,
+    content_id: str,
+    embedding: list[float],
+    searchable_text: str,
+    metadata: dict | None = None,
+    user_id: str | None = None,
+    tx: prisma.Prisma | None = None,
+) -> bool:
+    """
+    Store embedding in the unified content embeddings table.
+
+    New function for unified content embedding storage.
+    Uses raw SQL since Prisma doesn't natively support pgvector.
+    """
+    try:
+        client = tx if tx else prisma.get_client()
+
+        # Convert embedding to PostgreSQL vector format
+        embedding_str = embedding_to_vector_string(embedding)
+        metadata_json = dumps(metadata or {})
+
+        # Upsert the embedding
+        # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
+        await execute_raw_with_schema(
+            """
+            INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
+                "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
+            )
+            VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
+            ON CONFLICT ("contentType", "contentId", "userId")
+            DO UPDATE SET
+                "embedding" = $4::vector,
+                "searchableText" = $5,
+                "metadata" = $6::jsonb,
+                "updatedAt" = NOW()
+            WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
+                AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
+                AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
+            """,
+            content_type,
+            content_id,
+            user_id,
+            embedding_str,
+            searchable_text,
+            metadata_json,
+            client=client,
+            set_public_search_path=True,
+        )
+
+        logger.info(f"Stored embedding for {content_type}:{content_id}")
+        return True
+
+    except Exception as e:
+        logger.error(f"Failed to store embedding for {content_type}:{content_id}: {e}")
+        return False
+
+
+async def get_embedding(version_id: str) -> dict[str, Any] | None:
+    """
+    Retrieve embedding record for a listing version.
+
+    BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
+    Returns dict with storeListingVersionId, embedding, timestamps or None if not found.
+    """
+    result = await get_content_embedding(
+        ContentType.STORE_AGENT, version_id, user_id=None
+    )
+    if result:
+        # Transform to old format for backward compatibility
+        return {
+            "storeListingVersionId": result["contentId"],
+            "embedding": result["embedding"],
+            "createdAt": result["createdAt"],
+            "updatedAt": result["updatedAt"],
+        }
+    return None
+
+
+async def get_content_embedding(
+    content_type: ContentType, content_id: str, user_id: str | None = None
+) -> dict[str, Any] | None:
+    """
+    Retrieve embedding record for any content type.
+
+    New function for unified content embedding retrieval.
+    Returns dict with contentType, contentId, embedding, timestamps or None if not found.
+    """
+    try:
+        result = await query_raw_with_schema(
+            """
+            SELECT
+                "contentType",
+                "contentId",
+                "userId",
+                "embedding"::text as "embedding",
+                "searchableText",
+                "metadata",
+                "createdAt",
+                "updatedAt"
+            FROM {schema_prefix}"UnifiedContentEmbedding"
+            WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
+            """,
+            content_type,
+            content_id,
+            user_id,
+            set_public_search_path=True,
+        )
+
+        if result and len(result) > 0:
+            return result[0]
+        return None
+
+    except Exception as e:
+        logger.error(f"Failed to get embedding for {content_type}:{content_id}: {e}")
+        return None
+
+
+async def ensure_embedding(
+    version_id: str,
+    name: str,
+    description: str,
+    sub_heading: str,
+    categories: list[str],
+    force: bool = False,
+    tx: prisma.Prisma | None = None,
+) -> bool:
+    """
+    Ensure an embedding exists for the listing version.
+
+    Creates embedding if missing. Use force=True to regenerate.
+    Backward-compatible wrapper for store listings.
+
+    Args:
+        version_id: The StoreListingVersion ID
+        name: Agent name
+        description: Agent description
+        sub_heading: Agent sub-heading
+        categories: Agent categories
+        force: Force regeneration even if embedding exists
+        tx: Optional transaction client
+
+    Returns:
+        True if embedding exists/was created, False on failure
+    """
+    try:
+        # Check if embedding already exists
+        if not force:
+            existing = await get_embedding(version_id)
+            if existing and existing.get("embedding"):
+                logger.debug(f"Embedding for version {version_id} already exists")
+                return True
+
+        # Build searchable text for embedding
+        searchable_text = build_searchable_text(
+            name, description, sub_heading, categories
+        )
+
+        # Generate new embedding
+        embedding = await generate_embedding(searchable_text)
+        if embedding is None:
+            logger.warning(f"Could not generate embedding for version {version_id}")
+            return False
+
+        # Store the embedding with metadata using new function
+        metadata = {
+            "name": name,
+            "subHeading": sub_heading,
+            "categories": categories,
+        }
+        return await store_content_embedding(
+            content_type=ContentType.STORE_AGENT,
+            content_id=version_id,
+            embedding=embedding,
+            searchable_text=searchable_text,
+            metadata=metadata,
+            user_id=None,  # Store agents are public
+            tx=tx,
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to ensure embedding for version {version_id}: {e}")
+        return False
+
+
+async def delete_embedding(version_id: str) -> bool:
+    """
+    Delete embedding for a listing version.
+
+    BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
+    Note: This is usually handled automatically by CASCADE delete,
+    but provided for manual cleanup if needed.
+    """
+    return await delete_content_embedding(ContentType.STORE_AGENT, version_id)
+
+
+async def delete_content_embedding(
+    content_type: ContentType, content_id: str, user_id: str | None = None
+) -> bool:
+    """
+    Delete embedding for any content type.
+
+    New function for unified content embedding deletion.
+    Note: This is usually handled automatically by CASCADE delete,
+    but provided for manual cleanup if needed.
+
+    Args:
+        content_type: The type of content (STORE_AGENT, LIBRARY_AGENT, etc.)
+        content_id: The unique identifier for the content
+        user_id: Optional user ID. For public content (STORE_AGENT, BLOCK), pass None.
+                 For user-scoped content (LIBRARY_AGENT), pass the user's ID to avoid
+                 deleting embeddings belonging to other users.
+
+    Returns:
+        True if deletion succeeded, False otherwise
+    """
+    try:
+        client = prisma.get_client()
+
+        await execute_raw_with_schema(
+            """
+            DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
+            WHERE "contentType" = $1::{schema_prefix}"ContentType"
+              AND "contentId" = $2
+              AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
+            """,
+            content_type,
+            content_id,
+            user_id,
+            client=client,
+        )
+
+        user_str = f" (user: {user_id})" if user_id else ""
+        logger.info(f"Deleted embedding for {content_type}:{content_id}{user_str}")
+        return True
+
+    except Exception as e:
+        logger.error(f"Failed to delete embedding for {content_type}:{content_id}: {e}")
+        return False
+
+
+async def get_embedding_stats() -> dict[str, Any]:
+    """
+    Get statistics about embedding coverage for all content types.
+
+    Returns stats per content type and overall totals.
+    """
+    try:
+        stats_by_type = {}
+        total_items = 0
+        total_with_embeddings = 0
+        total_without_embeddings = 0
+
+        # Aggregate stats from all handlers
+        for content_type, handler in CONTENT_HANDLERS.items():
+            try:
+                stats = await handler.get_stats()
+                stats_by_type[content_type.value] = {
+                    "total": stats["total"],
+                    "with_embeddings": stats["with_embeddings"],
+                    "without_embeddings": stats["without_embeddings"],
+                    "coverage_percent": (
+                        round(stats["with_embeddings"] / stats["total"] * 100, 1)
+                        if stats["total"] > 0
+                        else 0
+                    ),
+                }
+
+                total_items += stats["total"]
+                total_with_embeddings += stats["with_embeddings"]
+                total_without_embeddings += stats["without_embeddings"]
+
+            except Exception as e:
+                logger.error(f"Failed to get stats for {content_type.value}: {e}")
+                stats_by_type[content_type.value] = {
+                    "total": 0,
+                    "with_embeddings": 0,
+                    "without_embeddings": 0,
+                    "coverage_percent": 0,
+                    "error": str(e),
+                }
+
+        return {
+            "by_type": stats_by_type,
+            "totals": {
+                "total": total_items,
+                "with_embeddings": total_with_embeddings,
+                "without_embeddings": total_without_embeddings,
+                "coverage_percent": (
+                    round(total_with_embeddings / total_items * 100, 1)
+                    if total_items > 0
+                    else 0
+                ),
+            },
+        }
+
+    except Exception as e:
+        logger.error(f"Failed to get embedding stats: {e}")
+        return {
+            "by_type": {},
+            "totals": {
+                "total": 0,
+                "with_embeddings": 0,
+                "without_embeddings": 0,
+                "coverage_percent": 0,
+            },
+            "error": str(e),
+        }
+
+
+async def backfill_missing_embeddings(batch_size: int = 10) -> dict[str, Any]:
+    """
+    Generate embeddings for approved listings that don't have them.
+
+    BACKWARD COMPATIBILITY: Maintained for existing usage.
+    This now delegates to backfill_all_content_types() to process all content types.
+
+    Args:
+        batch_size: Number of embeddings to generate per content type
+
+    Returns:
+        Dict with success/failure counts aggregated across all content types
+    """
+    # Delegate to the new generic backfill system
+    result = await backfill_all_content_types(batch_size)
+
+    # Return in the old format for backward compatibility
+    return result["totals"]
+
+
+async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
+    """
+    Generate embeddings for all content types using registered handlers.
+
+    Processes content types in order: BLOCK → STORE_AGENT → DOCUMENTATION.
+    This ensures foundational content (blocks) are searchable first.
+
+    Args:
+        batch_size: Number of embeddings to generate per content type
+
+    Returns:
+        Dict with stats per content type and overall totals
+    """
+    results_by_type = {}
+    total_processed = 0
+    total_success = 0
+    total_failed = 0
+
+    # Process content types in explicit order
+    processing_order = [
+        ContentType.BLOCK,
+        ContentType.STORE_AGENT,
+        ContentType.DOCUMENTATION,
+    ]
+
+    for content_type in processing_order:
+        handler = CONTENT_HANDLERS.get(content_type)
+        if not handler:
+            logger.warning(f"No handler registered for {content_type.value}")
+            continue
+        try:
+            logger.info(f"Processing {content_type.value} content type...")
+
+            # Get missing items from handler
+            missing_items = await handler.get_missing_items(batch_size)
+
+            if not missing_items:
+                results_by_type[content_type.value] = {
+                    "processed": 0,
+                    "success": 0,
+                    "failed": 0,
+                    "message": "No missing embeddings",
+                }
+                continue
+
+            # Process embeddings concurrently for better performance
+            embedding_tasks = [
+                ensure_content_embedding(
+                    content_type=item.content_type,
+                    content_id=item.content_id,
+                    searchable_text=item.searchable_text,
+                    metadata=item.metadata,
+                    user_id=item.user_id,
+                )
+                for item in missing_items
+            ]
+
+            results = await asyncio.gather(*embedding_tasks, return_exceptions=True)
+
+            success = sum(1 for result in results if result is True)
+            failed = len(results) - success
+
+            results_by_type[content_type.value] = {
+                "processed": len(missing_items),
+                "success": success,
+                "failed": failed,
+                "message": f"Backfilled {success} embeddings, {failed} failed",
+            }
+
+            total_processed += len(missing_items)
+            total_success += success
+            total_failed += failed
+
+            logger.info(
+                f"{content_type.value}: processed {len(missing_items)}, "
+                f"success {success}, failed {failed}"
+            )
+
+        except Exception as e:
+            logger.error(f"Failed to process {content_type.value}: {e}")
+            results_by_type[content_type.value] = {
+                "processed": 0,
+                "success": 0,
+                "failed": 0,
+                "error": str(e),
+            }
+
+    return {
+        "by_type": results_by_type,
+        "totals": {
+            "processed": total_processed,
+            "success": total_success,
+            "failed": total_failed,
+            "message": f"Overall: {total_success} succeeded, {total_failed} failed",
+        },
+    }
+
+
+async def embed_query(query: str) -> list[float] | None:
+    """
+    Generate embedding for a search query.
+
+    Same as generate_embedding but with clearer intent.
+    """
+    return await generate_embedding(query)
+
+
+def embedding_to_vector_string(embedding: list[float]) -> str:
+    """Convert embedding list to PostgreSQL vector string format."""
+    return "[" + ",".join(str(x) for x in embedding) + "]"
+
+
+async def ensure_content_embedding(
+    content_type: ContentType,
+    content_id: str,
+    searchable_text: str,
+    metadata: dict | None = None,
+    user_id: str | None = None,
+    force: bool = False,
+    tx: prisma.Prisma | None = None,
+) -> bool:
+    """
+    Ensure an embedding exists for any content type.
+
+    Generic function for creating embeddings for store agents, blocks, docs, etc.
+
+    Args:
+        content_type: ContentType enum value (STORE_AGENT, BLOCK, etc.)
+        content_id: Unique identifier for the content
+        searchable_text: Combined text for embedding generation
+        metadata: Optional metadata to store with embedding
+        force: Force regeneration even if embedding exists
+        tx: Optional transaction client
+
+    Returns:
+        True if embedding exists/was created, False on failure
+    """
+    try:
+        # Check if embedding already exists
+        if not force:
+            existing = await get_content_embedding(content_type, content_id, user_id)
+            if existing and existing.get("embedding"):
+                logger.debug(
+                    f"Embedding for {content_type}:{content_id} already exists"
+                )
+                return True
+
+        # Generate new embedding
+        embedding = await generate_embedding(searchable_text)
+        if embedding is None:
+            logger.warning(
+                f"Could not generate embedding for {content_type}:{content_id}"
+            )
+            return False
+
+        # Store the embedding
+        return await store_content_embedding(
+            content_type=content_type,
+            content_id=content_id,
+            embedding=embedding,
+            searchable_text=searchable_text,
+            metadata=metadata or {},
+            user_id=user_id,
+            tx=tx,
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to ensure embedding for {content_type}:{content_id}: {e}")
+        return False
+
+
+async def cleanup_orphaned_embeddings() -> dict[str, Any]:
+    """
+    Clean up embeddings for content that no longer exists or is no longer valid.
+
+    Compares current content with embeddings in database and removes orphaned records:
+    - STORE_AGENT: Removes embeddings for rejected/deleted store listings
+    - BLOCK: Removes embeddings for blocks no longer registered
+    - DOCUMENTATION: Removes embeddings for deleted doc files
+
+    Returns:
+        Dict with cleanup statistics per content type
+    """
+    results_by_type = {}
+    total_deleted = 0
+
+    # Cleanup orphaned embeddings for all content types
+    cleanup_types = [
+        ContentType.STORE_AGENT,
+        ContentType.BLOCK,
+        ContentType.DOCUMENTATION,
+    ]
+
+    for content_type in cleanup_types:
+        try:
+            handler = CONTENT_HANDLERS.get(content_type)
+            if not handler:
+                logger.warning(f"No handler registered for {content_type}")
+                results_by_type[content_type.value] = {
+                    "deleted": 0,
+                    "error": "No handler registered",
+                }
+                continue
+
+            # Get all current content IDs from handler
+            if content_type == ContentType.STORE_AGENT:
+                # Get IDs of approved store listing versions from non-deleted listings
+                valid_agents = await query_raw_with_schema(
+                    """
+                    SELECT slv.id
+                    FROM {schema_prefix}"StoreListingVersion" slv
+                    JOIN {schema_prefix}"StoreListing" sl ON slv."storeListingId" = sl.id
+                    WHERE slv."submissionStatus" = 'APPROVED'
+                      AND slv."isDeleted" = false
+                      AND sl."isDeleted" = false
+                    """,
+                )
+                current_ids = {row["id"] for row in valid_agents}
+            elif content_type == ContentType.BLOCK:
+                from backend.data.block import get_blocks
+
+                current_ids = set(get_blocks().keys())
+            elif content_type == ContentType.DOCUMENTATION:
+                from pathlib import Path
+
+                # embeddings.py is at: backend/backend/api/features/store/embeddings.py
+                # Need to go up to project root then into docs/
+                this_file = Path(__file__)
+                project_root = (
+                    this_file.parent.parent.parent.parent.parent.parent.parent
+                )
+                docs_root = project_root / "docs"
+                if docs_root.exists():
+                    all_docs = list(docs_root.rglob("*.md")) + list(
+                        docs_root.rglob("*.mdx")
+                    )
+                    current_ids = {str(doc.relative_to(docs_root)) for doc in all_docs}
+                else:
+                    current_ids = set()
+            else:
+                # Skip unknown content types to avoid accidental deletion
+                logger.warning(
+                    f"Skipping cleanup for unknown content type: {content_type}"
+                )
+                results_by_type[content_type.value] = {
+                    "deleted": 0,
+                    "error": "Unknown content type - skipped for safety",
+                }
+                continue
+
+            # Get all embedding IDs from database
+            db_embeddings = await query_raw_with_schema(
+                """
+                SELECT "contentId"
+                FROM {schema_prefix}"UnifiedContentEmbedding"
+                WHERE "contentType" = $1::{schema_prefix}"ContentType"
+                """,
+                content_type,
+            )
+
+            db_ids = {row["contentId"] for row in db_embeddings}
+
+            # Find orphaned embeddings (in DB but not in current content)
+            orphaned_ids = db_ids - current_ids
+
+            if not orphaned_ids:
+                logger.info(f"{content_type.value}: No orphaned embeddings found")
+                results_by_type[content_type.value] = {
+                    "deleted": 0,
+                    "message": "No orphaned embeddings",
+                }
+                continue
+
+            # Delete orphaned embeddings in batch for better performance
+            orphaned_list = list(orphaned_ids)
+            try:
+                await execute_raw_with_schema(
+                    """
+                    DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
+                    WHERE "contentType" = $1::{schema_prefix}"ContentType"
+                      AND "contentId" = ANY($2::text[])
+                    """,
+                    content_type,
+                    orphaned_list,
+                )
+                deleted = len(orphaned_list)
+            except Exception as e:
+                logger.error(f"Failed to batch delete orphaned embeddings: {e}")
+                deleted = 0
+
+            logger.info(
+                f"{content_type.value}: Deleted {deleted}/{len(orphaned_ids)} orphaned embeddings"
+            )
+            results_by_type[content_type.value] = {
+                "deleted": deleted,
+                "orphaned": len(orphaned_ids),
+                "message": f"Deleted {deleted} orphaned embeddings",
+            }
+
+            total_deleted += deleted
+
+        except Exception as e:
+            logger.error(f"Failed to cleanup {content_type.value}: {e}")
+            results_by_type[content_type.value] = {
+                "deleted": 0,
+                "error": str(e),
+            }
+
+    return {
+        "by_type": results_by_type,
+        "totals": {
+            "deleted": total_deleted,
+            "message": f"Deleted {total_deleted} orphaned embeddings",
+        },
+    }
+
+
+async def semantic_search(
+    query: str,
+    content_types: list[ContentType] | None = None,
+    user_id: str | None = None,
+    limit: int = 20,
+    min_similarity: float = 0.5,
+) -> list[dict[str, Any]]:
+    """
+    Semantic search across content types using embeddings.
+
+    Performs vector similarity search on UnifiedContentEmbedding table.
+    Used directly for blocks/docs/library agents, or as the semantic component
+    within hybrid_search for store agents.
+
+    If embedding generation fails, falls back to lexical search on searchableText.
+
+    Args:
+        query: Search query string
+        content_types: List of ContentType to search. Defaults to [BLOCK, STORE_AGENT, DOCUMENTATION]
+        user_id: Optional user ID for searching private content (library agents)
+        limit: Maximum number of results to return (default: 20)
+        min_similarity: Minimum cosine similarity threshold (0-1, default: 0.5)
+
+    Returns:
+        List of search results with the following structure:
+        [
+            {
+                "content_id": str,
+                "content_type": str,  # "BLOCK", "STORE_AGENT", "DOCUMENTATION", or "LIBRARY_AGENT"
+                "searchable_text": str,
+                "metadata": dict,
+                "similarity": float,  # Cosine similarity score (0-1)
+            },
+            ...
+        ]
+
+    Examples:
+        # Search blocks only
+        results = await semantic_search("calculate", content_types=[ContentType.BLOCK])
+
+        # Search blocks and documentation
+        results = await semantic_search(
+            "how to use API",
+            content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION]
+        )
+
+        # Search all public content (default)
+        results = await semantic_search("AI agent")
+
+        # Search user's library agents
+        results = await semantic_search(
+            "my custom agent",
+            content_types=[ContentType.LIBRARY_AGENT],
+            user_id="user123"
+        )
+    """
+    # Default to searching all public content types
+    if content_types is None:
+        content_types = [
+            ContentType.BLOCK,
+            ContentType.STORE_AGENT,
+            ContentType.DOCUMENTATION,
+        ]
+
+    # Validate inputs
+    if not content_types:
+        return []  # Empty content_types would cause invalid SQL (IN ())
+
+    query = query.strip()
+    if not query:
+        return []
+
+    if limit < 1:
+        limit = 1
+    if limit > 100:
+        limit = 100
+
+    # Generate query embedding
+    query_embedding = await embed_query(query)
+
+    if query_embedding is not None:
+        # Semantic search with embeddings
+        embedding_str = embedding_to_vector_string(query_embedding)
+
+        # Build params in order: limit, then user_id (if provided), then content types
+        params: list[Any] = [limit]
+        user_filter = ""
+        if user_id is not None:
+            user_filter = 'AND "userId" = ${}'.format(len(params) + 1)
+            params.append(user_id)
+
+        # Add content type parameters and build placeholders dynamically
+        content_type_start_idx = len(params) + 1
+        content_type_placeholders = ", ".join(
+            f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
+            for i in range(len(content_types))
+        )
+        params.extend([ct.value for ct in content_types])
+
+        sql = f"""
+            SELECT
+                "contentId" as content_id,
+                "contentType" as content_type,
+                "searchableText" as searchable_text,
+                metadata,
+                1 - (embedding <=> '{embedding_str}'::vector) as similarity
+            FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
+            WHERE "contentType" IN ({content_type_placeholders})
+            {user_filter}
+            AND 1 - (embedding <=> '{embedding_str}'::vector) >= ${len(params) + 1}
+            ORDER BY similarity DESC
+            LIMIT $1
+        """
+        params.append(min_similarity)
+
+        try:
+            results = await query_raw_with_schema(
+                sql, *params, set_public_search_path=True
+            )
+            return [
+                {
+                    "content_id": row["content_id"],
+                    "content_type": row["content_type"],
+                    "searchable_text": row["searchable_text"],
+                    "metadata": row["metadata"],
+                    "similarity": float(row["similarity"]),
+                }
+                for row in results
+            ]
+        except Exception as e:
+            logger.error(f"Semantic search failed: {e}")
+            # Fall through to lexical search below
+
+    # Fallback to lexical search if embeddings unavailable
+    logger.warning("Falling back to lexical search (embeddings unavailable)")
+
+    params_lexical: list[Any] = [limit]
+    user_filter = ""
+    if user_id is not None:
+        user_filter = 'AND "userId" = ${}'.format(len(params_lexical) + 1)
+        params_lexical.append(user_id)
+
+    # Add content type parameters and build placeholders dynamically
+    content_type_start_idx = len(params_lexical) + 1
+    content_type_placeholders_lexical = ", ".join(
+        f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
+        for i in range(len(content_types))
+    )
+    params_lexical.extend([ct.value for ct in content_types])
+
+    sql_lexical = f"""
+        SELECT
+            "contentId" as content_id,
+            "contentType" as content_type,
+            "searchableText" as searchable_text,
+            metadata,
+            0.0 as similarity
+        FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
+        WHERE "contentType" IN ({content_type_placeholders_lexical})
+        {user_filter}
+        AND "searchableText" ILIKE ${len(params_lexical) + 1}
+        ORDER BY "updatedAt" DESC
+        LIMIT $1
+    """
+    params_lexical.append(f"%{query}%")
+
+    try:
+        results = await query_raw_with_schema(
+            sql_lexical, *params_lexical, set_public_search_path=True
+        )
+        return [
+            {
+                "content_id": row["content_id"],
+                "content_type": row["content_type"],
+                "searchable_text": row["searchable_text"],
+                "metadata": row["metadata"],
+                "similarity": 0.0,  # Lexical search doesn't provide similarity
+            }
+            for row in results
+        ]
+    except Exception as e:
+        logger.error(f"Lexical search failed: {e}")
+        return []
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_e2e_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_e2e_test.py
@@ -0,0 +1,666 @@
+"""
+End-to-end database tests for embeddings and hybrid search.
+
+These tests hit the actual database to verify SQL queries work correctly.
+Tests cover:
+1. Embedding storage (store_content_embedding)
+2. Embedding retrieval (get_content_embedding)
+3. Embedding deletion (delete_content_embedding)
+4. Unified hybrid search across content types
+5. Store agent hybrid search
+"""
+
+import uuid
+from typing import AsyncGenerator
+
+import pytest
+from prisma.enums import ContentType
+
+from backend.api.features.store import embeddings
+from backend.api.features.store.embeddings import EMBEDDING_DIM
+from backend.api.features.store.hybrid_search import (
+    hybrid_search,
+    unified_hybrid_search,
+)
+
+# ============================================================================
+# Test Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def test_content_id() -> str:
+    """Generate unique content ID for test isolation."""
+    return f"test-content-{uuid.uuid4()}"
+
+
+@pytest.fixture
+def test_user_id() -> str:
+    """Generate unique user ID for test isolation."""
+    return f"test-user-{uuid.uuid4()}"
+
+
+@pytest.fixture
+def mock_embedding() -> list[float]:
+    """Generate a mock embedding vector."""
+    # Create a normalized embedding vector
+    import math
+
+    raw = [float(i % 10) / 10.0 for i in range(EMBEDDING_DIM)]
+    # Normalize to unit length (required for cosine similarity)
+    magnitude = math.sqrt(sum(x * x for x in raw))
+    return [x / magnitude for x in raw]
+
+
+@pytest.fixture
+def similar_embedding() -> list[float]:
+    """Generate an embedding similar to mock_embedding."""
+    import math
+
+    # Similar but slightly different values
+    raw = [float(i % 10) / 10.0 + 0.01 for i in range(EMBEDDING_DIM)]
+    magnitude = math.sqrt(sum(x * x for x in raw))
+    return [x / magnitude for x in raw]
+
+
+@pytest.fixture
+def different_embedding() -> list[float]:
+    """Generate an embedding very different from mock_embedding."""
+    import math
+
+    # Reversed pattern to be maximally different
+    raw = [float((EMBEDDING_DIM - i) % 10) / 10.0 for i in range(EMBEDDING_DIM)]
+    magnitude = math.sqrt(sum(x * x for x in raw))
+    return [x / magnitude for x in raw]
+
+
+@pytest.fixture
+async def cleanup_embeddings(
+    server,
+) -> AsyncGenerator[list[tuple[ContentType, str, str | None]], None]:
+    """
+    Fixture that tracks created embeddings and cleans them up after tests.
+
+    Yields a list to which tests can append (content_type, content_id, user_id) tuples.
+    """
+    created_embeddings: list[tuple[ContentType, str, str | None]] = []
+    yield created_embeddings
+
+    # Cleanup all created embeddings
+    for content_type, content_id, user_id in created_embeddings:
+        try:
+            await embeddings.delete_content_embedding(content_type, content_id, user_id)
+        except Exception:
+            pass  # Ignore cleanup errors
+
+
+# ============================================================================
+# store_content_embedding Tests
+# ============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_content_embedding_store_agent(
+    server,
+    test_content_id: str,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test storing embedding for STORE_AGENT content type."""
+    # Track for cleanup
+    cleanup_embeddings.append((ContentType.STORE_AGENT, test_content_id, None))
+
+    result = await embeddings.store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="AI assistant for productivity tasks",
+        metadata={"name": "Test Agent", "categories": ["productivity"]},
+        user_id=None,  # Store agents are public
+    )
+
+    assert result is True
+
+    # Verify it was stored
+    stored = await embeddings.get_content_embedding(
+        ContentType.STORE_AGENT, test_content_id, user_id=None
+    )
+    assert stored is not None
+    assert stored["contentId"] == test_content_id
+    assert stored["contentType"] == "STORE_AGENT"
+    assert stored["searchableText"] == "AI assistant for productivity tasks"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_content_embedding_block(
+    server,
+    test_content_id: str,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test storing embedding for BLOCK content type."""
+    cleanup_embeddings.append((ContentType.BLOCK, test_content_id, None))
+
+    result = await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="HTTP request block for API calls",
+        metadata={"name": "HTTP Request Block"},
+        user_id=None,  # Blocks are public
+    )
+
+    assert result is True
+
+    stored = await embeddings.get_content_embedding(
+        ContentType.BLOCK, test_content_id, user_id=None
+    )
+    assert stored is not None
+    assert stored["contentType"] == "BLOCK"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_content_embedding_documentation(
+    server,
+    test_content_id: str,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test storing embedding for DOCUMENTATION content type."""
+    cleanup_embeddings.append((ContentType.DOCUMENTATION, test_content_id, None))
+
+    result = await embeddings.store_content_embedding(
+        content_type=ContentType.DOCUMENTATION,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="Getting started guide for AutoGPT platform",
+        metadata={"title": "Getting Started", "url": "/docs/getting-started"},
+        user_id=None,  # Docs are public
+    )
+
+    assert result is True
+
+    stored = await embeddings.get_content_embedding(
+        ContentType.DOCUMENTATION, test_content_id, user_id=None
+    )
+    assert stored is not None
+    assert stored["contentType"] == "DOCUMENTATION"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_content_embedding_upsert(
+    server,
+    test_content_id: str,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test that storing embedding twice updates instead of duplicates."""
+    cleanup_embeddings.append((ContentType.BLOCK, test_content_id, None))
+
+    # Store first time
+    result1 = await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="Original text",
+        metadata={"version": 1},
+        user_id=None,
+    )
+    assert result1 is True
+
+    # Store again with different text (upsert)
+    result2 = await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="Updated text",
+        metadata={"version": 2},
+        user_id=None,
+    )
+    assert result2 is True
+
+    # Verify only one record with updated text
+    stored = await embeddings.get_content_embedding(
+        ContentType.BLOCK, test_content_id, user_id=None
+    )
+    assert stored is not None
+    assert stored["searchableText"] == "Updated text"
+
+
+# ============================================================================
+# get_content_embedding Tests
+# ============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_get_content_embedding_not_found(server):
+    """Test retrieving non-existent embedding returns None."""
+    result = await embeddings.get_content_embedding(
+        ContentType.STORE_AGENT, "non-existent-id", user_id=None
+    )
+    assert result is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_get_content_embedding_with_metadata(
+    server,
+    test_content_id: str,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test that metadata is correctly stored and retrieved."""
+    cleanup_embeddings.append((ContentType.STORE_AGENT, test_content_id, None))
+
+    metadata = {
+        "name": "Test Agent",
+        "subHeading": "A test agent",
+        "categories": ["ai", "productivity"],
+        "customField": 123,
+    }
+
+    await embeddings.store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="test",
+        metadata=metadata,
+        user_id=None,
+    )
+
+    stored = await embeddings.get_content_embedding(
+        ContentType.STORE_AGENT, test_content_id, user_id=None
+    )
+
+    assert stored is not None
+    assert stored["metadata"]["name"] == "Test Agent"
+    assert stored["metadata"]["categories"] == ["ai", "productivity"]
+    assert stored["metadata"]["customField"] == 123
+
+
+# ============================================================================
+# delete_content_embedding Tests
+# ============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_delete_content_embedding(
+    server,
+    test_content_id: str,
+    mock_embedding: list[float],
+):
+    """Test deleting embedding removes it from database."""
+    # Store embedding
+    await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=test_content_id,
+        embedding=mock_embedding,
+        searchable_text="To be deleted",
+        metadata=None,
+        user_id=None,
+    )
+
+    # Verify it exists
+    stored = await embeddings.get_content_embedding(
+        ContentType.BLOCK, test_content_id, user_id=None
+    )
+    assert stored is not None
+
+    # Delete it
+    result = await embeddings.delete_content_embedding(
+        ContentType.BLOCK, test_content_id, user_id=None
+    )
+    assert result is True
+
+    # Verify it's gone
+    stored = await embeddings.get_content_embedding(
+        ContentType.BLOCK, test_content_id, user_id=None
+    )
+    assert stored is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_delete_content_embedding_not_found(server):
+    """Test deleting non-existent embedding doesn't error."""
+    result = await embeddings.delete_content_embedding(
+        ContentType.BLOCK, "non-existent-id", user_id=None
+    )
+    # Should succeed even if nothing to delete
+    assert result is True
+
+
+# ============================================================================
+# unified_hybrid_search Tests
+# ============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_unified_hybrid_search_finds_matching_content(
+    server,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test unified search finds content matching the query."""
+    # Create unique content IDs
+    agent_id = f"test-agent-{uuid.uuid4()}"
+    block_id = f"test-block-{uuid.uuid4()}"
+    doc_id = f"test-doc-{uuid.uuid4()}"
+
+    cleanup_embeddings.append((ContentType.STORE_AGENT, agent_id, None))
+    cleanup_embeddings.append((ContentType.BLOCK, block_id, None))
+    cleanup_embeddings.append((ContentType.DOCUMENTATION, doc_id, None))
+
+    # Store embeddings for different content types
+    await embeddings.store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=agent_id,
+        embedding=mock_embedding,
+        searchable_text="AI writing assistant for blog posts",
+        metadata={"name": "Writing Assistant"},
+        user_id=None,
+    )
+
+    await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=block_id,
+        embedding=mock_embedding,
+        searchable_text="Text generation block for creative writing",
+        metadata={"name": "Text Generator"},
+        user_id=None,
+    )
+
+    await embeddings.store_content_embedding(
+        content_type=ContentType.DOCUMENTATION,
+        content_id=doc_id,
+        embedding=mock_embedding,
+        searchable_text="How to use writing blocks in AutoGPT",
+        metadata={"title": "Writing Guide"},
+        user_id=None,
+    )
+
+    # Search for "writing" - should find all three
+    results, total = await unified_hybrid_search(
+        query="writing",
+        page=1,
+        page_size=20,
+    )
+
+    # Should find at least our test content (may find others too)
+    content_ids = [r["content_id"] for r in results]
+    assert agent_id in content_ids or total >= 1  # Lexical search should find it
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_unified_hybrid_search_filter_by_content_type(
+    server,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test unified search can filter by content type."""
+    agent_id = f"test-agent-{uuid.uuid4()}"
+    block_id = f"test-block-{uuid.uuid4()}"
+
+    cleanup_embeddings.append((ContentType.STORE_AGENT, agent_id, None))
+    cleanup_embeddings.append((ContentType.BLOCK, block_id, None))
+
+    # Store both types with same searchable text
+    await embeddings.store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=agent_id,
+        embedding=mock_embedding,
+        searchable_text="unique_search_term_xyz123",
+        metadata={},
+        user_id=None,
+    )
+
+    await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=block_id,
+        embedding=mock_embedding,
+        searchable_text="unique_search_term_xyz123",
+        metadata={},
+        user_id=None,
+    )
+
+    # Search only for BLOCK type
+    results, total = await unified_hybrid_search(
+        query="unique_search_term_xyz123",
+        content_types=[ContentType.BLOCK],
+        page=1,
+        page_size=20,
+    )
+
+    # All results should be BLOCK type
+    for r in results:
+        assert r["content_type"] == "BLOCK"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_unified_hybrid_search_empty_query(server):
+    """Test unified search with empty query returns empty results."""
+    results, total = await unified_hybrid_search(
+        query="",
+        page=1,
+        page_size=20,
+    )
+
+    assert results == []
+    assert total == 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_unified_hybrid_search_pagination(
+    server,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test unified search pagination works correctly."""
+    # Create multiple items
+    content_ids = []
+    for i in range(5):
+        content_id = f"test-pagination-{uuid.uuid4()}"
+        content_ids.append(content_id)
+        cleanup_embeddings.append((ContentType.BLOCK, content_id, None))
+
+        await embeddings.store_content_embedding(
+            content_type=ContentType.BLOCK,
+            content_id=content_id,
+            embedding=mock_embedding,
+            searchable_text=f"pagination test item number {i}",
+            metadata={"index": i},
+            user_id=None,
+        )
+
+    # Get first page
+    page1_results, total1 = await unified_hybrid_search(
+        query="pagination test",
+        content_types=[ContentType.BLOCK],
+        page=1,
+        page_size=2,
+    )
+
+    # Get second page
+    page2_results, total2 = await unified_hybrid_search(
+        query="pagination test",
+        content_types=[ContentType.BLOCK],
+        page=2,
+        page_size=2,
+    )
+
+    # Total should be consistent
+    assert total1 == total2
+
+    # Pages should have different content (if we have enough results)
+    if len(page1_results) > 0 and len(page2_results) > 0:
+        page1_ids = {r["content_id"] for r in page1_results}
+        page2_ids = {r["content_id"] for r in page2_results}
+        # No overlap between pages
+        assert page1_ids.isdisjoint(page2_ids)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_unified_hybrid_search_min_score_filtering(
+    server,
+    mock_embedding: list[float],
+    cleanup_embeddings: list,
+):
+    """Test unified search respects min_score threshold."""
+    content_id = f"test-minscore-{uuid.uuid4()}"
+    cleanup_embeddings.append((ContentType.BLOCK, content_id, None))
+
+    await embeddings.store_content_embedding(
+        content_type=ContentType.BLOCK,
+        content_id=content_id,
+        embedding=mock_embedding,
+        searchable_text="completely unrelated content about bananas",
+        metadata={},
+        user_id=None,
+    )
+
+    # Search with very high min_score - should filter out low relevance
+    results_high, _ = await unified_hybrid_search(
+        query="quantum computing algorithms",
+        content_types=[ContentType.BLOCK],
+        min_score=0.9,  # Very high threshold
+        page=1,
+        page_size=20,
+    )
+
+    # Search with low min_score
+    results_low, _ = await unified_hybrid_search(
+        query="quantum computing algorithms",
+        content_types=[ContentType.BLOCK],
+        min_score=0.01,  # Very low threshold
+        page=1,
+        page_size=20,
+    )
+
+    # High threshold should have fewer or equal results
+    assert len(results_high) <= len(results_low)
+
+
+# ============================================================================
+# hybrid_search (Store Agents) Tests
+# ============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_hybrid_search_store_agents_sql_valid(server):
+    """Test that hybrid_search SQL executes without errors."""
+    # This test verifies the SQL is syntactically correct
+    # even if no results are found
+    results, total = await hybrid_search(
+        query="test agent",
+        page=1,
+        page_size=20,
+    )
+
+    # Should not raise - verifies SQL is valid
+    assert isinstance(results, list)
+    assert isinstance(total, int)
+    assert total >= 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_hybrid_search_with_filters(server):
+    """Test hybrid_search with various filter options."""
+    # Test with all filter types
+    results, total = await hybrid_search(
+        query="productivity",
+        featured=True,
+        creators=["test-creator"],
+        category="productivity",
+        page=1,
+        page_size=10,
+    )
+
+    # Should not raise - verifies filter SQL is valid
+    assert isinstance(results, list)
+    assert isinstance(total, int)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_hybrid_search_pagination(server):
+    """Test hybrid_search pagination."""
+    # Page 1
+    results1, total1 = await hybrid_search(
+        query="agent",
+        page=1,
+        page_size=5,
+    )
+
+    # Page 2
+    results2, total2 = await hybrid_search(
+        query="agent",
+        page=2,
+        page_size=5,
+    )
+
+    # Verify SQL executes without error
+    assert isinstance(results1, list)
+    assert isinstance(results2, list)
+    assert isinstance(total1, int)
+    assert isinstance(total2, int)
+
+    # If page 1 has results, total should be > 0
+    # Note: total from page 2 may be 0 if no results on that page (COUNT(*) OVER limitation)
+    if results1:
+        assert total1 > 0
+
+
+# ============================================================================
+# SQL Validity Tests (verify queries don't break)
+# ============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_all_content_types_searchable(server):
+    """Test that all content types can be searched without SQL errors."""
+    for content_type in [
+        ContentType.STORE_AGENT,
+        ContentType.BLOCK,
+        ContentType.DOCUMENTATION,
+    ]:
+        results, total = await unified_hybrid_search(
+            query="test",
+            content_types=[content_type],
+            page=1,
+            page_size=10,
+        )
+
+        # Should not raise
+        assert isinstance(results, list)
+        assert isinstance(total, int)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_multiple_content_types_searchable(server):
+    """Test searching multiple content types at once."""
+    results, total = await unified_hybrid_search(
+        query="test",
+        content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION],
+        page=1,
+        page_size=20,
+    )
+
+    # Should not raise
+    assert isinstance(results, list)
+    assert isinstance(total, int)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_search_all_content_types_default(server):
+    """Test searching all content types (default behavior)."""
+    results, total = await unified_hybrid_search(
+        query="test",
+        content_types=None,  # Should search all
+        page=1,
+        page_size=20,
+    )
+
+    # Should not raise
+    assert isinstance(results, list)
+    assert isinstance(total, int)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
@@ -0,0 +1,315 @@
+"""
+Integration tests for embeddings with schema handling.
+
+These tests verify that embeddings operations work correctly across different database schemas.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from prisma.enums import ContentType
+
+from backend.api.features.store import embeddings
+from backend.api.features.store.embeddings import EMBEDDING_DIM
+
+# Schema prefix tests removed - functionality moved to db.raw_with_schema() helper
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_store_content_embedding_with_schema():
+    """Test storing embeddings with proper schema handling."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "platform"
+
+        with patch("prisma.get_client") as mock_get_client:
+            mock_client = AsyncMock()
+            mock_get_client.return_value = mock_client
+
+            result = await embeddings.store_content_embedding(
+                content_type=ContentType.STORE_AGENT,
+                content_id="test-id",
+                embedding=[0.1] * EMBEDDING_DIM,
+                searchable_text="test text",
+                metadata={"test": "data"},
+                user_id=None,
+            )
+
+            # Verify the query was called
+            assert mock_client.execute_raw.called
+
+            # Get the SQL query that was executed
+            call_args = mock_client.execute_raw.call_args
+            sql_query = call_args[0][0]
+
+            # Verify schema prefix is in the query
+            assert '"platform"."UnifiedContentEmbedding"' in sql_query
+
+            # Verify result
+            assert result is True
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_get_content_embedding_with_schema():
+    """Test retrieving embeddings with proper schema handling."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "platform"
+
+        with patch("prisma.get_client") as mock_get_client:
+            mock_client = AsyncMock()
+            mock_client.query_raw.return_value = [
+                {
+                    "contentType": "STORE_AGENT",
+                    "contentId": "test-id",
+                    "userId": None,
+                    "embedding": "[0.1, 0.2]",
+                    "searchableText": "test",
+                    "metadata": {},
+                    "createdAt": "2024-01-01",
+                    "updatedAt": "2024-01-01",
+                }
+            ]
+            mock_get_client.return_value = mock_client
+
+            result = await embeddings.get_content_embedding(
+                ContentType.STORE_AGENT,
+                "test-id",
+                user_id=None,
+            )
+
+            # Verify the query was called
+            assert mock_client.query_raw.called
+
+            # Get the SQL query that was executed
+            call_args = mock_client.query_raw.call_args
+            sql_query = call_args[0][0]
+
+            # Verify schema prefix is in the query
+            assert '"platform"."UnifiedContentEmbedding"' in sql_query
+
+            # Verify result
+            assert result is not None
+            assert result["contentId"] == "test-id"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_delete_content_embedding_with_schema():
+    """Test deleting embeddings with proper schema handling."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "platform"
+
+        with patch("prisma.get_client") as mock_get_client:
+            mock_client = AsyncMock()
+            mock_get_client.return_value = mock_client
+
+            result = await embeddings.delete_content_embedding(
+                ContentType.STORE_AGENT,
+                "test-id",
+            )
+
+            # Verify the query was called
+            assert mock_client.execute_raw.called
+
+            # Get the SQL query that was executed
+            call_args = mock_client.execute_raw.call_args
+            sql_query = call_args[0][0]
+
+            # Verify schema prefix is in the query
+            assert '"platform"."UnifiedContentEmbedding"' in sql_query
+
+            # Verify result
+            assert result is True
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_get_embedding_stats_with_schema():
+    """Test embedding statistics with proper schema handling via content handlers."""
+    # Mock handler to return stats
+    mock_handler = MagicMock()
+    mock_handler.get_stats = AsyncMock(
+        return_value={
+            "total": 100,
+            "with_embeddings": 80,
+            "without_embeddings": 20,
+        }
+    )
+
+    with patch(
+        "backend.api.features.store.embeddings.CONTENT_HANDLERS",
+        {ContentType.STORE_AGENT: mock_handler},
+    ):
+        result = await embeddings.get_embedding_stats()
+
+        # Verify handler was called
+        mock_handler.get_stats.assert_called_once()
+
+        # Verify new result structure
+        assert "by_type" in result
+        assert "totals" in result
+        assert result["totals"]["total"] == 100
+        assert result["totals"]["with_embeddings"] == 80
+        assert result["totals"]["without_embeddings"] == 20
+        assert result["totals"]["coverage_percent"] == 80.0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_backfill_missing_embeddings_with_schema():
+    """Test backfilling embeddings via content handlers."""
+    from backend.api.features.store.content_handlers import ContentItem
+
+    # Create mock content item
+    mock_item = ContentItem(
+        content_id="version-1",
+        content_type=ContentType.STORE_AGENT,
+        searchable_text="Test Agent Test description",
+        metadata={"name": "Test Agent"},
+    )
+
+    # Mock handler
+    mock_handler = MagicMock()
+    mock_handler.get_missing_items = AsyncMock(return_value=[mock_item])
+
+    with patch(
+        "backend.api.features.store.embeddings.CONTENT_HANDLERS",
+        {ContentType.STORE_AGENT: mock_handler},
+    ):
+        with patch(
+            "backend.api.features.store.embeddings.generate_embedding",
+            return_value=[0.1] * EMBEDDING_DIM,
+        ):
+            with patch(
+                "backend.api.features.store.embeddings.store_content_embedding",
+                return_value=True,
+            ):
+                result = await embeddings.backfill_missing_embeddings(batch_size=10)
+
+                # Verify handler was called
+                mock_handler.get_missing_items.assert_called_once_with(10)
+
+                # Verify results
+                assert result["processed"] == 1
+                assert result["success"] == 1
+                assert result["failed"] == 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_ensure_content_embedding_with_schema():
+    """Test ensuring embeddings exist with proper schema handling."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "platform"
+
+        with patch(
+            "backend.api.features.store.embeddings.get_content_embedding"
+        ) as mock_get:
+            # Simulate no existing embedding
+            mock_get.return_value = None
+
+            with patch(
+                "backend.api.features.store.embeddings.generate_embedding"
+            ) as mock_generate:
+                mock_generate.return_value = [0.1] * EMBEDDING_DIM
+
+                with patch(
+                    "backend.api.features.store.embeddings.store_content_embedding"
+                ) as mock_store:
+                    mock_store.return_value = True
+
+                    result = await embeddings.ensure_content_embedding(
+                        content_type=ContentType.STORE_AGENT,
+                        content_id="test-id",
+                        searchable_text="test text",
+                        metadata={"test": "data"},
+                        user_id=None,
+                        force=False,
+                    )
+
+                    # Verify the flow
+                    assert mock_get.called
+                    assert mock_generate.called
+                    assert mock_store.called
+                    assert result is True
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_backward_compatibility_store_embedding():
+    """Test backward compatibility wrapper for store_embedding."""
+    with patch(
+        "backend.api.features.store.embeddings.store_content_embedding"
+    ) as mock_store:
+        mock_store.return_value = True
+
+        result = await embeddings.store_embedding(
+            version_id="test-version-id",
+            embedding=[0.1] * EMBEDDING_DIM,
+            tx=None,
+        )
+
+        # Verify it calls the new function with correct parameters
+        assert mock_store.called
+        call_args = mock_store.call_args
+
+        assert call_args[1]["content_type"] == ContentType.STORE_AGENT
+        assert call_args[1]["content_id"] == "test-version-id"
+        assert call_args[1]["user_id"] is None
+        assert result is True
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_backward_compatibility_get_embedding():
+    """Test backward compatibility wrapper for get_embedding."""
+    with patch(
+        "backend.api.features.store.embeddings.get_content_embedding"
+    ) as mock_get:
+        mock_get.return_value = {
+            "contentType": "STORE_AGENT",
+            "contentId": "test-version-id",
+            "embedding": "[0.1, 0.2]",
+            "createdAt": "2024-01-01",
+            "updatedAt": "2024-01-01",
+        }
+
+        result = await embeddings.get_embedding("test-version-id")
+
+        # Verify it calls the new function
+        assert mock_get.called
+
+        # Verify it transforms to old format
+        assert result is not None
+        assert result["storeListingVersionId"] == "test-version-id"
+        assert "embedding" in result
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_schema_handling_error_cases():
+    """Test error handling in schema-aware operations."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "platform"
+
+        with patch("prisma.get_client") as mock_get_client:
+            mock_client = AsyncMock()
+            mock_client.execute_raw.side_effect = Exception("Database error")
+            mock_get_client.return_value = mock_client
+
+            result = await embeddings.store_content_embedding(
+                content_type=ContentType.STORE_AGENT,
+                content_id="test-id",
+                embedding=[0.1] * EMBEDDING_DIM,
+                searchable_text="test",
+                metadata=None,
+                user_id=None,
+            )
+
+            # Should return False on error, not raise
+            assert result is False
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
@@ -0,0 +1,407 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import prisma
+import pytest
+from prisma import Prisma
+from prisma.enums import ContentType
+
+from backend.api.features.store import embeddings
+
+
+@pytest.fixture(autouse=True)
+async def setup_prisma():
+    """Setup Prisma client for tests."""
+    try:
+        Prisma()
+    except prisma.errors.ClientAlreadyRegisteredError:
+        pass
+    yield
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_build_searchable_text():
+    """Test searchable text building from listing fields."""
+    result = embeddings.build_searchable_text(
+        name="AI Assistant",
+        description="A helpful AI assistant for productivity",
+        sub_heading="Boost your productivity",
+        categories=["AI", "Productivity"],
+    )
+
+    expected = "AI Assistant Boost your productivity A helpful AI assistant for productivity AI Productivity"
+    assert result == expected
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_build_searchable_text_empty_fields():
+    """Test searchable text building with empty fields."""
+    result = embeddings.build_searchable_text(
+        name="", description="Test description", sub_heading="", categories=[]
+    )
+
+    assert result == "Test description"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_generate_embedding_success():
+    """Test successful embedding generation."""
+    # Mock OpenAI response
+    mock_client = MagicMock()
+    mock_response = MagicMock()
+    mock_response.data = [MagicMock()]
+    mock_response.data[0].embedding = [0.1, 0.2, 0.3] * 512  # 1536 dimensions
+
+    # Use AsyncMock for async embeddings.create method
+    mock_client.embeddings.create = AsyncMock(return_value=mock_response)
+
+    # Patch at the point of use in embeddings.py
+    with patch(
+        "backend.api.features.store.embeddings.get_openai_client"
+    ) as mock_get_client:
+        mock_get_client.return_value = mock_client
+
+        result = await embeddings.generate_embedding("test text")
+
+        assert result is not None
+        assert len(result) == embeddings.EMBEDDING_DIM
+        assert result[0] == 0.1
+
+        mock_client.embeddings.create.assert_called_once_with(
+            model="text-embedding-3-small", input="test text"
+        )
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_generate_embedding_no_api_key():
+    """Test embedding generation without API key."""
+    # Patch at the point of use in embeddings.py
+    with patch(
+        "backend.api.features.store.embeddings.get_openai_client"
+    ) as mock_get_client:
+        mock_get_client.return_value = None
+
+        result = await embeddings.generate_embedding("test text")
+
+        assert result is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_generate_embedding_api_error():
+    """Test embedding generation with API error."""
+    mock_client = MagicMock()
+    mock_client.embeddings.create = AsyncMock(side_effect=Exception("API Error"))
+
+    # Patch at the point of use in embeddings.py
+    with patch(
+        "backend.api.features.store.embeddings.get_openai_client"
+    ) as mock_get_client:
+        mock_get_client.return_value = mock_client
+
+        result = await embeddings.generate_embedding("test text")
+
+        assert result is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_generate_embedding_text_truncation():
+    """Test that long text is properly truncated using tiktoken."""
+    from tiktoken import encoding_for_model
+
+    mock_client = MagicMock()
+    mock_response = MagicMock()
+    mock_response.data = [MagicMock()]
+    mock_response.data[0].embedding = [0.1] * embeddings.EMBEDDING_DIM
+
+    # Use AsyncMock for async embeddings.create method
+    mock_client.embeddings.create = AsyncMock(return_value=mock_response)
+
+    # Patch at the point of use in embeddings.py
+    with patch(
+        "backend.api.features.store.embeddings.get_openai_client"
+    ) as mock_get_client:
+        mock_get_client.return_value = mock_client
+
+        # Create text that will exceed 8191 tokens
+        # Use varied characters to ensure token-heavy text: each word is ~1 token
+        words = [f"word{i}" for i in range(10000)]
+        long_text = " ".join(words)  # ~10000 tokens
+
+        await embeddings.generate_embedding(long_text)
+
+        # Verify text was truncated to 8191 tokens
+        call_args = mock_client.embeddings.create.call_args
+        truncated_text = call_args.kwargs["input"]
+
+        # Count actual tokens in truncated text
+        enc = encoding_for_model("text-embedding-3-small")
+        actual_tokens = len(enc.encode(truncated_text))
+
+        # Should be at or just under 8191 tokens
+        assert actual_tokens <= 8191
+        # Should be close to the limit (not over-truncated)
+        assert actual_tokens >= 8100
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_embedding_success(mocker):
+    """Test successful embedding storage."""
+    mock_client = mocker.AsyncMock()
+    mock_client.execute_raw = mocker.AsyncMock()
+
+    embedding = [0.1, 0.2, 0.3]
+
+    result = await embeddings.store_embedding(
+        version_id="test-version-id", embedding=embedding, tx=mock_client
+    )
+
+    assert result is True
+    # execute_raw is called twice: once for SET search_path, once for INSERT
+    assert mock_client.execute_raw.call_count == 2
+
+    # First call: SET search_path
+    first_call_args = mock_client.execute_raw.call_args_list[0][0]
+    assert "SET search_path" in first_call_args[0]
+
+    # Second call: INSERT query with the actual data
+    second_call_args = mock_client.execute_raw.call_args_list[1][0]
+    assert "test-version-id" in second_call_args
+    assert "[0.1,0.2,0.3]" in second_call_args
+    assert None in second_call_args  # userId should be None for store agents
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_store_embedding_database_error(mocker):
+    """Test embedding storage with database error."""
+    mock_client = mocker.AsyncMock()
+    mock_client.execute_raw.side_effect = Exception("Database error")
+
+    embedding = [0.1, 0.2, 0.3]
+
+    result = await embeddings.store_embedding(
+        version_id="test-version-id", embedding=embedding, tx=mock_client
+    )
+
+    assert result is False
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_get_embedding_success():
+    """Test successful embedding retrieval."""
+    mock_result = [
+        {
+            "contentType": "STORE_AGENT",
+            "contentId": "test-version-id",
+            "userId": None,
+            "embedding": "[0.1,0.2,0.3]",
+            "searchableText": "Test text",
+            "metadata": {},
+            "createdAt": "2024-01-01T00:00:00Z",
+            "updatedAt": "2024-01-01T00:00:00Z",
+        }
+    ]
+
+    with patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_result,
+    ):
+        result = await embeddings.get_embedding("test-version-id")
+
+        assert result is not None
+        assert result["storeListingVersionId"] == "test-version-id"
+        assert result["embedding"] == "[0.1,0.2,0.3]"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_get_embedding_not_found():
+    """Test embedding retrieval when not found."""
+    with patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=[],
+    ):
+        result = await embeddings.get_embedding("test-version-id")
+
+        assert result is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@patch("backend.api.features.store.embeddings.generate_embedding")
+@patch("backend.api.features.store.embeddings.store_embedding")
+@patch("backend.api.features.store.embeddings.get_embedding")
+async def test_ensure_embedding_already_exists(mock_get, mock_store, mock_generate):
+    """Test ensure_embedding when embedding already exists."""
+    mock_get.return_value = {"embedding": "[0.1,0.2,0.3]"}
+
+    result = await embeddings.ensure_embedding(
+        version_id="test-id",
+        name="Test",
+        description="Test description",
+        sub_heading="Test heading",
+        categories=["test"],
+    )
+
+    assert result is True
+    mock_generate.assert_not_called()
+    mock_store.assert_not_called()
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@patch("backend.api.features.store.embeddings.generate_embedding")
+@patch("backend.api.features.store.embeddings.store_content_embedding")
+@patch("backend.api.features.store.embeddings.get_embedding")
+async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
+    """Test ensure_embedding creating new embedding."""
+    mock_get.return_value = None
+    mock_generate.return_value = [0.1, 0.2, 0.3]
+    mock_store.return_value = True
+
+    result = await embeddings.ensure_embedding(
+        version_id="test-id",
+        name="Test",
+        description="Test description",
+        sub_heading="Test heading",
+        categories=["test"],
+    )
+
+    assert result is True
+    mock_generate.assert_called_once_with("Test Test heading Test description test")
+    mock_store.assert_called_once_with(
+        content_type=ContentType.STORE_AGENT,
+        content_id="test-id",
+        embedding=[0.1, 0.2, 0.3],
+        searchable_text="Test Test heading Test description test",
+        metadata={"name": "Test", "subHeading": "Test heading", "categories": ["test"]},
+        user_id=None,
+        tx=None,
+    )
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@patch("backend.api.features.store.embeddings.generate_embedding")
+@patch("backend.api.features.store.embeddings.get_embedding")
+async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
+    """Test ensure_embedding when generation fails."""
+    mock_get.return_value = None
+    mock_generate.return_value = None
+
+    result = await embeddings.ensure_embedding(
+        version_id="test-id",
+        name="Test",
+        description="Test description",
+        sub_heading="Test heading",
+        categories=["test"],
+    )
+
+    assert result is False
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_get_embedding_stats():
+    """Test embedding statistics retrieval."""
+    # Mock handler stats for each content type
+    mock_handler = MagicMock()
+    mock_handler.get_stats = AsyncMock(
+        return_value={
+            "total": 100,
+            "with_embeddings": 75,
+            "without_embeddings": 25,
+        }
+    )
+
+    # Patch the CONTENT_HANDLERS where it's used (in embeddings module)
+    with patch(
+        "backend.api.features.store.embeddings.CONTENT_HANDLERS",
+        {ContentType.STORE_AGENT: mock_handler},
+    ):
+        result = await embeddings.get_embedding_stats()
+
+        assert "by_type" in result
+        assert "totals" in result
+        assert result["totals"]["total"] == 100
+        assert result["totals"]["with_embeddings"] == 75
+        assert result["totals"]["without_embeddings"] == 25
+        assert result["totals"]["coverage_percent"] == 75.0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@patch("backend.api.features.store.embeddings.store_content_embedding")
+async def test_backfill_missing_embeddings_success(mock_store):
+    """Test backfill with successful embedding generation."""
+    # Mock ContentItem from handlers
+    from backend.api.features.store.content_handlers import ContentItem
+
+    mock_items = [
+        ContentItem(
+            content_id="version-1",
+            content_type=ContentType.STORE_AGENT,
+            searchable_text="Agent 1 Description 1",
+            metadata={"name": "Agent 1"},
+        ),
+        ContentItem(
+            content_id="version-2",
+            content_type=ContentType.STORE_AGENT,
+            searchable_text="Agent 2 Description 2",
+            metadata={"name": "Agent 2"},
+        ),
+    ]
+
+    # Mock handler to return missing items
+    mock_handler = MagicMock()
+    mock_handler.get_missing_items = AsyncMock(return_value=mock_items)
+
+    # Mock store_content_embedding to succeed for first, fail for second
+    mock_store.side_effect = [True, False]
+
+    with patch(
+        "backend.api.features.store.embeddings.CONTENT_HANDLERS",
+        {ContentType.STORE_AGENT: mock_handler},
+    ):
+        with patch(
+            "backend.api.features.store.embeddings.generate_embedding",
+            return_value=[0.1] * embeddings.EMBEDDING_DIM,
+        ):
+            result = await embeddings.backfill_missing_embeddings(batch_size=5)
+
+            assert result["processed"] == 2
+            assert result["success"] == 1
+            assert result["failed"] == 1
+            assert mock_store.call_count == 2
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_backfill_missing_embeddings_no_missing():
+    """Test backfill when no embeddings are missing."""
+    # Mock handler to return no missing items
+    mock_handler = MagicMock()
+    mock_handler.get_missing_items = AsyncMock(return_value=[])
+
+    with patch(
+        "backend.api.features.store.embeddings.CONTENT_HANDLERS",
+        {ContentType.STORE_AGENT: mock_handler},
+    ):
+        result = await embeddings.backfill_missing_embeddings(batch_size=5)
+
+        assert result["processed"] == 0
+        assert result["success"] == 0
+        assert result["failed"] == 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_embedding_to_vector_string():
+    """Test embedding to PostgreSQL vector string conversion."""
+    embedding = [0.1, 0.2, 0.3, -0.4]
+    result = embeddings.embedding_to_vector_string(embedding)
+    assert result == "[0.1,0.2,0.3,-0.4]"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_embed_query():
+    """Test embed_query function (alias for generate_embedding)."""
+    with patch(
+        "backend.api.features.store.embeddings.generate_embedding"
+    ) as mock_generate:
+        mock_generate.return_value = [0.1, 0.2, 0.3]
+
+        result = await embeddings.embed_query("test query")
+
+        assert result == [0.1, 0.2, 0.3]
+        mock_generate.assert_called_once_with("test query")
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -0,0 +1,625 @@
+"""
+Unified Hybrid Search
+
+Combines semantic (embedding) search with lexical (tsvector) search
+for improved relevance across all content types (agents, blocks, docs).
+"""
+
+import logging
+from dataclasses import dataclass
+from typing import Any, Literal
+
+from prisma.enums import ContentType
+
+from backend.api.features.store.embeddings import (
+    EMBEDDING_DIM,
+    embed_query,
+    embedding_to_vector_string,
+)
+from backend.data.db import query_raw_with_schema
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class UnifiedSearchWeights:
+    """Weights for unified search (no popularity signal)."""
+
+    semantic: float = 0.40  # Embedding cosine similarity
+    lexical: float = 0.40  # tsvector ts_rank_cd score
+    category: float = 0.10  # Category match boost (for types that have categories)
+    recency: float = 0.10  # Newer content ranked higher
+
+    def __post_init__(self):
+        """Validate weights are non-negative and sum to approximately 1.0."""
+        total = self.semantic + self.lexical + self.category + self.recency
+
+        if any(
+            w < 0 for w in [self.semantic, self.lexical, self.category, self.recency]
+        ):
+            raise ValueError("All weights must be non-negative")
+
+        if not (0.99 <= total <= 1.01):
+            raise ValueError(f"Weights must sum to ~1.0, got {total:.3f}")
+
+
+# Default weights for unified search
+DEFAULT_UNIFIED_WEIGHTS = UnifiedSearchWeights()
+
+# Minimum relevance score thresholds
+DEFAULT_MIN_SCORE = 0.15  # For unified search (more permissive)
+DEFAULT_STORE_AGENT_MIN_SCORE = 0.20  # For store agent search (original threshold)
+
+
+async def unified_hybrid_search(
+    query: str,
+    content_types: list[ContentType] | None = None,
+    category: str | None = None,
+    page: int = 1,
+    page_size: int = 20,
+    weights: UnifiedSearchWeights | None = None,
+    min_score: float | None = None,
+    user_id: str | None = None,
+) -> tuple[list[dict[str, Any]], int]:
+    """
+    Unified hybrid search across all content types.
+
+    Searches UnifiedContentEmbedding using both semantic (vector) and lexical (tsvector) signals.
+
+    Args:
+        query: Search query string
+        content_types: List of content types to search. Defaults to all public types.
+        category: Filter by category (for content types that support it)
+        page: Page number (1-indexed)
+        page_size: Results per page
+        weights: Custom weights for search signals
+        min_score: Minimum relevance score threshold (0-1)
+        user_id: User ID for searching private content (library agents)
+
+    Returns:
+        Tuple of (results list, total count)
+    """
+    # Validate inputs
+    query = query.strip()
+    if not query:
+        return [], 0
+
+    if page < 1:
+        page = 1
+    if page_size < 1:
+        page_size = 1
+    if page_size > 100:
+        page_size = 100
+
+    if content_types is None:
+        content_types = [
+            ContentType.STORE_AGENT,
+            ContentType.BLOCK,
+            ContentType.DOCUMENTATION,
+        ]
+
+    if weights is None:
+        weights = DEFAULT_UNIFIED_WEIGHTS
+    if min_score is None:
+        min_score = DEFAULT_MIN_SCORE
+
+    offset = (page - 1) * page_size
+
+    # Generate query embedding
+    query_embedding = await embed_query(query)
+
+    # Graceful degradation if embedding unavailable
+    if query_embedding is None or not query_embedding:
+        logger.warning(
+            "Failed to generate query embedding - falling back to lexical-only search. "
+            "Check that openai_internal_api_key is configured and OpenAI API is accessible."
+        )
+        query_embedding = [0.0] * EMBEDDING_DIM
+        # Redistribute semantic weight to lexical
+        total_non_semantic = weights.lexical + weights.category + weights.recency
+        if total_non_semantic > 0:
+            factor = 1.0 / total_non_semantic
+            weights = UnifiedSearchWeights(
+                semantic=0.0,
+                lexical=weights.lexical * factor,
+                category=weights.category * factor,
+                recency=weights.recency * factor,
+            )
+        else:
+            weights = UnifiedSearchWeights(
+                semantic=0.0, lexical=1.0, category=0.0, recency=0.0
+            )
+
+    # Build parameters
+    params: list[Any] = []
+    param_idx = 1
+
+    # Query for lexical search
+    params.append(query)
+    query_param = f"${param_idx}"
+    param_idx += 1
+
+    # Query lowercase for category matching
+    params.append(query.lower())
+    query_lower_param = f"${param_idx}"
+    param_idx += 1
+
+    # Embedding
+    embedding_str = embedding_to_vector_string(query_embedding)
+    params.append(embedding_str)
+    embedding_param = f"${param_idx}"
+    param_idx += 1
+
+    # Content types
+    content_type_values = [ct.value for ct in content_types]
+    params.append(content_type_values)
+    content_types_param = f"${param_idx}"
+    param_idx += 1
+
+    # User ID filter (for private content)
+    user_filter = ""
+    if user_id is not None:
+        params.append(user_id)
+        user_filter = f'AND (uce."userId" = ${param_idx} OR uce."userId" IS NULL)'
+        param_idx += 1
+    else:
+        user_filter = 'AND uce."userId" IS NULL'
+
+    # Weights
+    params.append(weights.semantic)
+    w_semantic = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.lexical)
+    w_lexical = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.category)
+    w_category = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.recency)
+    w_recency = f"${param_idx}"
+    param_idx += 1
+
+    # Min score
+    params.append(min_score)
+    min_score_param = f"${param_idx}"
+    param_idx += 1
+
+    # Pagination
+    params.append(page_size)
+    limit_param = f"${param_idx}"
+    param_idx += 1
+
+    params.append(offset)
+    offset_param = f"${param_idx}"
+    param_idx += 1
+
+    # Unified search query on UnifiedContentEmbedding
+    sql_query = f"""
+        WITH candidates AS (
+            -- Lexical matches (uses GIN index on search column)
+            SELECT uce.id, uce."contentType", uce."contentId"
+            FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
+            WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
+            {user_filter}
+            AND uce.search @@ plainto_tsquery('english', {query_param})
+
+            UNION
+
+            -- Semantic matches (uses HNSW index on embedding)
+            (
+                SELECT uce.id, uce."contentType", uce."contentId"
+                FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
+                WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
+                {user_filter}
+                ORDER BY uce.embedding <=> {embedding_param}::vector
+                LIMIT 200
+            )
+        ),
+        search_scores AS (
+            SELECT
+                uce."contentType" as content_type,
+                uce."contentId" as content_id,
+                uce."searchableText" as searchable_text,
+                uce.metadata,
+                uce."updatedAt" as updated_at,
+                -- Semantic score: cosine similarity (1 - distance)
+                COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
+                -- Lexical score: ts_rank_cd
+                COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
+                -- Category match from metadata
+                CASE
+                    WHEN uce.metadata ? 'categories' AND EXISTS (
+                        SELECT 1 FROM jsonb_array_elements_text(uce.metadata->'categories') cat
+                        WHERE LOWER(cat) LIKE '%' || {query_lower_param} || '%'
+                    )
+                    THEN 1.0
+                    ELSE 0.0
+                END as category_score,
+                -- Recency score: linear decay over 90 days
+                GREATEST(0, 1 - EXTRACT(EPOCH FROM (NOW() - uce."updatedAt")) / (90 * 24 * 3600)) as recency_score
+            FROM candidates c
+            INNER JOIN {{schema_prefix}}"UnifiedContentEmbedding" uce ON c.id = uce.id
+        ),
+        max_lexical AS (
+            SELECT GREATEST(MAX(lexical_raw), 0.001) as max_val FROM search_scores
+        ),
+        normalized AS (
+            SELECT
+                ss.*,
+                ss.lexical_raw / ml.max_val as lexical_score
+            FROM search_scores ss
+            CROSS JOIN max_lexical ml
+        ),
+        scored AS (
+            SELECT
+                content_type,
+                content_id,
+                searchable_text,
+                metadata,
+                updated_at,
+                semantic_score,
+                lexical_score,
+                category_score,
+                recency_score,
+                (
+                    {w_semantic} * semantic_score +
+                    {w_lexical} * lexical_score +
+                    {w_category} * category_score +
+                    {w_recency} * recency_score
+                ) as combined_score
+            FROM normalized
+        ),
+        filtered AS (
+            SELECT
+                *,
+                COUNT(*) OVER () as total_count
+            FROM scored
+            WHERE combined_score >= {min_score_param}
+        )
+        SELECT * FROM filtered
+        ORDER BY combined_score DESC
+        LIMIT {limit_param} OFFSET {offset_param}
+    """
+
+    results = await query_raw_with_schema(
+        sql_query, *params, set_public_search_path=True
+    )
+
+    total = results[0]["total_count"] if results else 0
+
+    # Clean up results
+    for result in results:
+        result.pop("total_count", None)
+
+    logger.info(f"Unified hybrid search: {len(results)} results, {total} total")
+
+    return results, total
+
+
+# ============================================================================
+# Store Agent specific search (with full metadata)
+# ============================================================================
+
+
+@dataclass
+class StoreAgentSearchWeights:
+    """Weights for store agent search including popularity."""
+
+    semantic: float = 0.30
+    lexical: float = 0.30
+    category: float = 0.20
+    recency: float = 0.10
+    popularity: float = 0.10
+
+    def __post_init__(self):
+        total = (
+            self.semantic
+            + self.lexical
+            + self.category
+            + self.recency
+            + self.popularity
+        )
+        if any(
+            w < 0
+            for w in [
+                self.semantic,
+                self.lexical,
+                self.category,
+                self.recency,
+                self.popularity,
+            ]
+        ):
+            raise ValueError("All weights must be non-negative")
+        if not (0.99 <= total <= 1.01):
+            raise ValueError(f"Weights must sum to ~1.0, got {total:.3f}")
+
+
+DEFAULT_STORE_AGENT_WEIGHTS = StoreAgentSearchWeights()
+
+
+async def hybrid_search(
+    query: str,
+    featured: bool = False,
+    creators: list[str] | None = None,
+    category: str | None = None,
+    sorted_by: (
+        Literal["relevance", "rating", "runs", "name", "updated_at"] | None
+    ) = None,
+    page: int = 1,
+    page_size: int = 20,
+    weights: StoreAgentSearchWeights | None = None,
+    min_score: float | None = None,
+) -> tuple[list[dict[str, Any]], int]:
+    """
+    Hybrid search for store agents with full metadata.
+
+    Uses UnifiedContentEmbedding for search, joins to StoreAgent for metadata.
+    """
+    query = query.strip()
+    if not query:
+        return [], 0
+
+    if page < 1:
+        page = 1
+    if page_size < 1:
+        page_size = 1
+    if page_size > 100:
+        page_size = 100
+
+    if weights is None:
+        weights = DEFAULT_STORE_AGENT_WEIGHTS
+    if min_score is None:
+        min_score = (
+            DEFAULT_STORE_AGENT_MIN_SCORE  # Use original threshold for store agents
+        )
+
+    offset = (page - 1) * page_size
+
+    # Generate query embedding
+    query_embedding = await embed_query(query)
+
+    # Graceful degradation
+    if query_embedding is None or not query_embedding:
+        logger.warning(
+            "Failed to generate query embedding - falling back to lexical-only search."
+        )
+        query_embedding = [0.0] * EMBEDDING_DIM
+        total_non_semantic = (
+            weights.lexical + weights.category + weights.recency + weights.popularity
+        )
+        if total_non_semantic > 0:
+            factor = 1.0 / total_non_semantic
+            weights = StoreAgentSearchWeights(
+                semantic=0.0,
+                lexical=weights.lexical * factor,
+                category=weights.category * factor,
+                recency=weights.recency * factor,
+                popularity=weights.popularity * factor,
+            )
+        else:
+            weights = StoreAgentSearchWeights(
+                semantic=0.0, lexical=1.0, category=0.0, recency=0.0, popularity=0.0
+            )
+
+    # Build parameters
+    params: list[Any] = []
+    param_idx = 1
+
+    params.append(query)
+    query_param = f"${param_idx}"
+    param_idx += 1
+
+    params.append(query.lower())
+    query_lower_param = f"${param_idx}"
+    param_idx += 1
+
+    embedding_str = embedding_to_vector_string(query_embedding)
+    params.append(embedding_str)
+    embedding_param = f"${param_idx}"
+    param_idx += 1
+
+    # Build WHERE clause for StoreAgent filters
+    where_parts = ["sa.is_available = true"]
+
+    if featured:
+        where_parts.append("sa.featured = true")
+
+    if creators:
+        params.append(creators)
+        where_parts.append(f"sa.creator_username = ANY(${param_idx})")
+        param_idx += 1
+
+    if category:
+        params.append(category)
+        where_parts.append(f"${param_idx} = ANY(sa.categories)")
+        param_idx += 1
+
+    where_clause = " AND ".join(where_parts)
+
+    # Weights
+    params.append(weights.semantic)
+    w_semantic = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.lexical)
+    w_lexical = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.category)
+    w_category = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.recency)
+    w_recency = f"${param_idx}"
+    param_idx += 1
+
+    params.append(weights.popularity)
+    w_popularity = f"${param_idx}"
+    param_idx += 1
+
+    params.append(min_score)
+    min_score_param = f"${param_idx}"
+    param_idx += 1
+
+    params.append(page_size)
+    limit_param = f"${param_idx}"
+    param_idx += 1
+
+    params.append(offset)
+    offset_param = f"${param_idx}"
+    param_idx += 1
+
+    # Query using UnifiedContentEmbedding for search, StoreAgent for metadata
+    sql_query = f"""
+        WITH candidates AS (
+            -- Lexical matches via UnifiedContentEmbedding.search
+            SELECT uce."contentId" as "storeListingVersionId"
+            FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
+            INNER JOIN {{schema_prefix}}"StoreAgent" sa
+                ON uce."contentId" = sa."storeListingVersionId"
+            WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
+            AND uce."userId" IS NULL
+            AND uce.search @@ plainto_tsquery('english', {query_param})
+            AND {where_clause}
+
+            UNION
+
+            -- Semantic matches via UnifiedContentEmbedding.embedding
+            SELECT uce."contentId" as "storeListingVersionId"
+            FROM (
+                SELECT uce."contentId", uce.embedding
+                FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
+                INNER JOIN {{schema_prefix}}"StoreAgent" sa
+                    ON uce."contentId" = sa."storeListingVersionId"
+                WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
+                AND uce."userId" IS NULL
+                AND {where_clause}
+                ORDER BY uce.embedding <=> {embedding_param}::vector
+                LIMIT 200
+            ) uce
+        ),
+        search_scores AS (
+            SELECT
+                sa.slug,
+                sa.agent_name,
+                sa.agent_image,
+                sa.creator_username,
+                sa.creator_avatar,
+                sa.sub_heading,
+                sa.description,
+                sa.runs,
+                sa.rating,
+                sa.categories,
+                sa.featured,
+                sa.is_available,
+                sa.updated_at,
+                -- Semantic score
+                COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
+                -- Lexical score (raw, will normalize)
+                COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
+                -- Category match
+                CASE
+                    WHEN EXISTS (
+                        SELECT 1 FROM unnest(sa.categories) cat
+                        WHERE LOWER(cat) LIKE '%' || {query_lower_param} || '%'
+                    )
+                    THEN 1.0
+                    ELSE 0.0
+                END as category_score,
+                -- Recency
+                GREATEST(0, 1 - EXTRACT(EPOCH FROM (NOW() - sa.updated_at)) / (90 * 24 * 3600)) as recency_score,
+                -- Popularity (raw)
+                sa.runs as popularity_raw
+            FROM candidates c
+            INNER JOIN {{schema_prefix}}"StoreAgent" sa
+                ON c."storeListingVersionId" = sa."storeListingVersionId"
+            INNER JOIN {{schema_prefix}}"UnifiedContentEmbedding" uce
+                ON sa."storeListingVersionId" = uce."contentId"
+                AND uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
+        ),
+        max_vals AS (
+            SELECT
+                GREATEST(MAX(lexical_raw), 0.001) as max_lexical,
+                GREATEST(MAX(popularity_raw), 1) as max_popularity
+            FROM search_scores
+        ),
+        normalized AS (
+            SELECT
+                ss.*,
+                ss.lexical_raw / mv.max_lexical as lexical_score,
+                CASE
+                    WHEN ss.popularity_raw > 0
+                    THEN LN(1 + ss.popularity_raw) / LN(1 + mv.max_popularity)
+                    ELSE 0
+                END as popularity_score
+            FROM search_scores ss
+            CROSS JOIN max_vals mv
+        ),
+        scored AS (
+            SELECT
+                slug,
+                agent_name,
+                agent_image,
+                creator_username,
+                creator_avatar,
+                sub_heading,
+                description,
+                runs,
+                rating,
+                categories,
+                featured,
+                is_available,
+                updated_at,
+                semantic_score,
+                lexical_score,
+                category_score,
+                recency_score,
+                popularity_score,
+                (
+                    {w_semantic} * semantic_score +
+                    {w_lexical} * lexical_score +
+                    {w_category} * category_score +
+                    {w_recency} * recency_score +
+                    {w_popularity} * popularity_score
+                ) as combined_score
+            FROM normalized
+        ),
+        filtered AS (
+            SELECT *, COUNT(*) OVER () as total_count
+            FROM scored
+            WHERE combined_score >= {min_score_param}
+        )
+        SELECT * FROM filtered
+        ORDER BY combined_score DESC
+        LIMIT {limit_param} OFFSET {offset_param}
+    """
+
+    results = await query_raw_with_schema(
+        sql_query, *params, set_public_search_path=True
+    )
+
+    total = results[0]["total_count"] if results else 0
+
+    for result in results:
+        result.pop("total_count", None)
+
+    logger.info(f"Hybrid search (store agents): {len(results)} results, {total} total")
+
+    return results, total
+
+
+async def hybrid_search_simple(
+    query: str,
+    page: int = 1,
+    page_size: int = 20,
+) -> tuple[list[dict[str, Any]], int]:
+    """Simplified hybrid search for store agents."""
+    return await hybrid_search(query=query, page=page, page_size=page_size)
+
+
+# Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
+# for existing code that expects the popularity parameter
+HybridSearchWeights = StoreAgentSearchWeights
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
@@ -0,0 +1,667 @@
+"""
+Integration tests for hybrid search with schema handling.
+
+These tests verify that hybrid search works correctly across different database schemas.
+"""
+
+from unittest.mock import patch
+
+import pytest
+from prisma.enums import ContentType
+
+from backend.api.features.store import embeddings
+from backend.api.features.store.hybrid_search import (
+    HybridSearchWeights,
+    UnifiedSearchWeights,
+    hybrid_search,
+    unified_hybrid_search,
+)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_with_schema_handling():
+    """Test that hybrid search correctly handles database schema prefixes."""
+    # Test with a mock query to ensure schema handling works
+    query = "test agent"
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        # Mock the query result
+        mock_query.return_value = [
+            {
+                "slug": "test/agent",
+                "agent_name": "Test Agent",
+                "agent_image": "test.png",
+                "creator_username": "test",
+                "creator_avatar": "avatar.png",
+                "sub_heading": "Test sub-heading",
+                "description": "Test description",
+                "runs": 10,
+                "rating": 4.5,
+                "categories": ["test"],
+                "featured": False,
+                "is_available": True,
+                "updated_at": "2024-01-01T00:00:00Z",
+                "combined_score": 0.8,
+                "semantic_score": 0.7,
+                "lexical_score": 0.6,
+                "category_score": 0.5,
+                "recency_score": 0.4,
+                "total_count": 1,
+            }
+        ]
+
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM  # Mock embedding
+
+            results, total = await hybrid_search(
+                query=query,
+                page=1,
+                page_size=20,
+            )
+
+            # Verify the query was called
+            assert mock_query.called
+            # Verify the SQL template uses schema_prefix placeholder
+            call_args = mock_query.call_args
+            sql_template = call_args[0][0]
+            assert "{schema_prefix}" in sql_template
+
+            # Verify results
+            assert len(results) == 1
+            assert total == 1
+            assert results[0]["slug"] == "test/agent"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_with_public_schema():
+    """Test hybrid search when using public schema (no prefix needed)."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "public"
+
+        with patch(
+            "backend.api.features.store.hybrid_search.query_raw_with_schema"
+        ) as mock_query:
+            mock_query.return_value = []
+
+            with patch(
+                "backend.api.features.store.hybrid_search.embed_query"
+            ) as mock_embed:
+                mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+                results, total = await hybrid_search(
+                    query="test",
+                    page=1,
+                    page_size=20,
+                )
+
+                # Verify the mock was set up correctly
+                assert mock_schema.return_value == "public"
+
+                # Results should work even with empty results
+                assert results == []
+                assert total == 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_with_custom_schema():
+    """Test hybrid search when using custom schema (e.g., 'platform')."""
+    with patch("backend.data.db.get_database_schema") as mock_schema:
+        mock_schema.return_value = "platform"
+
+        with patch(
+            "backend.api.features.store.hybrid_search.query_raw_with_schema"
+        ) as mock_query:
+            mock_query.return_value = []
+
+            with patch(
+                "backend.api.features.store.hybrid_search.embed_query"
+            ) as mock_embed:
+                mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+                results, total = await hybrid_search(
+                    query="test",
+                    page=1,
+                    page_size=20,
+                )
+
+                # Verify the mock was set up correctly
+                assert mock_schema.return_value == "platform"
+
+                assert results == []
+                assert total == 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_without_embeddings():
+    """Test hybrid search gracefully degrades when embeddings are unavailable."""
+    # Mock database to return some results
+    mock_results = [
+        {
+            "slug": "test-agent",
+            "agent_name": "Test Agent",
+            "agent_image": "test.png",
+            "creator_username": "creator",
+            "creator_avatar": "avatar.png",
+            "sub_heading": "Test heading",
+            "description": "Test description",
+            "runs": 100,
+            "rating": 4.5,
+            "categories": ["AI"],
+            "featured": False,
+            "is_available": True,
+            "updated_at": "2025-01-01T00:00:00Z",
+            "semantic_score": 0.0,  # Zero because no embedding
+            "lexical_score": 0.5,
+            "category_score": 0.0,
+            "recency_score": 0.1,
+            "popularity_score": 0.2,
+            "combined_score": 0.3,
+            "total_count": 1,
+        }
+    ]
+
+    with patch("backend.api.features.store.hybrid_search.embed_query") as mock_embed:
+        with patch(
+            "backend.api.features.store.hybrid_search.query_raw_with_schema"
+        ) as mock_query:
+            # Simulate embedding failure
+            mock_embed.return_value = None
+            mock_query.return_value = mock_results
+
+            # Should NOT raise - graceful degradation
+            results, total = await hybrid_search(
+                query="test",
+                page=1,
+                page_size=20,
+            )
+
+            # Verify it returns results even without embeddings
+            assert len(results) == 1
+            assert results[0]["slug"] == "test-agent"
+            assert total == 1
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_with_filters():
+    """Test hybrid search with various filters."""
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        mock_query.return_value = []
+
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            # Test with featured filter
+            results, total = await hybrid_search(
+                query="test",
+                featured=True,
+                creators=["user1", "user2"],
+                category="productivity",
+                page=1,
+                page_size=10,
+            )
+
+            # Verify filters were applied in the query
+            call_args = mock_query.call_args
+            params = call_args[0][1:]  # Skip SQL template
+
+            # Should have query, query_lower, creators array, category
+            assert len(params) >= 4
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_weights():
+    """Test hybrid search with custom weights."""
+    custom_weights = HybridSearchWeights(
+        semantic=0.5,
+        lexical=0.3,
+        category=0.1,
+        recency=0.1,
+        popularity=0.0,
+    )
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        mock_query.return_value = []
+
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            results, total = await hybrid_search(
+                query="test",
+                weights=custom_weights,
+                page=1,
+                page_size=20,
+            )
+
+            # Verify custom weights were used in the query
+            call_args = mock_query.call_args
+            sql_template = call_args[0][0]
+            params = call_args[0][1:]  # Get all parameters passed
+
+            # Check that SQL uses parameterized weights (not f-string interpolation)
+            assert "$" in sql_template  # Verify parameterization is used
+
+            # Check that custom weights are in the params
+            assert 0.5 in params  # semantic weight
+            assert 0.3 in params  # lexical weight
+            assert 0.1 in params  # category and recency weights
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_min_score_filtering():
+    """Test hybrid search minimum score threshold."""
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        # Return results with varying scores
+        mock_query.return_value = [
+            {
+                "slug": "high-score/agent",
+                "agent_name": "High Score Agent",
+                "combined_score": 0.8,
+                "total_count": 1,
+                # ... other fields
+            }
+        ]
+
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            # Test with custom min_score
+            results, total = await hybrid_search(
+                query="test",
+                min_score=0.5,  # High threshold
+                page=1,
+                page_size=20,
+            )
+
+            # Verify min_score was applied in query
+            call_args = mock_query.call_args
+            sql_template = call_args[0][0]
+            params = call_args[0][1:]  # Get all parameters
+
+            # Check that SQL uses parameterized min_score
+            assert "combined_score >=" in sql_template
+            assert "$" in sql_template  # Verify parameterization
+
+            # Check that custom min_score is in the params
+            assert 0.5 in params
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_pagination():
+    """Test hybrid search pagination."""
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        mock_query.return_value = []
+
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            # Test page 2 with page_size 10
+            results, total = await hybrid_search(
+                query="test",
+                page=2,
+                page_size=10,
+            )
+
+            # Verify pagination parameters
+            call_args = mock_query.call_args
+            params = call_args[0]
+
+            # Last two params should be LIMIT and OFFSET
+            limit = params[-2]
+            offset = params[-1]
+
+            assert limit == 10  # page_size
+            assert offset == 10  # (page - 1) * page_size = (2 - 1) * 10
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_hybrid_search_error_handling():
+    """Test hybrid search error handling."""
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        # Simulate database error
+        mock_query.side_effect = Exception("Database connection error")
+
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            # Should raise exception
+            with pytest.raises(Exception) as exc_info:
+                await hybrid_search(
+                    query="test",
+                    page=1,
+                    page_size=20,
+                )
+
+            assert "Database connection error" in str(exc_info.value)
+
+
+# =============================================================================
+# Unified Hybrid Search Tests
+# =============================================================================
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_basic():
+    """Test basic unified hybrid search across all content types."""
+    mock_results = [
+        {
+            "content_type": "STORE_AGENT",
+            "content_id": "agent-1",
+            "searchable_text": "Test Agent Description",
+            "metadata": {"name": "Test Agent"},
+            "updated_at": "2025-01-01T00:00:00Z",
+            "semantic_score": 0.7,
+            "lexical_score": 0.8,
+            "category_score": 0.5,
+            "recency_score": 0.3,
+            "combined_score": 0.6,
+            "total_count": 2,
+        },
+        {
+            "content_type": "BLOCK",
+            "content_id": "block-1",
+            "searchable_text": "Test Block Description",
+            "metadata": {"name": "Test Block"},
+            "updated_at": "2025-01-01T00:00:00Z",
+            "semantic_score": 0.6,
+            "lexical_score": 0.7,
+            "category_score": 0.4,
+            "recency_score": 0.2,
+            "combined_score": 0.5,
+            "total_count": 2,
+        },
+    ]
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = mock_results
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            results, total = await unified_hybrid_search(
+                query="test",
+                page=1,
+                page_size=20,
+            )
+
+            assert len(results) == 2
+            assert total == 2
+            assert results[0]["content_type"] == "STORE_AGENT"
+            assert results[1]["content_type"] == "BLOCK"
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_filter_by_content_type():
+    """Test unified search filtering by specific content types."""
+    mock_results = [
+        {
+            "content_type": "BLOCK",
+            "content_id": "block-1",
+            "searchable_text": "Test Block",
+            "metadata": {},
+            "updated_at": "2025-01-01T00:00:00Z",
+            "semantic_score": 0.7,
+            "lexical_score": 0.8,
+            "category_score": 0.0,
+            "recency_score": 0.3,
+            "combined_score": 0.5,
+            "total_count": 1,
+        },
+    ]
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = mock_results
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            results, total = await unified_hybrid_search(
+                query="test",
+                content_types=[ContentType.BLOCK],
+                page=1,
+                page_size=20,
+            )
+
+            # Verify content_types parameter was passed correctly
+            call_args = mock_query.call_args
+            params = call_args[0][1:]
+            # The content types should be in the params as a list
+            assert ["BLOCK"] in params
+
+            assert len(results) == 1
+            assert total == 1
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_with_user_id():
+    """Test unified search with user_id for private content."""
+    mock_results = [
+        {
+            "content_type": "STORE_AGENT",
+            "content_id": "agent-1",
+            "searchable_text": "My Private Agent",
+            "metadata": {},
+            "updated_at": "2025-01-01T00:00:00Z",
+            "semantic_score": 0.7,
+            "lexical_score": 0.8,
+            "category_score": 0.0,
+            "recency_score": 0.3,
+            "combined_score": 0.6,
+            "total_count": 1,
+        },
+    ]
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = mock_results
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            results, total = await unified_hybrid_search(
+                query="test",
+                user_id="user-123",
+                page=1,
+                page_size=20,
+            )
+
+            # Verify SQL contains user_id filter
+            call_args = mock_query.call_args
+            sql_template = call_args[0][0]
+            params = call_args[0][1:]
+
+            assert 'uce."userId"' in sql_template
+            assert "user-123" in params
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_custom_weights():
+    """Test unified search with custom weights."""
+    custom_weights = UnifiedSearchWeights(
+        semantic=0.6,
+        lexical=0.2,
+        category=0.1,
+        recency=0.1,
+    )
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = []
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            results, total = await unified_hybrid_search(
+                query="test",
+                weights=custom_weights,
+                page=1,
+                page_size=20,
+            )
+
+            # Verify custom weights are in parameters
+            call_args = mock_query.call_args
+            params = call_args[0][1:]
+
+            assert 0.6 in params  # semantic weight
+            assert 0.2 in params  # lexical weight
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_graceful_degradation():
+    """Test unified search gracefully degrades when embeddings unavailable."""
+    mock_results = [
+        {
+            "content_type": "DOCUMENTATION",
+            "content_id": "doc-1",
+            "searchable_text": "API Documentation",
+            "metadata": {},
+            "updated_at": "2025-01-01T00:00:00Z",
+            "semantic_score": 0.0,  # Zero because no embedding
+            "lexical_score": 0.8,
+            "category_score": 0.0,
+            "recency_score": 0.2,
+            "combined_score": 0.5,
+            "total_count": 1,
+        },
+    ]
+
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = mock_results
+            mock_embed.return_value = None  # Embedding failure
+
+            # Should NOT raise - graceful degradation
+            results, total = await unified_hybrid_search(
+                query="test",
+                page=1,
+                page_size=20,
+            )
+
+            assert len(results) == 1
+            assert total == 1
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_empty_query():
+    """Test unified search with empty query returns empty results."""
+    results, total = await unified_hybrid_search(
+        query="",
+        page=1,
+        page_size=20,
+    )
+
+    assert results == []
+    assert total == 0
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_pagination():
+    """Test unified search pagination."""
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = []
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            results, total = await unified_hybrid_search(
+                query="test",
+                page=3,
+                page_size=15,
+            )
+
+            # Verify pagination parameters (last two params are LIMIT and OFFSET)
+            call_args = mock_query.call_args
+            params = call_args[0]
+
+            limit = params[-2]
+            offset = params[-1]
+
+            assert limit == 15  # page_size
+            assert offset == 30  # (page - 1) * page_size = (3 - 1) * 15
+
+
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.integration
+async def test_unified_hybrid_search_schema_prefix():
+    """Test unified search uses schema_prefix placeholder."""
+    with patch(
+        "backend.api.features.store.hybrid_search.query_raw_with_schema"
+    ) as mock_query:
+        with patch(
+            "backend.api.features.store.hybrid_search.embed_query"
+        ) as mock_embed:
+            mock_query.return_value = []
+            mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
+
+            await unified_hybrid_search(
+                query="test",
+                page=1,
+                page_size=20,
+            )
+
+            call_args = mock_query.call_args
+            sql_template = call_args[0][0]
+
+            # Verify schema_prefix placeholder is used for table references
+            assert "{schema_prefix}" in sql_template
+            assert '"UnifiedContentEmbedding"' in sql_template
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
--- a/autogpt_platform/backend/backend/api/features/store/model.py
+++ b/autogpt_platform/backend/backend/api/features/store/model.py
@@ -110,6 +110,7 @@ class Profile(pydantic.BaseModel):


 class StoreSubmission(pydantic.BaseModel):
+    listing_id: str
    agent_id: str
    agent_version: int
    name: str
@@ -164,8 +165,12 @@ class StoreListingsWithVersionsResponse(pydantic.BaseModel):


 class StoreSubmissionRequest(pydantic.BaseModel):
-    agent_id: str
-    agent_version: int
+    agent_id: str = pydantic.Field(
+        ..., min_length=1, description="Agent ID cannot be empty"
+    )
+    agent_version: int = pydantic.Field(
+        ..., gt=0, description="Agent version must be greater than 0"
+    )
    slug: str
    name: str
    sub_heading: str
@@ -216,3 +221,23 @@ class ReviewSubmissionRequest(pydantic.BaseModel):
    is_approved: bool
    comments: str  # External comments visible to creator
    internal_comments: str | None = None  # Private admin notes
+
+
+class UnifiedSearchResult(pydantic.BaseModel):
+    """A single result from unified hybrid search across all content types."""
+
+    content_type: str  # STORE_AGENT, BLOCK, DOCUMENTATION
+    content_id: str
+    searchable_text: str
+    metadata: dict | None = None
+    updated_at: datetime.datetime | None = None
+    combined_score: float | None = None
+    semantic_score: float | None = None
+    lexical_score: float | None = None
+
+
+class UnifiedSearchResponse(pydantic.BaseModel):
+    """Response model for unified search across all content types."""
+
+    results: list[UnifiedSearchResult]
+    pagination: Pagination
--- a/autogpt_platform/backend/backend/api/features/store/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/model_test.py
@@ -138,6 +138,7 @@ def test_creator_details():

 def test_store_submission():
    submission = store_model.StoreSubmission(
+        listing_id="listing123",
        agent_id="agent123",
        agent_version=1,
        sub_heading="Test subheading",
@@ -159,6 +160,7 @@ def test_store_submissions_response():
    response = store_model.StoreSubmissionsResponse(
        submissions=[
            store_model.StoreSubmission(
+                listing_id="listing123",
                agent_id="agent123",
                agent_version=1,
                sub_heading="Test subheading",
--- a/autogpt_platform/backend/backend/api/features/store/routes.py
+++ b/autogpt_platform/backend/backend/api/features/store/routes.py
@@ -7,12 +7,15 @@ from typing import Literal
 import autogpt_libs.auth
 import fastapi
 import fastapi.responses
+import prisma.enums

 import backend.data.graph
 import backend.util.json
+from backend.util.models import Pagination

 from . import cache as store_cache
 from . import db as store_db
+from . import hybrid_search as store_hybrid_search
 from . import image_gen as store_image_gen
 from . import media as store_media
 from . import model as store_model
@@ -146,6 +149,102 @@ async def get_agents(
    return agents


+##############################################
+############### Search Endpoints #############
+##############################################
+
+
+@router.get(
+    "/search",
+    summary="Unified search across all content types",
+    tags=["store", "public"],
+    response_model=store_model.UnifiedSearchResponse,
+)
+async def unified_search(
+    query: str,
+    content_types: list[str] | None = fastapi.Query(
+        default=None,
+        description="Content types to search: STORE_AGENT, BLOCK, DOCUMENTATION. If not specified, searches all.",
+    ),
+    page: int = 1,
+    page_size: int = 20,
+    user_id: str | None = fastapi.Security(
+        autogpt_libs.auth.get_optional_user_id, use_cache=False
+    ),
+):
+    """
+    Search across all content types (store agents, blocks, documentation) using hybrid search.
+
+    Combines semantic (embedding-based) and lexical (text-based) search for best results.
+
+    Args:
+        query: The search query string
+        content_types: Optional list of content types to filter by (STORE_AGENT, BLOCK, DOCUMENTATION)
+        page: Page number for pagination (default 1)
+        page_size: Number of results per page (default 20)
+        user_id: Optional authenticated user ID (for user-scoped content in future)
+
+    Returns:
+        UnifiedSearchResponse: Paginated list of search results with relevance scores
+    """
+    if page < 1:
+        raise fastapi.HTTPException(
+            status_code=422, detail="Page must be greater than 0"
+        )
+
+    if page_size < 1:
+        raise fastapi.HTTPException(
+            status_code=422, detail="Page size must be greater than 0"
+        )
+
+    # Convert string content types to enum
+    content_type_enums: list[prisma.enums.ContentType] | None = None
+    if content_types:
+        try:
+            content_type_enums = [prisma.enums.ContentType(ct) for ct in content_types]
+        except ValueError as e:
+            raise fastapi.HTTPException(
+                status_code=422,
+                detail=f"Invalid content type. Valid values: STORE_AGENT, BLOCK, DOCUMENTATION. Error: {e}",
+            )
+
+    # Perform unified hybrid search
+    results, total = await store_hybrid_search.unified_hybrid_search(
+        query=query,
+        content_types=content_type_enums,
+        user_id=user_id,
+        page=page,
+        page_size=page_size,
+    )
+
+    # Convert results to response model
+    search_results = [
+        store_model.UnifiedSearchResult(
+            content_type=r["content_type"],
+            content_id=r["content_id"],
+            searchable_text=r.get("searchable_text", ""),
+            metadata=r.get("metadata"),
+            updated_at=r.get("updated_at"),
+            combined_score=r.get("combined_score"),
+            semantic_score=r.get("semantic_score"),
+            lexical_score=r.get("lexical_score"),
+        )
+        for r in results
+    ]
+
+    total_pages = (total + page_size - 1) // page_size if total > 0 else 0
+
+    return store_model.UnifiedSearchResponse(
+        results=search_results,
+        pagination=Pagination(
+            total_items=total,
+            total_pages=total_pages,
+            current_page=page,
+            page_size=page_size,
+        ),
+    )
+
+
@router.get(
    "/agents/{username}/{agent_name}",
    summary="Get specific agent",
@@ -294,6 +393,7 @@ async def get_creators(
@router.get(
    "/creator/{username}",
    summary="Get creator details",
+    operation_id="getV2GetCreatorDetails",
    tags=["store", "public"],
    response_model=store_model.CreatorDetails,
 )
--- a/autogpt_platform/backend/backend/api/features/store/routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/routes_test.py
@@ -521,6 +521,7 @@ def test_get_submissions_success(
    mocked_value = store_model.StoreSubmissionsResponse(
        submissions=[
            store_model.StoreSubmission(
+                listing_id="test-listing-id",
                name="Test Agent",
                description="Test agent description",
                image_urls=["test.jpg"],
--- a/autogpt_platform/backend/backend/api/features/store/semantic_search_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/semantic_search_test.py
@@ -0,0 +1,272 @@
+"""Tests for the semantic_search function."""
+
+import pytest
+from prisma.enums import ContentType
+
+from backend.api.features.store.embeddings import EMBEDDING_DIM, semantic_search
+
+
+@pytest.mark.asyncio
+async def test_search_blocks_only(mocker):
+    """Test searching only BLOCK content type."""
+    # Mock embed_query to return a test embedding
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    # Mock query_raw_with_schema to return test results
+    mock_results = [
+        {
+            "content_id": "block-123",
+            "content_type": "BLOCK",
+            "searchable_text": "Calculator Block - Performs arithmetic operations",
+            "metadata": {"name": "Calculator", "categories": ["Math"]},
+            "similarity": 0.85,
+        }
+    ]
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_results,
+    )
+
+    results = await semantic_search(
+        query="calculate numbers",
+        content_types=[ContentType.BLOCK],
+    )
+
+    assert len(results) == 1
+    assert results[0]["content_type"] == "BLOCK"
+    assert results[0]["content_id"] == "block-123"
+    assert results[0]["similarity"] == 0.85
+
+
+@pytest.mark.asyncio
+async def test_search_multiple_content_types(mocker):
+    """Test searching multiple content types simultaneously."""
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    mock_results = [
+        {
+            "content_id": "block-123",
+            "content_type": "BLOCK",
+            "searchable_text": "Calculator Block",
+            "metadata": {},
+            "similarity": 0.85,
+        },
+        {
+            "content_id": "doc-456",
+            "content_type": "DOCUMENTATION",
+            "searchable_text": "How to use Calculator",
+            "metadata": {},
+            "similarity": 0.75,
+        },
+    ]
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_results,
+    )
+
+    results = await semantic_search(
+        query="calculator",
+        content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION],
+    )
+
+    assert len(results) == 2
+    assert results[0]["content_type"] == "BLOCK"
+    assert results[1]["content_type"] == "DOCUMENTATION"
+
+
+@pytest.mark.asyncio
+async def test_search_with_min_similarity_threshold(mocker):
+    """Test that results below min_similarity are filtered out."""
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    # Only return results above 0.7 similarity
+    mock_results = [
+        {
+            "content_id": "block-123",
+            "content_type": "BLOCK",
+            "searchable_text": "Calculator Block",
+            "metadata": {},
+            "similarity": 0.85,
+        }
+    ]
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_results,
+    )
+
+    results = await semantic_search(
+        query="calculate",
+        content_types=[ContentType.BLOCK],
+        min_similarity=0.7,
+    )
+
+    assert len(results) == 1
+    assert results[0]["similarity"] >= 0.7
+
+
+@pytest.mark.asyncio
+async def test_search_fallback_to_lexical(mocker):
+    """Test fallback to lexical search when embeddings fail."""
+    # Mock embed_query to return None (embeddings unavailable)
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=None,
+    )
+
+    mock_lexical_results = [
+        {
+            "content_id": "block-123",
+            "content_type": "BLOCK",
+            "searchable_text": "Calculator Block performs calculations",
+            "metadata": {},
+            "similarity": 0.0,
+        }
+    ]
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_lexical_results,
+    )
+
+    results = await semantic_search(
+        query="calculator",
+        content_types=[ContentType.BLOCK],
+    )
+
+    assert len(results) == 1
+    assert results[0]["similarity"] == 0.0  # Lexical search returns 0 similarity
+
+
+@pytest.mark.asyncio
+async def test_search_empty_query():
+    """Test that empty query returns no results."""
+    results = await semantic_search(query="")
+    assert results == []
+
+    results = await semantic_search(query="   ")
+    assert results == []
+
+
+@pytest.mark.asyncio
+async def test_search_with_user_id_filter(mocker):
+    """Test searching with user_id filter for private content."""
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    mock_results = [
+        {
+            "content_id": "agent-789",
+            "content_type": "LIBRARY_AGENT",
+            "searchable_text": "My Custom Agent",
+            "metadata": {},
+            "similarity": 0.9,
+        }
+    ]
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_results,
+    )
+
+    results = await semantic_search(
+        query="custom agent",
+        content_types=[ContentType.LIBRARY_AGENT],
+        user_id="user-123",
+    )
+
+    assert len(results) == 1
+    assert results[0]["content_type"] == "LIBRARY_AGENT"
+
+
+@pytest.mark.asyncio
+async def test_search_limit_parameter(mocker):
+    """Test that limit parameter correctly limits results."""
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    # Return 5 results
+    mock_results = [
+        {
+            "content_id": f"block-{i}",
+            "content_type": "BLOCK",
+            "searchable_text": f"Block {i}",
+            "metadata": {},
+            "similarity": 0.8,
+        }
+        for i in range(5)
+    ]
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=mock_results,
+    )
+
+    results = await semantic_search(
+        query="block",
+        content_types=[ContentType.BLOCK],
+        limit=5,
+    )
+
+    assert len(results) == 5
+
+
+@pytest.mark.asyncio
+async def test_search_default_content_types(mocker):
+    """Test that default content_types includes BLOCK, STORE_AGENT, and DOCUMENTATION."""
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    mock_query_raw = mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        return_value=[],
+    )
+
+    await semantic_search(query="test")
+
+    # Check that the SQL query includes all three default content types
+    call_args = mock_query_raw.call_args
+    assert "BLOCK" in str(call_args)
+    assert "STORE_AGENT" in str(call_args)
+    assert "DOCUMENTATION" in str(call_args)
+
+
+@pytest.mark.asyncio
+async def test_search_handles_database_error(mocker):
+    """Test that database errors are handled gracefully."""
+    mock_embedding = [0.1] * EMBEDDING_DIM
+    mocker.patch(
+        "backend.api.features.store.embeddings.embed_query",
+        return_value=mock_embedding,
+    )
+
+    # Simulate database error
+    mocker.patch(
+        "backend.api.features.store.embeddings.query_raw_with_schema",
+        side_effect=Exception("Database connection failed"),
+    )
+
+    results = await semantic_search(
+        query="test",
+        content_types=[ContentType.BLOCK],
+    )
+
+    # Should return empty list on error
+    assert results == []
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -64,7 +64,6 @@ from backend.data.onboarding import (
    complete_re_run_agent,
    get_recommended_agents,
    get_user_onboarding,
-    increment_runs,
    onboarding_enabled,
    reset_user_onboarding,
    update_user_onboarding,
@@ -975,7 +974,6 @@ async def execute_graph(
        # Record successful graph execution
        record_graph_execution(graph_id=graph_id, status="success", user_id=user_id)
        record_graph_operation(operation="execute", status="success")
-        await increment_runs(user_id)
        await complete_re_run_agent(user_id, graph_id)
        if source == "library":
            await complete_onboarding_step(
--- a/autogpt_platform/backend/backend/api/rest_api.py
+++ b/autogpt_platform/backend/backend/api/rest_api.py
@@ -18,6 +18,7 @@ from prisma.errors import PrismaError

 import backend.api.features.admin.credit_admin_routes
 import backend.api.features.admin.execution_analytics_routes
+import backend.api.features.admin.llm_routes
 import backend.api.features.admin.store_admin_routes
 import backend.api.features.builder
 import backend.api.features.builder.routes
@@ -37,9 +38,11 @@ import backend.data.db
 import backend.data.graph
 import backend.data.user
 import backend.integrations.webhooks.utils
+import backend.server.v2.llm.routes as public_llm_routes
 import backend.util.service
 import backend.util.settings
-from backend.blocks.llm import LlmModel
+from backend.data import llm_registry
+from backend.data.block_cost_config import refresh_llm_costs
 from backend.data.model import Credentials
 from backend.integrations.providers import ProviderName
 from backend.monitoring.instrumentation import instrument_fastapi
@@ -109,11 +112,27 @@ async def lifespan_context(app: fastapi.FastAPI):

    AutoRegistry.patch_integrations()

+    # Refresh LLM registry before initializing blocks so blocks can use registry data
+    await llm_registry.refresh_llm_registry()
+    refresh_llm_costs()
+
+    # Clear block schema caches so they're regenerated with updated discriminator_mapping
+    from backend.data.block import BlockSchema
+
+    BlockSchema.clear_all_schema_caches()
+
    await backend.data.block.initialize_blocks()

    await backend.data.user.migrate_and_encrypt_user_integrations()
    await backend.data.graph.fix_llm_provider_credentials()
-    await backend.data.graph.migrate_llm_models(LlmModel.GPT4O)
+    # migrate_llm_models uses registry default model
+    from backend.blocks.llm import LlmModel
+
+    default_model_slug = llm_registry.get_default_model_slug()
+    if default_model_slug:
+        await backend.data.graph.migrate_llm_models(LlmModel(default_model_slug))
+    else:
+        logger.warning("Skipping LLM model migration: no default model available")
    await backend.integrations.webhooks.utils.migrate_legacy_triggered_graphs()

    with launch_darkly_context():
@@ -298,6 +317,16 @@ app.include_router(
    tags=["v2", "executions", "review"],
    prefix="/api/review",
 )
+app.include_router(
+    backend.api.features.admin.llm_routes.router,
+    tags=["v2", "admin", "llm"],
+    prefix="/api/llm/admin",
+)
+app.include_router(
+    public_llm_routes.router,
+    tags=["v2", "llm"],
+    prefix="/api",
+)
 app.include_router(
    backend.api.features.library.routes.router, tags=["v2"], prefix="/api/library"
 )
--- a/autogpt_platform/backend/backend/api/ws_api.py
+++ b/autogpt_platform/backend/backend/api/ws_api.py
@@ -77,7 +77,39 @@ async def event_broadcaster(manager: ConnectionManager):
                payload=notification.payload,
            )

-    await asyncio.gather(execution_worker(), notification_worker())
+    async def registry_refresh_worker():
+        """Listen for LLM registry refresh notifications and broadcast to all clients."""
+        from backend.data.llm_registry import REGISTRY_REFRESH_CHANNEL
+        from backend.data.redis_client import connect_async
+
+        redis = await connect_async()
+        pubsub = redis.pubsub()
+        await pubsub.subscribe(REGISTRY_REFRESH_CHANNEL)
+        logger.info(
+            "Subscribed to LLM registry refresh notifications for WebSocket broadcast"
+        )
+
+        async for message in pubsub.listen():
+            if (
+                message["type"] == "message"
+                and message["channel"] == REGISTRY_REFRESH_CHANNEL
+            ):
+                logger.info(
+                    "Broadcasting LLM registry refresh to all WebSocket clients"
+                )
+                await manager.broadcast_to_all(
+                    method=WSMethod.NOTIFICATION,
+                    data={
+                        "type": "LLM_REGISTRY_REFRESH",
+                        "event": "registry_updated",
+                    },
+                )
+
+    await asyncio.gather(
+        execution_worker(),
+        notification_worker(),
+        registry_refresh_worker(),
+    )


 async def authenticate_websocket(websocket: WebSocket) -> str:
--- a/autogpt_platform/backend/backend/blocks/ai_condition.py
+++ b/autogpt_platform/backend/backend/blocks/ai_condition.py
@@ -9,6 +9,7 @@ from backend.blocks.llm import (
    LlmModel,
    LLMResponse,
    llm_call,
+    llm_model_schema_extra,
 )
 from backend.data.block import (
    BlockCategory,
@@ -49,9 +50,10 @@ class AIConditionBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default_factory=LlmModel.default,
            description="The language model to use for evaluating the condition.",
            advanced=False,
+            json_schema_extra=llm_model_schema_extra(),
        )
        credentials: AICredentials = AICredentialsField()

@@ -81,7 +83,7 @@ class AIConditionBlock(AIBlockBase):
                "condition": "the input is an email address",
                "yes_value": "Valid email",
                "no_value": "Not an email",
-                "model": LlmModel.GPT4O,
+                "model": "gpt-4o",  # Using string value - enum accepts any model slug dynamically
                "credentials": TEST_CREDENTIALS_INPUT,
            },
            test_credentials=TEST_CREDENTIALS,
--- a/autogpt_platform/backend/backend/blocks/airtable/_webhook.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/_webhook.py
@@ -6,6 +6,9 @@ import hashlib
 import hmac
 import logging
 from enum import Enum
+from typing import cast
+
+from prisma.types import Serializable

 from backend.sdk import (
    BaseWebhooksManager,
@@ -84,7 +87,9 @@ class AirtableWebhookManager(BaseWebhooksManager):
        # update webhook config
        await update_webhook(
            webhook.id,
-            config={"base_id": base_id, "cursor": response.cursor},
+            config=cast(
+                dict[str, Serializable], {"base_id": base_id, "cursor": response.cursor}
+            ),
        )

        event_type = "notification"
--- a/autogpt_platform/backend/backend/blocks/dataforseo/related_keywords.py
+++ b/autogpt_platform/backend/backend/blocks/dataforseo/related_keywords.py
@@ -182,13 +182,10 @@ class DataForSeoRelatedKeywordsBlock(Block):
            if results and len(results) > 0:
                # results is a list, get the first element
                first_result = results[0] if isinstance(results, list) else results
-                items = (
-                    first_result.get("items", [])
-                    if isinstance(first_result, dict)
-                    else []
-                )
-                # Ensure items is never None
-                if items is None:
+                # Handle missing key, null value, or valid list value
+                if isinstance(first_result, dict):
+                    items = first_result.get("items") or []
+                else:
                    items = []
                for item in items:
                    # Extract keyword_data from the item
--- a/autogpt_platform/backend/backend/blocks/google/docs.py
+++ b/autogpt_platform/backend/backend/blocks/google/docs.py
--- a/autogpt_platform/backend/backend/blocks/helpers/review.py
+++ b/autogpt_platform/backend/backend/blocks/helpers/review.py
@@ -0,0 +1,184 @@
+"""
+Shared helpers for Human-In-The-Loop (HITL) review functionality.
+Used by both the dedicated HumanInTheLoopBlock and blocks that require human review.
+"""
+
+import logging
+from typing import Any, Optional
+
+from prisma.enums import ReviewStatus
+from pydantic import BaseModel
+
+from backend.data.execution import ExecutionContext, ExecutionStatus
+from backend.data.human_review import ReviewResult
+from backend.executor.manager import async_update_node_execution_status
+from backend.util.clients import get_database_manager_async_client
+
+logger = logging.getLogger(__name__)
+
+
+class ReviewDecision(BaseModel):
+    """Result of a review decision."""
+
+    should_proceed: bool
+    message: str
+    review_result: ReviewResult
+
+
+class HITLReviewHelper:
+    """Helper class for Human-In-The-Loop review operations."""
+
+    @staticmethod
+    async def get_or_create_human_review(**kwargs) -> Optional[ReviewResult]:
+        """Create or retrieve a human review from the database."""
+        return await get_database_manager_async_client().get_or_create_human_review(
+            **kwargs
+        )
+
+    @staticmethod
+    async def update_node_execution_status(**kwargs) -> None:
+        """Update the execution status of a node."""
+        await async_update_node_execution_status(
+            db_client=get_database_manager_async_client(), **kwargs
+        )
+
+    @staticmethod
+    async def update_review_processed_status(
+        node_exec_id: str, processed: bool
+    ) -> None:
+        """Update the processed status of a review."""
+        return await get_database_manager_async_client().update_review_processed_status(
+            node_exec_id, processed
+        )
+
+    @staticmethod
+    async def _handle_review_request(
+        input_data: Any,
+        user_id: str,
+        node_exec_id: str,
+        graph_exec_id: str,
+        graph_id: str,
+        graph_version: int,
+        execution_context: ExecutionContext,
+        block_name: str = "Block",
+        editable: bool = False,
+    ) -> Optional[ReviewResult]:
+        """
+        Handle a review request for a block that requires human review.
+
+        Args:
+            input_data: The input data to be reviewed
+            user_id: ID of the user requesting the review
+            node_exec_id: ID of the node execution
+            graph_exec_id: ID of the graph execution
+            graph_id: ID of the graph
+            graph_version: Version of the graph
+            execution_context: Current execution context
+            block_name: Name of the block requesting review
+            editable: Whether the reviewer can edit the data
+
+        Returns:
+            ReviewResult if review is complete, None if waiting for human input
+
+        Raises:
+            Exception: If review creation or status update fails
+        """
+        # Skip review if safe mode is disabled - return auto-approved result
+        if not execution_context.safe_mode:
+            logger.info(
+                f"Block {block_name} skipping review for node {node_exec_id} - safe mode disabled"
+            )
+            return ReviewResult(
+                data=input_data,
+                status=ReviewStatus.APPROVED,
+                message="Auto-approved (safe mode disabled)",
+                processed=True,
+                node_exec_id=node_exec_id,
+            )
+
+        result = await HITLReviewHelper.get_or_create_human_review(
+            user_id=user_id,
+            node_exec_id=node_exec_id,
+            graph_exec_id=graph_exec_id,
+            graph_id=graph_id,
+            graph_version=graph_version,
+            input_data=input_data,
+            message=f"Review required for {block_name} execution",
+            editable=editable,
+        )
+
+        if result is None:
+            logger.info(
+                f"Block {block_name} pausing execution for node {node_exec_id} - awaiting human review"
+            )
+            await HITLReviewHelper.update_node_execution_status(
+                exec_id=node_exec_id,
+                status=ExecutionStatus.REVIEW,
+            )
+            return None  # Signal that execution should pause
+
+        # Mark review as processed if not already done
+        if not result.processed:
+            await HITLReviewHelper.update_review_processed_status(
+                node_exec_id=node_exec_id, processed=True
+            )
+
+        return result
+
+    @staticmethod
+    async def handle_review_decision(
+        input_data: Any,
+        user_id: str,
+        node_exec_id: str,
+        graph_exec_id: str,
+        graph_id: str,
+        graph_version: int,
+        execution_context: ExecutionContext,
+        block_name: str = "Block",
+        editable: bool = False,
+    ) -> Optional[ReviewDecision]:
+        """
+        Handle a review request and return the decision in a single call.
+
+        Args:
+            input_data: The input data to be reviewed
+            user_id: ID of the user requesting the review
+            node_exec_id: ID of the node execution
+            graph_exec_id: ID of the graph execution
+            graph_id: ID of the graph
+            graph_version: Version of the graph
+            execution_context: Current execution context
+            block_name: Name of the block requesting review
+            editable: Whether the reviewer can edit the data
+
+        Returns:
+            ReviewDecision if review is complete (approved/rejected),
+            None if execution should pause (awaiting review)
+        """
+        review_result = await HITLReviewHelper._handle_review_request(
+            input_data=input_data,
+            user_id=user_id,
+            node_exec_id=node_exec_id,
+            graph_exec_id=graph_exec_id,
+            graph_id=graph_id,
+            graph_version=graph_version,
+            execution_context=execution_context,
+            block_name=block_name,
+            editable=editable,
+        )
+
+        if review_result is None:
+            # Still awaiting review - return None to pause execution
+            return None
+
+        # Review is complete, determine outcome
+        should_proceed = review_result.status == ReviewStatus.APPROVED
+        message = review_result.message or (
+            "Execution approved by reviewer"
+            if should_proceed
+            else "Execution rejected by reviewer"
+        )
+
+        return ReviewDecision(
+            should_proceed=should_proceed, message=message, review_result=review_result
+        )
--- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
+++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
@@ -3,6 +3,7 @@ from typing import Any

 from prisma.enums import ReviewStatus

+from backend.blocks.helpers.review import HITLReviewHelper
 from backend.data.block import (
    Block,
    BlockCategory,
@@ -11,11 +12,9 @@ from backend.data.block import (
    BlockSchemaOutput,
    BlockType,
 )
-from backend.data.execution import ExecutionContext, ExecutionStatus
+from backend.data.execution import ExecutionContext
 from backend.data.human_review import ReviewResult
 from backend.data.model import SchemaField
-from backend.executor.manager import async_update_node_execution_status
-from backend.util.clients import get_database_manager_async_client

 logger = logging.getLogger(__name__)

@@ -72,32 +71,26 @@ class HumanInTheLoopBlock(Block):
                ("approved_data", {"name": "John Doe", "age": 30}),
            ],
            test_mock={
-                "get_or_create_human_review": lambda *_args, **_kwargs: ReviewResult(
-                    data={"name": "John Doe", "age": 30},
-                    status=ReviewStatus.APPROVED,
-                    message="",
-                    processed=False,
-                    node_exec_id="test-node-exec-id",
-                ),
-                "update_node_execution_status": lambda *_args, **_kwargs: None,
-                "update_review_processed_status": lambda *_args, **_kwargs: None,
+                "handle_review_decision": lambda **kwargs: type(
+                    "ReviewDecision",
+                    (),
+                    {
+                        "should_proceed": True,
+                        "message": "Test approval message",
+                        "review_result": ReviewResult(
+                            data={"name": "John Doe", "age": 30},
+                            status=ReviewStatus.APPROVED,
+                            message="",
+                            processed=False,
+                            node_exec_id="test-node-exec-id",
+                        ),
+                    },
+                )(),
            },
        )

-    async def get_or_create_human_review(self, **kwargs):
-        return await get_database_manager_async_client().get_or_create_human_review(
-            **kwargs
-        )
-
-    async def update_node_execution_status(self, **kwargs):
-        return await async_update_node_execution_status(
-            db_client=get_database_manager_async_client(), **kwargs
-        )
-
-    async def update_review_processed_status(self, node_exec_id: str, processed: bool):
-        return await get_database_manager_async_client().update_review_processed_status(
-            node_exec_id, processed
-        )
+    async def handle_review_decision(self, **kwargs):
+        return await HITLReviewHelper.handle_review_decision(**kwargs)

    async def run(
        self,
@@ -109,7 +102,7 @@ class HumanInTheLoopBlock(Block):
        graph_id: str,
        graph_version: int,
        execution_context: ExecutionContext,
-        **kwargs,
+        **_kwargs,
    ) -> BlockOutput:
        if not execution_context.safe_mode:
            logger.info(
@@ -119,48 +112,28 @@ class HumanInTheLoopBlock(Block):
            yield "review_message", "Auto-approved (safe mode disabled)"
            return

-        try:
-            result = await self.get_or_create_human_review(
-                user_id=user_id,
-                node_exec_id=node_exec_id,
-                graph_exec_id=graph_exec_id,
-                graph_id=graph_id,
-                graph_version=graph_version,
-                input_data=input_data.data,
-                message=input_data.name,
-                editable=input_data.editable,
-            )
-        except Exception as e:
-            logger.error(f"Error in HITL block for node {node_exec_id}: {str(e)}")
-            raise
+        decision = await self.handle_review_decision(
+            input_data=input_data.data,
+            user_id=user_id,
+            node_exec_id=node_exec_id,
+            graph_exec_id=graph_exec_id,
+            graph_id=graph_id,
+            graph_version=graph_version,
+            execution_context=execution_context,
+            block_name=self.name,
+            editable=input_data.editable,
+        )

-        if result is None:
-            logger.info(
-                f"HITL block pausing execution for node {node_exec_id} - awaiting human review"
-            )
-            try:
-                await self.update_node_execution_status(
-                    exec_id=node_exec_id,
-                    status=ExecutionStatus.REVIEW,
-                )
-                return
-            except Exception as e:
-                logger.error(
-                    f"Failed to update node status for HITL block {node_exec_id}: {str(e)}"
-                )
-                raise
+        if decision is None:
+            return

-        if not result.processed:
-            await self.update_review_processed_status(
-                node_exec_id=node_exec_id, processed=True
-            )
+        status = decision.review_result.status
+        if status == ReviewStatus.APPROVED:
+            yield "approved_data", decision.review_result.data
+        elif status == ReviewStatus.REJECTED:
+            yield "rejected_data", decision.review_result.data
+        else:
+            raise RuntimeError(f"Unexpected review status: {status}")

-            if result.status == ReviewStatus.APPROVED:
-                yield "approved_data", result.data
-                if result.message:
-                    yield "review_message", result.message
-
-            elif result.status == ReviewStatus.REJECTED:
-                yield "rejected_data", result.data
-                if result.message:
-                    yield "review_message", result.message
+        if decision.message:
+            yield "review_message", decision.message
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -4,17 +4,19 @@ import logging
 import re
 import secrets
 from abc import ABC
-from enum import Enum, EnumMeta
+from enum import Enum
 from json import JSONDecodeError
-from typing import Any, Iterable, List, Literal, NamedTuple, Optional
+from typing import Any, Iterable, List, Literal, Optional

 import anthropic
 import ollama
 import openai
 from anthropic.types import ToolParam
 from groq import AsyncGroq
-from pydantic import BaseModel, SecretStr
+from pydantic import BaseModel, GetCoreSchemaHandler, SecretStr
+from pydantic_core import CoreSchema, core_schema

+from backend.data import llm_registry
 from backend.data.block import (
    Block,
    BlockCategory,
@@ -22,6 +24,7 @@ from backend.data.block import (
    BlockSchemaInput,
    BlockSchemaOutput,
 )
+from backend.data.llm_registry import ModelMetadata
 from backend.data.model import (
    APIKeyCredentials,
    CredentialsField,
@@ -66,113 +69,117 @@ TEST_CREDENTIALS_INPUT = {


 def AICredentialsField() -> AICredentials:
+    """
+    Returns a CredentialsField for LLM providers.
+    The discriminator_mapping will be refreshed when the schema is generated
+    if it's empty, ensuring the LLM registry is loaded.
+    """
+    # Get the mapping now - it may be empty initially, but will be refreshed
+    # when the schema is generated via CredentialsMetaInput._add_json_schema_extra
+    mapping = llm_registry.get_llm_discriminator_mapping()
+
    return CredentialsField(
        description="API key for the LLM provider.",
        discriminator="model",
-        discriminator_mapping={
-            model.value: model.metadata.provider for model in LlmModel
-        },
+        discriminator_mapping=mapping,  # May be empty initially, refreshed later
    )


-class ModelMetadata(NamedTuple):
-    provider: str
-    context_window: int
-    max_output_tokens: int | None
+def llm_model_schema_extra() -> dict[str, Any]:
+    return {"options": llm_registry.get_llm_model_schema_options()}


-class LlmModelMeta(EnumMeta):
-    pass
+class LlmModelMeta(type):
+    """
+    Metaclass for LlmModel that enables attribute-style access to dynamic models.
+
+    This allows code like `LlmModel.GPT4O` to work by converting the attribute
+    name to a slug format:
+    - GPT4O -> gpt-4o
+    - GPT4O_MINI -> gpt-4o-mini
+    - CLAUDE_3_5_SONNET -> claude-3-5-sonnet
+    """
+
+    def __getattr__(cls, name: str):
+        # Don't intercept private/dunder attributes
+        if name.startswith("_"):
+            raise AttributeError(f"type object 'LlmModel' has no attribute '{name}'")
+
+        # Convert attribute name to slug format:
+        # 1. Lowercase: GPT4O -> gpt4o
+        # 2. Underscores to hyphens: GPT4O_MINI -> gpt4o-mini
+        # 3. Insert hyphen between letter and digit: gpt4o -> gpt-4o
+        slug = name.lower().replace("_", "-")
+        slug = re.sub(r"([a-z])(\d)", r"\1-\2", slug)
+
+        return cls(slug)


-class LlmModel(str, Enum, metaclass=LlmModelMeta):
-    # OpenAI models
-    O3_MINI = "o3-mini"
-    O3 = "o3-2025-04-16"
-    O1 = "o1"
-    O1_MINI = "o1-mini"
-    # GPT-5 models
-    GPT5 = "gpt-5-2025-08-07"
-    GPT5_1 = "gpt-5.1-2025-11-13"
-    GPT5_MINI = "gpt-5-mini-2025-08-07"
-    GPT5_NANO = "gpt-5-nano-2025-08-07"
-    GPT5_CHAT = "gpt-5-chat-latest"
-    GPT41 = "gpt-4.1-2025-04-14"
-    GPT41_MINI = "gpt-4.1-mini-2025-04-14"
-    GPT4O_MINI = "gpt-4o-mini"
-    GPT4O = "gpt-4o"
-    GPT4_TURBO = "gpt-4-turbo"
-    GPT3_5_TURBO = "gpt-3.5-turbo"
-    # Anthropic models
-    CLAUDE_4_1_OPUS = "claude-opus-4-1-20250805"
-    CLAUDE_4_OPUS = "claude-opus-4-20250514"
-    CLAUDE_4_SONNET = "claude-sonnet-4-20250514"
-    CLAUDE_4_5_OPUS = "claude-opus-4-5-20251101"
-    CLAUDE_4_5_SONNET = "claude-sonnet-4-5-20250929"
-    CLAUDE_4_5_HAIKU = "claude-haiku-4-5-20251001"
-    CLAUDE_3_7_SONNET = "claude-3-7-sonnet-20250219"
-    CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
-    # AI/ML API models
-    AIML_API_QWEN2_5_72B = "Qwen/Qwen2.5-72B-Instruct-Turbo"
-    AIML_API_LLAMA3_1_70B = "nvidia/llama-3.1-nemotron-70b-instruct"
-    AIML_API_LLAMA3_3_70B = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
-    AIML_API_META_LLAMA_3_1_70B = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
-    AIML_API_LLAMA_3_2_3B = "meta-llama/Llama-3.2-3B-Instruct-Turbo"
-    # Groq models
-    LLAMA3_3_70B = "llama-3.3-70b-versatile"
-    LLAMA3_1_8B = "llama-3.1-8b-instant"
-    # Ollama models
-    OLLAMA_LLAMA3_3 = "llama3.3"
-    OLLAMA_LLAMA3_2 = "llama3.2"
-    OLLAMA_LLAMA3_8B = "llama3"
-    OLLAMA_LLAMA3_405B = "llama3.1:405b"
-    OLLAMA_DOLPHIN = "dolphin-mistral:latest"
-    # OpenRouter models
-    OPENAI_GPT_OSS_120B = "openai/gpt-oss-120b"
-    OPENAI_GPT_OSS_20B = "openai/gpt-oss-20b"
-    GEMINI_2_5_PRO = "google/gemini-2.5-pro-preview-03-25"
-    GEMINI_3_PRO_PREVIEW = "google/gemini-3-pro-preview"
-    GEMINI_2_5_FLASH = "google/gemini-2.5-flash"
-    GEMINI_2_0_FLASH = "google/gemini-2.0-flash-001"
-    GEMINI_2_5_FLASH_LITE_PREVIEW = "google/gemini-2.5-flash-lite-preview-06-17"
-    GEMINI_2_0_FLASH_LITE = "google/gemini-2.0-flash-lite-001"
-    MISTRAL_NEMO = "mistralai/mistral-nemo"
-    COHERE_COMMAND_R_08_2024 = "cohere/command-r-08-2024"
-    COHERE_COMMAND_R_PLUS_08_2024 = "cohere/command-r-plus-08-2024"
-    DEEPSEEK_CHAT = "deepseek/deepseek-chat"  # Actually: DeepSeek V3
-    DEEPSEEK_R1_0528 = "deepseek/deepseek-r1-0528"
-    PERPLEXITY_SONAR = "perplexity/sonar"
-    PERPLEXITY_SONAR_PRO = "perplexity/sonar-pro"
-    PERPLEXITY_SONAR_DEEP_RESEARCH = "perplexity/sonar-deep-research"
-    NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B = "nousresearch/hermes-3-llama-3.1-405b"
-    NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B = "nousresearch/hermes-3-llama-3.1-70b"
-    AMAZON_NOVA_LITE_V1 = "amazon/nova-lite-v1"
-    AMAZON_NOVA_MICRO_V1 = "amazon/nova-micro-v1"
-    AMAZON_NOVA_PRO_V1 = "amazon/nova-pro-v1"
-    MICROSOFT_WIZARDLM_2_8X22B = "microsoft/wizardlm-2-8x22b"
-    GRYPHE_MYTHOMAX_L2_13B = "gryphe/mythomax-l2-13b"
-    META_LLAMA_4_SCOUT = "meta-llama/llama-4-scout"
-    META_LLAMA_4_MAVERICK = "meta-llama/llama-4-maverick"
-    GROK_4 = "x-ai/grok-4"
-    GROK_4_FAST = "x-ai/grok-4-fast"
-    GROK_4_1_FAST = "x-ai/grok-4.1-fast"
-    GROK_CODE_FAST_1 = "x-ai/grok-code-fast-1"
-    KIMI_K2 = "moonshotai/kimi-k2"
-    QWEN3_235B_A22B_THINKING = "qwen/qwen3-235b-a22b-thinking-2507"
-    QWEN3_CODER = "qwen/qwen3-coder"
-    # Llama API models
-    LLAMA_API_LLAMA_4_SCOUT = "Llama-4-Scout-17B-16E-Instruct-FP8"
-    LLAMA_API_LLAMA4_MAVERICK = "Llama-4-Maverick-17B-128E-Instruct-FP8"
-    LLAMA_API_LLAMA3_3_8B = "Llama-3.3-8B-Instruct"
-    LLAMA_API_LLAMA3_3_70B = "Llama-3.3-70B-Instruct"
-    # v0 by Vercel models
-    V0_1_5_MD = "v0-1.5-md"
-    V0_1_5_LG = "v0-1.5-lg"
-    V0_1_0_MD = "v0-1.0-md"
+class LlmModel(str, metaclass=LlmModelMeta):
+    """
+    Dynamic LLM model type that accepts any model slug from the registry.
+
+    This is a string subclass (not an Enum) that allows any model slug value.
+    All models are managed via the LLM Registry in the database.
+
+    Usage:
+        model = LlmModel("gpt-4o")  # Direct construction
+        model = LlmModel.GPT4O      # Attribute access (converted to "gpt-4o")
+        model.value                  # Returns the slug string
+        model.provider               # Returns the provider from registry
+    """
+
+    def __new__(cls, value: str):
+        if isinstance(value, LlmModel):
+            return value
+        return str.__new__(cls, value)
+
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, source_type: Any, handler: GetCoreSchemaHandler
+    ) -> CoreSchema:
+        """
+        Tell Pydantic how to validate LlmModel.
+
+        Accepts strings and converts them to LlmModel instances.
+        """
+        return core_schema.no_info_after_validator_function(
+            cls,  # The validator function (LlmModel constructor)
+            core_schema.str_schema(),  # Accept string input
+            serialization=core_schema.to_string_ser_schema(),  # Serialize as string
+        )
+
+    @property
+    def value(self) -> str:
+        """Return the model slug (for compatibility with enum-style access)."""
+        return str(self)
+
+    @classmethod
+    def default(cls) -> "LlmModel":
+        """
+        Get the default model from the registry.
+
+        Returns the recommended model if set, otherwise gpt-4o if available
+        and enabled, otherwise the first enabled model from the registry.
+        Falls back to "gpt-4o" if registry is empty (e.g., at module import time).
+        """
+        from backend.data.llm_registry import get_default_model_slug
+
+        slug = get_default_model_slug()
+        if slug is None:
+            # Registry is empty (e.g., at module import time before DB connection).
+            # Fall back to gpt-4o for backward compatibility.
+            slug = "gpt-4o"
+        return cls(slug)

    @property
    def metadata(self) -> ModelMetadata:
-        return MODEL_METADATA[self]
+        metadata = llm_registry.get_llm_model_metadata(self.value)
+        if metadata:
+            return metadata
+        raise ValueError(
+            f"Missing metadata for model: {self.value}. Model not found in LLM registry."
+        )

    @property
    def provider(self) -> str:
@@ -187,125 +194,11 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
        return self.metadata.max_output_tokens


-MODEL_METADATA = {
-    # https://platform.openai.com/docs/models
-    LlmModel.O3: ModelMetadata("openai", 200000, 100000),
-    LlmModel.O3_MINI: ModelMetadata("openai", 200000, 100000),  # o3-mini-2025-01-31
-    LlmModel.O1: ModelMetadata("openai", 200000, 100000),  # o1-2024-12-17
-    LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536),  # o1-mini-2024-09-12
-    # GPT-5 models
-    LlmModel.GPT5: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_1: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_MINI: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_NANO: ModelMetadata("openai", 400000, 128000),
-    LlmModel.GPT5_CHAT: ModelMetadata("openai", 400000, 16384),
-    LlmModel.GPT41: ModelMetadata("openai", 1047576, 32768),
-    LlmModel.GPT41_MINI: ModelMetadata("openai", 1047576, 32768),
-    LlmModel.GPT4O_MINI: ModelMetadata(
-        "openai", 128000, 16384
-    ),  # gpt-4o-mini-2024-07-18
-    LlmModel.GPT4O: ModelMetadata("openai", 128000, 16384),  # gpt-4o-2024-08-06
-    LlmModel.GPT4_TURBO: ModelMetadata(
-        "openai", 128000, 4096
-    ),  # gpt-4-turbo-2024-04-09
-    LlmModel.GPT3_5_TURBO: ModelMetadata("openai", 16385, 4096),  # gpt-3.5-turbo-0125
-    # https://docs.anthropic.com/en/docs/about-claude/models
-    LlmModel.CLAUDE_4_1_OPUS: ModelMetadata(
-        "anthropic", 200000, 32000
-    ),  # claude-opus-4-1-20250805
-    LlmModel.CLAUDE_4_OPUS: ModelMetadata(
-        "anthropic", 200000, 32000
-    ),  # claude-4-opus-20250514
-    LlmModel.CLAUDE_4_SONNET: ModelMetadata(
-        "anthropic", 200000, 64000
-    ),  # claude-4-sonnet-20250514
-    LlmModel.CLAUDE_4_5_OPUS: ModelMetadata(
-        "anthropic", 200000, 64000
-    ),  # claude-opus-4-5-20251101
-    LlmModel.CLAUDE_4_5_SONNET: ModelMetadata(
-        "anthropic", 200000, 64000
-    ),  # claude-sonnet-4-5-20250929
-    LlmModel.CLAUDE_4_5_HAIKU: ModelMetadata(
-        "anthropic", 200000, 64000
-    ),  # claude-haiku-4-5-20251001
-    LlmModel.CLAUDE_3_7_SONNET: ModelMetadata(
-        "anthropic", 200000, 64000
-    ),  # claude-3-7-sonnet-20250219
-    LlmModel.CLAUDE_3_HAIKU: ModelMetadata(
-        "anthropic", 200000, 4096
-    ),  # claude-3-haiku-20240307
-    # https://docs.aimlapi.com/api-overview/model-database/text-models
-    LlmModel.AIML_API_QWEN2_5_72B: ModelMetadata("aiml_api", 32000, 8000),
-    LlmModel.AIML_API_LLAMA3_1_70B: ModelMetadata("aiml_api", 128000, 40000),
-    LlmModel.AIML_API_LLAMA3_3_70B: ModelMetadata("aiml_api", 128000, None),
-    LlmModel.AIML_API_META_LLAMA_3_1_70B: ModelMetadata("aiml_api", 131000, 2000),
-    LlmModel.AIML_API_LLAMA_3_2_3B: ModelMetadata("aiml_api", 128000, None),
-    # https://console.groq.com/docs/models
-    LlmModel.LLAMA3_3_70B: ModelMetadata("groq", 128000, 32768),
-    LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 128000, 8192),
-    # https://ollama.com/library
-    LlmModel.OLLAMA_LLAMA3_3: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_LLAMA3_2: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192, None),
-    LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768, None),
-    # https://openrouter.ai/models
-    LlmModel.GEMINI_2_5_PRO: ModelMetadata("open_router", 1050000, 8192),
-    LlmModel.GEMINI_3_PRO_PREVIEW: ModelMetadata("open_router", 1048576, 65535),
-    LlmModel.GEMINI_2_5_FLASH: ModelMetadata("open_router", 1048576, 65535),
-    LlmModel.GEMINI_2_0_FLASH: ModelMetadata("open_router", 1048576, 8192),
-    LlmModel.GEMINI_2_5_FLASH_LITE_PREVIEW: ModelMetadata(
-        "open_router", 1048576, 65535
-    ),
-    LlmModel.GEMINI_2_0_FLASH_LITE: ModelMetadata("open_router", 1048576, 8192),
-    LlmModel.MISTRAL_NEMO: ModelMetadata("open_router", 128000, 4096),
-    LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata("open_router", 128000, 4096),
-    LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata("open_router", 128000, 4096),
-    LlmModel.DEEPSEEK_CHAT: ModelMetadata("open_router", 64000, 2048),
-    LlmModel.DEEPSEEK_R1_0528: ModelMetadata("open_router", 163840, 163840),
-    LlmModel.PERPLEXITY_SONAR: ModelMetadata("open_router", 127000, 8000),
-    LlmModel.PERPLEXITY_SONAR_PRO: ModelMetadata("open_router", 200000, 8000),
-    LlmModel.PERPLEXITY_SONAR_DEEP_RESEARCH: ModelMetadata(
-        "open_router",
-        128000,
-        16000,
-    ),
-    LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata(
-        "open_router", 131000, 4096
-    ),
-    LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata(
-        "open_router", 12288, 12288
-    ),
-    LlmModel.OPENAI_GPT_OSS_120B: ModelMetadata("open_router", 131072, 131072),
-    LlmModel.OPENAI_GPT_OSS_20B: ModelMetadata("open_router", 131072, 32768),
-    LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata("open_router", 300000, 5120),
-    LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata("open_router", 128000, 5120),
-    LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata("open_router", 300000, 5120),
-    LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata("open_router", 65536, 4096),
-    LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4096, 4096),
-    LlmModel.META_LLAMA_4_SCOUT: ModelMetadata("open_router", 131072, 131072),
-    LlmModel.META_LLAMA_4_MAVERICK: ModelMetadata("open_router", 1048576, 1000000),
-    LlmModel.GROK_4: ModelMetadata("open_router", 256000, 256000),
-    LlmModel.GROK_4_FAST: ModelMetadata("open_router", 2000000, 30000),
-    LlmModel.GROK_4_1_FAST: ModelMetadata("open_router", 2000000, 30000),
-    LlmModel.GROK_CODE_FAST_1: ModelMetadata("open_router", 256000, 10000),
-    LlmModel.KIMI_K2: ModelMetadata("open_router", 131000, 131000),
-    LlmModel.QWEN3_235B_A22B_THINKING: ModelMetadata("open_router", 262144, 262144),
-    LlmModel.QWEN3_CODER: ModelMetadata("open_router", 262144, 262144),
-    # Llama API models
-    LlmModel.LLAMA_API_LLAMA_4_SCOUT: ModelMetadata("llama_api", 128000, 4028),
-    LlmModel.LLAMA_API_LLAMA4_MAVERICK: ModelMetadata("llama_api", 128000, 4028),
-    LlmModel.LLAMA_API_LLAMA3_3_8B: ModelMetadata("llama_api", 128000, 4028),
-    LlmModel.LLAMA_API_LLAMA3_3_70B: ModelMetadata("llama_api", 128000, 4028),
-    # v0 by Vercel models
-    LlmModel.V0_1_5_MD: ModelMetadata("v0", 128000, 64000),
-    LlmModel.V0_1_5_LG: ModelMetadata("v0", 512000, 64000),
-    LlmModel.V0_1_0_MD: ModelMetadata("v0", 128000, 64000),
-}
+# MODEL_METADATA removed - all models now come from the database via llm_registry

-for model in LlmModel:
-    if model not in MODEL_METADATA:
-        raise ValueError(f"Missing MODEL_METADATA metadata for model: {model}")
+# Default model constant for backward compatibility
+# Uses the dynamic registry to get the default model
+DEFAULT_LLM_MODEL = LlmModel.default()


 class ToolCall(BaseModel):
@@ -434,19 +327,94 @@ async def llm_call(
            - prompt_tokens: The number of tokens used in the prompt.
            - completion_tokens: The number of tokens used in the completion.
    """
-    provider = llm_model.metadata.provider
-    context_window = llm_model.context_window
+    # Get model metadata and check if enabled - with fallback support
+    # The model we'll actually use (may differ if original is disabled)
+    model_to_use = llm_model.value
+
+    # Check if model is in registry and if it's enabled
+    from backend.data.llm_registry import (
+        get_fallback_model_for_disabled,
+        get_model_info,
+    )
+
+    model_info = get_model_info(llm_model.value)
+
+    if model_info and not model_info.is_enabled:
+        # Model is disabled - try to find a fallback from the same provider
+        fallback = get_fallback_model_for_disabled(llm_model.value)
+        if fallback:
+            logger.warning(
+                f"Model '{llm_model.value}' is disabled. Using fallback model '{fallback.slug}' from the same provider ({fallback.metadata.provider})."
+            )
+            model_to_use = fallback.slug
+            # Use fallback model's metadata
+            provider = fallback.metadata.provider
+            context_window = fallback.metadata.context_window
+            model_max_output = fallback.metadata.max_output_tokens or int(2**15)
+        else:
+            # No fallback available - raise error
+            raise ValueError(
+                f"LLM model '{llm_model.value}' is disabled and no fallback model "
+                f"from the same provider is available. Please enable the model or "
+                f"select a different model in the block configuration."
+            )
+    else:
+        # Model is enabled or not in registry (legacy/static model)
+        try:
+            provider = llm_model.metadata.provider
+            context_window = llm_model.context_window
+            model_max_output = llm_model.max_output_tokens or int(2**15)
+        except ValueError:
+            # Model not in cache - try refreshing the registry once if we have DB access
+            logger.warning(f"Model {llm_model.value} not found in registry cache")
+
+            # Try refreshing the registry if we have database access
+            from backend.data.db import is_connected
+
+            if is_connected():
+                try:
+                    logger.info(
+                        f"Refreshing LLM registry and retrying lookup for {llm_model.value}"
+                    )
+                    await llm_registry.refresh_llm_registry()
+                    # Try again after refresh
+                    try:
+                        provider = llm_model.metadata.provider
+                        context_window = llm_model.context_window
+                        model_max_output = llm_model.max_output_tokens or int(2**15)
+                        logger.info(
+                            f"Successfully loaded model {llm_model.value} metadata after registry refresh"
+                        )
+                    except ValueError:
+                        # Still not found after refresh
+                        raise ValueError(
+                            f"LLM model '{llm_model.value}' not found in registry after refresh. "
+                            "Please ensure the model is added and enabled in the LLM registry via the admin UI."
+                        )
+                except Exception as refresh_exc:
+                    logger.error(f"Failed to refresh LLM registry: {refresh_exc}")
+                    raise ValueError(
+                        f"LLM model '{llm_model.value}' not found in registry and failed to refresh. "
+                        "Please ensure the model is added to the LLM registry via the admin UI."
+                    ) from refresh_exc
+            else:
+                # No DB access (e.g., in executor without direct DB connection)
+                # The registry should have been loaded on startup
+                raise ValueError(
+                    f"LLM model '{llm_model.value}' not found in registry cache. "
+                    "The registry may need to be refreshed. Please contact support or try again later."
+                )

    if compress_prompt_to_fit:
        prompt = compress_prompt(
            messages=prompt,
-            target_tokens=llm_model.context_window // 2,
+            target_tokens=context_window // 2,
            lossy_ok=True,
        )

    # Calculate available tokens based on context window and input length
    estimated_input_tokens = estimate_token_count(prompt)
-    model_max_output = llm_model.max_output_tokens or int(2**15)
+    # model_max_output already set above
    user_max = max_tokens or model_max_output
    available_tokens = max(context_window - estimated_input_tokens, 0)
    max_tokens = max(min(available_tokens, model_max_output, user_max), 1)
@@ -464,7 +432,7 @@ async def llm_call(
            response_format = {"type": "json_object"}

        response = await oai_client.chat.completions.create(
-            model=llm_model.value,
+            model=model_to_use,
            messages=prompt,  # type: ignore
            response_format=response_format,  # type: ignore
            max_completion_tokens=max_tokens,
@@ -511,7 +479,7 @@ async def llm_call(
        )
        try:
            resp = await client.messages.create(
-                model=llm_model.value,
+                model=model_to_use,
                system=sysprompt,
                messages=messages,
                max_tokens=max_tokens,
@@ -575,7 +543,7 @@ async def llm_call(
        client = AsyncGroq(api_key=credentials.api_key.get_secret_value())
        response_format = {"type": "json_object"} if force_json_output else None
        response = await client.chat.completions.create(
-            model=llm_model.value,
+            model=model_to_use,
            messages=prompt,  # type: ignore
            response_format=response_format,  # type: ignore
            max_tokens=max_tokens,
@@ -597,7 +565,7 @@ async def llm_call(
        sys_messages = [p["content"] for p in prompt if p["role"] == "system"]
        usr_messages = [p["content"] for p in prompt if p["role"] != "system"]
        response = await client.generate(
-            model=llm_model.value,
+            model=model_to_use,
            prompt=f"{sys_messages}\n\n{usr_messages}",
            stream=False,
            options={"num_ctx": max_tokens},
@@ -627,7 +595,7 @@ async def llm_call(
                "HTTP-Referer": "https://agpt.co",
                "X-Title": "AutoGPT",
            },
-            model=llm_model.value,
+            model=model_to_use,
            messages=prompt,  # type: ignore
            max_tokens=max_tokens,
            tools=tools_param,  # type: ignore
@@ -669,7 +637,7 @@ async def llm_call(
                "HTTP-Referer": "https://agpt.co",
                "X-Title": "AutoGPT",
            },
-            model=llm_model.value,
+            model=model_to_use,
            messages=prompt,  # type: ignore
            max_tokens=max_tokens,
            tools=tools_param,  # type: ignore
@@ -696,7 +664,7 @@ async def llm_call(
            reasoning=reasoning,
        )
    elif provider == "aiml_api":
-        client = openai.OpenAI(
+        client = openai.AsyncOpenAI(
            base_url="https://api.aimlapi.com/v2",
            api_key=credentials.api_key.get_secret_value(),
            default_headers={
@@ -706,8 +674,8 @@ async def llm_call(
            },
        )

-        completion = client.chat.completions.create(
-            model=llm_model.value,
+        completion = await client.chat.completions.create(
+            model=model_to_use,
            messages=prompt,  # type: ignore
            max_tokens=max_tokens,
        )
@@ -739,7 +707,7 @@ async def llm_call(
        )

        response = await client.chat.completions.create(
-            model=llm_model.value,
+            model=model_to_use,
            messages=prompt,  # type: ignore
            response_format=response_format,  # type: ignore
            max_tokens=max_tokens,
@@ -790,9 +758,10 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default_factory=LlmModel.default,
            description="The language model to use for answering the prompt.",
            advanced=False,
+            json_schema_extra=llm_model_schema_extra(),
        )
        force_json_output: bool = SchemaField(
            title="Restrict LLM to pure JSON output",
@@ -855,7 +824,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
            input_schema=AIStructuredResponseGeneratorBlock.Input,
            output_schema=AIStructuredResponseGeneratorBlock.Output,
            test_input={
-                "model": LlmModel.GPT4O,
+                "model": "gpt-4o",  # Using string value - enum accepts any model slug dynamically
                "credentials": TEST_CREDENTIALS_INPUT,
                "expected_format": {
                    "key1": "value1",
@@ -1221,9 +1190,10 @@ class AITextGeneratorBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default_factory=LlmModel.default,
            description="The language model to use for answering the prompt.",
            advanced=False,
+            json_schema_extra=llm_model_schema_extra(),
        )
        credentials: AICredentials = AICredentialsField()
        sys_prompt: str = SchemaField(
@@ -1317,8 +1287,9 @@ class AITextSummarizerBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default_factory=LlmModel.default,
            description="The language model to use for summarizing the text.",
+            json_schema_extra=llm_model_schema_extra(),
        )
        focus: str = SchemaField(
            title="Focus",
@@ -1534,8 +1505,9 @@ class AIConversationBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default_factory=LlmModel.default,
            description="The language model to use for the conversation.",
+            json_schema_extra=llm_model_schema_extra(),
        )
        credentials: AICredentials = AICredentialsField()
        max_tokens: int | None = SchemaField(
@@ -1572,7 +1544,7 @@ class AIConversationBlock(AIBlockBase):
                    },
                    {"role": "user", "content": "Where was it played?"},
                ],
-                "model": LlmModel.GPT4O,
+                "model": "gpt-4o",  # Using string value - enum accepts any model slug dynamically
                "credentials": TEST_CREDENTIALS_INPUT,
            },
            test_credentials=TEST_CREDENTIALS,
@@ -1635,9 +1607,10 @@ class AIListGeneratorBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default_factory=LlmModel.default,
            description="The language model to use for generating the list.",
            advanced=True,
+            json_schema_extra=llm_model_schema_extra(),
        )
        credentials: AICredentials = AICredentialsField()
        max_retries: int = SchemaField(
@@ -1692,7 +1665,7 @@ class AIListGeneratorBlock(AIBlockBase):
                    "drawing explorers to uncover its mysteries. Each planet showcases the limitless possibilities of "
                    "fictional worlds."
                ),
-                "model": LlmModel.GPT4O,
+                "model": "gpt-4o",  # Using string value - enum accepts any model slug dynamically
                "credentials": TEST_CREDENTIALS_INPUT,
                "max_retries": 3,
                "force_json_output": False,
--- a/autogpt_platform/backend/backend/blocks/reddit.py
+++ b/autogpt_platform/backend/backend/blocks/reddit.py
--- a/autogpt_platform/backend/backend/blocks/search.py
+++ b/autogpt_platform/backend/backend/blocks/search.py
@@ -18,6 +18,7 @@ from backend.data.model import (
    SchemaField,
 )
 from backend.integrations.providers import ProviderName
+from backend.util.request import DEFAULT_USER_AGENT


 class GetWikipediaSummaryBlock(Block, GetRequest):
@@ -39,17 +40,27 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
            output_schema=GetWikipediaSummaryBlock.Output,
            test_input={"topic": "Artificial Intelligence"},
            test_output=("summary", "summary content"),
-            test_mock={"get_request": lambda url, json: {"extract": "summary content"}},
+            test_mock={
+                "get_request": lambda url, headers, json: {"extract": "summary content"}
+            },
        )

    async def run(self, input_data: Input, **kwargs) -> BlockOutput:
        topic = input_data.topic
-        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
+        # URL-encode the topic to handle spaces and special characters
+        encoded_topic = quote(topic, safe="")
+        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_topic}"
+
+        # Set headers per Wikimedia robot policy (https://w.wiki/4wJS)
+        # - User-Agent: Required, must identify the bot
+        # - Accept-Encoding: gzip recommended to reduce bandwidth
+        headers = {
+            "User-Agent": DEFAULT_USER_AGENT,
+            "Accept-Encoding": "gzip, deflate",
+        }

-        # Note: User-Agent is now automatically set by the request library
-        # to comply with Wikimedia's robot policy (https://w.wiki/4wJS)
        try:
-            response = await self.get_request(url, json=True)
+            response = await self.get_request(url, headers=headers, json=True)
            if "extract" not in response:
                raise ValueError(f"Unable to parse Wikipedia response: {response}")
            yield "summary", response["extract"]
--- a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
@@ -226,9 +226,10 @@ class SmartDecisionMakerBlock(Block):
        )
        model: llm.LlmModel = SchemaField(
            title="LLM Model",
-            default=llm.LlmModel.GPT4O,
+            default_factory=llm.LlmModel.default,
            description="The language model to use for answering the prompt.",
            advanced=False,
+            json_schema_extra=llm.llm_model_schema_extra(),
        )
        credentials: llm.AICredentials = llm.AICredentialsField()
        multiple_tool_calls: bool = SchemaField(
@@ -391,8 +392,12 @@ class SmartDecisionMakerBlock(Block):
        """
        block = sink_node.block

+        # Use custom name from node metadata if set, otherwise fall back to block.name
+        custom_name = sink_node.metadata.get("customized_name")
+        tool_name = custom_name if custom_name else block.name
+
        tool_function: dict[str, Any] = {
-            "name": SmartDecisionMakerBlock.cleanup(block.name),
+            "name": SmartDecisionMakerBlock.cleanup(tool_name),
            "description": block.description,
        }
        sink_block_input_schema = block.input_schema
@@ -489,14 +494,24 @@ class SmartDecisionMakerBlock(Block):
                f"Sink graph metadata not found: {graph_id} {graph_version}"
            )

+        # Use custom name from node metadata if set, otherwise fall back to graph name
+        custom_name = sink_node.metadata.get("customized_name")
+        tool_name = custom_name if custom_name else sink_graph_meta.name
+
        tool_function: dict[str, Any] = {
-            "name": SmartDecisionMakerBlock.cleanup(sink_graph_meta.name),
+            "name": SmartDecisionMakerBlock.cleanup(tool_name),
            "description": sink_graph_meta.description,
        }

        properties = {}
+        field_mapping = {}

        for link in links:
+            field_name = link.sink_name
+
+            clean_field_name = SmartDecisionMakerBlock.cleanup(field_name)
+            field_mapping[clean_field_name] = field_name
+
            sink_block_input_schema = sink_node.input_default["input_schema"]
            sink_block_properties = sink_block_input_schema.get("properties", {}).get(
                link.sink_name, {}
@@ -506,7 +521,7 @@ class SmartDecisionMakerBlock(Block):
                if "description" in sink_block_properties
                else f"The {link.sink_name} of the tool"
            )
-            properties[link.sink_name] = {
+            properties[clean_field_name] = {
                "type": "string",
                "description": description,
                "default": json.dumps(sink_block_properties.get("default", None)),
@@ -519,7 +534,7 @@ class SmartDecisionMakerBlock(Block):
            "strict": True,
        }

-        # Store node info for later use in output processing
+        tool_function["_field_mapping"] = field_mapping
        tool_function["_sink_node_id"] = sink_node.id

        return {"type": "function", "function": tool_function}
@@ -975,10 +990,28 @@ class SmartDecisionMakerBlock(Block):
        graph_version: int,
        execution_context: ExecutionContext,
        execution_processor: "ExecutionProcessor",
+        nodes_to_skip: set[str] | None = None,
        **kwargs,
    ) -> BlockOutput:

        tool_functions = await self._create_tool_node_signatures(node_id)
+        original_tool_count = len(tool_functions)
+
+        # Filter out tools for nodes that should be skipped (e.g., missing optional credentials)
+        if nodes_to_skip:
+            tool_functions = [
+                tf
+                for tf in tool_functions
+                if tf.get("function", {}).get("_sink_node_id") not in nodes_to_skip
+            ]
+
+            # Only raise error if we had tools but they were all filtered out
+            if original_tool_count > 0 and not tool_functions:
+                raise ValueError(
+                    "No available tools to execute - all downstream nodes are unavailable "
+                    "(possibly due to missing optional credentials)"
+                )
+
        yield "tool_functions", json.dumps(tool_functions)

        conversation_history = input_data.conversation_history or []
@@ -1129,8 +1162,9 @@ class SmartDecisionMakerBlock(Block):
                original_field_name = field_mapping.get(clean_arg_name, clean_arg_name)
                arg_value = tool_args.get(clean_arg_name)

-                sanitized_arg_name = self.cleanup(original_field_name)
-                emit_key = f"tools_^_{sink_node_id}_~_{sanitized_arg_name}"
+                # Use original_field_name directly (not sanitized) to match link sink_name
+                # The field_mapping already translates from LLM's cleaned names to original names
+                emit_key = f"tools_^_{sink_node_id}_~_{original_field_name}"

                logger.debug(
                    "[SmartDecisionMakerBlock|geid:%s|neid:%s] emit %s",
--- a/autogpt_platform/backend/backend/blocks/stagehand/blocks.py
+++ b/autogpt_platform/backend/backend/blocks/stagehand/blocks.py
@@ -10,13 +10,13 @@ import stagehand.main
 from stagehand import Stagehand

 from backend.blocks.llm import (
-    MODEL_METADATA,
    AICredentials,
    AICredentialsField,
    LlmModel,
    ModelMetadata,
 )
 from backend.blocks.stagehand._config import stagehand as stagehand_provider
+from backend.data import llm_registry
 from backend.sdk import (
    APIKeyCredentials,
    Block,
@@ -91,7 +91,7 @@ class StagehandRecommendedLlmModel(str, Enum):
        Returns the provider name for the model in the required format for Stagehand:
        provider/model_name
        """
-        model_metadata = MODEL_METADATA[LlmModel(self.value)]
+        model_metadata = self.metadata
        model_name = self.value

        if len(model_name.split("/")) == 1 and not self.value.startswith(
@@ -107,19 +107,23 @@ class StagehandRecommendedLlmModel(str, Enum):

    @property
    def provider(self) -> str:
-        return MODEL_METADATA[LlmModel(self.value)].provider
+        return self.metadata.provider

    @property
    def metadata(self) -> ModelMetadata:
-        return MODEL_METADATA[LlmModel(self.value)]
+        metadata = llm_registry.get_llm_model_metadata(self.value)
+        if metadata:
+            return metadata
+        # Fallback to LlmModel enum if registry lookup fails
+        return LlmModel(self.value).metadata

    @property
    def context_window(self) -> int:
-        return MODEL_METADATA[LlmModel(self.value)].context_window
+        return self.metadata.context_window

    @property
    def max_output_tokens(self) -> int | None:
-        return MODEL_METADATA[LlmModel(self.value)].max_output_tokens
+        return self.metadata.max_output_tokens


 class StagehandObserveBlock(Block):
--- a/autogpt_platform/backend/backend/blocks/test/test_blocks_dos_vulnerability.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_blocks_dos_vulnerability.py
@@ -196,6 +196,15 @@ class TestXMLParserBlockSecurity:
            async for _ in block.run(XMLParserBlock.Input(input_xml=large_xml)):
                pass

+    async def test_rejects_text_outside_root(self):
+        """Ensure parser surfaces readable errors for invalid root text."""
+        block = XMLParserBlock()
+        invalid_xml = "<root><child>value</child></root> trailing"
+
+        with pytest.raises(ValueError, match="text outside the root element"):
+            async for _ in block.run(XMLParserBlock.Input(input_xml=invalid_xml)):
+                pass
+

 class TestStoreMediaFileSecurity:
    """Test file storage security limits."""
--- a/autogpt_platform/backend/backend/blocks/test/test_llm.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_llm.py
@@ -28,7 +28,7 @@ class TestLLMStatsTracking:

            response = await llm.llm_call(
                credentials=llm.TEST_CREDENTIALS,
-                llm_model=llm.LlmModel.GPT4O,
+                llm_model=llm.DEFAULT_LLM_MODEL,
                prompt=[{"role": "user", "content": "Hello"}],
                max_tokens=100,
            )
@@ -65,7 +65,7 @@ class TestLLMStatsTracking:
        input_data = llm.AIStructuredResponseGeneratorBlock.Input(
            prompt="Test prompt",
            expected_format={"key1": "desc1", "key2": "desc2"},
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore  # type: ignore
        )

@@ -109,7 +109,7 @@ class TestLLMStatsTracking:
        # Run the block
        input_data = llm.AITextGeneratorBlock.Input(
            prompt="Generate text",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -170,7 +170,7 @@ class TestLLMStatsTracking:
        input_data = llm.AIStructuredResponseGeneratorBlock.Input(
            prompt="Test prompt",
            expected_format={"key1": "desc1", "key2": "desc2"},
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            retry=2,
        )
@@ -228,7 +228,7 @@ class TestLLMStatsTracking:

        input_data = llm.AITextSummarizerBlock.Input(
            text=long_text,
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            max_tokens=100,  # Small chunks
            chunk_overlap=10,
@@ -299,7 +299,7 @@ class TestLLMStatsTracking:
            # Test with very short text (should only need 1 chunk + 1 final summary)
            input_data = llm.AITextSummarizerBlock.Input(
                text="This is a short text.",
-                model=llm.LlmModel.GPT4O,
+                model=llm.DEFAULT_LLM_MODEL,
                credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
                max_tokens=1000,  # Large enough to avoid chunking
            )
@@ -346,7 +346,7 @@ class TestLLMStatsTracking:
                {"role": "assistant", "content": "Hi there!"},
                {"role": "user", "content": "How are you?"},
            ],
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -387,7 +387,7 @@ class TestLLMStatsTracking:
        # Run the block
        input_data = llm.AIListGeneratorBlock.Input(
            focus="test items",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            max_retries=3,
        )
@@ -469,7 +469,7 @@ class TestLLMStatsTracking:
        input_data = llm.AIStructuredResponseGeneratorBlock.Input(
            prompt="Test",
            expected_format={"result": "desc"},
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -513,7 +513,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            style=llm.SummaryStyle.BULLET_POINTS,
        )
@@ -558,7 +558,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            style=llm.SummaryStyle.BULLET_POINTS,
            max_tokens=1000,
@@ -593,7 +593,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -623,7 +623,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            max_tokens=1000,
        )
@@ -654,7 +654,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker.py
@@ -233,7 +233,7 @@ async def test_smart_decision_maker_tracks_llm_stats():
        # Create test input
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Should I continue with this task?",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -335,7 +335,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            retry=2,  # Set retry to 2 for testing
            agent_mode_max_iterations=0,
@@ -402,7 +402,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -462,7 +462,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -526,7 +526,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -648,7 +648,7 @@ async def test_smart_decision_maker_raw_response_conversion():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Test prompt",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            retry=2,
            agent_mode_max_iterations=0,
@@ -722,7 +722,7 @@ async def test_smart_decision_maker_raw_response_conversion():
    ):
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Simple prompt",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -778,7 +778,7 @@ async def test_smart_decision_maker_raw_response_conversion():
    ):
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Another test",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -931,7 +931,7 @@ async def test_smart_decision_maker_agent_mode():
        # Test agent mode with max_iterations = 3
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Complete this task using tools",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=3,  # Enable agent mode with 3 max iterations
        )
@@ -1020,7 +1020,7 @@ async def test_smart_decision_maker_traditional_mode_default():
        # Test default behavior (traditional mode)
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Test prompt",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,  # Traditional mode
        )
@@ -1057,3 +1057,153 @@ async def test_smart_decision_maker_traditional_mode_default():
        )  # Should yield individual tool parameters
        assert "tools_^_test-sink-node-id_~_max_keyword_difficulty" in outputs
        assert "conversations" in outputs
+
+
+@pytest.mark.asyncio
+async def test_smart_decision_maker_uses_customized_name_for_blocks():
+    """Test that SmartDecisionMakerBlock uses customized_name from node metadata for tool names."""
+    from unittest.mock import MagicMock
+
+    from backend.blocks.basic import StoreValueBlock
+    from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
+    from backend.data.graph import Link, Node
+
+    # Create a mock node with customized_name in metadata
+    mock_node = MagicMock(spec=Node)
+    mock_node.id = "test-node-id"
+    mock_node.block_id = StoreValueBlock().id
+    mock_node.metadata = {"customized_name": "My Custom Tool Name"}
+    mock_node.block = StoreValueBlock()
+
+    # Create a mock link
+    mock_link = MagicMock(spec=Link)
+    mock_link.sink_name = "input"
+
+    # Call the function directly
+    result = await SmartDecisionMakerBlock._create_block_function_signature(
+        mock_node, [mock_link]
+    )
+
+    # Verify the tool name uses the customized name (cleaned up)
+    assert result["type"] == "function"
+    assert result["function"]["name"] == "my_custom_tool_name"  # Cleaned version
+    assert result["function"]["_sink_node_id"] == "test-node-id"
+
+
+@pytest.mark.asyncio
+async def test_smart_decision_maker_falls_back_to_block_name():
+    """Test that SmartDecisionMakerBlock falls back to block.name when no customized_name."""
+    from unittest.mock import MagicMock
+
+    from backend.blocks.basic import StoreValueBlock
+    from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
+    from backend.data.graph import Link, Node
+
+    # Create a mock node without customized_name
+    mock_node = MagicMock(spec=Node)
+    mock_node.id = "test-node-id"
+    mock_node.block_id = StoreValueBlock().id
+    mock_node.metadata = {}  # No customized_name
+    mock_node.block = StoreValueBlock()
+
+    # Create a mock link
+    mock_link = MagicMock(spec=Link)
+    mock_link.sink_name = "input"
+
+    # Call the function directly
+    result = await SmartDecisionMakerBlock._create_block_function_signature(
+        mock_node, [mock_link]
+    )
+
+    # Verify the tool name uses the block's default name
+    assert result["type"] == "function"
+    assert result["function"]["name"] == "storevalueblock"  # Default block name cleaned
+    assert result["function"]["_sink_node_id"] == "test-node-id"
+
+
+@pytest.mark.asyncio
+async def test_smart_decision_maker_uses_customized_name_for_agents():
+    """Test that SmartDecisionMakerBlock uses customized_name from metadata for agent nodes."""
+    from unittest.mock import AsyncMock, MagicMock, patch
+
+    from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
+    from backend.data.graph import Link, Node
+
+    # Create a mock node with customized_name in metadata
+    mock_node = MagicMock(spec=Node)
+    mock_node.id = "test-agent-node-id"
+    mock_node.metadata = {"customized_name": "My Custom Agent"}
+    mock_node.input_default = {
+        "graph_id": "test-graph-id",
+        "graph_version": 1,
+        "input_schema": {"properties": {"test_input": {"description": "Test input"}}},
+    }
+
+    # Create a mock link
+    mock_link = MagicMock(spec=Link)
+    mock_link.sink_name = "test_input"
+
+    # Mock the database client
+    mock_graph_meta = MagicMock()
+    mock_graph_meta.name = "Original Agent Name"
+    mock_graph_meta.description = "Agent description"
+
+    mock_db_client = AsyncMock()
+    mock_db_client.get_graph_metadata.return_value = mock_graph_meta
+
+    with patch(
+        "backend.blocks.smart_decision_maker.get_database_manager_async_client",
+        return_value=mock_db_client,
+    ):
+        result = await SmartDecisionMakerBlock._create_agent_function_signature(
+            mock_node, [mock_link]
+        )
+
+    # Verify the tool name uses the customized name (cleaned up)
+    assert result["type"] == "function"
+    assert result["function"]["name"] == "my_custom_agent"  # Cleaned version
+    assert result["function"]["_sink_node_id"] == "test-agent-node-id"
+
+
+@pytest.mark.asyncio
+async def test_smart_decision_maker_agent_falls_back_to_graph_name():
+    """Test that agent node falls back to graph name when no customized_name."""
+    from unittest.mock import AsyncMock, MagicMock, patch
+
+    from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
+    from backend.data.graph import Link, Node
+
+    # Create a mock node without customized_name
+    mock_node = MagicMock(spec=Node)
+    mock_node.id = "test-agent-node-id"
+    mock_node.metadata = {}  # No customized_name
+    mock_node.input_default = {
+        "graph_id": "test-graph-id",
+        "graph_version": 1,
+        "input_schema": {"properties": {"test_input": {"description": "Test input"}}},
+    }
+
+    # Create a mock link
+    mock_link = MagicMock(spec=Link)
+    mock_link.sink_name = "test_input"
+
+    # Mock the database client
+    mock_graph_meta = MagicMock()
+    mock_graph_meta.name = "Original Agent Name"
+    mock_graph_meta.description = "Agent description"
+
+    mock_db_client = AsyncMock()
+    mock_db_client.get_graph_metadata.return_value = mock_graph_meta
+
+    with patch(
+        "backend.blocks.smart_decision_maker.get_database_manager_async_client",
+        return_value=mock_db_client,
+    ):
+        result = await SmartDecisionMakerBlock._create_agent_function_signature(
+            mock_node, [mock_link]
+        )
+
+    # Verify the tool name uses the graph's default name
+    assert result["type"] == "function"
+    assert result["function"]["name"] == "original_agent_name"  # Graph name cleaned
+    assert result["function"]["_sink_node_id"] == "test-agent-node-id"
--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dict.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dict.py
@@ -15,6 +15,7 @@ async def test_smart_decision_maker_handles_dynamic_dict_fields():
    mock_node.block = CreateDictionaryBlock()
    mock_node.block_id = CreateDictionaryBlock().id
    mock_node.input_default = {}
+    mock_node.metadata = {}

    # Create mock links with dynamic dictionary fields
    mock_links = [
@@ -77,6 +78,7 @@ async def test_smart_decision_maker_handles_dynamic_list_fields():
    mock_node.block = AddToListBlock()
    mock_node.block_id = AddToListBlock().id
    mock_node.input_default = {}
+    mock_node.metadata = {}

    # Create mock links with dynamic list fields
    mock_links = [
--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dynamic_fields.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dynamic_fields.py
@@ -44,6 +44,7 @@ async def test_create_block_function_signature_with_dict_fields():
    mock_node.block = CreateDictionaryBlock()
    mock_node.block_id = CreateDictionaryBlock().id
    mock_node.input_default = {}
+    mock_node.metadata = {}

    # Create mock links with dynamic dictionary fields (source sanitized, sink original)
    mock_links = [
@@ -106,6 +107,7 @@ async def test_create_block_function_signature_with_list_fields():
    mock_node.block = AddToListBlock()
    mock_node.block_id = AddToListBlock().id
    mock_node.input_default = {}
+    mock_node.metadata = {}

    # Create mock links with dynamic list fields
    mock_links = [
@@ -159,6 +161,7 @@ async def test_create_block_function_signature_with_object_fields():
    mock_node.block = MatchTextPatternBlock()
    mock_node.block_id = MatchTextPatternBlock().id
    mock_node.input_default = {}
+    mock_node.metadata = {}

    # Create mock links with dynamic object fields
    mock_links = [
@@ -208,11 +211,13 @@ async def test_create_tool_node_signatures():
        mock_dict_node.block = CreateDictionaryBlock()
        mock_dict_node.block_id = CreateDictionaryBlock().id
        mock_dict_node.input_default = {}
+        mock_dict_node.metadata = {}

        mock_list_node = Mock()
        mock_list_node.block = AddToListBlock()
        mock_list_node.block_id = AddToListBlock().id
        mock_list_node.input_default = {}
+        mock_list_node.metadata = {}

        # Mock links with dynamic fields
        dict_link1 = Mock(
@@ -373,7 +378,7 @@ async def test_output_yielding_with_dynamic_fields():
            input_data = block.input_schema(
                prompt="Create a user dictionary",
                credentials=llm.TEST_CREDENTIALS_INPUT,
-                model=llm.LlmModel.GPT4O,
+                model=llm.DEFAULT_LLM_MODEL,
                agent_mode_max_iterations=0,  # Use traditional mode to test output yielding
            )

@@ -423,6 +428,7 @@ async def test_mixed_regular_and_dynamic_fields():
    mock_node.block.name = "TestBlock"
    mock_node.block.description = "A test block"
    mock_node.block.input_schema = Mock()
+    mock_node.metadata = {}

    # Mock the get_field_schema to return a proper schema for regular fields
    def get_field_schema(field_name):
@@ -594,7 +600,7 @@ async def test_validation_errors_dont_pollute_conversation():
                input_data = block.input_schema(
                    prompt="Test prompt",
                    credentials=llm.TEST_CREDENTIALS_INPUT,
-                    model=llm.LlmModel.GPT4O,
+                    model=llm.DEFAULT_LLM_MODEL,
                    retry=3,  # Allow retries
                    agent_mode_max_iterations=1,
                )
--- a/autogpt_platform/backend/backend/blocks/wordpress/init.py
+++ b/autogpt_platform/backend/backend/blocks/wordpress/init.py
@@ -1,3 +1,3 @@
-from .blog import WordPressCreatePostBlock
+from .blog import WordPressCreatePostBlock, WordPressGetAllPostsBlock

-__all__ = ["WordPressCreatePostBlock"]
+__all__ = ["WordPressCreatePostBlock", "WordPressGetAllPostsBlock"]
--- a/autogpt_platform/backend/backend/blocks/wordpress/_api.py
+++ b/autogpt_platform/backend/backend/blocks/wordpress/_api.py
@@ -161,7 +161,7 @@ async def oauth_exchange_code_for_tokens(
        grant_type="authorization_code",
    ).model_dump(exclude_none=True)

-    response = await Requests().post(
+    response = await Requests(raise_for_status=False).post(
        f"{WORDPRESS_BASE_URL}oauth2/token",
        headers=headers,
        data=data,
@@ -205,7 +205,7 @@ async def oauth_refresh_tokens(
        grant_type="refresh_token",
    ).model_dump(exclude_none=True)

-    response = await Requests().post(
+    response = await Requests(raise_for_status=False).post(
        f"{WORDPRESS_BASE_URL}oauth2/token",
        headers=headers,
        data=data,
@@ -252,7 +252,7 @@ async def validate_token(
        "token": token,
    }

-    response = await Requests().get(
+    response = await Requests(raise_for_status=False).get(
        f"{WORDPRESS_BASE_URL}oauth2/token-info",
        params=params,
    )
@@ -296,7 +296,7 @@ async def make_api_request(

    url = f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}"

-    request_method = getattr(Requests(), method.lower())
+    request_method = getattr(Requests(raise_for_status=False), method.lower())
    response = await request_method(
        url,
        headers=headers,
@@ -476,6 +476,7 @@ async def create_post(
        data["tags"] = ",".join(str(t) for t in data["tags"])

    # Make the API request
+    site = normalize_site(site)
    endpoint = f"/rest/v1.1/sites/{site}/posts/new"

    headers = {
@@ -483,7 +484,7 @@ async def create_post(
        "Content-Type": "application/x-www-form-urlencoded",
    }

-    response = await Requests().post(
+    response = await Requests(raise_for_status=False).post(
        f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}",
        headers=headers,
        data=data,
@@ -499,3 +500,132 @@ async def create_post(
    )
    error_message = error_data.get("message", response.text)
    raise ValueError(f"Failed to create post: {response.status} - {error_message}")
+
+
+class Post(BaseModel):
+    """Response model for individual posts in a posts list response.
+
+    This is a simplified version compared to PostResponse, as the list endpoint
+    returns less detailed information than the create/get single post endpoints.
+    """
+
+    ID: int
+    site_ID: int
+    author: PostAuthor
+    date: datetime
+    modified: datetime
+    title: str
+    URL: str
+    short_URL: str
+    content: str | None = None
+    excerpt: str | None = None
+    slug: str
+    guid: str
+    status: str
+    sticky: bool
+    password: str | None = ""
+    parent: Union[Dict[str, Any], bool, None] = None
+    type: str
+    discussion: Dict[str, Union[str, bool, int]] | None = None
+    likes_enabled: bool | None = None
+    sharing_enabled: bool | None = None
+    like_count: int | None = None
+    i_like: bool | None = None
+    is_reblogged: bool | None = None
+    is_following: bool | None = None
+    global_ID: str | None = None
+    featured_image: str | None = None
+    post_thumbnail: Dict[str, Any] | None = None
+    format: str | None = None
+    geo: Union[Dict[str, Any], bool, None] = None
+    menu_order: int | None = None
+    page_template: str | None = None
+    publicize_URLs: List[str] | None = None
+    terms: Dict[str, Dict[str, Any]] | None = None
+    tags: Dict[str, Dict[str, Any]] | None = None
+    categories: Dict[str, Dict[str, Any]] | None = None
+    attachments: Dict[str, Dict[str, Any]] | None = None
+    attachment_count: int | None = None
+    metadata: List[Dict[str, Any]] | None = None
+    meta: Dict[str, Any] | None = None
+    capabilities: Dict[str, bool] | None = None
+    revisions: List[int] | None = None
+    other_URLs: Dict[str, Any] | None = None
+
+
+class PostsResponse(BaseModel):
+    """Response model for WordPress posts list."""
+
+    found: int
+    posts: List[Post]
+    meta: Dict[str, Any]
+
+
+def normalize_site(site: str) -> str:
+    """
+    Normalize a site identifier by stripping protocol and trailing slashes.
+
+    Args:
+        site: Site URL, domain, or ID (e.g., "https://myblog.wordpress.com/", "myblog.wordpress.com", "123456789")
+
+    Returns:
+        Normalized site identifier (domain or ID only)
+    """
+    site = site.strip()
+    if site.startswith("https://"):
+        site = site[8:]
+    elif site.startswith("http://"):
+        site = site[7:]
+    return site.rstrip("/")
+
+
+async def get_posts(
+    credentials: Credentials,
+    site: str,
+    status: PostStatus | None = None,
+    number: int = 100,
+    offset: int = 0,
+) -> PostsResponse:
+    """
+    Get posts from a WordPress site.
+
+    Args:
+        credentials: OAuth credentials
+        site: Site ID or domain (e.g., "myblog.wordpress.com" or "123456789")
+        status: Filter by post status using PostStatus enum, or None for all
+        number: Number of posts to retrieve (max 100)
+        offset: Number of posts to skip (for pagination)
+
+    Returns:
+        PostsResponse with the list of posts
+    """
+    site = normalize_site(site)
+    endpoint = f"/rest/v1.1/sites/{site}/posts"
+
+    headers = {
+        "Authorization": credentials.auth_header(),
+    }
+
+    params: Dict[str, Any] = {
+        "number": max(1, min(number, 100)),  # 1–100 posts per request
+        "offset": offset,
+    }
+
+    if status:
+        params["status"] = status.value
+    response = await Requests(raise_for_status=False).get(
+        f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}",
+        headers=headers,
+        params=params,
+    )
+
+    if response.ok:
+        return PostsResponse.model_validate(response.json())
+
+    error_data = (
+        response.json()
+        if response.headers.get("content-type", "").startswith("application/json")
+        else {}
+    )
+    error_message = error_data.get("message", response.text)
+    raise ValueError(f"Failed to get posts: {response.status} - {error_message}")
--- a/autogpt_platform/backend/backend/blocks/wordpress/blog.py
+++ b/autogpt_platform/backend/backend/blocks/wordpress/blog.py
@@ -9,7 +9,15 @@ from backend.sdk import (
    SchemaField,
 )

-from ._api import CreatePostRequest, PostResponse, PostStatus, create_post
+from ._api import (
+    CreatePostRequest,
+    Post,
+    PostResponse,
+    PostsResponse,
+    PostStatus,
+    create_post,
+    get_posts,
+)
 from ._config import wordpress


@@ -49,8 +57,15 @@ class WordPressCreatePostBlock(Block):
        media_urls: list[str] = SchemaField(
            description="URLs of images to sideload and attach to the post", default=[]
        )
+        publish_as_draft: bool = SchemaField(
+            description="If True, publishes the post as a draft. If False, publishes it publicly.",
+            default=False,
+        )

    class Output(BlockSchemaOutput):
+        site: str = SchemaField(
+            description="The site ID or domain (pass-through for chaining with other blocks)"
+        )
        post_id: int = SchemaField(description="The ID of the created post")
        post_url: str = SchemaField(description="The full URL of the created post")
        short_url: str = SchemaField(description="The shortened wp.me URL")
@@ -78,7 +93,9 @@ class WordPressCreatePostBlock(Block):
            tags=input_data.tags,
            featured_image=input_data.featured_image,
            media_urls=input_data.media_urls,
-            status=PostStatus.PUBLISH,
+            status=(
+                PostStatus.DRAFT if input_data.publish_as_draft else PostStatus.PUBLISH
+            ),
        )

        post_response: PostResponse = await create_post(
@@ -87,7 +104,69 @@ class WordPressCreatePostBlock(Block):
            post_data=post_request,
        )

+        yield "site", input_data.site
        yield "post_id", post_response.ID
        yield "post_url", post_response.URL
        yield "short_url", post_response.short_URL
        yield "post_data", post_response.model_dump()
+
+
+class WordPressGetAllPostsBlock(Block):
+    """
+    Fetches all posts from a WordPress.com site or Jetpack-enabled site.
+    Supports filtering by status and pagination.
+    """
+
+    class Input(BlockSchemaInput):
+        credentials: CredentialsMetaInput = wordpress.credentials_field()
+        site: str = SchemaField(
+            description="Site ID or domain (e.g., 'myblog.wordpress.com' or '123456789')"
+        )
+        status: PostStatus | None = SchemaField(
+            description="Filter by post status, or None for all",
+            default=None,
+        )
+        number: int = SchemaField(
+            description="Number of posts to retrieve (max 100 per request)", default=20
+        )
+        offset: int = SchemaField(
+            description="Number of posts to skip (for pagination)", default=0
+        )
+
+    class Output(BlockSchemaOutput):
+        site: str = SchemaField(
+            description="The site ID or domain (pass-through for chaining with other blocks)"
+        )
+        found: int = SchemaField(description="Total number of posts found")
+        posts: list[Post] = SchemaField(
+            description="List of post objects with their details"
+        )
+        post: Post = SchemaField(
+            description="Individual post object (yielded for each post)"
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="97728fa7-7f6f-4789-ba0c-f2c114119536",
+            description="Fetch all posts from WordPress.com or Jetpack sites",
+            categories={BlockCategory.SOCIAL},
+            input_schema=self.Input,
+            output_schema=self.Output,
+        )
+
+    async def run(
+        self, input_data: Input, *, credentials: Credentials, **kwargs
+    ) -> BlockOutput:
+        posts_response: PostsResponse = await get_posts(
+            credentials=credentials,
+            site=input_data.site,
+            status=input_data.status,
+            number=input_data.number,
+            offset=input_data.offset,
+        )
+
+        yield "site", input_data.site
+        yield "found", posts_response.found
+        yield "posts", posts_response.posts
+        for post in posts_response.posts:
+            yield "post", post
--- a/autogpt_platform/backend/backend/blocks/xml_parser.py
+++ b/autogpt_platform/backend/backend/blocks/xml_parser.py
@@ -1,5 +1,5 @@
 from gravitasml.parser import Parser
-from gravitasml.token import tokenize
+from gravitasml.token import Token, tokenize

 from backend.data.block import Block, BlockOutput, BlockSchemaInput, BlockSchemaOutput
 from backend.data.model import SchemaField
@@ -25,6 +25,38 @@ class XMLParserBlock(Block):
            ],
        )

+    @staticmethod
+    def _validate_tokens(tokens: list[Token]) -> None:
+        """Ensure the XML has a single root element and no stray text."""
+        if not tokens:
+            raise ValueError("XML input is empty.")
+
+        depth = 0
+        root_seen = False
+
+        for token in tokens:
+            if token.type == "TAG_OPEN":
+                if depth == 0 and root_seen:
+                    raise ValueError("XML must have a single root element.")
+                depth += 1
+                if depth == 1:
+                    root_seen = True
+            elif token.type == "TAG_CLOSE":
+                depth -= 1
+                if depth < 0:
+                    raise SyntaxError("Unexpected closing tag in XML input.")
+            elif token.type in {"TEXT", "ESCAPE"}:
+                if depth == 0 and token.value:
+                    raise ValueError(
+                        "XML contains text outside the root element; "
+                        "wrap content in a single root tag."
+                    )
+
+        if depth != 0:
+            raise SyntaxError("Unclosed tag detected in XML input.")
+        if not root_seen:
+            raise ValueError("XML must include a root element.")
+
    async def run(self, input_data: Input, **kwargs) -> BlockOutput:
        # Security fix: Add size limits to prevent XML bomb attacks
        MAX_XML_SIZE = 10 * 1024 * 1024  # 10MB limit for XML input
@@ -35,7 +67,9 @@ class XMLParserBlock(Block):
            )

        try:
-            tokens = tokenize(input_data.input_xml)
+            tokens = list(tokenize(input_data.input_xml))
+            self._validate_tokens(tokens)
+
            parser = Parser(tokens)
            parsed_result = parser.parse()
            yield "parsed_xml", parsed_result
--- a/autogpt_platform/backend/backend/blocks/youtube.py
+++ b/autogpt_platform/backend/backend/blocks/youtube.py
@@ -111,6 +111,8 @@ class TranscribeYoutubeVideoBlock(Block):
                return parsed_url.path.split("/")[2]
            if parsed_url.path[:3] == "/v/":
                return parsed_url.path.split("/")[2]
+            if parsed_url.path.startswith("/shorts/"):
+                return parsed_url.path.split("/")[2]
        raise ValueError(f"Invalid YouTube URL: {url}")

    def get_transcript(
--- a/autogpt_platform/backend/backend/data/block.py
+++ b/autogpt_platform/backend/backend/data/block.py
@@ -25,6 +25,7 @@ from prisma.models import AgentBlock
 from prisma.types import AgentBlockCreateInput
 from pydantic import BaseModel

+from backend.data.llm_registry import update_schema_with_llm_registry
 from backend.data.model import NodeExecutionStats
 from backend.integrations.providers import ProviderName
 from backend.util import json
@@ -50,6 +51,8 @@ from .model import (
 logger = logging.getLogger(__name__)

 if TYPE_CHECKING:
+    from backend.data.execution import ExecutionContext
+
    from .graph import Link

 app_config = Config()
@@ -141,35 +144,59 @@ class BlockInfo(BaseModel):


 class BlockSchema(BaseModel):
-    cached_jsonschema: ClassVar[dict[str, Any]]
+    cached_jsonschema: ClassVar[dict[str, Any] | None] = None
+
+    @classmethod
+    def clear_schema_cache(cls) -> None:
+        """Clear the cached JSON schema for this class."""
+        # Use None instead of {} because {} is truthy and would prevent regeneration
+        cls.cached_jsonschema = None  # type: ignore
+
+    @staticmethod
+    def clear_all_schema_caches() -> None:
+        """Clear cached JSON schemas for all BlockSchema subclasses."""
+
+        def clear_recursive(cls: type) -> None:
+            """Recursively clear cache for class and all subclasses."""
+            if hasattr(cls, "clear_schema_cache"):
+                cls.clear_schema_cache()
+            for subclass in cls.__subclasses__():
+                clear_recursive(subclass)
+
+        clear_recursive(BlockSchema)

    @classmethod
    def jsonschema(cls) -> dict[str, Any]:
-        if cls.cached_jsonschema:
-            return cls.cached_jsonschema
+        # Generate schema if not cached
+        if not cls.cached_jsonschema:
+            model = jsonref.replace_refs(cls.model_json_schema(), merge_props=True)

-        model = jsonref.replace_refs(cls.model_json_schema(), merge_props=True)
+            def ref_to_dict(obj):
+                if isinstance(obj, dict):
+                    # OpenAPI <3.1 does not support sibling fields that has a $ref key
+                    # So sometimes, the schema has an "allOf"/"anyOf"/"oneOf" with 1 item.
+                    keys = {"allOf", "anyOf", "oneOf"}
+                    one_key = next(
+                        (k for k in keys if k in obj and len(obj[k]) == 1), None
+                    )
+                    if one_key:
+                        obj.update(obj[one_key][0])

-        def ref_to_dict(obj):
-            if isinstance(obj, dict):
-                # OpenAPI <3.1 does not support sibling fields that has a $ref key
-                # So sometimes, the schema has an "allOf"/"anyOf"/"oneOf" with 1 item.
-                keys = {"allOf", "anyOf", "oneOf"}
-                one_key = next((k for k in keys if k in obj and len(obj[k]) == 1), None)
-                if one_key:
-                    obj.update(obj[one_key][0])
+                    return {
+                        key: ref_to_dict(value)
+                        for key, value in obj.items()
+                        if not key.startswith("$") and key != one_key
+                    }
+                elif isinstance(obj, list):
+                    return [ref_to_dict(item) for item in obj]

-                return {
-                    key: ref_to_dict(value)
-                    for key, value in obj.items()
-                    if not key.startswith("$") and key != one_key
-                }
-            elif isinstance(obj, list):
-                return [ref_to_dict(item) for item in obj]
+                return obj

-            return obj
+            cls.cached_jsonschema = cast(dict[str, Any], ref_to_dict(model))

-        cls.cached_jsonschema = cast(dict[str, Any], ref_to_dict(model))
+        # Always post-process to ensure LLM registry data is up-to-date
+        # This refreshes model options and discriminator mappings even if schema was cached
+        update_schema_with_llm_registry(cls.cached_jsonschema, cls)

        return cls.cached_jsonschema

@@ -472,6 +499,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        self.block_type = block_type
        self.webhook_config = webhook_config
        self.execution_stats: NodeExecutionStats = NodeExecutionStats()
+        self.requires_human_review: bool = False

        if self.webhook_config:
            if isinstance(self.webhook_config, BlockWebhookConfig):
@@ -614,7 +642,77 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
                    block_id=self.id,
                ) from ex

+    async def is_block_exec_need_review(
+        self,
+        input_data: BlockInput,
+        *,
+        user_id: str,
+        node_exec_id: str,
+        graph_exec_id: str,
+        graph_id: str,
+        graph_version: int,
+        execution_context: "ExecutionContext",
+        **kwargs,
+    ) -> tuple[bool, BlockInput]:
+        """
+        Check if this block execution needs human review and handle the review process.
+
+        Returns:
+            Tuple of (should_pause, input_data_to_use)
+            - should_pause: True if execution should be paused for review
+            - input_data_to_use: The input data to use (may be modified by reviewer)
+        """
+        # Skip review if not required or safe mode is disabled
+        if not self.requires_human_review or not execution_context.safe_mode:
+            return False, input_data
+
+        from backend.blocks.helpers.review import HITLReviewHelper
+
+        # Handle the review request and get decision
+        decision = await HITLReviewHelper.handle_review_decision(
+            input_data=input_data,
+            user_id=user_id,
+            node_exec_id=node_exec_id,
+            graph_exec_id=graph_exec_id,
+            graph_id=graph_id,
+            graph_version=graph_version,
+            execution_context=execution_context,
+            block_name=self.name,
+            editable=True,
+        )
+
+        if decision is None:
+            # We're awaiting review - pause execution
+            return True, input_data
+
+        if not decision.should_proceed:
+            # Review was rejected, raise an error to stop execution
+            raise BlockExecutionError(
+                message=f"Block execution rejected by reviewer: {decision.message}",
+                block_name=self.name,
+                block_id=self.id,
+            )
+
+        # Review was approved - use the potentially modified data
+        # ReviewResult.data must be a dict for block inputs
+        reviewed_data = decision.review_result.data
+        if not isinstance(reviewed_data, dict):
+            raise BlockExecutionError(
+                message=f"Review data must be a dict for block input, got {type(reviewed_data).__name__}",
+                block_name=self.name,
+                block_id=self.id,
+            )
+        return False, reviewed_data
+
    async def _execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
+        # Check for review requirement and get potentially modified input data
+        should_pause, input_data = await self.is_block_exec_need_review(
+            input_data, **kwargs
+        )
+        if should_pause:
+            return
+
+        # Validate the input data (original or reviewer-modified) once
        if error := self.input_schema.validate_data(input_data):
            raise BlockInputError(
                message=f"Unable to execute block with invalid input data: {error}",
@@ -622,6 +720,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
                block_id=self.id,
            )

+        # Use the validated input data
        async for output_name, output_data in self.run(
            self.input_schema(**{k: v for k, v in input_data.items() if v is not None}),
            **kwargs,
@@ -785,6 +884,28 @@ def is_block_auth_configured(


 async def initialize_blocks() -> None:
+    # Refresh LLM registry before initializing blocks so blocks can use registry data
+    # This ensures the registry cache is populated even in executor context
+    try:
+        from backend.data import llm_registry
+        from backend.data.block_cost_config import refresh_llm_costs
+
+        # Only refresh if we have DB access (check if Prisma is connected)
+        from backend.data.db import is_connected
+
+        if is_connected():
+            await llm_registry.refresh_llm_registry()
+            refresh_llm_costs()
+            logger.info("LLM registry refreshed during block initialization")
+        else:
+            logger.warning(
+                "Prisma not connected, skipping LLM registry refresh during block initialization"
+            )
+    except Exception as exc:
+        logger.warning(
+            "Failed to refresh LLM registry during block initialization: %s", exc
+        )
+
    # First, sync all provider costs to blocks
    # Imported here to avoid circular import
    from backend.sdk.cost_integration import sync_all_provider_costs
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Type

 from backend.blocks.ai_image_customizer import AIImageCustomizerBlock, GeminiImageModel
@@ -23,19 +24,18 @@ from backend.blocks.ideogram import IdeogramModelBlock
 from backend.blocks.jina.embeddings import JinaEmbeddingBlock
 from backend.blocks.jina.search import ExtractWebsiteContentBlock, SearchTheWebBlock
 from backend.blocks.llm import (
-    MODEL_METADATA,
    AIConversationBlock,
    AIListGeneratorBlock,
    AIStructuredResponseGeneratorBlock,
    AITextGeneratorBlock,
    AITextSummarizerBlock,
-    LlmModel,
 )
 from backend.blocks.replicate.flux_advanced import ReplicateFluxAdvancedModelBlock
 from backend.blocks.replicate.replicate_block import ReplicateModelBlock
 from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
 from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
 from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
+from backend.data import llm_registry
 from backend.data.block import Block, BlockCost, BlockCostType
 from backend.integrations.credentials_store import (
    aiml_api_credentials,
@@ -55,209 +55,63 @@ from backend.integrations.credentials_store import (
    v0_credentials,
 )

-# =============== Configure the cost for each LLM Model call =============== #
+logger = logging.getLogger(__name__)

-MODEL_COST: dict[LlmModel, int] = {
-    LlmModel.O3: 4,
-    LlmModel.O3_MINI: 2,  # $1.10 / $4.40
-    LlmModel.O1: 16,  # $15 / $60
-    LlmModel.O1_MINI: 4,
-    # GPT-5 models
-    LlmModel.GPT5: 2,
-    LlmModel.GPT5_1: 5,
-    LlmModel.GPT5_MINI: 1,
-    LlmModel.GPT5_NANO: 1,
-    LlmModel.GPT5_CHAT: 5,
-    LlmModel.GPT41: 2,
-    LlmModel.GPT41_MINI: 1,
-    LlmModel.GPT4O_MINI: 1,
-    LlmModel.GPT4O: 3,
-    LlmModel.GPT4_TURBO: 10,
-    LlmModel.GPT3_5_TURBO: 1,
-    LlmModel.CLAUDE_4_1_OPUS: 21,
-    LlmModel.CLAUDE_4_OPUS: 21,
-    LlmModel.CLAUDE_4_SONNET: 5,
-    LlmModel.CLAUDE_4_5_HAIKU: 4,
-    LlmModel.CLAUDE_4_5_OPUS: 14,
-    LlmModel.CLAUDE_4_5_SONNET: 9,
-    LlmModel.CLAUDE_3_7_SONNET: 5,
-    LlmModel.CLAUDE_3_HAIKU: 1,
-    LlmModel.AIML_API_QWEN2_5_72B: 1,
-    LlmModel.AIML_API_LLAMA3_1_70B: 1,
-    LlmModel.AIML_API_LLAMA3_3_70B: 1,
-    LlmModel.AIML_API_META_LLAMA_3_1_70B: 1,
-    LlmModel.AIML_API_LLAMA_3_2_3B: 1,
-    LlmModel.LLAMA3_3_70B: 1,  # $0.59 / $0.79
-    LlmModel.LLAMA3_1_8B: 1,
-    LlmModel.OLLAMA_LLAMA3_3: 1,
-    LlmModel.OLLAMA_LLAMA3_2: 1,
-    LlmModel.OLLAMA_LLAMA3_8B: 1,
-    LlmModel.OLLAMA_LLAMA3_405B: 1,
-    LlmModel.OLLAMA_DOLPHIN: 1,
-    LlmModel.OPENAI_GPT_OSS_120B: 1,
-    LlmModel.OPENAI_GPT_OSS_20B: 1,
-    LlmModel.GEMINI_2_5_PRO: 4,
-    LlmModel.GEMINI_3_PRO_PREVIEW: 5,
-    LlmModel.MISTRAL_NEMO: 1,
-    LlmModel.COHERE_COMMAND_R_08_2024: 1,
-    LlmModel.COHERE_COMMAND_R_PLUS_08_2024: 3,
-    LlmModel.DEEPSEEK_CHAT: 2,
-    LlmModel.PERPLEXITY_SONAR: 1,
-    LlmModel.PERPLEXITY_SONAR_PRO: 5,
-    LlmModel.PERPLEXITY_SONAR_DEEP_RESEARCH: 10,
-    LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: 1,
-    LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: 1,
-    LlmModel.AMAZON_NOVA_LITE_V1: 1,
-    LlmModel.AMAZON_NOVA_MICRO_V1: 1,
-    LlmModel.AMAZON_NOVA_PRO_V1: 1,
-    LlmModel.MICROSOFT_WIZARDLM_2_8X22B: 1,
-    LlmModel.GRYPHE_MYTHOMAX_L2_13B: 1,
-    LlmModel.META_LLAMA_4_SCOUT: 1,
-    LlmModel.META_LLAMA_4_MAVERICK: 1,
-    LlmModel.LLAMA_API_LLAMA_4_SCOUT: 1,
-    LlmModel.LLAMA_API_LLAMA4_MAVERICK: 1,
-    LlmModel.LLAMA_API_LLAMA3_3_8B: 1,
-    LlmModel.LLAMA_API_LLAMA3_3_70B: 1,
-    LlmModel.GROK_4: 9,
-    LlmModel.GROK_4_FAST: 1,
-    LlmModel.GROK_4_1_FAST: 1,
-    LlmModel.GROK_CODE_FAST_1: 1,
-    LlmModel.KIMI_K2: 1,
-    LlmModel.QWEN3_235B_A22B_THINKING: 1,
-    LlmModel.QWEN3_CODER: 9,
-    LlmModel.GEMINI_2_5_FLASH: 1,
-    LlmModel.GEMINI_2_0_FLASH: 1,
-    LlmModel.GEMINI_2_5_FLASH_LITE_PREVIEW: 1,
-    LlmModel.GEMINI_2_0_FLASH_LITE: 1,
-    LlmModel.DEEPSEEK_R1_0528: 1,
-    # v0 by Vercel models
-    LlmModel.V0_1_5_MD: 1,
-    LlmModel.V0_1_5_LG: 2,
-    LlmModel.V0_1_0_MD: 1,
+PROVIDER_CREDENTIALS = {
+    "openai": openai_credentials,
+    "anthropic": anthropic_credentials,
+    "groq": groq_credentials,
+    "open_router": open_router_credentials,
+    "llama_api": llama_api_credentials,
+    "aiml_api": aiml_api_credentials,
+    "v0": v0_credentials,
 }

-for model in LlmModel:
-    if model not in MODEL_COST:
-        raise ValueError(f"Missing MODEL_COST for model: {model}")
+# =============== Configure the cost for each LLM Model call =============== #
+# All LLM costs now come from the database via llm_registry
+
+LLM_COST: list[BlockCost] = []


-LLM_COST = (
-    # Anthropic Models
-    [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
+def _build_llm_costs_from_registry() -> list[BlockCost]:
+    """Build BlockCost list from all models in the LLM registry."""
+    costs: list[BlockCost] = []
+    for model in llm_registry.iter_dynamic_models():
+        for cost in model.costs:
+            credentials = PROVIDER_CREDENTIALS.get(cost.credential_provider)
+            if not credentials:
+                logger.warning(
+                    "Skipping cost entry for %s due to unknown credentials provider %s",
+                    model.slug,
+                    cost.credential_provider,
+                )
+                continue
+            cost_filter = {
+                "model": model.slug,
                "credentials": {
-                    "id": anthropic_credentials.id,
-                    "provider": anthropic_credentials.provider,
-                    "type": anthropic_credentials.type,
+                    "id": credentials.id,
+                    "provider": credentials.provider,
+                    "type": credentials.type,
                },
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "anthropic"
-    ]
-    # OpenAI Models
-    + [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
-                "credentials": {
-                    "id": openai_credentials.id,
-                    "provider": openai_credentials.provider,
-                    "type": openai_credentials.type,
-                },
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "openai"
-    ]
-    # Groq Models
-    + [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
-                "credentials": {"id": groq_credentials.id},
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "groq"
-    ]
-    # Open Router Models
-    + [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
-                "credentials": {
-                    "id": open_router_credentials.id,
-                    "provider": open_router_credentials.provider,
-                    "type": open_router_credentials.type,
-                },
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "open_router"
-    ]
-    # Llama API Models
-    + [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
-                "credentials": {
-                    "id": llama_api_credentials.id,
-                    "provider": llama_api_credentials.provider,
-                    "type": llama_api_credentials.type,
-                },
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "llama_api"
-    ]
-    # v0 by Vercel Models
-    + [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
-                "credentials": {
-                    "id": v0_credentials.id,
-                    "provider": v0_credentials.provider,
-                    "type": v0_credentials.type,
-                },
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "v0"
-    ]
-    # AI/ML Api Models
-    + [
-        BlockCost(
-            cost_type=BlockCostType.RUN,
-            cost_filter={
-                "model": model,
-                "credentials": {
-                    "id": aiml_api_credentials.id,
-                    "provider": aiml_api_credentials.provider,
-                    "type": aiml_api_credentials.type,
-                },
-            },
-            cost_amount=cost,
-        )
-        for model, cost in MODEL_COST.items()
-        if MODEL_METADATA[model].provider == "aiml_api"
-    ]
-)
+            }
+            costs.append(
+                BlockCost(
+                    cost_type=BlockCostType.RUN,
+                    cost_filter=cost_filter,
+                    cost_amount=cost.credit_cost,
+                )
+            )
+    return costs
+
+
+def refresh_llm_costs() -> None:
+    """Refresh LLM costs from the registry. All costs now come from the database."""
+    LLM_COST.clear()
+    LLM_COST.extend(_build_llm_costs_from_registry())
+
+
+# Initial load will happen after registry is refreshed at startup
+# Don't call refresh_llm_costs() here - it will be called after registry refresh

 # =============== This is the exhaustive list of cost for each Block =============== #

--- a/autogpt_platform/backend/backend/data/credit.py
+++ b/autogpt_platform/backend/backend/data/credit.py
@@ -341,6 +341,19 @@ class UserCreditBase(ABC):

        if result:
            # UserBalance is already updated by the CTE
+
+            # Clear insufficient funds notification flags when credits are added
+            # so user can receive alerts again if they run out in the future.
+            if transaction.amount > 0 and transaction.type in [
+                CreditTransactionType.GRANT,
+                CreditTransactionType.TOP_UP,
+            ]:
+                from backend.executor.manager import (
+                    clear_insufficient_funds_notifications,
+                )
+
+                await clear_insufficient_funds_notifications(user_id)
+
            return result[0]["balance"]

    async def _add_transaction(
@@ -530,6 +543,22 @@ class UserCreditBase(ABC):
        if result:
            new_balance, tx_key = result[0]["balance"], result[0]["transactionKey"]
            # UserBalance is already updated by the CTE
+
+            # Clear insufficient funds notification flags when credits are added
+            # so user can receive alerts again if they run out in the future.
+            if (
+                amount > 0
+                and is_active
+                and transaction_type
+                in [CreditTransactionType.GRANT, CreditTransactionType.TOP_UP]
+            ):
+                # Lazy import to avoid circular dependency with executor.manager
+                from backend.executor.manager import (
+                    clear_insufficient_funds_notifications,
+                )
+
+                await clear_insufficient_funds_notifications(user_id)
+
            return new_balance, tx_key

        # If no result, either user doesn't exist or insufficient balance
--- a/autogpt_platform/backend/backend/data/db.py
+++ b/autogpt_platform/backend/backend/data/db.py
@@ -38,6 +38,20 @@ POOL_TIMEOUT = os.getenv("DB_POOL_TIMEOUT")
 if POOL_TIMEOUT:
    DATABASE_URL = add_param(DATABASE_URL, "pool_timeout", POOL_TIMEOUT)

+# Add public schema to search_path for pgvector type access
+# The vector extension is in public schema, but search_path is determined by schema parameter
+# Extract the schema from DATABASE_URL or default to 'public' (matching get_database_schema())
+parsed_url = urlparse(DATABASE_URL)
+url_params = dict(parse_qsl(parsed_url.query))
+db_schema = url_params.get("schema", "public")
+# Build search_path, avoiding duplicates if db_schema is already 'public'
+search_path_schemas = list(
+    dict.fromkeys([db_schema, "public"])
+)  # Preserves order, removes duplicates
+search_path = ",".join(search_path_schemas)
+# This allows using ::vector without schema qualification
+DATABASE_URL = add_param(DATABASE_URL, "options", f"-c search_path={search_path}")
+
 HTTP_TIMEOUT = int(POOL_TIMEOUT) if POOL_TIMEOUT else None

 prisma = Prisma(
@@ -108,21 +122,102 @@ def get_database_schema() -> str:
    return query_params.get("schema", "public")


-async def query_raw_with_schema(query_template: str, *args) -> list[dict]:
-    """Execute raw SQL query with proper schema handling."""
+async def _raw_with_schema(
+    query_template: str,
+    *args,
+    execute: bool = False,
+    client: Prisma | None = None,
+    set_public_search_path: bool = False,
+) -> list[dict] | int:
+    """Internal: Execute raw SQL with proper schema handling.
+
+    Use query_raw_with_schema() or execute_raw_with_schema() instead.
+
+    Args:
+        query_template: SQL query with {schema_prefix} placeholder
+        *args: Query parameters
+        execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
+        client: Optional Prisma client for transactions (only used when execute=True).
+        set_public_search_path: If True, sets search_path to include public schema.
+                                Needed for pgvector types and other public schema objects.
+
+    Returns:
+        - list[dict] if execute=False (query results)
+        - int if execute=True (number of affected rows)
+    """
    schema = get_database_schema()
    schema_prefix = f'"{schema}".' if schema != "public" else ""
    formatted_query = query_template.format(schema_prefix=schema_prefix)

    import prisma as prisma_module

-    result = await prisma_module.get_client().query_raw(
-        formatted_query, *args  # type: ignore
-    )
+    db_client = client if client else prisma_module.get_client()
+
+    # Set search_path to include public schema if requested
+    # Prisma doesn't support the 'options' connection parameter, so we set it per-session
+    # This is idempotent and safe to call multiple times
+    if set_public_search_path:
+        await db_client.execute_raw(f"SET search_path = {schema}, public")  # type: ignore
+
+    if execute:
+        result = await db_client.execute_raw(formatted_query, *args)  # type: ignore
+    else:
+        result = await db_client.query_raw(formatted_query, *args)  # type: ignore

    return result


+async def query_raw_with_schema(
+    query_template: str, *args, set_public_search_path: bool = False
+) -> list[dict]:
+    """Execute raw SQL SELECT query with proper schema handling.
+
+    Args:
+        query_template: SQL query with {schema_prefix} placeholder
+        *args: Query parameters
+        set_public_search_path: If True, sets search_path to include public schema.
+                                Needed for pgvector types and other public schema objects.
+
+    Returns:
+        List of result rows as dictionaries
+
+    Example:
+        results = await query_raw_with_schema(
+            'SELECT * FROM {schema_prefix}"User" WHERE id = $1',
+            user_id
+        )
+    """
+    return await _raw_with_schema(query_template, *args, execute=False, set_public_search_path=set_public_search_path)  # type: ignore
+
+
+async def execute_raw_with_schema(
+    query_template: str,
+    *args,
+    client: Prisma | None = None,
+    set_public_search_path: bool = False,
+) -> int:
+    """Execute raw SQL command (INSERT/UPDATE/DELETE) with proper schema handling.
+
+    Args:
+        query_template: SQL query with {schema_prefix} placeholder
+        *args: Query parameters
+        client: Optional Prisma client for transactions
+        set_public_search_path: If True, sets search_path to include public schema.
+                                Needed for pgvector types and other public schema objects.
+
+    Returns:
+        Number of affected rows
+
+    Example:
+        await execute_raw_with_schema(
+            'INSERT INTO {schema_prefix}"User" (id, name) VALUES ($1, $2)',
+            user_id, name,
+            client=tx  # Optional transaction client
+        )
+    """
+    return await _raw_with_schema(query_template, *args, execute=True, client=client, set_public_search_path=set_public_search_path)  # type: ignore
+
+
 class BaseDbModel(BaseModel):
    id: str = Field(default_factory=lambda: str(uuid4()))

--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -383,6 +383,7 @@ class GraphExecutionWithNodes(GraphExecution):
        self,
        execution_context: ExecutionContext,
        compiled_nodes_input_masks: Optional[NodesInputMasks] = None,
+        nodes_to_skip: Optional[set[str]] = None,
    ):
        return GraphExecutionEntry(
            user_id=self.user_id,
@@ -390,6 +391,7 @@ class GraphExecutionWithNodes(GraphExecution):
            graph_version=self.graph_version or 0,
            graph_exec_id=self.id,
            nodes_input_masks=compiled_nodes_input_masks,
+            nodes_to_skip=nodes_to_skip or set(),
            execution_context=execution_context,
        )

@@ -1145,6 +1147,8 @@ class GraphExecutionEntry(BaseModel):
    graph_id: str
    graph_version: int
    nodes_input_masks: Optional[NodesInputMasks] = None
+    nodes_to_skip: set[str] = Field(default_factory=set)
+    """Node IDs that should be skipped due to optional credentials not being configured."""
    execution_context: ExecutionContext = Field(default_factory=ExecutionContext)


--- a/autogpt_platform/backend/backend/data/graph.py
+++ b/autogpt_platform/backend/backend/data/graph.py
@@ -94,6 +94,15 @@ class Node(BaseDbModel):
    input_links: list[Link] = []
    output_links: list[Link] = []

+    @property
+    def credentials_optional(self) -> bool:
+        """
+        Whether credentials are optional for this node.
+        When True and credentials are not configured, the node will be skipped
+        during execution rather than causing a validation error.
+        """
+        return self.metadata.get("credentials_optional", False)
+
    @property
    def block(self) -> AnyBlockSchema | "_UnknownBlockBase":
        """Get the block for this node. Returns UnknownBlock if block is deleted/missing."""
@@ -235,7 +244,10 @@ class BaseGraph(BaseDbModel):
        return any(
            node.block_id
            for node in self.nodes
-            if node.block.block_type == BlockType.HUMAN_IN_THE_LOOP
+            if (
+                node.block.block_type == BlockType.HUMAN_IN_THE_LOOP
+                or node.block.requires_human_review
+            )
        )

    @property
@@ -326,7 +338,35 @@ class Graph(BaseGraph):
    @computed_field
    @property
    def credentials_input_schema(self) -> dict[str, Any]:
-        return self._credentials_input_schema.jsonschema()
+        schema = self._credentials_input_schema.jsonschema()
+
+        # Determine which credential fields are required based on credentials_optional metadata
+        graph_credentials_inputs = self.aggregate_credentials_inputs()
+        required_fields = []
+
+        # Build a map of node_id -> node for quick lookup
+        all_nodes = {node.id: node for node in self.nodes}
+        for sub_graph in self.sub_graphs:
+            for node in sub_graph.nodes:
+                all_nodes[node.id] = node
+
+        for field_key, (
+            _field_info,
+            node_field_pairs,
+        ) in graph_credentials_inputs.items():
+            # A field is required if ANY node using it has credentials_optional=False
+            is_required = False
+            for node_id, _field_name in node_field_pairs:
+                node = all_nodes.get(node_id)
+                if node and not node.credentials_optional:
+                    is_required = True
+                    break
+
+            if is_required:
+                required_fields.append(field_key)
+
+        schema["required"] = required_fields
+        return schema

    @property
    def _credentials_input_schema(self) -> type[BlockSchema]:
@@ -1443,8 +1483,10 @@ async def migrate_llm_models(migrate_to: LlmModel):
            if field.annotation == LlmModel:
                llm_model_fields[block.id] = field_name

-    # Convert enum values to a list of strings for the SQL query
-    enum_values = [v.value for v in LlmModel]
+    # Get all model slugs from the registry (dynamic, not hardcoded enum)
+    from backend.data import llm_registry
+
+    enum_values = list(llm_registry.get_all_model_slugs_for_validation())
    escaped_enum_values = repr(tuple(enum_values))  # hack but works

    # Update each block
--- a/autogpt_platform/backend/backend/data/graph_test.py
+++ b/autogpt_platform/backend/backend/data/graph_test.py
@@ -1,5 +1,6 @@
 import json
 from typing import Any
+from unittest.mock import AsyncMock, patch
 from uuid import UUID

 import fastapi.exceptions
@@ -18,6 +19,17 @@ from backend.usecases.sample import create_test_user
 from backend.util.test import SpinTestServer


+@pytest.fixture(scope="session", autouse=True)
+def mock_embedding_functions():
+    """Mock embedding functions for all tests to avoid database/API dependencies."""
+    with patch(
+        "backend.api.features.store.db.ensure_embedding",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
+        yield
+
+
@pytest.mark.asyncio(loop_scope="session")
 async def test_graph_creation(server: SpinTestServer, snapshot: Snapshot):
    """
@@ -396,3 +408,58 @@ async def test_access_store_listing_graph(server: SpinTestServer):
        created_graph.id, created_graph.version, "3e53486c-cf57-477e-ba2a-cb02dc828e1b"
    )
    assert got_graph is not None
+
+
+# ============================================================================
+# Tests for Optional Credentials Feature
+# ============================================================================
+
+
+def test_node_credentials_optional_default():
+    """Test that credentials_optional defaults to False when not set in metadata."""
+    node = Node(
+        id="test_node",
+        block_id=StoreValueBlock().id,
+        input_default={},
+        metadata={},
+    )
+    assert node.credentials_optional is False
+
+
+def test_node_credentials_optional_true():
+    """Test that credentials_optional returns True when explicitly set."""
+    node = Node(
+        id="test_node",
+        block_id=StoreValueBlock().id,
+        input_default={},
+        metadata={"credentials_optional": True},
+    )
+    assert node.credentials_optional is True
+
+
+def test_node_credentials_optional_false():
+    """Test that credentials_optional returns False when explicitly set to False."""
+    node = Node(
+        id="test_node",
+        block_id=StoreValueBlock().id,
+        input_default={},
+        metadata={"credentials_optional": False},
+    )
+    assert node.credentials_optional is False
+
+
+def test_node_credentials_optional_with_other_metadata():
+    """Test that credentials_optional works correctly with other metadata present."""
+    node = Node(
+        id="test_node",
+        block_id=StoreValueBlock().id,
+        input_default={},
+        metadata={
+            "position": {"x": 100, "y": 200},
+            "customized_name": "My Custom Node",
+            "credentials_optional": True,
+        },
+    )
+    assert node.credentials_optional is True
+    assert node.metadata["position"] == {"x": 100, "y": 200}
+    assert node.metadata["customized_name"] == "My Custom Node"
--- a/autogpt_platform/backend/backend/data/llm_registry/init.py
+++ b/autogpt_platform/backend/backend/data/llm_registry/init.py
@@ -0,0 +1,72 @@
+"""
+LLM Registry module for managing LLM models, providers, and costs dynamically.
+
+This module provides a database-driven registry system for LLM models,
+replacing hardcoded model configurations with a flexible admin-managed system.
+"""
+
+from backend.data.llm_registry.model_types import ModelMetadata
+
+# Re-export for backwards compatibility
+from backend.data.llm_registry.notifications import (
+    REGISTRY_REFRESH_CHANNEL,
+    publish_registry_refresh_notification,
+    subscribe_to_registry_refresh,
+)
+from backend.data.llm_registry.registry import (
+    RegistryModel,
+    RegistryModelCost,
+    RegistryModelCreator,
+    get_all_model_slugs_for_validation,
+    get_default_model_slug,
+    get_dynamic_model_slugs,
+    get_fallback_model_for_disabled,
+    get_llm_discriminator_mapping,
+    get_llm_model_cost,
+    get_llm_model_metadata,
+    get_llm_model_schema_options,
+    get_model_info,
+    is_model_enabled,
+    iter_dynamic_models,
+    refresh_llm_registry,
+    register_static_costs,
+    register_static_metadata,
+)
+from backend.data.llm_registry.schema_utils import (
+    is_llm_model_field,
+    refresh_llm_discriminator_mapping,
+    refresh_llm_model_options,
+    update_schema_with_llm_registry,
+)
+
+__all__ = [
+    # Types
+    "ModelMetadata",
+    "RegistryModel",
+    "RegistryModelCost",
+    "RegistryModelCreator",
+    # Registry functions
+    "get_all_model_slugs_for_validation",
+    "get_default_model_slug",
+    "get_dynamic_model_slugs",
+    "get_fallback_model_for_disabled",
+    "get_llm_discriminator_mapping",
+    "get_llm_model_cost",
+    "get_llm_model_metadata",
+    "get_llm_model_schema_options",
+    "get_model_info",
+    "is_model_enabled",
+    "iter_dynamic_models",
+    "refresh_llm_registry",
+    "register_static_costs",
+    "register_static_metadata",
+    # Notifications
+    "REGISTRY_REFRESH_CHANNEL",
+    "publish_registry_refresh_notification",
+    "subscribe_to_registry_refresh",
+    # Schema utilities
+    "is_llm_model_field",
+    "refresh_llm_discriminator_mapping",
+    "refresh_llm_model_options",
+    "update_schema_with_llm_registry",
+]
--- a/autogpt_platform/backend/backend/data/llm_registry/model_types.py
+++ b/autogpt_platform/backend/backend/data/llm_registry/model_types.py
@@ -0,0 +1,11 @@
+"""Type definitions for LLM model metadata."""
+
+from typing import NamedTuple
+
+
+class ModelMetadata(NamedTuple):
+    """Metadata for an LLM model."""
+
+    provider: str
+    context_window: int
+    max_output_tokens: int | None
--- a/autogpt_platform/backend/backend/data/llm_registry/notifications.py
+++ b/autogpt_platform/backend/backend/data/llm_registry/notifications.py
@@ -0,0 +1,89 @@
+"""
+Redis pub/sub notifications for LLM registry updates.
+
+When models are added/updated/removed via the admin UI, this module
+publishes notifications to Redis that all executor services subscribe to,
+ensuring they refresh their registry cache in real-time.
+"""
+
+import asyncio
+import logging
+from typing import Any
+
+from backend.data.redis_client import connect_async
+
+logger = logging.getLogger(__name__)
+
+# Redis channel name for LLM registry refresh notifications
+REGISTRY_REFRESH_CHANNEL = "llm_registry:refresh"
+
+
+async def publish_registry_refresh_notification() -> None:
+    """
+    Publish a notification to Redis that the LLM registry has been updated.
+    All executor services subscribed to this channel will refresh their registry.
+    """
+    try:
+        redis = await connect_async()
+        await redis.publish(REGISTRY_REFRESH_CHANNEL, "refresh")
+        logger.info("Published LLM registry refresh notification to Redis")
+    except Exception as exc:
+        logger.warning(
+            "Failed to publish LLM registry refresh notification: %s",
+            exc,
+            exc_info=True,
+        )
+
+
+async def subscribe_to_registry_refresh(
+    on_refresh: Any,  # Async callable that takes no args
+) -> None:
+    """
+    Subscribe to Redis notifications for LLM registry updates.
+    This runs in a loop and processes messages as they arrive.
+
+    Args:
+        on_refresh: Async callable to execute when a refresh notification is received
+    """
+    try:
+        redis = await connect_async()
+        pubsub = redis.pubsub()
+        await pubsub.subscribe(REGISTRY_REFRESH_CHANNEL)
+        logger.info(
+            "Subscribed to LLM registry refresh notifications on channel: %s",
+            REGISTRY_REFRESH_CHANNEL,
+        )
+
+        # Process messages in a loop
+        while True:
+            try:
+                message = await pubsub.get_message(
+                    ignore_subscribe_messages=True, timeout=1.0
+                )
+                if (
+                    message
+                    and message["type"] == "message"
+                    and message["channel"] == REGISTRY_REFRESH_CHANNEL
+                ):
+                    logger.info("Received LLM registry refresh notification")
+                    try:
+                        await on_refresh()
+                    except Exception as exc:
+                        logger.error(
+                            "Error refreshing LLM registry from notification: %s",
+                            exc,
+                            exc_info=True,
+                        )
+            except Exception as exc:
+                logger.warning(
+                    "Error processing registry refresh message: %s", exc, exc_info=True
+                )
+                # Continue listening even if one message fails
+                await asyncio.sleep(1)
+    except Exception as exc:
+        logger.error(
+            "Failed to subscribe to LLM registry refresh notifications: %s",
+            exc,
+            exc_info=True,
+        )
+        raise
--- a/autogpt_platform/backend/backend/data/llm_registry/registry.py
+++ b/autogpt_platform/backend/backend/data/llm_registry/registry.py
@@ -0,0 +1,372 @@
+"""Core LLM registry implementation for managing models dynamically."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Iterable
+
+import prisma.models
+
+from backend.data.llm_registry.model_types import ModelMetadata
+
+logger = logging.getLogger(__name__)
+
+
+def _json_to_dict(value: Any) -> dict[str, Any]:
+    """Convert Prisma Json type to dict, with fallback to empty dict."""
+    if value is None:
+        return {}
+    if isinstance(value, dict):
+        return value
+    # Prisma Json type should always be a dict at runtime
+    return dict(value) if value else {}
+
+
+@dataclass(frozen=True)
+class RegistryModelCost:
+    """Cost configuration for an LLM model."""
+
+    credit_cost: int
+    credential_provider: str
+    credential_id: str | None
+    credential_type: str | None
+    currency: str | None
+    metadata: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class RegistryModelCreator:
+    """Creator information for an LLM model."""
+
+    id: str
+    name: str
+    display_name: str
+    description: str | None
+    website_url: str | None
+    logo_url: str | None
+
+
+@dataclass(frozen=True)
+class RegistryModel:
+    """Represents a model in the LLM registry."""
+
+    slug: str
+    display_name: str
+    description: str | None
+    metadata: ModelMetadata
+    capabilities: dict[str, Any]
+    extra_metadata: dict[str, Any]
+    provider_display_name: str
+    is_enabled: bool
+    is_recommended: bool = False
+    costs: tuple[RegistryModelCost, ...] = field(default_factory=tuple)
+    creator: RegistryModelCreator | None = None
+
+
+_static_metadata: dict[str, ModelMetadata] = {}
+_static_costs: dict[str, int] = {}
+_dynamic_models: dict[str, RegistryModel] = {}
+_schema_options: list[dict[str, str]] = []
+_discriminator_mapping: dict[str, str] = {}
+_lock = asyncio.Lock()
+
+
+def register_static_metadata(metadata: dict[Any, ModelMetadata]) -> None:
+    """Register static metadata for legacy models (deprecated)."""
+    _static_metadata.update({str(key): value for key, value in metadata.items()})
+    _refresh_cached_schema()
+
+
+def register_static_costs(costs: dict[Any, int]) -> None:
+    """Register static costs for legacy models (deprecated)."""
+    _static_costs.update({str(key): value for key, value in costs.items()})
+
+
+def _build_schema_options() -> list[dict[str, str]]:
+    """Build schema options for model selection dropdown. Only includes enabled models."""
+    options: list[dict[str, str]] = []
+    # Only include enabled models in the dropdown options
+    for model in sorted(_dynamic_models.values(), key=lambda m: m.display_name.lower()):
+        if model.is_enabled:
+            options.append(
+                {
+                    "label": model.display_name,
+                    "value": model.slug,
+                    "group": model.metadata.provider,
+                    "description": model.description or "",
+                }
+            )
+
+    for slug, metadata in _static_metadata.items():
+        if slug in _dynamic_models:
+            continue
+        options.append(
+            {
+                "label": slug,
+                "value": slug,
+                "group": metadata.provider,
+                "description": "",
+            }
+        )
+    return options
+
+
+async def refresh_llm_registry() -> None:
+    """Refresh the LLM registry from the database. Loads all models (enabled and disabled)."""
+    async with _lock:
+        try:
+            records = await prisma.models.LlmModel.prisma().find_many(
+                include={
+                    "Provider": True,
+                    "Costs": True,
+                    "Creator": True,
+                }
+            )
+            logger.debug("Found %d LLM model records in database", len(records))
+        except Exception as exc:
+            logger.error(
+                "Failed to refresh LLM registry from DB: %s", exc, exc_info=True
+            )
+            return
+
+        dynamic: dict[str, RegistryModel] = {}
+        for record in records:
+            provider_name = (
+                record.Provider.name if record.Provider else record.providerId
+            )
+            metadata = ModelMetadata(
+                provider=provider_name,
+                context_window=record.contextWindow,
+                max_output_tokens=record.maxOutputTokens,
+            )
+            costs = tuple(
+                RegistryModelCost(
+                    credit_cost=cost.creditCost,
+                    credential_provider=cost.credentialProvider,
+                    credential_id=cost.credentialId,
+                    credential_type=cost.credentialType,
+                    currency=cost.currency,
+                    metadata=_json_to_dict(cost.metadata),
+                )
+                for cost in (record.Costs or [])
+            )
+
+            # Map creator if present
+            creator = None
+            if record.Creator:
+                creator = RegistryModelCreator(
+                    id=record.Creator.id,
+                    name=record.Creator.name,
+                    display_name=record.Creator.displayName,
+                    description=record.Creator.description,
+                    website_url=record.Creator.websiteUrl,
+                    logo_url=record.Creator.logoUrl,
+                )
+
+            dynamic[record.slug] = RegistryModel(
+                slug=record.slug,
+                display_name=record.displayName,
+                description=record.description,
+                metadata=metadata,
+                capabilities=_json_to_dict(record.capabilities),
+                extra_metadata=_json_to_dict(record.metadata),
+                provider_display_name=(
+                    record.Provider.displayName
+                    if record.Provider
+                    else record.providerId
+                ),
+                is_enabled=record.isEnabled,
+                is_recommended=record.isRecommended,
+                costs=costs,
+                creator=creator,
+            )
+
+        # Atomic swap - build new structures then replace references
+        # This ensures readers never see partially updated state
+        global _dynamic_models
+        _dynamic_models = dynamic
+        _refresh_cached_schema()
+        logger.info(
+            "LLM registry refreshed with %s dynamic models (enabled: %s, disabled: %s)",
+            len(dynamic),
+            sum(1 for m in dynamic.values() if m.is_enabled),
+            sum(1 for m in dynamic.values() if not m.is_enabled),
+        )
+
+
+def _refresh_cached_schema() -> None:
+    """Refresh cached schema options and discriminator mapping."""
+    global _schema_options, _discriminator_mapping
+
+    # Build new structures
+    new_options = _build_schema_options()
+    new_mapping = {
+        slug: entry.metadata.provider for slug, entry in _dynamic_models.items()
+    }
+    for slug, metadata in _static_metadata.items():
+        new_mapping.setdefault(slug, metadata.provider)
+
+    # Atomic swap - replace references to ensure readers see consistent state
+    _schema_options = new_options
+    _discriminator_mapping = new_mapping
+
+
+def get_llm_model_metadata(slug: str) -> ModelMetadata | None:
+    """Get model metadata by slug. Checks dynamic models first, then static metadata."""
+    if slug in _dynamic_models:
+        return _dynamic_models[slug].metadata
+    return _static_metadata.get(slug)
+
+
+def get_llm_model_cost(slug: str) -> tuple[RegistryModelCost, ...]:
+    """Get model cost configuration by slug."""
+    if slug in _dynamic_models:
+        return _dynamic_models[slug].costs
+    cost_value = _static_costs.get(slug)
+    if cost_value is None:
+        return tuple()
+    return (
+        RegistryModelCost(
+            credit_cost=cost_value,
+            credential_provider="static",
+            credential_id=None,
+            credential_type=None,
+            currency=None,
+            metadata={},
+        ),
+    )
+
+
+def get_llm_model_schema_options() -> list[dict[str, str]]:
+    """
+    Get schema options for LLM model selection dropdown.
+
+    Returns a copy of cached schema options that are refreshed when the registry is
+    updated via refresh_llm_registry() (called on startup and via Redis pub/sub).
+    """
+    # Return a copy to prevent external mutation
+    return list(_schema_options)
+
+
+def get_llm_discriminator_mapping() -> dict[str, str]:
+    """
+    Get discriminator mapping for LLM models.
+
+    Returns a copy of cached discriminator mapping that is refreshed when the registry
+    is updated via refresh_llm_registry() (called on startup and via Redis pub/sub).
+    """
+    # Return a copy to prevent external mutation
+    return dict(_discriminator_mapping)
+
+
+def get_dynamic_model_slugs() -> set[str]:
+    """Get all dynamic model slugs from the registry."""
+    return set(_dynamic_models.keys())
+
+
+def get_all_model_slugs_for_validation() -> set[str]:
+    """
+    Get ALL model slugs (both enabled and disabled) for validation purposes.
+
+    This is used for JSON schema enum validation - we need to accept any known
+    model value (even disabled ones) so that existing graphs don't fail validation.
+    The actual fallback/enforcement happens at runtime in llm_call().
+    """
+    all_slugs = set(_dynamic_models.keys())
+    all_slugs.update(_static_metadata.keys())
+    return all_slugs
+
+
+def iter_dynamic_models() -> Iterable[RegistryModel]:
+    """Iterate over all dynamic models in the registry."""
+    return tuple(_dynamic_models.values())
+
+
+def get_fallback_model_for_disabled(disabled_model_slug: str) -> RegistryModel | None:
+    """
+    Find a fallback model when the requested model is disabled.
+
+    Looks for an enabled model from the same provider. Prefers models with
+    similar names or capabilities if possible.
+
+    Args:
+        disabled_model_slug: The slug of the disabled model
+
+    Returns:
+        An enabled RegistryModel from the same provider, or None if no fallback found
+    """
+    disabled_model = _dynamic_models.get(disabled_model_slug)
+    if not disabled_model:
+        return None
+
+    provider = disabled_model.metadata.provider
+
+    # Find all enabled models from the same provider
+    candidates = [
+        model
+        for model in _dynamic_models.values()
+        if model.is_enabled and model.metadata.provider == provider
+    ]
+
+    if not candidates:
+        return None
+
+    # Sort by: prefer models with similar context window, then by name
+    candidates.sort(
+        key=lambda m: (
+            abs(m.metadata.context_window - disabled_model.metadata.context_window),
+            m.display_name.lower(),
+        )
+    )
+
+    return candidates[0]
+
+
+def is_model_enabled(model_slug: str) -> bool:
+    """Check if a model is enabled in the registry."""
+    model = _dynamic_models.get(model_slug)
+    if not model:
+        # Model not in registry - assume it's a static/legacy model and allow it
+        return True
+    return model.is_enabled
+
+
+def get_model_info(model_slug: str) -> RegistryModel | None:
+    """Get model info from the registry."""
+    return _dynamic_models.get(model_slug)
+
+
+def get_default_model_slug() -> str | None:
+    """
+    Get the default model slug to use for block defaults.
+
+    Returns the recommended model if set (configured via admin UI),
+    otherwise returns the first enabled model alphabetically.
+    Returns None if no models are available or enabled.
+    """
+    # Return the recommended model if one is set and enabled
+    for model in _dynamic_models.values():
+        if model.is_recommended and model.is_enabled:
+            return model.slug
+
+    # No recommended model set - find first enabled model alphabetically
+    for model in sorted(_dynamic_models.values(), key=lambda m: m.display_name.lower()):
+        if model.is_enabled:
+            logger.warning(
+                "No recommended model set, using '%s' as default",
+                model.slug,
+            )
+            return model.slug
+
+    # No enabled models available
+    if _dynamic_models:
+        logger.error(
+            "No enabled models found in registry (%d models registered but all disabled)",
+            len(_dynamic_models),
+        )
+    else:
+        logger.error("No models registered in LLM registry")
+
+    return None
--- a/autogpt_platform/backend/backend/data/llm_registry/schema_utils.py
+++ b/autogpt_platform/backend/backend/data/llm_registry/schema_utils.py
@@ -0,0 +1,130 @@
+"""
+Helper utilities for LLM registry integration with block schemas.
+
+This module handles the dynamic injection of discriminator mappings
+and model options from the LLM registry into block schemas.
+"""
+
+import logging
+from typing import Any
+
+from backend.data.llm_registry.registry import (
+    get_all_model_slugs_for_validation,
+    get_default_model_slug,
+    get_llm_discriminator_mapping,
+    get_llm_model_schema_options,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def is_llm_model_field(field_name: str, field_info: Any) -> bool:
+    """
+    Check if a field is an LLM model selection field.
+
+    Returns True if the field has 'options' in json_schema_extra
+    (set by llm_model_schema_extra() in blocks/llm.py).
+    """
+    if not hasattr(field_info, "json_schema_extra"):
+        return False
+
+    extra = field_info.json_schema_extra
+    if isinstance(extra, dict):
+        return "options" in extra
+
+    return False
+
+
+def refresh_llm_model_options(field_schema: dict[str, Any]) -> None:
+    """
+    Refresh LLM model options from the registry.
+
+    Updates 'options' (for frontend dropdown) to show only enabled models,
+    but keeps the 'enum' (for validation) inclusive of ALL known models.
+
+    This is important because:
+    - Options: What users see in the dropdown (enabled models only)
+    - Enum: What values pass validation (all known models, including disabled)
+
+    Existing graphs may have disabled models selected - they should pass validation
+    and the fallback logic in llm_call() will handle using an alternative model.
+    """
+    fresh_options = get_llm_model_schema_options()
+    if not fresh_options:
+        return
+
+    # Update options array (UI dropdown) - only enabled models
+    if "options" in field_schema:
+        field_schema["options"] = fresh_options
+
+    all_known_slugs = get_all_model_slugs_for_validation()
+    if all_known_slugs and "enum" in field_schema:
+        existing_enum = set(field_schema.get("enum", []))
+        combined_enum = existing_enum | all_known_slugs
+        field_schema["enum"] = sorted(combined_enum)
+
+    # Set the default value from the registry (gpt-4o if available, else first enabled)
+    # This ensures new blocks have a sensible default pre-selected
+    default_slug = get_default_model_slug()
+    if default_slug:
+        field_schema["default"] = default_slug
+
+
+def refresh_llm_discriminator_mapping(field_schema: dict[str, Any]) -> None:
+    """
+    Refresh discriminator_mapping for fields that use model-based discrimination.
+
+    The discriminator is already set when AICredentialsField() creates the field.
+    We only need to refresh the mapping when models are added/removed.
+    """
+    if field_schema.get("discriminator") != "model":
+        return
+
+    # Always refresh the mapping to get latest models
+    fresh_mapping = get_llm_discriminator_mapping()
+    if fresh_mapping:
+        field_schema["discriminator_mapping"] = fresh_mapping
+
+
+def update_schema_with_llm_registry(
+    schema: dict[str, Any], model_class: type | None = None
+) -> None:
+    """
+    Update a JSON schema with current LLM registry data.
+
+    Refreshes:
+    1. Model options for LLM model selection fields (dropdown choices)
+    2. Discriminator mappings for credentials fields (model → provider)
+
+    Args:
+        schema: The JSON schema to update (mutated in-place)
+        model_class: The Pydantic model class (optional, for field introspection)
+    """
+    properties = schema.get("properties", {})
+
+    for field_name, field_schema in properties.items():
+        if not isinstance(field_schema, dict):
+            continue
+
+        # Refresh model options for LLM model fields
+        if model_class and hasattr(model_class, "model_fields"):
+            field_info = model_class.model_fields.get(field_name)
+            if field_info and is_llm_model_field(field_name, field_info):
+                try:
+                    refresh_llm_model_options(field_schema)
+                except Exception as exc:
+                    logger.warning(
+                        "Failed to refresh LLM options for field %s: %s",
+                        field_name,
+                        exc,
+                    )
+
+        # Refresh discriminator mapping for fields that use model discrimination
+        try:
+            refresh_llm_discriminator_mapping(field_schema)
+        except Exception as exc:
+            logger.warning(
+                "Failed to refresh discriminator mapping for field %s: %s",
+                field_name,
+                exc,
+            )
--- a/Show More
+++ b/Show More