fix(backend): Add diagnostic logging for vector type errors

When 'type vector does not exist' occurs in hybrid search, log search_path, current_schema, and user info to help diagnose why the pgvector extension isn't visible. This is a debug-only change to help track down an intermittent issue on dev-behave where the vector type occasionally fails to resolve.
fix(backend): filter graph-only blocks from CoPilot's find_block results (#11892 )
2026-02-09 14:25:25 -05:00 · 2026-02-09 16:06:29 +00:00 · 2026-02-09 07:19:43 +00:00
6 changed files with 459 additions and 57 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -13,10 +13,32 @@ from backend.api.features.chat.tools.models import (
    NoResultsResponse,
 )
 from backend.api.features.store.hybrid_search import unified_hybrid_search
-from backend.data.block import get_block
+from backend.data.block import BlockType, get_block

 logger = logging.getLogger(__name__)

+_TARGET_RESULTS = 10
+# Over-fetch to compensate for post-hoc filtering of graph-only blocks.
+# 40 is 2x current removed; speed of query 10 vs 40 is minimial
+_OVERFETCH_PAGE_SIZE = 40
+
+# Block types that only work within graphs and cannot run standalone in CoPilot.
+COPILOT_EXCLUDED_BLOCK_TYPES = {
+    BlockType.INPUT,  # Graph interface definition - data enters via chat, not graph inputs
+    BlockType.OUTPUT,  # Graph interface definition - data exits via chat, not graph outputs
+    BlockType.WEBHOOK,  # Wait for external events - would hang forever in CoPilot
+    BlockType.WEBHOOK_MANUAL,  # Same as WEBHOOK
+    BlockType.NOTE,  # Visual annotation only - no runtime behavior
+    BlockType.HUMAN_IN_THE_LOOP,  # Pauses for human approval - CoPilot IS human-in-the-loop
+    BlockType.AGENT,  # AgentExecutorBlock requires execution_context - use run_agent tool
+}
+
+# Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
+COPILOT_EXCLUDED_BLOCK_IDS = {
+    # SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
+    "3b191d9f-356f-482d-8238-ba04b6d18381",
+}
+

 class FindBlockTool(BaseTool):
    """Tool for searching available blocks."""
@@ -88,7 +110,7 @@ class FindBlockTool(BaseTool):
                query=query,
                content_types=[ContentType.BLOCK],
                page=1,
-                page_size=10,
+                page_size=_OVERFETCH_PAGE_SIZE,
            )

            if not results:
@@ -108,60 +130,90 @@ class FindBlockTool(BaseTool):
                block = get_block(block_id)

                # Skip disabled blocks
-                if block and not block.disabled:
-                    # Get input/output schemas
-                    input_schema = {}
-                    output_schema = {}
-                    try:
-                        input_schema = block.input_schema.jsonschema()
-                    except Exception:
-                        pass
-                    try:
-                        output_schema = block.output_schema.jsonschema()
-                    except Exception:
-                        pass
+                if not block or block.disabled:
+                    continue

-                    # Get categories from block instance
-                    categories = []
-                    if hasattr(block, "categories") and block.categories:
-                        categories = [cat.value for cat in block.categories]
+                # Skip blocks excluded from CoPilot (graph-only blocks)
+                if (
+                    block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+                    or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+                ):
+                    continue

-                    # Extract required inputs for easier use
-                    required_inputs: list[BlockInputFieldInfo] = []
-                    if input_schema:
-                        properties = input_schema.get("properties", {})
-                        required_fields = set(input_schema.get("required", []))
-                        # Get credential field names to exclude from required inputs
-                        credentials_fields = set(
-                            block.input_schema.get_credentials_fields().keys()
-                        )
-
-                        for field_name, field_schema in properties.items():
-                            # Skip credential fields - they're handled separately
-                            if field_name in credentials_fields:
-                                continue
-
-                            required_inputs.append(
-                                BlockInputFieldInfo(
-                                    name=field_name,
-                                    type=field_schema.get("type", "string"),
-                                    description=field_schema.get("description", ""),
-                                    required=field_name in required_fields,
-                                    default=field_schema.get("default"),
-                                )
-                            )
-
-                    blocks.append(
-                        BlockInfoSummary(
-                            id=block_id,
-                            name=block.name,
-                            description=block.description or "",
-                            categories=categories,
-                            input_schema=input_schema,
-                            output_schema=output_schema,
-                            required_inputs=required_inputs,
-                        )
+                # Get input/output schemas
+                input_schema = {}
+                output_schema = {}
+                try:
+                    input_schema = block.input_schema.jsonschema()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to generate input schema for block %s: %s",
+                        block_id,
+                        e,
                    )
+                try:
+                    output_schema = block.output_schema.jsonschema()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to generate output schema for block %s: %s",
+                        block_id,
+                        e,
+                    )
+
+                # Get categories from block instance
+                categories = []
+                if hasattr(block, "categories") and block.categories:
+                    categories = [cat.value for cat in block.categories]
+
+                # Extract required inputs for easier use
+                required_inputs: list[BlockInputFieldInfo] = []
+                if input_schema:
+                    properties = input_schema.get("properties", {})
+                    required_fields = set(input_schema.get("required", []))
+                    # Get credential field names to exclude from required inputs
+                    credentials_fields = set(
+                        block.input_schema.get_credentials_fields().keys()
+                    )
+
+                    for field_name, field_schema in properties.items():
+                        # Skip credential fields - they're handled separately
+                        if field_name in credentials_fields:
+                            continue
+
+                        required_inputs.append(
+                            BlockInputFieldInfo(
+                                name=field_name,
+                                type=field_schema.get("type", "string"),
+                                description=field_schema.get("description", ""),
+                                required=field_name in required_fields,
+                                default=field_schema.get("default"),
+                            )
+                        )
+
+                blocks.append(
+                    BlockInfoSummary(
+                        id=block_id,
+                        name=block.name,
+                        description=block.description or "",
+                        categories=categories,
+                        input_schema=input_schema,
+                        output_schema=output_schema,
+                        required_inputs=required_inputs,
+                    )
+                )
+
+                if len(blocks) >= _TARGET_RESULTS:
+                    break
+
+            if blocks and len(blocks) < _TARGET_RESULTS:
+                logger.debug(
+                    "find_block returned %d/%d results for query '%s' "
+                    "(filtered %d excluded/disabled blocks)",
+                    len(blocks),
+                    _TARGET_RESULTS,
+                    query,
+                    len(results) - len(blocks),
+                )

            if not blocks:
                return NoResultsResponse(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -0,0 +1,139 @@
+"""Tests for block filtering in FindBlockTool."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from backend.api.features.chat.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+    FindBlockTool,
+)
+from backend.api.features.chat.tools.models import BlockListResponse
+from backend.data.block import BlockType
+
+from ._test_data import make_session
+
+_TEST_USER_ID = "test-user-find-block"
+
+
+def make_mock_block(
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+):
+    """Create a mock block for testing."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.description = f"{name} description"
+    mock.block_type = block_type
+    mock.disabled = disabled
+    mock.input_schema = MagicMock()
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields.return_value = {}
+    mock.output_schema = MagicMock()
+    mock.output_schema.jsonschema.return_value = {}
+    mock.categories = []
+    return mock
+
+
+class TestFindBlockFiltering:
+    """Tests for block filtering in FindBlockTool."""
+
+    def test_excluded_block_types_contains_expected_types(self):
+        """Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
+        assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES
+
+    def test_excluded_block_ids_contains_smart_decision_maker(self):
+        """Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
+        assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_type_filtered_from_results(self):
+        """Verify blocks with excluded BlockTypes are filtered from search results."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        # Mock search returns an INPUT block (excluded) and a STANDARD block (included)
+        search_results = [
+            {"content_id": "input-block-id", "score": 0.9},
+            {"content_id": "standard-block-id", "score": 0.8},
+        ]
+
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
+        standard_block = make_mock_block(
+            "standard-block-id", "HTTP Request", BlockType.STANDARD
+        )
+
+        def mock_get_block(block_id):
+            return {
+                "input-block-id": input_block,
+                "standard-block-id": standard_block,
+            }.get(block_id)
+
+        with patch(
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
+            new_callable=AsyncMock,
+            return_value=(search_results, 2),
+        ):
+            with patch(
+                "backend.api.features.chat.tools.find_block.get_block",
+                side_effect=mock_get_block,
+            ):
+                tool = FindBlockTool()
+                response = await tool._execute(
+                    user_id=_TEST_USER_ID, session=session, query="test"
+                )
+
+        # Should only return the standard block, not the INPUT block
+        assert isinstance(response, BlockListResponse)
+        assert len(response.blocks) == 1
+        assert response.blocks[0].id == "standard-block-id"
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_id_filtered_from_results(self):
+        """Verify SmartDecisionMakerBlock is filtered from search results."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
+        search_results = [
+            {"content_id": smart_decision_id, "score": 0.9},
+            {"content_id": "normal-block-id", "score": 0.8},
+        ]
+
+        # SmartDecisionMakerBlock has STANDARD type but is excluded by ID
+        smart_block = make_mock_block(
+            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
+        )
+        normal_block = make_mock_block(
+            "normal-block-id", "Normal Block", BlockType.STANDARD
+        )
+
+        def mock_get_block(block_id):
+            return {
+                smart_decision_id: smart_block,
+                "normal-block-id": normal_block,
+            }.get(block_id)
+
+        with patch(
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
+            new_callable=AsyncMock,
+            return_value=(search_results, 2),
+        ):
+            with patch(
+                "backend.api.features.chat.tools.find_block.get_block",
+                side_effect=mock_get_block,
+            ):
+                tool = FindBlockTool()
+                response = await tool._execute(
+                    user_id=_TEST_USER_ID, session=session, query="decision"
+                )
+
+        # Should only return normal block, not SmartDecisionMakerBlock
+        assert isinstance(response, BlockListResponse)
+        assert len(response.blocks) == 1
+        assert response.blocks[0].id == "normal-block-id"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -8,6 +8,10 @@ from typing import Any
 from pydantic_core import PydanticUndefined

 from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+)
 from backend.data.block import get_block
 from backend.data.execution import ExecutionContext
 from backend.data.model import CredentialsMetaInput
@@ -212,6 +216,19 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )

+        # Check if block is excluded from CoPilot (graph-only blocks)
+        if (
+            block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+            or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+        ):
+            return ErrorResponse(
+                message=(
+                    f"Block '{block.name}' cannot be run directly in CoPilot. "
+                    "This block is designed for use within graphs only."
+                ),
+                session_id=session_id,
+            )
+
        logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")

        creds_manager = IntegrationCredentialsManager()
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
@@ -0,0 +1,106 @@
+"""Tests for block execution guards in RunBlockTool."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from backend.api.features.chat.tools.models import ErrorResponse
+from backend.api.features.chat.tools.run_block import RunBlockTool
+from backend.data.block import BlockType
+
+from ._test_data import make_session
+
+_TEST_USER_ID = "test-user-run-block"
+
+
+def make_mock_block(
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+):
+    """Create a mock block for testing."""
+    mock = MagicMock()
+    mock.id = block_id
+    mock.name = name
+    mock.block_type = block_type
+    mock.disabled = disabled
+    mock.input_schema = MagicMock()
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields_info.return_value = []
+    return mock
+
+
+class TestRunBlockFiltering:
+    """Tests for block execution guards in RunBlockTool."""
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_type_returns_error(self):
+        """Attempting to execute a block with excluded BlockType returns error."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=input_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id="input-block-id",
+                input_data={},
+            )
+
+        assert isinstance(response, ErrorResponse)
+        assert "cannot be run directly in CoPilot" in response.message
+        assert "designed for use within graphs only" in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_excluded_block_id_returns_error(self):
+        """Attempting to execute SmartDecisionMakerBlock returns error."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
+        smart_block = make_mock_block(
+            smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
+        )
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=smart_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id=smart_decision_id,
+                input_data={},
+            )
+
+        assert isinstance(response, ErrorResponse)
+        assert "cannot be run directly in CoPilot" in response.message
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_non_excluded_block_passes_guard(self):
+        """Non-excluded blocks pass the filtering guard (may fail later for other reasons)."""
+        session = make_session(user_id=_TEST_USER_ID)
+
+        standard_block = make_mock_block(
+            "standard-id", "HTTP Request", BlockType.STANDARD
+        )
+
+        with patch(
+            "backend.api.features.chat.tools.run_block.get_block",
+            return_value=standard_block,
+        ):
+            tool = RunBlockTool()
+            response = await tool._execute(
+                user_id=_TEST_USER_ID,
+                session=session,
+                block_id="standard-id",
+                input_data={},
+            )
+
+        # Should NOT be an ErrorResponse about CoPilot exclusion
+        # (may be other errors like missing credentials, but not the exclusion guard)
+        if isinstance(response, ErrorResponse):
+            assert "cannot be run directly in CoPilot" not in response.message
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance.

 import logging
 import re
+import time
 from dataclasses import dataclass
 from typing import Any, Literal

@@ -362,7 +363,11 @@ async def unified_hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0
    # Apply BM25 reranking
@@ -686,7 +691,11 @@ async def hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0

@@ -718,6 +727,87 @@ async def hybrid_search_simple(
    return await hybrid_search(query=query, page=page, page_size=page_size)


+# ============================================================================
+# Diagnostics
+# ============================================================================
+
+# Rate limit: only log vector error diagnostics once per this interval
+_VECTOR_DIAG_INTERVAL_SECONDS = 60
+_last_vector_diag_time: float = 0
+
+
+async def _log_vector_error_diagnostics(error: Exception) -> None:
+    """Log diagnostic info when 'type vector does not exist' error occurs.
+
+    Note: Diagnostic queries use query_raw_with_schema which may run on a different
+    pooled connection than the one that failed. Session-level search_path can differ,
+    so these diagnostics show cluster-wide state, not necessarily the failed session.
+
+    Includes rate limiting to avoid log spam - only logs once per minute.
+    Caller should re-raise the error after calling this function.
+    """
+    global _last_vector_diag_time
+
+    # Check if this is the vector type error
+    error_str = str(error).lower()
+    if not (
+        "type" in error_str and "vector" in error_str and "does not exist" in error_str
+    ):
+        return
+
+    # Rate limit: only log once per interval
+    now = time.time()
+    if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS:
+        return
+    _last_vector_diag_time = now
+
+    try:
+        diagnostics: dict[str, object] = {}
+
+        try:
+            search_path_result = await query_raw_with_schema("SHOW search_path")
+            diagnostics["search_path"] = search_path_result
+        except Exception as e:
+            diagnostics["search_path"] = f"Error: {e}"
+
+        try:
+            schema_result = await query_raw_with_schema("SELECT current_schema()")
+            diagnostics["current_schema"] = schema_result
+        except Exception as e:
+            diagnostics["current_schema"] = f"Error: {e}"
+
+        try:
+            user_result = await query_raw_with_schema(
+                "SELECT current_user, session_user, current_database()"
+            )
+            diagnostics["user_info"] = user_result
+        except Exception as e:
+            diagnostics["user_info"] = f"Error: {e}"
+
+        try:
+            # Check pgvector extension installation (cluster-wide, stable info)
+            ext_result = await query_raw_with_schema(
+                "SELECT extname, extversion, nspname as schema "
+                "FROM pg_extension e "
+                "JOIN pg_namespace n ON e.extnamespace = n.oid "
+                "WHERE extname = 'vector'"
+            )
+            diagnostics["pgvector_extension"] = ext_result
+        except Exception as e:
+            diagnostics["pgvector_extension"] = f"Error: {e}"
+
+        logger.error(
+            f"Vector type error diagnostics:\n"
+            f"  Error: {error}\n"
+            f"  search_path: {diagnostics.get('search_path')}\n"
+            f"  current_schema: {diagnostics.get('current_schema')}\n"
+            f"  user_info: {diagnostics.get('user_info')}\n"
+            f"  pgvector_extension: {diagnostics.get('pgvector_extension')}"
+        )
+    except Exception as diag_error:
+        logger.error(f"Failed to collect vector error diagnostics: {diag_error}")
+
+
 # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
 # for existing code that expects the popularity parameter
 HybridSearchWeights = StoreAgentSearchWeights
--- a/autogpt_platform/backend/backend/data/execution_queue_test.py
+++ b/autogpt_platform/backend/backend/data/execution_queue_test.py
@@ -3,8 +3,6 @@
 import queue
 import threading

-import pytest
-
 from backend.data.execution import ExecutionQueue