fix(backend): Add diagnostic logging for vector type errors

When 'type vector does not exist' occurs in hybrid search, log search_path, current_schema, and user info to help diagnose why the pgvector extension isn't visible. This is a debug-only change to help track down an intermittent issue on dev-behave where the vector type occasionally fails to resolve.
2026-02-09 22:35:54 -05:00 · 2026-02-09 16:06:29 +00:00
5 changed files with 123 additions and 295 deletions
--- a/autogpt_platform/CLAUDE.md
+++ b/autogpt_platform/CLAUDE.md
@@ -45,11 +45,6 @@ AutoGPT Platform is a monorepo containing:
 - Backend/Frontend services use YAML anchors for consistent configuration
 - Supabase services (`db/docker/docker-compose.yml`) follow the same pattern

-### Branching Strategy
-
- **`dev`** is the main development branch. All PRs should target `dev`.
- **`master`** is the production branch. Only used for production releases.
-
 ### Creating Pull Requests

 - Create the PR against the `dev` branch of the repository.
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -54,8 +54,7 @@ class FindBlockTool(BaseTool):
            "Blocks are reusable components that perform specific tasks like "
            "sending emails, making API calls, processing text, etc. "
            "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. "
-            "The response includes each block's id, required_inputs, "
-            "and output_fields."
+            "The response includes each block's id, required_inputs, and input_schema."
        )

    @property
@@ -124,7 +123,7 @@ class FindBlockTool(BaseTool):
                    session_id=session_id,
                )

-            # Enrich results with block information
+            # Enrich results with full block information
            blocks: list[BlockInfoSummary] = []
            for result in results:
                block_id = result["content_id"]
@@ -142,8 +141,8 @@ class FindBlockTool(BaseTool):
                    continue

                # Get input/output schemas
-                input_schema: dict[str, Any] = {}
-                output_schema: dict[str, Any] = {}
+                input_schema = {}
+                output_schema = {}
                try:
                    input_schema = block.input_schema.jsonschema()
                except Exception as e:
@@ -161,28 +160,26 @@ class FindBlockTool(BaseTool):
                        e,
                    )

-                # Get credential field names to exclude from required inputs
-                credentials_fields: set[str] = set()
-                try:
-                    credentials_fields = set(
-                        block.input_schema.get_credentials_fields().keys()
-                    )
-                except Exception as e:
-                    logger.debug(
-                        "Failed to get credentials fields for block %s: %s",
-                        block_id,
-                        e,
-                    )
+                # Get categories from block instance
+                categories = []
+                if hasattr(block, "categories") and block.categories:
+                    categories = [cat.value for cat in block.categories]

-                # Extract input fields (excluding credentials)
+                # Extract required inputs for easier use
                required_inputs: list[BlockInputFieldInfo] = []
                if input_schema:
                    properties = input_schema.get("properties", {})
                    required_fields = set(input_schema.get("required", []))
+                    # Get credential field names to exclude from required inputs
+                    credentials_fields = set(
+                        block.input_schema.get_credentials_fields().keys()
+                    )

                    for field_name, field_schema in properties.items():
+                        # Skip credential fields - they're handled separately
                        if field_name in credentials_fields:
                            continue
+
                        required_inputs.append(
                            BlockInputFieldInfo(
                                name=field_name,
@@ -193,26 +190,15 @@ class FindBlockTool(BaseTool):
                            )
                        )

-                # Extract output fields
-                output_fields: list[BlockInputFieldInfo] = []
-                if output_schema:
-                    out_props = output_schema.get("properties", {})
-                    for field_name, field_schema in out_props.items():
-                        output_fields.append(
-                            BlockInputFieldInfo(
-                                name=field_name,
-                                type=field_schema.get("type", "string"),
-                                description=field_schema.get("description", ""),
-                            )
-                        )
-
                blocks.append(
                    BlockInfoSummary(
                        id=block_id,
                        name=block.name,
                        description=block.description or "",
+                        categories=categories,
+                        input_schema=input_schema,
+                        output_schema=output_schema,
                        required_inputs=required_inputs,
-                        output_fields=output_fields,
                    )
                )

@@ -241,8 +227,8 @@ class FindBlockTool(BaseTool):
            return BlockListResponse(
                message=(
                    f"Found {len(blocks)} block(s) matching '{query}'. "
-                    "To execute a block, use run_block with the block's 'id' "
-                    "and provide 'input_data' matching required_inputs."
+                    "To execute a block, use run_block with the block's 'id' field "
+                    "and provide 'input_data' matching the block's input_schema."
                ),
                blocks=blocks,
                count=len(blocks),
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -18,13 +18,7 @@ _TEST_USER_ID = "test-user-find-block"


 def make_mock_block(
-    block_id: str,
-    name: str,
-    block_type: BlockType,
-    disabled: bool = False,
-    input_schema: dict | None = None,
-    output_schema: dict | None = None,
-    credentials_fields: dict | None = None,
+    block_id: str, name: str, block_type: BlockType, disabled: bool = False
 ):
    """Create a mock block for testing."""
    mock = MagicMock()
@@ -34,13 +28,10 @@ def make_mock_block(
    mock.block_type = block_type
    mock.disabled = disabled
    mock.input_schema = MagicMock()
-    mock.input_schema.jsonschema.return_value = input_schema or {
-        "properties": {},
-        "required": [],
-    }
-    mock.input_schema.get_credentials_fields.return_value = credentials_fields or {}
+    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.get_credentials_fields.return_value = {}
    mock.output_schema = MagicMock()
-    mock.output_schema.jsonschema.return_value = output_schema or {}
+    mock.output_schema.jsonschema.return_value = {}
    mock.categories = []
    return mock

@@ -146,240 +137,3 @@ class TestFindBlockFiltering:
        assert isinstance(response, BlockListResponse)
        assert len(response.blocks) == 1
        assert response.blocks[0].id == "normal-block-id"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_response_size_average_chars_per_block(self):
-        """Measure average chars per block in the serialized response."""
-        session = make_session(user_id=_TEST_USER_ID)
-
-        # Realistic block definitions modeled after real blocks
-        block_defs = [
-            {
-                "id": "http-block-id",
-                "name": "Send Web Request",
-                "input_schema": {
-                    "properties": {
-                        "url": {
-                            "type": "string",
-                            "description": "The URL to send the request to",
-                        },
-                        "method": {
-                            "type": "string",
-                            "description": "The HTTP method to use",
-                        },
-                        "headers": {
-                            "type": "object",
-                            "description": "Headers to include in the request",
-                        },
-                        "json_format": {
-                            "type": "boolean",
-                            "description": "If true, send the body as JSON",
-                        },
-                        "body": {
-                            "type": "object",
-                            "description": "Form/JSON body payload",
-                        },
-                        "credentials": {
-                            "type": "object",
-                            "description": "HTTP credentials",
-                        },
-                    },
-                    "required": ["url", "method"],
-                },
-                "output_schema": {
-                    "properties": {
-                        "response": {
-                            "type": "object",
-                            "description": "The response from the server",
-                        },
-                        "client_error": {
-                            "type": "object",
-                            "description": "Errors on 4xx status codes",
-                        },
-                        "server_error": {
-                            "type": "object",
-                            "description": "Errors on 5xx status codes",
-                        },
-                        "error": {
-                            "type": "string",
-                            "description": "Errors for all other exceptions",
-                        },
-                    },
-                },
-                "credentials_fields": {"credentials": True},
-            },
-            {
-                "id": "email-block-id",
-                "name": "Send Email",
-                "input_schema": {
-                    "properties": {
-                        "to_email": {
-                            "type": "string",
-                            "description": "Recipient email address",
-                        },
-                        "subject": {
-                            "type": "string",
-                            "description": "Subject of the email",
-                        },
-                        "body": {
-                            "type": "string",
-                            "description": "Body of the email",
-                        },
-                        "config": {
-                            "type": "object",
-                            "description": "SMTP Config",
-                        },
-                        "credentials": {
-                            "type": "object",
-                            "description": "SMTP credentials",
-                        },
-                    },
-                    "required": ["to_email", "subject", "body", "credentials"],
-                },
-                "output_schema": {
-                    "properties": {
-                        "status": {
-                            "type": "string",
-                            "description": "Status of the email sending operation",
-                        },
-                        "error": {
-                            "type": "string",
-                            "description": "Error message if sending failed",
-                        },
-                    },
-                },
-                "credentials_fields": {"credentials": True},
-            },
-            {
-                "id": "claude-code-block-id",
-                "name": "Claude Code",
-                "input_schema": {
-                    "properties": {
-                        "e2b_credentials": {
-                            "type": "object",
-                            "description": "API key for E2B platform",
-                        },
-                        "anthropic_credentials": {
-                            "type": "object",
-                            "description": "API key for Anthropic",
-                        },
-                        "prompt": {
-                            "type": "string",
-                            "description": "Task or instruction for Claude Code",
-                        },
-                        "timeout": {
-                            "type": "integer",
-                            "description": "Sandbox timeout in seconds",
-                        },
-                        "setup_commands": {
-                            "type": "array",
-                            "description": "Shell commands to run before execution",
-                        },
-                        "working_directory": {
-                            "type": "string",
-                            "description": "Working directory for Claude Code",
-                        },
-                        "session_id": {
-                            "type": "string",
-                            "description": "Session ID to resume a conversation",
-                        },
-                        "sandbox_id": {
-                            "type": "string",
-                            "description": "Sandbox ID to reconnect to",
-                        },
-                        "conversation_history": {
-                            "type": "string",
-                            "description": "Previous conversation history",
-                        },
-                        "dispose_sandbox": {
-                            "type": "boolean",
-                            "description": "Whether to dispose sandbox after execution",
-                        },
-                    },
-                    "required": [
-                        "e2b_credentials",
-                        "anthropic_credentials",
-                        "prompt",
-                    ],
-                },
-                "output_schema": {
-                    "properties": {
-                        "response": {
-                            "type": "string",
-                            "description": "Output from Claude Code execution",
-                        },
-                        "files": {
-                            "type": "array",
-                            "description": "Files created/modified by Claude Code",
-                        },
-                        "conversation_history": {
-                            "type": "string",
-                            "description": "Full conversation history",
-                        },
-                        "session_id": {
-                            "type": "string",
-                            "description": "Session ID for this conversation",
-                        },
-                        "sandbox_id": {
-                            "type": "string",
-                            "description": "ID of the sandbox instance",
-                        },
-                        "error": {
-                            "type": "string",
-                            "description": "Error message if execution failed",
-                        },
-                    },
-                },
-                "credentials_fields": {
-                    "e2b_credentials": True,
-                    "anthropic_credentials": True,
-                },
-            },
-        ]
-
-        search_results = [
-            {"content_id": d["id"], "score": 0.9 - i * 0.1}
-            for i, d in enumerate(block_defs)
-        ]
-        mock_blocks = {
-            d["id"]: make_mock_block(
-                block_id=d["id"],
-                name=d["name"],
-                block_type=BlockType.STANDARD,
-                input_schema=d["input_schema"],
-                output_schema=d["output_schema"],
-                credentials_fields=d["credentials_fields"],
-            )
-            for d in block_defs
-        }
-
-        with patch(
-            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
-            new_callable=AsyncMock,
-            return_value=(search_results, len(search_results)),
-        ), patch(
-            "backend.api.features.chat.tools.find_block.get_block",
-            side_effect=lambda bid: mock_blocks.get(bid),
-        ):
-            tool = FindBlockTool()
-            response = await tool._execute(
-                user_id=_TEST_USER_ID, session=session, query="test"
-            )
-
-        assert isinstance(response, BlockListResponse)
-        assert response.count == len(block_defs)
-
-        total_chars = len(response.model_dump_json())
-        avg_chars = total_chars // response.count
-
-        # Print for visibility in test output
-        print(f"\nTotal response size: {total_chars} chars")
-        print(f"Number of blocks: {response.count}")
-        print(f"Average chars per block: {avg_chars}")
-
-        # The old response was ~90K for 10 blocks (~9K per block).
-        # With the optimized format (no raw JSON schemas) we expect ~1.5K per block.
-        assert avg_chars < 2000, (
-            f"Average chars per block ({avg_chars}) exceeds 2000. "
-            f"Total response: {total_chars} chars for {response.count} blocks."
-        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -334,14 +334,13 @@ class BlockInfoSummary(BaseModel):
    id: str
    name: str
    description: str
+    categories: list[str]
+    input_schema: dict[str, Any]
+    output_schema: dict[str, Any]
    required_inputs: list[BlockInputFieldInfo] = Field(
        default_factory=list,
        description="List of required input fields for this block",
    )
-    output_fields: list[BlockInputFieldInfo] = Field(
-        default_factory=list,
-        description="Output fields produced by this block",
-    )


 class BlockListResponse(ToolResponseBase):
@@ -351,6 +350,10 @@ class BlockListResponse(ToolResponseBase):
    blocks: list[BlockInfoSummary]
    count: int
    query: str
+    usage_hint: str = Field(
+        default="To execute a block, call run_block with block_id set to the block's "
+        "'id' field and input_data containing the required fields from input_schema."
+    )


 class BlockOutputResponse(ToolResponseBase):
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance.

 import logging
 import re
+import time
 from dataclasses import dataclass
 from typing import Any, Literal

@@ -362,7 +363,11 @@ async def unified_hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0
    # Apply BM25 reranking
@@ -686,7 +691,11 @@ async def hybrid_search(
        LIMIT {limit_param} OFFSET {offset_param}
    """

-    results = await query_raw_with_schema(sql_query, *params)
+    try:
+        results = await query_raw_with_schema(sql_query, *params)
+    except Exception as e:
+        await _log_vector_error_diagnostics(e)
+        raise

    total = results[0]["total_count"] if results else 0

@@ -718,6 +727,87 @@ async def hybrid_search_simple(
    return await hybrid_search(query=query, page=page, page_size=page_size)


+# ============================================================================
+# Diagnostics
+# ============================================================================
+
+# Rate limit: only log vector error diagnostics once per this interval
+_VECTOR_DIAG_INTERVAL_SECONDS = 60
+_last_vector_diag_time: float = 0
+
+
+async def _log_vector_error_diagnostics(error: Exception) -> None:
+    """Log diagnostic info when 'type vector does not exist' error occurs.
+
+    Note: Diagnostic queries use query_raw_with_schema which may run on a different
+    pooled connection than the one that failed. Session-level search_path can differ,
+    so these diagnostics show cluster-wide state, not necessarily the failed session.
+
+    Includes rate limiting to avoid log spam - only logs once per minute.
+    Caller should re-raise the error after calling this function.
+    """
+    global _last_vector_diag_time
+
+    # Check if this is the vector type error
+    error_str = str(error).lower()
+    if not (
+        "type" in error_str and "vector" in error_str and "does not exist" in error_str
+    ):
+        return
+
+    # Rate limit: only log once per interval
+    now = time.time()
+    if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS:
+        return
+    _last_vector_diag_time = now
+
+    try:
+        diagnostics: dict[str, object] = {}
+
+        try:
+            search_path_result = await query_raw_with_schema("SHOW search_path")
+            diagnostics["search_path"] = search_path_result
+        except Exception as e:
+            diagnostics["search_path"] = f"Error: {e}"
+
+        try:
+            schema_result = await query_raw_with_schema("SELECT current_schema()")
+            diagnostics["current_schema"] = schema_result
+        except Exception as e:
+            diagnostics["current_schema"] = f"Error: {e}"
+
+        try:
+            user_result = await query_raw_with_schema(
+                "SELECT current_user, session_user, current_database()"
+            )
+            diagnostics["user_info"] = user_result
+        except Exception as e:
+            diagnostics["user_info"] = f"Error: {e}"
+
+        try:
+            # Check pgvector extension installation (cluster-wide, stable info)
+            ext_result = await query_raw_with_schema(
+                "SELECT extname, extversion, nspname as schema "
+                "FROM pg_extension e "
+                "JOIN pg_namespace n ON e.extnamespace = n.oid "
+                "WHERE extname = 'vector'"
+            )
+            diagnostics["pgvector_extension"] = ext_result
+        except Exception as e:
+            diagnostics["pgvector_extension"] = f"Error: {e}"
+
+        logger.error(
+            f"Vector type error diagnostics:\n"
+            f"  Error: {error}\n"
+            f"  search_path: {diagnostics.get('search_path')}\n"
+            f"  current_schema: {diagnostics.get('current_schema')}\n"
+            f"  user_info: {diagnostics.get('user_info')}\n"
+            f"  pgvector_extension: {diagnostics.get('pgvector_extension')}"
+        )
+    except Exception as diag_error:
+        logger.error(f"Failed to collect vector error diagnostics: {diag_error}")
+
+
 # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
 # for existing code that expects the popularity parameter
 HybridSearchWeights = StoreAgentSearchWeights