From 81f8290f0176eea29be3428c5e6ed21b82dc5da2 Mon Sep 17 00:00:00 2001 From: Otto Date: Tue, 10 Feb 2026 07:35:13 +0000 Subject: [PATCH] debug(backend/db): Add diagnostic logging for vector type errors (#12024) Adds diagnostic logging when the `type vector does not exist` error occurs in raw SQL queries. ## Problem We're seeing intermittent "type vector does not exist" errors on dev-behave ([Sentry issue](https://significant-gravitas.sentry.io/issues/7205929979/)). The pgvector extension should be in the search_path, but occasionally queries fail to resolve the vector type. ## Solution When a query fails with this specific error, we now log: - `SHOW search_path` - what schemas are being searched - `SELECT current_schema()` - the active schema - `SELECT current_user, session_user, current_database()` - connection context This diagnostic info will help identify why the vector extension isn't visible in certain cases. ## Changes - Added `_log_vector_error_diagnostics()` helper function in `backend/data/db.py` - Wrapped SQL execution in try/except to catch and diagnose vector type errors - Original exception is re-raised after logging (no behavior change) ## Testing This is observational/diagnostic code. It will be validated by waiting for the error to occur naturally on dev and checking the logs. ## Rollout Once we've captured diagnostic logs and identified the root cause, this logging can be removed or reduced in verbosity. --- .../api/features/store/hybrid_search.py | 94 ++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py index e1b8f402c8..b10bfbcc06 100644 --- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py +++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py @@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance. import logging import re +import time from dataclasses import dataclass from typing import Any, Literal @@ -362,7 +363,11 @@ async def unified_hybrid_search( LIMIT {limit_param} OFFSET {offset_param} """ - results = await query_raw_with_schema(sql_query, *params) + try: + results = await query_raw_with_schema(sql_query, *params) + except Exception as e: + await _log_vector_error_diagnostics(e) + raise total = results[0]["total_count"] if results else 0 # Apply BM25 reranking @@ -686,7 +691,11 @@ async def hybrid_search( LIMIT {limit_param} OFFSET {offset_param} """ - results = await query_raw_with_schema(sql_query, *params) + try: + results = await query_raw_with_schema(sql_query, *params) + except Exception as e: + await _log_vector_error_diagnostics(e) + raise total = results[0]["total_count"] if results else 0 @@ -718,6 +727,87 @@ async def hybrid_search_simple( return await hybrid_search(query=query, page=page, page_size=page_size) +# ============================================================================ +# Diagnostics +# ============================================================================ + +# Rate limit: only log vector error diagnostics once per this interval +_VECTOR_DIAG_INTERVAL_SECONDS = 60 +_last_vector_diag_time: float = 0 + + +async def _log_vector_error_diagnostics(error: Exception) -> None: + """Log diagnostic info when 'type vector does not exist' error occurs. + + Note: Diagnostic queries use query_raw_with_schema which may run on a different + pooled connection than the one that failed. Session-level search_path can differ, + so these diagnostics show cluster-wide state, not necessarily the failed session. + + Includes rate limiting to avoid log spam - only logs once per minute. + Caller should re-raise the error after calling this function. + """ + global _last_vector_diag_time + + # Check if this is the vector type error + error_str = str(error).lower() + if not ( + "type" in error_str and "vector" in error_str and "does not exist" in error_str + ): + return + + # Rate limit: only log once per interval + now = time.time() + if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS: + return + _last_vector_diag_time = now + + try: + diagnostics: dict[str, object] = {} + + try: + search_path_result = await query_raw_with_schema("SHOW search_path") + diagnostics["search_path"] = search_path_result + except Exception as e: + diagnostics["search_path"] = f"Error: {e}" + + try: + schema_result = await query_raw_with_schema("SELECT current_schema()") + diagnostics["current_schema"] = schema_result + except Exception as e: + diagnostics["current_schema"] = f"Error: {e}" + + try: + user_result = await query_raw_with_schema( + "SELECT current_user, session_user, current_database()" + ) + diagnostics["user_info"] = user_result + except Exception as e: + diagnostics["user_info"] = f"Error: {e}" + + try: + # Check pgvector extension installation (cluster-wide, stable info) + ext_result = await query_raw_with_schema( + "SELECT extname, extversion, nspname as schema " + "FROM pg_extension e " + "JOIN pg_namespace n ON e.extnamespace = n.oid " + "WHERE extname = 'vector'" + ) + diagnostics["pgvector_extension"] = ext_result + except Exception as e: + diagnostics["pgvector_extension"] = f"Error: {e}" + + logger.error( + f"Vector type error diagnostics:\n" + f" Error: {error}\n" + f" search_path: {diagnostics.get('search_path')}\n" + f" current_schema: {diagnostics.get('current_schema')}\n" + f" user_info: {diagnostics.get('user_info')}\n" + f" pgvector_extension: {diagnostics.get('pgvector_extension')}" + ) + except Exception as diag_error: + logger.error(f"Failed to collect vector error diagnostics: {diag_error}") + + # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights # for existing code that expects the popularity parameter HybridSearchWeights = StoreAgentSearchWeights