diff --git a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py index e1b8f402c8..b10bfbcc06 100644 --- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py +++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py @@ -8,6 +8,7 @@ Includes BM25 reranking for improved lexical relevance. import logging import re +import time from dataclasses import dataclass from typing import Any, Literal @@ -362,7 +363,11 @@ async def unified_hybrid_search( LIMIT {limit_param} OFFSET {offset_param} """ - results = await query_raw_with_schema(sql_query, *params) + try: + results = await query_raw_with_schema(sql_query, *params) + except Exception as e: + await _log_vector_error_diagnostics(e) + raise total = results[0]["total_count"] if results else 0 # Apply BM25 reranking @@ -686,7 +691,11 @@ async def hybrid_search( LIMIT {limit_param} OFFSET {offset_param} """ - results = await query_raw_with_schema(sql_query, *params) + try: + results = await query_raw_with_schema(sql_query, *params) + except Exception as e: + await _log_vector_error_diagnostics(e) + raise total = results[0]["total_count"] if results else 0 @@ -718,6 +727,87 @@ async def hybrid_search_simple( return await hybrid_search(query=query, page=page, page_size=page_size) +# ============================================================================ +# Diagnostics +# ============================================================================ + +# Rate limit: only log vector error diagnostics once per this interval +_VECTOR_DIAG_INTERVAL_SECONDS = 60 +_last_vector_diag_time: float = 0 + + +async def _log_vector_error_diagnostics(error: Exception) -> None: + """Log diagnostic info when 'type vector does not exist' error occurs. + + Note: Diagnostic queries use query_raw_with_schema which may run on a different + pooled connection than the one that failed. Session-level search_path can differ, + so these diagnostics show cluster-wide state, not necessarily the failed session. + + Includes rate limiting to avoid log spam - only logs once per minute. + Caller should re-raise the error after calling this function. + """ + global _last_vector_diag_time + + # Check if this is the vector type error + error_str = str(error).lower() + if not ( + "type" in error_str and "vector" in error_str and "does not exist" in error_str + ): + return + + # Rate limit: only log once per interval + now = time.time() + if now - _last_vector_diag_time < _VECTOR_DIAG_INTERVAL_SECONDS: + return + _last_vector_diag_time = now + + try: + diagnostics: dict[str, object] = {} + + try: + search_path_result = await query_raw_with_schema("SHOW search_path") + diagnostics["search_path"] = search_path_result + except Exception as e: + diagnostics["search_path"] = f"Error: {e}" + + try: + schema_result = await query_raw_with_schema("SELECT current_schema()") + diagnostics["current_schema"] = schema_result + except Exception as e: + diagnostics["current_schema"] = f"Error: {e}" + + try: + user_result = await query_raw_with_schema( + "SELECT current_user, session_user, current_database()" + ) + diagnostics["user_info"] = user_result + except Exception as e: + diagnostics["user_info"] = f"Error: {e}" + + try: + # Check pgvector extension installation (cluster-wide, stable info) + ext_result = await query_raw_with_schema( + "SELECT extname, extversion, nspname as schema " + "FROM pg_extension e " + "JOIN pg_namespace n ON e.extnamespace = n.oid " + "WHERE extname = 'vector'" + ) + diagnostics["pgvector_extension"] = ext_result + except Exception as e: + diagnostics["pgvector_extension"] = f"Error: {e}" + + logger.error( + f"Vector type error diagnostics:\n" + f" Error: {error}\n" + f" search_path: {diagnostics.get('search_path')}\n" + f" current_schema: {diagnostics.get('current_schema')}\n" + f" user_info: {diagnostics.get('user_info')}\n" + f" pgvector_extension: {diagnostics.get('pgvector_extension')}" + ) + except Exception as diag_error: + logger.error(f"Failed to collect vector error diagnostics: {diag_error}") + + # Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights # for existing code that expects the popularity parameter HybridSearchWeights = StoreAgentSearchWeights