Merge branch 'dev' into claude/fix-block-search-vector-error-QMbO4

fix(backend): add pod and pgvector diagnostics to vector query logging
Enhance debug logging for vector query failures to help identify which database replicas have pgvector installed vs not: - Add get_pod_info() to capture pod hostname, name, namespace, and IP - Extend get_connection_debug_info() to check pgvector extension status, including which schema it's installed in and list all extensions - Log connection details BEFORE every vector query (not just on failure) to correlate backend_pid with success/failure patterns - Include server_addr to identify which DB replica is being hit This helps diagnose the 30% failure rate where identical queries randomly fail with "type vector does not exist" - likely due to some replicas missing the pgvector extension. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 16:38:17 -05:00 · 2026-01-27 10:50:47 -06:00 · 2026-01-26 15:22:39 -06:00 · 2026-01-26 13:10:02 -06:00 · 2026-01-26 11:23:42 -06:00 · 2026-01-26 10:38:23 -06:00
2 changed files with 166 additions and 8 deletions
--- a/autogpt_platform/backend/backend/data/db.py
+++ b/autogpt_platform/backend/backend/data/db.py
@@ -26,6 +26,31 @@ def add_param(url: str, key: str, value: str) -> str:

 DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://localhost:5432")

+# Extract the application schema from DATABASE_URL for use in queries
+_parsed = urlparse(DATABASE_URL)
+_query_params = dict(parse_qsl(_parsed.query))
+_app_schema = _query_params.get("schema", "public")
+
+# Build search_path that includes app schema and extension schemas where pgvector may live.
+# This is used both in connection options (may be ignored by PgBouncer) and in SET LOCAL
+# statements before raw queries (guaranteed to work).
+SEARCH_PATH = (
+    f"{_app_schema},extensions,public"
+    if _app_schema != "public"
+    else "public,extensions"
+)
+
+# Try to set search_path via PostgreSQL options parameter at connection time.
+# NOTE: This may be ignored by PgBouncer in transaction pooling mode.
+# As a fallback, we also SET LOCAL search_path before raw queries.
+if "options" in _query_params:
+    _query_params["options"] = (
+        _query_params["options"] + f" -c search_path={SEARCH_PATH}"
+    )
+else:
+    _query_params["options"] = f"-c search_path={SEARCH_PATH}"
+DATABASE_URL = urlunparse(_parsed._replace(query=urlencode(_query_params)))
+
 CONN_LIMIT = os.getenv("DB_CONNECTION_LIMIT")
 if CONN_LIMIT:
    DATABASE_URL = add_param(DATABASE_URL, "connection_limit", CONN_LIMIT)
@@ -108,6 +133,70 @@ def get_database_schema() -> str:
    return query_params.get("schema", "public")


+def get_pod_info() -> dict:
+    """Get information about the current pod/host.
+
+    Returns dict with: hostname, pod_name (from HOSTNAME env var in k8s),
+    pod_namespace, pod_ip if available.
+    """
+    import socket
+
+    return {
+        "hostname": socket.gethostname(),
+        "pod_name": os.getenv("HOSTNAME", "unknown"),
+        "pod_namespace": os.getenv("POD_NAMESPACE", "unknown"),
+        "pod_ip": os.getenv("POD_IP", "unknown"),
+    }
+
+
+async def get_connection_debug_info(tx=None) -> dict:
+    """Get diagnostic info about the current database connection and pod.
+
+    Useful for debugging "table does not exist" or "type does not exist" errors
+    that may indicate connections going to different database instances or pods.
+
+    Args:
+        tx: Optional transaction client to use for the query (ensures same connection)
+
+    Returns dict with: search_path, current_schema, server_version, pg_backend_pid,
+    pgvector_installed, pgvector_schema, plus pod info
+    """
+    import prisma as prisma_module
+
+    pod_info = get_pod_info()
+    db_client = tx if tx else prisma_module.get_client()
+
+    try:
+        # Get connection info and check for pgvector in a single query
+        result = await db_client.query_raw(
+            """
+            SELECT
+                current_setting('search_path') as search_path,
+                current_schema() as current_schema,
+                current_database() as current_database,
+                inet_server_addr() as server_addr,
+                inet_server_port() as server_port,
+                pg_backend_pid() as backend_pid,
+                version() as server_version,
+                (SELECT EXISTS(
+                    SELECT 1 FROM pg_extension WHERE extname = 'vector'
+                )) as pgvector_installed,
+                (SELECT nspname FROM pg_extension e
+                 JOIN pg_namespace n ON e.extnamespace = n.oid
+                 WHERE e.extname = 'vector'
+                 LIMIT 1) as pgvector_schema,
+                (SELECT string_agg(extname || ' in ' || nspname, ', ')
+                 FROM pg_extension e
+                 JOIN pg_namespace n ON e.extnamespace = n.oid
+                ) as all_extensions
+            """
+        )
+        db_info = result[0] if result else {}
+        return {**pod_info, **db_info}
+    except Exception as e:
+        return {**pod_info, "db_error": str(e)}
+
+
 async def _raw_with_schema(
    query_template: str,
    *args,
@@ -124,8 +213,9 @@ async def _raw_with_schema(

    Note on pgvector types:
        Use unqualified ::vector and <=> operator in queries. PostgreSQL resolves
-        these via search_path, which includes the schema where pgvector is installed
-        on all environments (local, CI, dev).
+        these via search_path. The connection's search_path is configured at module
+        load to include common extension schemas (public, extensions) where pgvector
+        may be installed across different environments (local, CI, Supabase).

    Args:
        query_template: SQL query with {schema_prefix} and/or {schema} placeholders
@@ -155,12 +245,60 @@ async def _raw_with_schema(

    db_client = client if client else prisma_module.get_client()

-    if execute:
-        result = await db_client.execute_raw(formatted_query, *args)  # type: ignore
-    else:
-        result = await db_client.query_raw(formatted_query, *args)  # type: ignore
+    # For queries that might use pgvector types (::vector or <=> operator),
+    # we need to ensure search_path includes the schema where pgvector is installed.
+    # PgBouncer in transaction mode may ignore connection-level options, so we
+    # use SET LOCAL within a transaction to guarantee correct search_path.
+    needs_vector_search_path = "::vector" in formatted_query or "<=>" in formatted_query

-    return result
+    try:
+        if needs_vector_search_path and client is None:
+            # Use transaction to set search_path for vector queries
+            async with db_client.tx() as tx:
+                # Log debug info BEFORE the query to capture which backend we're hitting
+                debug_info = await get_connection_debug_info(tx)
+                logger.info(
+                    f"Vector query starting. backend_pid={debug_info.get('backend_pid')}, "
+                    f"server_addr={debug_info.get('server_addr')}, "
+                    f"pgvector_installed={debug_info.get('pgvector_installed')}, "
+                    f"pgvector_schema={debug_info.get('pgvector_schema')}, "
+                    f"search_path={debug_info.get('search_path')}, "
+                    f"pod={debug_info.get('pod_name')}"
+                )
+
+                await tx.execute_raw(f"SET LOCAL search_path TO {SEARCH_PATH}")
+                if execute:
+                    result = await tx.execute_raw(formatted_query, *args)  # type: ignore
+                else:
+                    result = await tx.query_raw(formatted_query, *args)  # type: ignore
+
+                logger.info(
+                    f"Vector query SUCCESS. backend_pid={debug_info.get('backend_pid')}"
+                )
+        else:
+            # Regular query without vector types, or already in a transaction
+            if execute:
+                result = await db_client.execute_raw(formatted_query, *args)  # type: ignore
+            else:
+                result = await db_client.query_raw(formatted_query, *args)  # type: ignore
+        return result
+    except Exception as e:
+        error_msg = str(e)
+        # Log connection debug info for "does not exist" errors to help diagnose
+        # whether connections are going to different database instances
+        if "does not exist" in error_msg:
+            try:
+                debug_info = await get_connection_debug_info()
+                logger.error(
+                    f"Vector query FAILED. Connection debug info: {debug_info}. "
+                    f"Query template: {query_template[:200]}... Error: {error_msg}"
+                )
+            except Exception:
+                logger.error(
+                    f"Vector query FAILED (debug info unavailable). "
+                    f"Query template: {query_template[:200]}... Error: {error_msg}"
+                )
+        raise


 async def query_raw_with_schema(query_template: str, *args) -> list[dict]:
--- a/autogpt_platform/backend/backend/data/understanding.py
+++ b/autogpt_platform/backend/backend/data/understanding.py
@@ -216,7 +216,27 @@ async def get_business_understanding(

    # Cache miss - load from database
    logger.debug(f"Business understanding cache miss for user {user_id}")
-    record = await CoPilotUnderstanding.prisma().find_unique(where={"userId": user_id})
+    try:
+        record = await CoPilotUnderstanding.prisma().find_unique(where={"userId": user_id})
+    except Exception as e:
+        error_msg = str(e)
+        if "does not exist" in error_msg:
+            # Log connection debug info to diagnose if connections go to different DBs
+            from backend.data.db import get_connection_debug_info
+
+            try:
+                debug_info = await get_connection_debug_info()
+                logger.error(
+                    f"CoPilotUnderstanding table not found. Connection debug: {debug_info}. "
+                    f"Error: {error_msg}"
+                )
+            except Exception:
+                logger.error(
+                    f"CoPilotUnderstanding table not found (debug unavailable). "
+                    f"Error: {error_msg}"
+                )
+        raise
+
    if record is None:
        return None
Author	SHA1	Message	Date
Nicholas Tindle	77453f5f15	Merge branch 'dev' into claude/fix-block-search-vector-error-QMbO4	2026-01-27 10:50:47 -06:00
Nicholas Tindle	9ffff490b5	fix(backend): add pod and pgvector diagnostics to vector query logging Enhance debug logging for vector query failures to help identify which database replicas have pgvector installed vs not: - Add get_pod_info() to capture pod hostname, name, namespace, and IP - Extend get_connection_debug_info() to check pgvector extension status, including which schema it's installed in and list all extensions - Log connection details BEFORE every vector query (not just on failure) to correlate backend_pid with success/failure patterns - Include server_addr to identify which DB replica is being hit This helps diagnose the 30% failure rate where identical queries randomly fail with "type vector does not exist" - likely due to some replicas missing the pgvector extension. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-26 15:22:39 -06:00
Nicholas Tindle	097949b3e7	Merge branch 'feat/agent-generator' into claude/fix-block-search-vector-error-QMbO4	2026-01-26 13:10:02 -06:00
Zamil Majdy	7f1a1f636f	Merge branch 'dev' into feat/agent-generator	2026-01-26 11:23:42 -06:00
Zamil Majdy	4dc4ca4256	fix(frontend): address PR review comments - Remove unnecessary useCallback from handleClarificationAnswers - Remove redundant comment for clarification_needed rendering - Use Input component from design system instead of raw textarea	2026-01-26 10:38:23 -06:00
Claude	00730496e3	fix(backend): use SET LOCAL search_path for vector queries The connection-level search_path via options parameter is being ignored, likely by PgBouncer/Supavisor in transaction pooling mode. Logs showed connections with wrong search_path ('"$user", public, extensions') and wrong current_schema ('public' instead of 'platform'). This fix wraps vector-type queries (those using ::vector or <=>) in a transaction with SET LOCAL search_path to ensure pgvector types are always resolvable, regardless of PgBouncer configuration. Also exposed SEARCH_PATH constant for use in other modules if needed.	2026-01-25 23:36:30 +00:00
Claude	21c753b971	fix(backend): ensure pgvector search_path for all pooled connections Fix random "type 'vector' does not exist" errors during copilot chat block search. The error occurred because pooled database connections could have inconsistent search_path configurations that didn't include the schema where pgvector was installed. The fix adds search_path to the PostgreSQL connection options to ensure all connections include both the application schema and common extension schemas (public, extensions) where pgvector may be installed across different environments (local, CI, Supabase). Also adds connection debug logging for "does not exist" errors to help diagnose the related "CoPilotUnderstanding table does not exist" issue, which may indicate connections routing to different database instances. Debug info includes: search_path, current_schema, server_addr, backend_pid.	2026-01-25 22:26:10 +00:00
Zamil Majdy	732dfcbb63	Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into feat/agent-generator	2026-01-24 15:45:55 -06:00
Zamil Majdy	eebaf7df14	feat(frontend): implement clarification questions UI for agent generation Add interactive form to collect user answers when agent-generator service returns clarifying questions during agent creation/editing. Changes: - Add clarification_needed message type to ChatMessageData - Create ClarificationQuestionsWidget component for collecting answers - Update parseToolResponse to detect clarification_needed responses - Integrate widget into ChatMessage rendering Fixes issue where users had no way to answer clarifying questions, causing the chat to keep retrying without necessary context.	2026-01-24 15:45:34 -06:00
Zamil Majdy	653aab44b6	Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into feat/agent-generator	2026-01-24 15:05:34 -06:00
Zamil Majdy	f0bc3f2a49	Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into feat/agent-generator	2026-01-24 11:16:02 -06:00
Zamil Majdy	e702d77cdf	Revert "fix(backend): resolve OAuth test event loop issue" This reverts commit `25d9dbac83`.	2026-01-23 21:25:41 -06:00
Zamil Majdy	38741d2465	Merge branch 'dev' into feat/agent-generator	2026-01-23 21:23:00 -05:00
Zamil Majdy	25d9dbac83	fix(backend): resolve OAuth test event loop issue Fix RuntimeError: Event loop is closed when running oauth_test.py in full test suite. Changed client fixture from implicit session scope to explicit function scope to avoid httpx.AsyncClient binding to event loops that get closed/replaced between tests. - Set @pytest.fixture(scope="function") for client fixture - Added explicit timeout=30.0 to AsyncClient - Added documentation explaining the fix This resolves the last failing test in the suite (1722 passed, 1 error).	2026-01-23 20:20:16 -06:00
Zamil Majdy	fcbecf3502	Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into feat/agent-generator	2026-01-22 14:32:39 -05:00
Zamil Majdy	da9c4a4adf	fix: wrap generate_agent call in try/except for consistency Add exception handler for AgentGeneratorNotConfiguredError in generate_agent call for defensive consistency, even though decompose_goal would typically catch it first. Addresses CodeRabbit review suggestion.	2026-01-21 18:48:39 -05:00
Zamil Majdy	0ca73004e5	feat: add clear error when Agent Generator service is not configured - Add AgentGeneratorNotConfiguredError exception - Check service configuration before calling external service - Return helpful error message in create_agent and edit_agent tools - Update tests to mock is_external_service_configured Addresses Sentry review comment about unconditional external service calls	2026-01-21 18:38:05 -05:00
Zamil Majdy	9a786ed8d9	refactor: remove redundant local agent generation code The external Agent Generator service handles fixing and validation internally, so we no longer need these components in the backend: - Removed client.py (built-in LLM client) - Removed prompts.py (built-in prompts) - Removed fixer.py (local agent fixing) - Removed validator.py (local agent validation) - Removed utils.py (utility functions for fixer/validator) Simplified create_agent.py and edit_agent.py to directly use the external service results without local post-processing. Updated tests to match the simplified architecture. This reduces ~1,800 lines of code that duplicated functionality already provided by the external Agent Generator service.	2026-01-21 18:13:09 -05:00
Zamil Majdy	0a435e2ffb	feat(backend): add external Agent Generator service integration Add support for delegating agent generation to an external microservice when AGENTGENERATOR_HOST is configured. Falls back to built-in LLM-based implementation when not configured. Changes: - Add agentgenerator_host, agentgenerator_port, agentgenerator_timeout settings - Add service.py client for external Agent Generator API - Update core.py to delegate to external service when configured - Export is_external_service_configured and check_external_service_health - Add comprehensive tests for service client and core integration	2026-01-21 17:44:56 -05:00