Compare commits

...

1 Commits

Author SHA1 Message Date
Zamil Majdy
12690ad0a9 fix(backend): Use explicit {schema}.vector for pgvector types
The unqualified ::vector type fails in ORDER BY context with PgBouncer.
Use explicit schema qualification ({schema}.vector) which resolves to
platform.vector where the pgvector extension is installed.

Changes:
- Add {schema} placeholder to db.py for raw schema name
- Use {schema}.vector instead of unqualified ::vector in embeddings.py
- Use {{schema}}.vector instead of unqualified ::vector in hybrid_search.py

Tested on dev: explicit platform.vector works in all contexts.

Fixes: AUTOGPT-SERVER-76B
2026-01-21 10:19:08 -05:00
3 changed files with 15 additions and 26 deletions

View File

@@ -154,16 +154,15 @@ async def store_content_embedding(
# Upsert the embedding # Upsert the embedding
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
# Use {pgvector_schema}.vector for explicit pgvector type qualification
await execute_raw_with_schema( await execute_raw_with_schema(
""" """
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" ( INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt" "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
) )
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{pgvector_schema}.vector, $5, $6::jsonb, NOW(), NOW()) VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{schema}.vector, $5, $6::jsonb, NOW(), NOW())
ON CONFLICT ("contentType", "contentId", "userId") ON CONFLICT ("contentType", "contentId", "userId")
DO UPDATE SET DO UPDATE SET
"embedding" = $4::{pgvector_schema}.vector, "embedding" = $4::{schema}.vector,
"searchableText" = $5, "searchableText" = $5,
"metadata" = $6::jsonb, "metadata" = $6::jsonb,
"updatedAt" = NOW() "updatedAt" = NOW()
@@ -879,8 +878,6 @@ async def semantic_search(
min_similarity_idx = len(params) + 1 min_similarity_idx = len(params) + 1
params.append(min_similarity) params.append(min_similarity)
# Use regular string (not f-string) for template to preserve {schema_prefix} and {schema} placeholders
# Use OPERATOR({pgvector_schema}.<=>) for explicit operator schema qualification
sql = ( sql = (
""" """
SELECT SELECT
@@ -888,9 +885,9 @@ async def semantic_search(
"contentType" as content_type, "contentType" as content_type,
"searchableText" as searchable_text, "searchableText" as searchable_text,
metadata, metadata,
1 - (embedding OPERATOR({pgvector_schema}.<=>) '""" 1 - (embedding <=> '"""
+ embedding_str + embedding_str
+ """'::{pgvector_schema}.vector) as similarity + """'::{schema}.vector) as similarity
FROM {schema_prefix}"UnifiedContentEmbedding" FROM {schema_prefix}"UnifiedContentEmbedding"
WHERE "contentType" IN (""" WHERE "contentType" IN ("""
+ content_type_placeholders + content_type_placeholders
@@ -898,9 +895,9 @@ async def semantic_search(
""" """
+ user_filter + user_filter
+ """ + """
AND 1 - (embedding OPERATOR({pgvector_schema}.<=>) '""" AND 1 - (embedding <=> '"""
+ embedding_str + embedding_str
+ """'::{pgvector_schema}.vector) >= $""" + """'::{schema}.vector) >= $"""
+ str(min_similarity_idx) + str(min_similarity_idx)
+ """ + """
ORDER BY similarity DESC ORDER BY similarity DESC

View File

@@ -295,7 +295,7 @@ async def unified_hybrid_search(
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[]) WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
{user_filter} {user_filter}
ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector ORDER BY uce.embedding <=> {embedding_param}::{{schema}}.vector
LIMIT 200 LIMIT 200
) )
), ),
@@ -307,7 +307,7 @@ async def unified_hybrid_search(
uce.metadata, uce.metadata,
uce."updatedAt" as updated_at, uce."updatedAt" as updated_at,
-- Semantic score: cosine similarity (1 - distance) -- Semantic score: cosine similarity (1 - distance)
COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score, COALESCE(1 - (uce.embedding <=> {embedding_param}::{{schema}}.vector), 0) as semantic_score,
-- Lexical score: ts_rank_cd -- Lexical score: ts_rank_cd
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw, COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
-- Category match from metadata -- Category match from metadata
@@ -583,7 +583,7 @@ async def hybrid_search(
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType" WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
AND uce."userId" IS NULL AND uce."userId" IS NULL
AND {where_clause} AND {where_clause}
ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector ORDER BY uce.embedding <=> {embedding_param}::{{schema}}.vector
LIMIT 200 LIMIT 200
) uce ) uce
), ),
@@ -605,7 +605,7 @@ async def hybrid_search(
-- Searchable text for BM25 reranking -- Searchable text for BM25 reranking
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text, COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
-- Semantic score -- Semantic score
COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score, COALESCE(1 - (uce.embedding <=> {embedding_param}::{{schema}}.vector), 0) as semantic_score,
-- Lexical score (raw, will normalize) -- Lexical score (raw, will normalize)
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw, COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
-- Category match -- Category match

View File

@@ -120,11 +120,10 @@ async def _raw_with_schema(
Supports placeholders: Supports placeholders:
- {schema_prefix}: Table/type prefix (e.g., "platform".) - {schema_prefix}: Table/type prefix (e.g., "platform".)
- {schema}: Raw schema name for application tables (e.g., platform) - {schema}: Raw schema name (e.g., platform) for pgvector types
- {pgvector_schema}: Schema where pgvector is installed (defaults to "public")
Args: Args:
query_template: SQL query with {schema_prefix}, {schema}, and/or {pgvector_schema} placeholders query_template: SQL query with {schema_prefix} and/or {schema} placeholders
*args: Query parameters *args: Query parameters
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE. execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
client: Optional Prisma client for transactions (only used when execute=True). client: Optional Prisma client for transactions (only used when execute=True).
@@ -133,23 +132,16 @@ async def _raw_with_schema(
- list[dict] if execute=False (query results) - list[dict] if execute=False (query results)
- int if execute=True (number of affected rows) - int if execute=True (number of affected rows)
Example with vector type: Example:
await execute_raw_with_schema( await execute_raw_with_schema(
'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{pgvector_schema}.vector)', 'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{schema}.vector)',
embedding_data embedding_data
) )
""" """
schema = get_database_schema() schema = get_database_schema()
schema_prefix = f'"{schema}".' if schema != "public" else "" schema_prefix = f'"{schema}".' if schema != "public" else ""
# pgvector extension is typically installed in "public" schema
# On Supabase it may be in "extensions" but "public" is the common default
pgvector_schema = "public"
formatted_query = query_template.format( formatted_query = query_template.format(schema_prefix=schema_prefix, schema=schema)
schema_prefix=schema_prefix,
schema=schema,
pgvector_schema=pgvector_schema,
)
import prisma as prisma_module import prisma as prisma_module