fix(backend): Use unqualified vector type for pgvector queries

Remove explicit schema qualification ({schema}.vector and
OPERATOR({schema}.<=>)) from pgvector queries. The vector type
and <=> operator work unqualified because pgvector is installed
in a schema that's part of the search_path on all environments.

This fixes "type 'platform.vector' does not exist" errors in CI
where pgvector is in public schema, and "type 'public.vector'
does not exist" errors on dev where pgvector is in platform schema.

Fixes: AUTOGPT-SERVER-763, AUTOGPT-SERVER-764, AUTOGPT-SERVER-76B
This commit is contained in:
Zamil Majdy
2026-01-21 11:27:03 -05:00
parent 0a48c49902
commit f23c350da1
2 changed files with 39 additions and 20 deletions

View File

@@ -154,16 +154,16 @@ async def store_content_embedding(
# Upsert the embedding
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
# Use {schema}.vector for explicit pgvector type qualification
# Use unqualified ::vector - pgvector is in search_path schema
await execute_raw_with_schema(
"""
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
)
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{schema}.vector, $5, $6::jsonb, NOW(), NOW())
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
ON CONFLICT ("contentType", "contentId", "userId")
DO UPDATE SET
"embedding" = $4::{schema}.vector,
"embedding" = $4::vector,
"searchableText" = $5,
"metadata" = $6::jsonb,
"updatedAt" = NOW()
@@ -870,7 +870,7 @@ async def semantic_search(
# Add content type parameters and build placeholders dynamically
content_type_start_idx = len(params) + 1
content_type_placeholders = ", ".join(
'$' + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
"$" + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
for i in range(len(content_types))
)
params.extend([ct.value for ct in content_types])
@@ -879,22 +879,33 @@ async def semantic_search(
min_similarity_idx = len(params) + 1
params.append(min_similarity)
# Use regular string (not f-string) for template to preserve {schema_prefix} and {schema} placeholders
# Use OPERATOR({schema}.<=>) for explicit operator schema qualification
sql = """
# Use unqualified ::vector and <=> operator - pgvector is in search_path schema
sql = (
"""
SELECT
"contentId" as content_id,
"contentType" as content_type,
"searchableText" as searchable_text,
metadata,
1 - (embedding OPERATOR({schema}.<=>) '""" + embedding_str + """'::{schema}.vector) as similarity
1 - (embedding <=> '"""
+ embedding_str
+ """'::vector) as similarity
FROM {schema_prefix}"UnifiedContentEmbedding"
WHERE "contentType" IN (""" + content_type_placeholders + """)
""" + user_filter + """
AND 1 - (embedding OPERATOR({schema}.<=>) '""" + embedding_str + """'::{schema}.vector) >= $""" + str(min_similarity_idx) + """
WHERE "contentType" IN ("""
+ content_type_placeholders
+ """)
"""
+ user_filter
+ """
AND 1 - (embedding <=> '"""
+ embedding_str
+ """'::vector) >= $"""
+ str(min_similarity_idx)
+ """
ORDER BY similarity DESC
LIMIT $1
"""
)
try:
results = await query_raw_with_schema(sql, *params)
@@ -924,7 +935,7 @@ async def semantic_search(
# Add content type parameters and build placeholders dynamically
content_type_start_idx = len(params_lexical) + 1
content_type_placeholders_lexical = ", ".join(
'$' + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
"$" + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
for i in range(len(content_types))
)
params_lexical.extend([ct.value for ct in content_types])
@@ -934,7 +945,8 @@ async def semantic_search(
params_lexical.append(f"%{query}%")
# Use regular string (not f-string) for template to preserve {schema_prefix} placeholders
sql_lexical = """
sql_lexical = (
"""
SELECT
"contentId" as content_id,
"contentType" as content_type,
@@ -942,12 +954,19 @@ async def semantic_search(
metadata,
0.0 as similarity
FROM {schema_prefix}"UnifiedContentEmbedding"
WHERE "contentType" IN (""" + content_type_placeholders_lexical + """)
""" + user_filter + """
AND "searchableText" ILIKE $""" + str(query_param_idx) + """
WHERE "contentType" IN ("""
+ content_type_placeholders_lexical
+ """)
"""
+ user_filter
+ """
AND "searchableText" ILIKE $"""
+ str(query_param_idx)
+ """
ORDER BY "updatedAt" DESC
LIMIT $1
"""
)
try:
results = await query_raw_with_schema(sql_lexical, *params_lexical)

View File

@@ -295,7 +295,7 @@ async def unified_hybrid_search(
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
{user_filter}
ORDER BY uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector
ORDER BY uce.embedding <=> {embedding_param}::vector
LIMIT 200
)
),
@@ -307,7 +307,7 @@ async def unified_hybrid_search(
uce.metadata,
uce."updatedAt" as updated_at,
-- Semantic score: cosine similarity (1 - distance)
COALESCE(1 - (uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector), 0) as semantic_score,
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
-- Lexical score: ts_rank_cd
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
-- Category match from metadata
@@ -583,7 +583,7 @@ async def hybrid_search(
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
AND uce."userId" IS NULL
AND {where_clause}
ORDER BY uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector
ORDER BY uce.embedding <=> {embedding_param}::vector
LIMIT 200
) uce
),
@@ -605,7 +605,7 @@ async def hybrid_search(
-- Searchable text for BM25 reranking
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
-- Semantic score
COALESCE(1 - (uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector), 0) as semantic_score,
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
-- Lexical score (raw, will normalize)
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
-- Category match