mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend): Use unqualified vector type for pgvector queries (#11818)
## Summary
- Remove explicit schema qualification (`{schema}.vector` and
`OPERATOR({schema}.<=>)`) from pgvector queries in `embeddings.py` and
`hybrid_search.py`
- Use unqualified `::vector` type cast and `<=>` operator which work
because pgvector is in the search_path on all environments
## Problem
The previous approach tried to explicitly qualify the vector type with
schema names, but this failed because:
- **CI environment**: pgvector is in `public` schema → `platform.vector`
doesn't exist
- **Dev (Supabase)**: pgvector is in `platform` schema → `public.vector`
doesn't exist
## Solution
Use unqualified `::vector` and `<=>` operator. PostgreSQL resolves these
via `search_path`, which includes the schema where pgvector is installed
on all environments.
Tested on both local and dev environments with a test script that
verified:
- ✅ Unqualified `::vector` type cast
- ✅ Unqualified `<=>` operator in ORDER BY
- ✅ Unqualified `<=>` in SELECT (similarity calculation)
- ✅ Combined query patterns matching actual usage
## Test plan
- [ ] CI tests pass
- [ ] Marketplace approval works on dev after deployment
Fixes: AUTOGPT-SERVER-763, AUTOGPT-SERVER-764, AUTOGPT-SERVER-76B
This commit is contained in:
@@ -154,16 +154,16 @@ async def store_content_embedding(
|
||||
|
||||
# Upsert the embedding
|
||||
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
||||
# Use {pgvector_schema}.vector for explicit pgvector type qualification
|
||||
# Use unqualified ::vector - pgvector is in search_path on all environments
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
||||
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
||||
)
|
||||
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{pgvector_schema}.vector, $5, $6::jsonb, NOW(), NOW())
|
||||
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
|
||||
ON CONFLICT ("contentType", "contentId", "userId")
|
||||
DO UPDATE SET
|
||||
"embedding" = $4::{pgvector_schema}.vector,
|
||||
"embedding" = $4::vector,
|
||||
"searchableText" = $5,
|
||||
"metadata" = $6::jsonb,
|
||||
"updatedAt" = NOW()
|
||||
@@ -879,8 +879,7 @@ async def semantic_search(
|
||||
min_similarity_idx = len(params) + 1
|
||||
params.append(min_similarity)
|
||||
|
||||
# Use regular string (not f-string) for template to preserve {schema_prefix} and {schema} placeholders
|
||||
# Use OPERATOR({pgvector_schema}.<=>) for explicit operator schema qualification
|
||||
# Use unqualified ::vector and <=> operator - pgvector is in search_path on all environments
|
||||
sql = (
|
||||
"""
|
||||
SELECT
|
||||
@@ -888,9 +887,9 @@ async def semantic_search(
|
||||
"contentType" as content_type,
|
||||
"searchableText" as searchable_text,
|
||||
metadata,
|
||||
1 - (embedding OPERATOR({pgvector_schema}.<=>) '"""
|
||||
1 - (embedding <=> '"""
|
||||
+ embedding_str
|
||||
+ """'::{pgvector_schema}.vector) as similarity
|
||||
+ """'::vector) as similarity
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" IN ("""
|
||||
+ content_type_placeholders
|
||||
@@ -898,9 +897,9 @@ async def semantic_search(
|
||||
"""
|
||||
+ user_filter
|
||||
+ """
|
||||
AND 1 - (embedding OPERATOR({pgvector_schema}.<=>) '"""
|
||||
AND 1 - (embedding <=> '"""
|
||||
+ embedding_str
|
||||
+ """'::{pgvector_schema}.vector) >= $"""
|
||||
+ """'::vector) >= $"""
|
||||
+ str(min_similarity_idx)
|
||||
+ """
|
||||
ORDER BY similarity DESC
|
||||
|
||||
@@ -295,7 +295,7 @@ async def unified_hybrid_search(
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
||||
{user_filter}
|
||||
ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector
|
||||
ORDER BY uce.embedding <=> {embedding_param}::vector
|
||||
LIMIT 200
|
||||
)
|
||||
),
|
||||
@@ -307,7 +307,7 @@ async def unified_hybrid_search(
|
||||
uce.metadata,
|
||||
uce."updatedAt" as updated_at,
|
||||
-- Semantic score: cosine similarity (1 - distance)
|
||||
COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score,
|
||||
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
|
||||
-- Lexical score: ts_rank_cd
|
||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||
-- Category match from metadata
|
||||
@@ -583,7 +583,7 @@ async def hybrid_search(
|
||||
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
||||
AND uce."userId" IS NULL
|
||||
AND {where_clause}
|
||||
ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector
|
||||
ORDER BY uce.embedding <=> {embedding_param}::vector
|
||||
LIMIT 200
|
||||
) uce
|
||||
),
|
||||
@@ -605,7 +605,7 @@ async def hybrid_search(
|
||||
-- Searchable text for BM25 reranking
|
||||
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
|
||||
-- Semantic score
|
||||
COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score,
|
||||
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
|
||||
-- Lexical score (raw, will normalize)
|
||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||
-- Category match
|
||||
|
||||
@@ -121,10 +121,14 @@ async def _raw_with_schema(
|
||||
Supports placeholders:
|
||||
- {schema_prefix}: Table/type prefix (e.g., "platform".)
|
||||
- {schema}: Raw schema name for application tables (e.g., platform)
|
||||
- {pgvector_schema}: Schema where pgvector is installed (defaults to "public")
|
||||
|
||||
Note on pgvector types:
|
||||
Use unqualified ::vector and <=> operator in queries. PostgreSQL resolves
|
||||
these via search_path, which includes the schema where pgvector is installed
|
||||
on all environments (local, CI, dev).
|
||||
|
||||
Args:
|
||||
query_template: SQL query with {schema_prefix}, {schema}, and/or {pgvector_schema} placeholders
|
||||
query_template: SQL query with {schema_prefix} and/or {schema} placeholders
|
||||
*args: Query parameters
|
||||
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
|
||||
client: Optional Prisma client for transactions (only used when execute=True).
|
||||
@@ -135,20 +139,16 @@ async def _raw_with_schema(
|
||||
|
||||
Example with vector type:
|
||||
await execute_raw_with_schema(
|
||||
'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{pgvector_schema}.vector)',
|
||||
'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::vector)',
|
||||
embedding_data
|
||||
)
|
||||
"""
|
||||
schema = get_database_schema()
|
||||
schema_prefix = f'"{schema}".' if schema != "public" else ""
|
||||
# pgvector extension is typically installed in "public" schema
|
||||
# On Supabase it may be in "extensions" but "public" is the common default
|
||||
pgvector_schema = "public"
|
||||
|
||||
formatted_query = query_template.format(
|
||||
schema_prefix=schema_prefix,
|
||||
schema=schema,
|
||||
pgvector_schema=pgvector_schema,
|
||||
)
|
||||
|
||||
import prisma as prisma_module
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
-- CreateExtension
|
||||
-- Supabase: pgvector must be enabled via Dashboard → Database → Extensions first
|
||||
-- Create in public schema so vector type is available across all schemas
|
||||
-- Creates extension in current schema (determined by search_path from DATABASE_URL ?schema= param)
|
||||
-- This ensures vector type is in the same schema as tables, making ::vector work without explicit qualification
|
||||
DO $$
|
||||
BEGIN
|
||||
CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "public";
|
||||
CREATE EXTENSION IF NOT EXISTS "vector";
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
RAISE NOTICE 'vector extension not available or already exists, skipping';
|
||||
END $$;
|
||||
@@ -19,7 +20,7 @@ CREATE TABLE "UnifiedContentEmbedding" (
|
||||
"contentType" "ContentType" NOT NULL,
|
||||
"contentId" TEXT NOT NULL,
|
||||
"userId" TEXT,
|
||||
"embedding" public.vector(1536) NOT NULL,
|
||||
"embedding" vector(1536) NOT NULL,
|
||||
"searchableText" TEXT NOT NULL,
|
||||
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||
|
||||
@@ -45,4 +46,4 @@ CREATE UNIQUE INDEX "UnifiedContentEmbedding_contentType_contentId_userId_key" O
|
||||
-- Uses cosine distance operator (<=>), which matches the query in hybrid_search.py
|
||||
-- Note: Drop first in case Prisma created a btree index (Prisma doesn't support HNSW)
|
||||
DROP INDEX IF EXISTS "UnifiedContentEmbedding_embedding_idx";
|
||||
CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" public.vector_cosine_ops);
|
||||
CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" vector_cosine_ops);
|
||||
|
||||
@@ -4,6 +4,7 @@ import { LoginPage } from "./pages/login.page";
|
||||
import { MarketplacePage } from "./pages/marketplace.page";
|
||||
import { hasMinCount, hasUrl, isVisible, matchesUrl } from "./utils/assertion";
|
||||
|
||||
// Marketplace tests for store agent search functionality
|
||||
test.describe("Marketplace – Basic Functionality", () => {
|
||||
test("User can access marketplace page when logged out", async ({ page }) => {
|
||||
const marketplacePage = new MarketplacePage(page);
|
||||
|
||||
Reference in New Issue
Block a user