mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-22 05:28:02 -05:00
Compare commits
1 Commits
dev
...
fix/pgvect
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
12690ad0a9 |
@@ -154,16 +154,15 @@ async def store_content_embedding(
|
|||||||
|
|
||||||
# Upsert the embedding
|
# Upsert the embedding
|
||||||
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
||||||
# Use {pgvector_schema}.vector for explicit pgvector type qualification
|
|
||||||
await execute_raw_with_schema(
|
await execute_raw_with_schema(
|
||||||
"""
|
"""
|
||||||
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
||||||
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
||||||
)
|
)
|
||||||
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{pgvector_schema}.vector, $5, $6::jsonb, NOW(), NOW())
|
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{schema}.vector, $5, $6::jsonb, NOW(), NOW())
|
||||||
ON CONFLICT ("contentType", "contentId", "userId")
|
ON CONFLICT ("contentType", "contentId", "userId")
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
"embedding" = $4::{pgvector_schema}.vector,
|
"embedding" = $4::{schema}.vector,
|
||||||
"searchableText" = $5,
|
"searchableText" = $5,
|
||||||
"metadata" = $6::jsonb,
|
"metadata" = $6::jsonb,
|
||||||
"updatedAt" = NOW()
|
"updatedAt" = NOW()
|
||||||
@@ -879,8 +878,6 @@ async def semantic_search(
|
|||||||
min_similarity_idx = len(params) + 1
|
min_similarity_idx = len(params) + 1
|
||||||
params.append(min_similarity)
|
params.append(min_similarity)
|
||||||
|
|
||||||
# Use regular string (not f-string) for template to preserve {schema_prefix} and {schema} placeholders
|
|
||||||
# Use OPERATOR({pgvector_schema}.<=>) for explicit operator schema qualification
|
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
SELECT
|
SELECT
|
||||||
@@ -888,9 +885,9 @@ async def semantic_search(
|
|||||||
"contentType" as content_type,
|
"contentType" as content_type,
|
||||||
"searchableText" as searchable_text,
|
"searchableText" as searchable_text,
|
||||||
metadata,
|
metadata,
|
||||||
1 - (embedding OPERATOR({pgvector_schema}.<=>) '"""
|
1 - (embedding <=> '"""
|
||||||
+ embedding_str
|
+ embedding_str
|
||||||
+ """'::{pgvector_schema}.vector) as similarity
|
+ """'::{schema}.vector) as similarity
|
||||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||||
WHERE "contentType" IN ("""
|
WHERE "contentType" IN ("""
|
||||||
+ content_type_placeholders
|
+ content_type_placeholders
|
||||||
@@ -898,9 +895,9 @@ async def semantic_search(
|
|||||||
"""
|
"""
|
||||||
+ user_filter
|
+ user_filter
|
||||||
+ """
|
+ """
|
||||||
AND 1 - (embedding OPERATOR({pgvector_schema}.<=>) '"""
|
AND 1 - (embedding <=> '"""
|
||||||
+ embedding_str
|
+ embedding_str
|
||||||
+ """'::{pgvector_schema}.vector) >= $"""
|
+ """'::{schema}.vector) >= $"""
|
||||||
+ str(min_similarity_idx)
|
+ str(min_similarity_idx)
|
||||||
+ """
|
+ """
|
||||||
ORDER BY similarity DESC
|
ORDER BY similarity DESC
|
||||||
|
|||||||
@@ -295,7 +295,7 @@ async def unified_hybrid_search(
|
|||||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||||
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
||||||
{user_filter}
|
{user_filter}
|
||||||
ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector
|
ORDER BY uce.embedding <=> {embedding_param}::{{schema}}.vector
|
||||||
LIMIT 200
|
LIMIT 200
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
@@ -307,7 +307,7 @@ async def unified_hybrid_search(
|
|||||||
uce.metadata,
|
uce.metadata,
|
||||||
uce."updatedAt" as updated_at,
|
uce."updatedAt" as updated_at,
|
||||||
-- Semantic score: cosine similarity (1 - distance)
|
-- Semantic score: cosine similarity (1 - distance)
|
||||||
COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score,
|
COALESCE(1 - (uce.embedding <=> {embedding_param}::{{schema}}.vector), 0) as semantic_score,
|
||||||
-- Lexical score: ts_rank_cd
|
-- Lexical score: ts_rank_cd
|
||||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||||
-- Category match from metadata
|
-- Category match from metadata
|
||||||
@@ -583,7 +583,7 @@ async def hybrid_search(
|
|||||||
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
||||||
AND uce."userId" IS NULL
|
AND uce."userId" IS NULL
|
||||||
AND {where_clause}
|
AND {where_clause}
|
||||||
ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector
|
ORDER BY uce.embedding <=> {embedding_param}::{{schema}}.vector
|
||||||
LIMIT 200
|
LIMIT 200
|
||||||
) uce
|
) uce
|
||||||
),
|
),
|
||||||
@@ -605,7 +605,7 @@ async def hybrid_search(
|
|||||||
-- Searchable text for BM25 reranking
|
-- Searchable text for BM25 reranking
|
||||||
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
|
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
|
||||||
-- Semantic score
|
-- Semantic score
|
||||||
COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>) {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score,
|
COALESCE(1 - (uce.embedding <=> {embedding_param}::{{schema}}.vector), 0) as semantic_score,
|
||||||
-- Lexical score (raw, will normalize)
|
-- Lexical score (raw, will normalize)
|
||||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||||
-- Category match
|
-- Category match
|
||||||
|
|||||||
@@ -120,11 +120,10 @@ async def _raw_with_schema(
|
|||||||
|
|
||||||
Supports placeholders:
|
Supports placeholders:
|
||||||
- {schema_prefix}: Table/type prefix (e.g., "platform".)
|
- {schema_prefix}: Table/type prefix (e.g., "platform".)
|
||||||
- {schema}: Raw schema name for application tables (e.g., platform)
|
- {schema}: Raw schema name (e.g., platform) for pgvector types
|
||||||
- {pgvector_schema}: Schema where pgvector is installed (defaults to "public")
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query_template: SQL query with {schema_prefix}, {schema}, and/or {pgvector_schema} placeholders
|
query_template: SQL query with {schema_prefix} and/or {schema} placeholders
|
||||||
*args: Query parameters
|
*args: Query parameters
|
||||||
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
|
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
|
||||||
client: Optional Prisma client for transactions (only used when execute=True).
|
client: Optional Prisma client for transactions (only used when execute=True).
|
||||||
@@ -133,23 +132,16 @@ async def _raw_with_schema(
|
|||||||
- list[dict] if execute=False (query results)
|
- list[dict] if execute=False (query results)
|
||||||
- int if execute=True (number of affected rows)
|
- int if execute=True (number of affected rows)
|
||||||
|
|
||||||
Example with vector type:
|
Example:
|
||||||
await execute_raw_with_schema(
|
await execute_raw_with_schema(
|
||||||
'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{pgvector_schema}.vector)',
|
'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{schema}.vector)',
|
||||||
embedding_data
|
embedding_data
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
schema = get_database_schema()
|
schema = get_database_schema()
|
||||||
schema_prefix = f'"{schema}".' if schema != "public" else ""
|
schema_prefix = f'"{schema}".' if schema != "public" else ""
|
||||||
# pgvector extension is typically installed in "public" schema
|
|
||||||
# On Supabase it may be in "extensions" but "public" is the common default
|
|
||||||
pgvector_schema = "public"
|
|
||||||
|
|
||||||
formatted_query = query_template.format(
|
formatted_query = query_template.format(schema_prefix=schema_prefix, schema=schema)
|
||||||
schema_prefix=schema_prefix,
|
|
||||||
schema=schema,
|
|
||||||
pgvector_schema=pgvector_schema,
|
|
||||||
)
|
|
||||||
|
|
||||||
import prisma as prisma_module
|
import prisma as prisma_module
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user