mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-20 04:28:09 -05:00
Compare commits
2 Commits
dev
...
fix/undefi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0a48c49902 | ||
|
|
1fc1102eb4 |
@@ -154,15 +154,16 @@ async def store_content_embedding(
|
|||||||
|
|
||||||
# Upsert the embedding
|
# Upsert the embedding
|
||||||
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
||||||
|
# Use {schema}.vector for explicit pgvector type qualification
|
||||||
await execute_raw_with_schema(
|
await execute_raw_with_schema(
|
||||||
"""
|
"""
|
||||||
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
||||||
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
||||||
)
|
)
|
||||||
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
|
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{schema}.vector, $5, $6::jsonb, NOW(), NOW())
|
||||||
ON CONFLICT ("contentType", "contentId", "userId")
|
ON CONFLICT ("contentType", "contentId", "userId")
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
"embedding" = $4::vector,
|
"embedding" = $4::{schema}.vector,
|
||||||
"searchableText" = $5,
|
"searchableText" = $5,
|
||||||
"metadata" = $6::jsonb,
|
"metadata" = $6::jsonb,
|
||||||
"updatedAt" = NOW()
|
"updatedAt" = NOW()
|
||||||
@@ -177,7 +178,6 @@ async def store_content_embedding(
|
|||||||
searchable_text,
|
searchable_text,
|
||||||
metadata_json,
|
metadata_json,
|
||||||
client=client,
|
client=client,
|
||||||
set_public_search_path=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Stored embedding for {content_type}:{content_id}")
|
logger.info(f"Stored embedding for {content_type}:{content_id}")
|
||||||
@@ -236,7 +236,6 @@ async def get_content_embedding(
|
|||||||
content_type,
|
content_type,
|
||||||
content_id,
|
content_id,
|
||||||
user_id,
|
user_id,
|
||||||
set_public_search_path=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if result and len(result) > 0:
|
if result and len(result) > 0:
|
||||||
@@ -871,31 +870,34 @@ async def semantic_search(
|
|||||||
# Add content type parameters and build placeholders dynamically
|
# Add content type parameters and build placeholders dynamically
|
||||||
content_type_start_idx = len(params) + 1
|
content_type_start_idx = len(params) + 1
|
||||||
content_type_placeholders = ", ".join(
|
content_type_placeholders = ", ".join(
|
||||||
f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
|
'$' + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
|
||||||
for i in range(len(content_types))
|
for i in range(len(content_types))
|
||||||
)
|
)
|
||||||
params.extend([ct.value for ct in content_types])
|
params.extend([ct.value for ct in content_types])
|
||||||
|
|
||||||
sql = f"""
|
# Build min_similarity param index before appending
|
||||||
|
min_similarity_idx = len(params) + 1
|
||||||
|
params.append(min_similarity)
|
||||||
|
|
||||||
|
# Use regular string (not f-string) for template to preserve {schema_prefix} and {schema} placeholders
|
||||||
|
# Use OPERATOR({schema}.<=>) for explicit operator schema qualification
|
||||||
|
sql = """
|
||||||
SELECT
|
SELECT
|
||||||
"contentId" as content_id,
|
"contentId" as content_id,
|
||||||
"contentType" as content_type,
|
"contentType" as content_type,
|
||||||
"searchableText" as searchable_text,
|
"searchableText" as searchable_text,
|
||||||
metadata,
|
metadata,
|
||||||
1 - (embedding <=> '{embedding_str}'::vector) as similarity
|
1 - (embedding OPERATOR({schema}.<=>) '""" + embedding_str + """'::{schema}.vector) as similarity
|
||||||
FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
|
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||||
WHERE "contentType" IN ({content_type_placeholders})
|
WHERE "contentType" IN (""" + content_type_placeholders + """)
|
||||||
{user_filter}
|
""" + user_filter + """
|
||||||
AND 1 - (embedding <=> '{embedding_str}'::vector) >= ${len(params) + 1}
|
AND 1 - (embedding OPERATOR({schema}.<=>) '""" + embedding_str + """'::{schema}.vector) >= $""" + str(min_similarity_idx) + """
|
||||||
ORDER BY similarity DESC
|
ORDER BY similarity DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
"""
|
"""
|
||||||
params.append(min_similarity)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = await query_raw_with_schema(
|
results = await query_raw_with_schema(sql, *params)
|
||||||
sql, *params, set_public_search_path=True
|
|
||||||
)
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"content_id": row["content_id"],
|
"content_id": row["content_id"],
|
||||||
@@ -922,31 +924,33 @@ async def semantic_search(
|
|||||||
# Add content type parameters and build placeholders dynamically
|
# Add content type parameters and build placeholders dynamically
|
||||||
content_type_start_idx = len(params_lexical) + 1
|
content_type_start_idx = len(params_lexical) + 1
|
||||||
content_type_placeholders_lexical = ", ".join(
|
content_type_placeholders_lexical = ", ".join(
|
||||||
f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
|
'$' + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
|
||||||
for i in range(len(content_types))
|
for i in range(len(content_types))
|
||||||
)
|
)
|
||||||
params_lexical.extend([ct.value for ct in content_types])
|
params_lexical.extend([ct.value for ct in content_types])
|
||||||
|
|
||||||
sql_lexical = f"""
|
# Build query param index before appending
|
||||||
|
query_param_idx = len(params_lexical) + 1
|
||||||
|
params_lexical.append(f"%{query}%")
|
||||||
|
|
||||||
|
# Use regular string (not f-string) for template to preserve {schema_prefix} placeholders
|
||||||
|
sql_lexical = """
|
||||||
SELECT
|
SELECT
|
||||||
"contentId" as content_id,
|
"contentId" as content_id,
|
||||||
"contentType" as content_type,
|
"contentType" as content_type,
|
||||||
"searchableText" as searchable_text,
|
"searchableText" as searchable_text,
|
||||||
metadata,
|
metadata,
|
||||||
0.0 as similarity
|
0.0 as similarity
|
||||||
FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
|
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||||
WHERE "contentType" IN ({content_type_placeholders_lexical})
|
WHERE "contentType" IN (""" + content_type_placeholders_lexical + """)
|
||||||
{user_filter}
|
""" + user_filter + """
|
||||||
AND "searchableText" ILIKE ${len(params_lexical) + 1}
|
AND "searchableText" ILIKE $""" + str(query_param_idx) + """
|
||||||
ORDER BY "updatedAt" DESC
|
ORDER BY "updatedAt" DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
"""
|
"""
|
||||||
params_lexical.append(f"%{query}%")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = await query_raw_with_schema(
|
results = await query_raw_with_schema(sql_lexical, *params_lexical)
|
||||||
sql_lexical, *params_lexical, set_public_search_path=True
|
|
||||||
)
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"content_id": row["content_id"],
|
"content_id": row["content_id"],
|
||||||
|
|||||||
@@ -155,18 +155,14 @@ async def test_store_embedding_success(mocker):
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert result is True
|
assert result is True
|
||||||
# execute_raw is called twice: once for SET search_path, once for INSERT
|
# execute_raw is called once for INSERT (no separate SET search_path needed)
|
||||||
assert mock_client.execute_raw.call_count == 2
|
assert mock_client.execute_raw.call_count == 1
|
||||||
|
|
||||||
# First call: SET search_path
|
# Verify the INSERT query with the actual data
|
||||||
first_call_args = mock_client.execute_raw.call_args_list[0][0]
|
call_args = mock_client.execute_raw.call_args_list[0][0]
|
||||||
assert "SET search_path" in first_call_args[0]
|
assert "test-version-id" in call_args
|
||||||
|
assert "[0.1,0.2,0.3]" in call_args
|
||||||
# Second call: INSERT query with the actual data
|
assert None in call_args # userId should be None for store agents
|
||||||
second_call_args = mock_client.execute_raw.call_args_list[1][0]
|
|
||||||
assert "test-version-id" in second_call_args
|
|
||||||
assert "[0.1,0.2,0.3]" in second_call_args
|
|
||||||
assert None in second_call_args # userId should be None for store agents
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio(loop_scope="session")
|
@pytest.mark.asyncio(loop_scope="session")
|
||||||
|
|||||||
@@ -295,7 +295,7 @@ async def unified_hybrid_search(
|
|||||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||||
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
||||||
{user_filter}
|
{user_filter}
|
||||||
ORDER BY uce.embedding <=> {embedding_param}::vector
|
ORDER BY uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector
|
||||||
LIMIT 200
|
LIMIT 200
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
@@ -307,7 +307,7 @@ async def unified_hybrid_search(
|
|||||||
uce.metadata,
|
uce.metadata,
|
||||||
uce."updatedAt" as updated_at,
|
uce."updatedAt" as updated_at,
|
||||||
-- Semantic score: cosine similarity (1 - distance)
|
-- Semantic score: cosine similarity (1 - distance)
|
||||||
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
|
COALESCE(1 - (uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector), 0) as semantic_score,
|
||||||
-- Lexical score: ts_rank_cd
|
-- Lexical score: ts_rank_cd
|
||||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||||
-- Category match from metadata
|
-- Category match from metadata
|
||||||
@@ -363,9 +363,7 @@ async def unified_hybrid_search(
|
|||||||
LIMIT {limit_param} OFFSET {offset_param}
|
LIMIT {limit_param} OFFSET {offset_param}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
results = await query_raw_with_schema(
|
results = await query_raw_with_schema(sql_query, *params)
|
||||||
sql_query, *params, set_public_search_path=True
|
|
||||||
)
|
|
||||||
|
|
||||||
total = results[0]["total_count"] if results else 0
|
total = results[0]["total_count"] if results else 0
|
||||||
# Apply BM25 reranking
|
# Apply BM25 reranking
|
||||||
@@ -585,7 +583,7 @@ async def hybrid_search(
|
|||||||
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
||||||
AND uce."userId" IS NULL
|
AND uce."userId" IS NULL
|
||||||
AND {where_clause}
|
AND {where_clause}
|
||||||
ORDER BY uce.embedding <=> {embedding_param}::vector
|
ORDER BY uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector
|
||||||
LIMIT 200
|
LIMIT 200
|
||||||
) uce
|
) uce
|
||||||
),
|
),
|
||||||
@@ -607,7 +605,7 @@ async def hybrid_search(
|
|||||||
-- Searchable text for BM25 reranking
|
-- Searchable text for BM25 reranking
|
||||||
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
|
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
|
||||||
-- Semantic score
|
-- Semantic score
|
||||||
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
|
COALESCE(1 - (uce.embedding OPERATOR({{schema}}.<=>) {embedding_param}::{{schema}}.vector), 0) as semantic_score,
|
||||||
-- Lexical score (raw, will normalize)
|
-- Lexical score (raw, will normalize)
|
||||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||||
-- Category match
|
-- Category match
|
||||||
@@ -688,9 +686,7 @@ async def hybrid_search(
|
|||||||
LIMIT {limit_param} OFFSET {offset_param}
|
LIMIT {limit_param} OFFSET {offset_param}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
results = await query_raw_with_schema(
|
results = await query_raw_with_schema(sql_query, *params)
|
||||||
sql_query, *params, set_public_search_path=True
|
|
||||||
)
|
|
||||||
|
|
||||||
total = results[0]["total_count"] if results else 0
|
total = results[0]["total_count"] if results else 0
|
||||||
|
|
||||||
|
|||||||
@@ -38,20 +38,6 @@ POOL_TIMEOUT = os.getenv("DB_POOL_TIMEOUT")
|
|||||||
if POOL_TIMEOUT:
|
if POOL_TIMEOUT:
|
||||||
DATABASE_URL = add_param(DATABASE_URL, "pool_timeout", POOL_TIMEOUT)
|
DATABASE_URL = add_param(DATABASE_URL, "pool_timeout", POOL_TIMEOUT)
|
||||||
|
|
||||||
# Add public schema to search_path for pgvector type access
|
|
||||||
# The vector extension is in public schema, but search_path is determined by schema parameter
|
|
||||||
# Extract the schema from DATABASE_URL or default to 'public' (matching get_database_schema())
|
|
||||||
parsed_url = urlparse(DATABASE_URL)
|
|
||||||
url_params = dict(parse_qsl(parsed_url.query))
|
|
||||||
db_schema = url_params.get("schema", "public")
|
|
||||||
# Build search_path, avoiding duplicates if db_schema is already 'public'
|
|
||||||
search_path_schemas = list(
|
|
||||||
dict.fromkeys([db_schema, "public"])
|
|
||||||
) # Preserves order, removes duplicates
|
|
||||||
search_path = ",".join(search_path_schemas)
|
|
||||||
# This allows using ::vector without schema qualification
|
|
||||||
DATABASE_URL = add_param(DATABASE_URL, "options", f"-c search_path={search_path}")
|
|
||||||
|
|
||||||
HTTP_TIMEOUT = int(POOL_TIMEOUT) if POOL_TIMEOUT else None
|
HTTP_TIMEOUT = int(POOL_TIMEOUT) if POOL_TIMEOUT else None
|
||||||
|
|
||||||
prisma = Prisma(
|
prisma = Prisma(
|
||||||
@@ -127,38 +113,43 @@ async def _raw_with_schema(
|
|||||||
*args,
|
*args,
|
||||||
execute: bool = False,
|
execute: bool = False,
|
||||||
client: Prisma | None = None,
|
client: Prisma | None = None,
|
||||||
set_public_search_path: bool = False,
|
|
||||||
) -> list[dict] | int:
|
) -> list[dict] | int:
|
||||||
"""Internal: Execute raw SQL with proper schema handling.
|
"""Internal: Execute raw SQL with proper schema handling.
|
||||||
|
|
||||||
Use query_raw_with_schema() or execute_raw_with_schema() instead.
|
Use query_raw_with_schema() or execute_raw_with_schema() instead.
|
||||||
|
|
||||||
|
Supports placeholders:
|
||||||
|
- {schema_prefix}: Table/type prefix (e.g., "platform".)
|
||||||
|
- {schema}: Raw schema name (e.g., platform) for pgvector types and operators
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query_template: SQL query with {schema_prefix} placeholder
|
query_template: SQL query with {schema_prefix} and/or {schema} placeholders
|
||||||
*args: Query parameters
|
*args: Query parameters
|
||||||
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
|
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
|
||||||
client: Optional Prisma client for transactions (only used when execute=True).
|
client: Optional Prisma client for transactions (only used when execute=True).
|
||||||
set_public_search_path: If True, sets search_path to include public schema.
|
|
||||||
Needed for pgvector types and other public schema objects.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
- list[dict] if execute=False (query results)
|
- list[dict] if execute=False (query results)
|
||||||
- int if execute=True (number of affected rows)
|
- int if execute=True (number of affected rows)
|
||||||
|
|
||||||
|
Example with vector type:
|
||||||
|
await execute_raw_with_schema(
|
||||||
|
'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{schema}.vector)',
|
||||||
|
embedding_data
|
||||||
|
)
|
||||||
"""
|
"""
|
||||||
schema = get_database_schema()
|
schema = get_database_schema()
|
||||||
schema_prefix = f'"{schema}".' if schema != "public" else ""
|
schema_prefix = f'"{schema}".' if schema != "public" else ""
|
||||||
formatted_query = query_template.format(schema_prefix=schema_prefix)
|
|
||||||
|
formatted_query = query_template.format(
|
||||||
|
schema_prefix=schema_prefix,
|
||||||
|
schema=schema,
|
||||||
|
)
|
||||||
|
|
||||||
import prisma as prisma_module
|
import prisma as prisma_module
|
||||||
|
|
||||||
db_client = client if client else prisma_module.get_client()
|
db_client = client if client else prisma_module.get_client()
|
||||||
|
|
||||||
# Set search_path to include public schema if requested
|
|
||||||
# Prisma doesn't support the 'options' connection parameter, so we set it per-session
|
|
||||||
# This is idempotent and safe to call multiple times
|
|
||||||
if set_public_search_path:
|
|
||||||
await db_client.execute_raw(f"SET search_path = {schema}, public") # type: ignore
|
|
||||||
|
|
||||||
if execute:
|
if execute:
|
||||||
result = await db_client.execute_raw(formatted_query, *args) # type: ignore
|
result = await db_client.execute_raw(formatted_query, *args) # type: ignore
|
||||||
else:
|
else:
|
||||||
@@ -167,16 +158,12 @@ async def _raw_with_schema(
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def query_raw_with_schema(
|
async def query_raw_with_schema(query_template: str, *args) -> list[dict]:
|
||||||
query_template: str, *args, set_public_search_path: bool = False
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Execute raw SQL SELECT query with proper schema handling.
|
"""Execute raw SQL SELECT query with proper schema handling.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query_template: SQL query with {schema_prefix} placeholder
|
query_template: SQL query with {schema_prefix} and/or {schema} placeholders
|
||||||
*args: Query parameters
|
*args: Query parameters
|
||||||
set_public_search_path: If True, sets search_path to include public schema.
|
|
||||||
Needed for pgvector types and other public schema objects.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of result rows as dictionaries
|
List of result rows as dictionaries
|
||||||
@@ -187,23 +174,20 @@ async def query_raw_with_schema(
|
|||||||
user_id
|
user_id
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
return await _raw_with_schema(query_template, *args, execute=False, set_public_search_path=set_public_search_path) # type: ignore
|
return await _raw_with_schema(query_template, *args, execute=False) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
async def execute_raw_with_schema(
|
async def execute_raw_with_schema(
|
||||||
query_template: str,
|
query_template: str,
|
||||||
*args,
|
*args,
|
||||||
client: Prisma | None = None,
|
client: Prisma | None = None,
|
||||||
set_public_search_path: bool = False,
|
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Execute raw SQL command (INSERT/UPDATE/DELETE) with proper schema handling.
|
"""Execute raw SQL command (INSERT/UPDATE/DELETE) with proper schema handling.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query_template: SQL query with {schema_prefix} placeholder
|
query_template: SQL query with {schema_prefix} and/or {schema} placeholders
|
||||||
*args: Query parameters
|
*args: Query parameters
|
||||||
client: Optional Prisma client for transactions
|
client: Optional Prisma client for transactions
|
||||||
set_public_search_path: If True, sets search_path to include public schema.
|
|
||||||
Needed for pgvector types and other public schema objects.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Number of affected rows
|
Number of affected rows
|
||||||
@@ -215,7 +199,7 @@ async def execute_raw_with_schema(
|
|||||||
client=tx # Optional transaction client
|
client=tx # Optional transaction client
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
return await _raw_with_schema(query_template, *args, execute=True, client=client, set_public_search_path=set_public_search_path) # type: ignore
|
return await _raw_with_schema(query_template, *args, execute=True, client=client) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class BaseDbModel(BaseModel):
|
class BaseDbModel(BaseModel):
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
-- CreateExtension
|
-- CreateExtension
|
||||||
-- Supabase: pgvector must be enabled via Dashboard → Database → Extensions first
|
-- Supabase: pgvector must be enabled via Dashboard → Database → Extensions first
|
||||||
-- Create in public schema so vector type is available across all schemas
|
-- Creates extension in current schema (determined by search_path)
|
||||||
|
-- The extension may already exist in a different schema (e.g., Supabase pre-enables it)
|
||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "public";
|
CREATE EXTENSION IF NOT EXISTS "vector";
|
||||||
EXCEPTION WHEN OTHERS THEN
|
EXCEPTION WHEN OTHERS THEN
|
||||||
RAISE NOTICE 'vector extension not available or already exists, skipping';
|
RAISE NOTICE 'vector extension not available or already exists, skipping';
|
||||||
END $$;
|
END $$;
|
||||||
@@ -12,6 +13,7 @@ END $$;
|
|||||||
CREATE TYPE "ContentType" AS ENUM ('STORE_AGENT', 'BLOCK', 'INTEGRATION', 'DOCUMENTATION', 'LIBRARY_AGENT');
|
CREATE TYPE "ContentType" AS ENUM ('STORE_AGENT', 'BLOCK', 'INTEGRATION', 'DOCUMENTATION', 'LIBRARY_AGENT');
|
||||||
|
|
||||||
-- CreateTable
|
-- CreateTable
|
||||||
|
-- Note: vector type is unqualified - relies on search_path including the schema where pgvector is installed
|
||||||
CREATE TABLE "UnifiedContentEmbedding" (
|
CREATE TABLE "UnifiedContentEmbedding" (
|
||||||
"id" TEXT NOT NULL,
|
"id" TEXT NOT NULL,
|
||||||
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
@@ -19,7 +21,7 @@ CREATE TABLE "UnifiedContentEmbedding" (
|
|||||||
"contentType" "ContentType" NOT NULL,
|
"contentType" "ContentType" NOT NULL,
|
||||||
"contentId" TEXT NOT NULL,
|
"contentId" TEXT NOT NULL,
|
||||||
"userId" TEXT,
|
"userId" TEXT,
|
||||||
"embedding" public.vector(1536) NOT NULL,
|
"embedding" vector(1536) NOT NULL,
|
||||||
"searchableText" TEXT NOT NULL,
|
"searchableText" TEXT NOT NULL,
|
||||||
"metadata" JSONB NOT NULL DEFAULT '{}',
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
|
||||||
@@ -45,4 +47,4 @@ CREATE UNIQUE INDEX "UnifiedContentEmbedding_contentType_contentId_userId_key" O
|
|||||||
-- Uses cosine distance operator (<=>), which matches the query in hybrid_search.py
|
-- Uses cosine distance operator (<=>), which matches the query in hybrid_search.py
|
||||||
-- Note: Drop first in case Prisma created a btree index (Prisma doesn't support HNSW)
|
-- Note: Drop first in case Prisma created a btree index (Prisma doesn't support HNSW)
|
||||||
DROP INDEX IF EXISTS "UnifiedContentEmbedding_embedding_idx";
|
DROP INDEX IF EXISTS "UnifiedContentEmbedding_embedding_idx";
|
||||||
CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" public.vector_cosine_ops);
|
CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" vector_cosine_ops);
|
||||||
|
|||||||
Reference in New Issue
Block a user