mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
add vector search in store
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Dict, Any, Tuple
|
||||
import re
|
||||
@@ -26,11 +27,29 @@ from backend.data.notifications import (
|
||||
NotificationEventModel,
|
||||
)
|
||||
from backend.notifications.notifications import queue_notification_async
|
||||
from backend.server.v2.store.embeddings import SearchFieldType, StoreAgentSearchService
|
||||
from backend.server.v2.store.embeddings import (
|
||||
SubmissionStatus as SearchSubmissionStatus,
|
||||
)
|
||||
from backend.server.v2.store.embeddings import create_embedding
|
||||
from backend.util.settings import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = Settings()
|
||||
|
||||
# Initialize the search service with database URL
|
||||
search_service: StoreAgentSearchService | None = None
|
||||
|
||||
|
||||
def get_search_service() -> StoreAgentSearchService:
|
||||
"""Get or create the search service instance"""
|
||||
global search_service
|
||||
if search_service is None:
|
||||
# Get database URL from environment variable (same as Prisma uses)
|
||||
db_url = os.getenv("DATABASE_URL", "postgresql://localhost:5432")
|
||||
search_service = StoreAgentSearchService(db_url)
|
||||
return search_service
|
||||
|
||||
|
||||
# Constants for default admin values
|
||||
DEFAULT_ADMIN_NAME = "AutoGPT Admin"
|
||||
@@ -56,163 +75,76 @@ def sanitize_query(query: str | None) -> str | None:
|
||||
)
|
||||
|
||||
|
||||
class AdvancedSearchEngine:
|
||||
"""Advanced search engine with multiple search algorithms"""
|
||||
|
||||
@staticmethod
|
||||
def tokenize(text: str) -> List[str]:
|
||||
"""Tokenize text into searchable words"""
|
||||
# Convert to lowercase and split on word boundaries
|
||||
text = text.lower()
|
||||
# Remove special characters but keep spaces, numbers, and letters
|
||||
text = re.sub(r'[^\w\s-]', ' ', text)
|
||||
# Split and filter empty tokens
|
||||
tokens = [token.strip() for token in text.split() if token.strip()]
|
||||
return tokens
|
||||
|
||||
@staticmethod
|
||||
def calculate_fuzzy_score(query: str, target: str, threshold: float = 0.6) -> float:
|
||||
"""Calculate fuzzy matching score using sequence matching"""
|
||||
query_lower = query.lower()
|
||||
target_lower = target.lower()
|
||||
|
||||
# Direct substring match gets highest score
|
||||
if query_lower in target_lower:
|
||||
return 1.0
|
||||
|
||||
# Check word-level matches
|
||||
query_tokens = AdvancedSearchEngine.tokenize(query)
|
||||
target_tokens = AdvancedSearchEngine.tokenize(target)
|
||||
|
||||
# Calculate token overlap score
|
||||
matching_tokens = sum(1 for qt in query_tokens if any(qt in tt for tt in target_tokens))
|
||||
if query_tokens:
|
||||
token_score = matching_tokens / len(query_tokens)
|
||||
else:
|
||||
token_score = 0
|
||||
|
||||
# Use SequenceMatcher for fuzzy matching
|
||||
sequence_score = SequenceMatcher(None, query_lower, target_lower).ratio()
|
||||
|
||||
# Combine scores with weights
|
||||
combined_score = (token_score * 0.6) + (sequence_score * 0.4)
|
||||
|
||||
return combined_score if combined_score >= threshold else 0
|
||||
|
||||
@staticmethod
|
||||
def calculate_relevance_score(
|
||||
agent: prisma.models.StoreAgent,
|
||||
query: str,
|
||||
search_fields: Dict[str, float]
|
||||
) -> float:
|
||||
"""Calculate relevance score for an agent based on search query"""
|
||||
total_score = 0.0
|
||||
query_lower = query.lower()
|
||||
|
||||
for field_name, weight in search_fields.items():
|
||||
field_value = getattr(agent, field_name, None)
|
||||
if field_value is None:
|
||||
continue
|
||||
|
||||
if isinstance(field_value, list):
|
||||
# For categories and other list fields
|
||||
field_text = ' '.join(str(v) for v in field_value)
|
||||
async def search_store_agents(
|
||||
search_query: str,
|
||||
) -> backend.server.v2.store.model.StoreAgentsResponse:
|
||||
"""
|
||||
Search for store agents using embeddings with SQLAlchemy
|
||||
"""
|
||||
query_embedding = create_embedding(search_query)
|
||||
|
||||
# Use SQLAlchemy service for search
|
||||
service = get_search_service()
|
||||
results = await service.search_by_embedding(query_embedding, limit=30)
|
||||
|
||||
# Convert raw results to StoreAgent models
|
||||
agents = []
|
||||
for row in results:
|
||||
try:
|
||||
# Handle agent_image - it could be a list or a single string
|
||||
agent_image = row.get("agent_image", "")
|
||||
if isinstance(agent_image, list) and agent_image:
|
||||
agent_image = str(agent_image[0])
|
||||
elif not agent_image:
|
||||
agent_image = ""
|
||||
else:
|
||||
field_text = str(field_value)
|
||||
|
||||
# Calculate field score
|
||||
field_score = AdvancedSearchEngine.calculate_fuzzy_score(query, field_text)
|
||||
|
||||
# Apply weight
|
||||
total_score += field_score * weight
|
||||
|
||||
# Boost score for exact matches in important fields
|
||||
if query_lower in agent.agent_name.lower():
|
||||
total_score *= 1.5
|
||||
|
||||
# Boost for high-rated agents
|
||||
if agent.rating > 4.0:
|
||||
total_score *= 1.1
|
||||
|
||||
# Boost for popular agents
|
||||
if agent.runs > 100:
|
||||
total_score *= 1.05
|
||||
|
||||
return total_score
|
||||
|
||||
@staticmethod
|
||||
def generate_search_variants(query: str) -> List[str]:
|
||||
"""Generate search variants for typo tolerance"""
|
||||
variants = [query]
|
||||
query_lower = query.lower()
|
||||
|
||||
# Add common typo patterns
|
||||
# Handle missing spaces (e.g., "chatgpt" -> "chat gpt")
|
||||
for i in range(1, len(query_lower)):
|
||||
variant = query_lower[:i] + ' ' + query_lower[i:]
|
||||
variants.append(variant)
|
||||
|
||||
# Handle extra spaces (e.g., "chat gpt" -> "chatgpt")
|
||||
variants.append(query_lower.replace(' ', ''))
|
||||
|
||||
# Handle common letter swaps
|
||||
for i in range(len(query_lower) - 1):
|
||||
chars = list(query_lower)
|
||||
chars[i], chars[i + 1] = chars[i + 1], chars[i]
|
||||
variants.append(''.join(chars))
|
||||
|
||||
return list(set(variants))[:10] # Limit to 10 variants
|
||||
|
||||
@staticmethod
|
||||
def build_advanced_search_query(
|
||||
query: str,
|
||||
use_fuzzy: bool = True
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build advanced search conditions for Prisma"""
|
||||
conditions = []
|
||||
|
||||
# Generate search variants for typo tolerance
|
||||
if use_fuzzy:
|
||||
search_variants = AdvancedSearchEngine.generate_search_variants(query)
|
||||
else:
|
||||
search_variants = [query]
|
||||
|
||||
for variant in search_variants:
|
||||
sanitized = sanitize_query(variant)
|
||||
if not sanitized:
|
||||
continue
|
||||
|
||||
# Tokenize for better matching
|
||||
tokens = AdvancedSearchEngine.tokenize(sanitized)
|
||||
|
||||
# Build conditions for each token
|
||||
for token in tokens:
|
||||
# Search in agent name with highest priority
|
||||
conditions.append({
|
||||
"agent_name": {"contains": token, "mode": "insensitive"}
|
||||
})
|
||||
|
||||
# Search in description
|
||||
conditions.append({
|
||||
"description": {"contains": token, "mode": "insensitive"}
|
||||
})
|
||||
|
||||
# Search in sub_heading
|
||||
conditions.append({
|
||||
"sub_heading": {"contains": token, "mode": "insensitive"}
|
||||
})
|
||||
|
||||
# Search in categories
|
||||
conditions.append({
|
||||
"categories": {"has": token}
|
||||
})
|
||||
|
||||
# Search in creator username
|
||||
conditions.append({
|
||||
"creator_username": {"contains": token, "mode": "insensitive"}
|
||||
})
|
||||
|
||||
return conditions
|
||||
agent_image = str(agent_image)
|
||||
|
||||
agent = backend.server.v2.store.model.StoreAgent(
|
||||
slug=row.get("slug", ""),
|
||||
agent_name=row.get("agent_name", ""),
|
||||
agent_image=agent_image,
|
||||
creator=row.get("creator_username", "Needs Profile"),
|
||||
creator_avatar=row.get("creator_avatar", ""),
|
||||
sub_heading=row.get("sub_heading", ""),
|
||||
description=row.get("description", ""),
|
||||
runs=row.get("runs", 0),
|
||||
rating=(
|
||||
float(row.get("rating", 0.0))
|
||||
if row.get("rating") is not None
|
||||
else 0.0
|
||||
),
|
||||
)
|
||||
agents.append(agent)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating StoreAgent from search result: {e}")
|
||||
continue
|
||||
|
||||
return backend.server.v2.store.model.StoreAgentsResponse(
|
||||
agents=agents,
|
||||
pagination=backend.server.v2.store.model.Pagination(
|
||||
current_page=1,
|
||||
total_items=len(agents),
|
||||
total_pages=1,
|
||||
page_size=20,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def search_agents(
|
||||
search_query: str,
|
||||
featured: bool | None = None,
|
||||
creators: list[str] | None = None,
|
||||
category: str | None = None,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
) -> backend.server.v2.store.model.StoreAgentsResponse:
|
||||
"""
|
||||
Search for store agents using embeddings with optional filters
|
||||
"""
|
||||
# For now, just use the search_store_agents function
|
||||
# In the future, we can add filtering logic here
|
||||
return await search_store_agents(search_query)
|
||||
|
||||
|
||||
async def get_store_agents(
|
||||
@@ -226,19 +158,19 @@ async def get_store_agents(
|
||||
) -> backend.server.v2.store.model.StoreAgentsResponse:
|
||||
"""
|
||||
Get PUBLIC store agents from the StoreAgent view with advanced search capabilities
|
||||
|
||||
|
||||
Features:
|
||||
- Fuzzy matching with typo tolerance
|
||||
- Token-based search across multiple fields
|
||||
- Relevance scoring and ranking
|
||||
- Category and creator filtering
|
||||
- Popularity boosting for high-rated/popular agents
|
||||
|
||||
|
||||
Note: For production deployments with Supabase, you can enable PostgreSQL extensions
|
||||
for even better search performance:
|
||||
- pg_trgm: For trigram-based fuzzy matching (available in Supabase)
|
||||
- Full-text search: Built into PostgreSQL
|
||||
|
||||
|
||||
These can be enabled via Supabase dashboard or raw SQL queries if needed.
|
||||
"""
|
||||
logger.debug(
|
||||
@@ -259,23 +191,23 @@ async def get_store_agents(
|
||||
search_conditions = AdvancedSearchEngine.build_advanced_search_query(
|
||||
search_query, use_fuzzy=True
|
||||
)
|
||||
|
||||
|
||||
if search_conditions:
|
||||
where_clause["OR"] = search_conditions
|
||||
|
||||
|
||||
# Define search relevance fields and their weights
|
||||
search_fields = {
|
||||
"agent_name": 3.0, # Highest weight for name matches
|
||||
"description": 2.0, # High weight for description
|
||||
"sub_heading": 1.5, # Medium weight for sub-heading
|
||||
"categories": 1.0, # Lower weight for categories
|
||||
"creator_username": 0.5 # Lower weight for creator name
|
||||
"agent_name": 3.0, # Highest weight for name matches
|
||||
"description": 2.0, # High weight for description
|
||||
"sub_heading": 1.5, # Medium weight for sub-heading
|
||||
"categories": 1.0, # Lower weight for categories
|
||||
"creator_username": 0.5, # Lower weight for creator name
|
||||
}
|
||||
|
||||
try:
|
||||
# Fetch more results initially for relevance scoring if searching
|
||||
fetch_limit = page_size * 3 if search_query else page_size
|
||||
|
||||
|
||||
# Build order_by clause
|
||||
order_by = []
|
||||
if sorted_by == "rating":
|
||||
@@ -287,7 +219,7 @@ async def get_store_agents(
|
||||
# Default to rating for better UX when no sort specified
|
||||
elif not sorted_by and not search_query:
|
||||
order_by.append({"rating": "desc"})
|
||||
|
||||
|
||||
# Fetch agents from database
|
||||
agents = await prisma.models.StoreAgent.prisma().find_many(
|
||||
where=prisma.types.StoreAgentWhereInput(**where_clause),
|
||||
@@ -306,21 +238,21 @@ async def get_store_agents(
|
||||
)
|
||||
if score > 0: # Only include agents with positive scores
|
||||
scored_agents.append((score, agent))
|
||||
|
||||
|
||||
# Sort by relevance score (highest first)
|
||||
scored_agents.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
|
||||
# Apply secondary sorting if specified
|
||||
if sorted_by == "rating":
|
||||
scored_agents.sort(key=lambda x: (x[0], x[1].rating), reverse=True)
|
||||
elif sorted_by == "runs":
|
||||
scored_agents.sort(key=lambda x: (x[0], x[1].runs), reverse=True)
|
||||
|
||||
|
||||
# Extract agents for the requested page
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
page_agents = [agent for _, agent in scored_agents[start_idx:end_idx]]
|
||||
|
||||
|
||||
# Update total count for pagination
|
||||
total = len(scored_agents)
|
||||
else:
|
||||
@@ -329,7 +261,7 @@ async def get_store_agents(
|
||||
total = await prisma.models.StoreAgent.prisma().count(
|
||||
where=prisma.types.StoreAgentWhereInput(**where_clause)
|
||||
)
|
||||
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
|
||||
# Convert to response models
|
||||
@@ -1585,6 +1517,40 @@ async def review_store_submission(
|
||||
f"Failed to update store listing version {store_listing_version_id}"
|
||||
)
|
||||
|
||||
# Create embeddings if approved
|
||||
if is_approved and submission.StoreListing:
|
||||
service = get_search_service()
|
||||
|
||||
# Create embeddings for the approved listing
|
||||
fields_to_embed = [
|
||||
("name", submission.name, SearchFieldType.NAME),
|
||||
("description", submission.description, SearchFieldType.DESCRIPTION),
|
||||
]
|
||||
|
||||
if submission.subHeading:
|
||||
fields_to_embed.append(
|
||||
("subHeading", submission.subHeading, SearchFieldType.SUBHEADING)
|
||||
)
|
||||
|
||||
if submission.categories:
|
||||
categories_text = ", ".join(submission.categories)
|
||||
fields_to_embed.append(
|
||||
("categories", categories_text, SearchFieldType.CATEGORIES)
|
||||
)
|
||||
|
||||
for field_name, field_value, field_type in fields_to_embed:
|
||||
embedding = create_embedding(field_value)
|
||||
await service.upsert_search_record(
|
||||
store_listing_version_id=store_listing_version_id,
|
||||
store_listing_id=submission.StoreListing.id,
|
||||
field_name=field_name,
|
||||
field_value=field_value,
|
||||
embedding=embedding,
|
||||
field_type=field_type,
|
||||
submission_status=SearchSubmissionStatus.APPROVED,
|
||||
is_available=submission.isAvailable,
|
||||
)
|
||||
|
||||
# Send email notification to the agent creator
|
||||
if store_listing_version.AgentGraph and store_listing_version.AgentGraph.User:
|
||||
agent_creator = store_listing_version.AgentGraph.User
|
||||
|
||||
390
autogpt_platform/backend/backend/server/v2/store/embeddings.py
Normal file
390
autogpt_platform/backend/backend/server/v2/store/embeddings.py
Normal file
@@ -0,0 +1,390 @@
|
||||
"""Store search functionality with embeddings and pgvector using SQLAlchemy"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
from uuid import uuid4
|
||||
|
||||
from openai import OpenAI
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy import Boolean, Column, DateTime
|
||||
from sqlalchemy import Enum as SQLEnum
|
||||
from sqlalchemy import Index, String, UniqueConstraint, and_, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
# OpenAI client for embeddings
|
||||
client = OpenAI()
|
||||
|
||||
|
||||
def create_embedding(text: str) -> list[float]:
|
||||
response = client.embeddings.create(
|
||||
input=text,
|
||||
model="text-embedding-3-small",
|
||||
)
|
||||
return response.data[0].embedding
|
||||
|
||||
|
||||
# SQLAlchemy models
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class SubmissionStatus(str, Enum):
|
||||
PENDING = "PENDING"
|
||||
APPROVED = "APPROVED"
|
||||
REJECTED = "REJECTED"
|
||||
|
||||
|
||||
class SearchFieldType(str, Enum):
|
||||
NAME = "NAME"
|
||||
DESCRIPTION = "DESCRIPTION"
|
||||
CATEGORIES = "CATEGORIES"
|
||||
SUBHEADING = "SUBHEADING"
|
||||
|
||||
|
||||
class StoreAgentSearch(Base):
|
||||
__tablename__ = "StoreAgentSearch"
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
|
||||
createdAt = Column(
|
||||
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updatedAt = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
# Relations (foreign keys exist in DB but not modeled here)
|
||||
storeListingVersionId = Column(String, nullable=False)
|
||||
storeListingId = Column(String, nullable=False)
|
||||
|
||||
# Searchable fields
|
||||
fieldName = Column(String, nullable=False)
|
||||
fieldValue = Column(String, nullable=False)
|
||||
|
||||
# Vector embedding for similarity search
|
||||
# text-embedding-3-small produces 1536-dimensional embeddings
|
||||
embedding = Column(Vector(1536), nullable=False)
|
||||
|
||||
# Metadata
|
||||
fieldType = Column(
|
||||
SQLEnum(SearchFieldType, name="SearchFieldType", schema="platform"),
|
||||
nullable=False,
|
||||
)
|
||||
submissionStatus = Column(
|
||||
SQLEnum(SubmissionStatus, name="SubmissionStatus", schema="platform"),
|
||||
nullable=False,
|
||||
)
|
||||
isAvailable = Column(Boolean, nullable=False)
|
||||
|
||||
# Constraints and schema
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"storeListingVersionId",
|
||||
"fieldName",
|
||||
name="_store_listing_version_field_unique",
|
||||
),
|
||||
Index("ix_store_agent_search_listing_version", "storeListingVersionId"),
|
||||
Index("ix_store_agent_search_listing", "storeListingId"),
|
||||
Index("ix_store_agent_search_field_name", "fieldName"),
|
||||
Index("ix_store_agent_search_field_type", "fieldType"),
|
||||
Index(
|
||||
"ix_store_agent_search_status_available", "submissionStatus", "isAvailable"
|
||||
),
|
||||
{"schema": "platform"}, # Specify the schema
|
||||
)
|
||||
|
||||
|
||||
class StoreAgentSearchService:
|
||||
"""Service class for Store Agent Search operations using SQLAlchemy"""
|
||||
|
||||
def __init__(self, database_url: str):
|
||||
"""Initialize the search service with async database connection"""
|
||||
# Parse the URL to handle schema and other params separately
|
||||
from urllib.parse import parse_qs, urlparse, urlunparse
|
||||
|
||||
parsed = urlparse(database_url)
|
||||
query_params = parse_qs(parsed.query)
|
||||
|
||||
# Extract schema if present
|
||||
schema = query_params.pop("schema", ["public"])[0]
|
||||
|
||||
# Remove connect_timeout from query params (will be handled in connect_args)
|
||||
connect_timeout = query_params.pop("connect_timeout", [None])[0]
|
||||
|
||||
# Rebuild query string without schema and connect_timeout
|
||||
new_query = "&".join([f"{k}={v[0]}" for k, v in query_params.items()])
|
||||
|
||||
# Rebuild URL without schema and connect_timeout parameters
|
||||
clean_url = urlunparse(
|
||||
(
|
||||
parsed.scheme,
|
||||
parsed.netloc,
|
||||
parsed.path,
|
||||
parsed.params,
|
||||
new_query,
|
||||
parsed.fragment,
|
||||
)
|
||||
)
|
||||
|
||||
# Convert to async URL for asyncpg
|
||||
if clean_url.startswith("postgresql://"):
|
||||
clean_url = clean_url.replace("postgresql://", "postgresql+asyncpg://")
|
||||
elif clean_url.startswith("postgres://"):
|
||||
clean_url = clean_url.replace("postgres://", "postgresql+asyncpg://")
|
||||
|
||||
# Build connect_args
|
||||
connect_args: dict[str, Any] = {"server_settings": {"search_path": schema}}
|
||||
|
||||
# Add timeout if present (asyncpg uses 'timeout' not 'connect_timeout')
|
||||
if connect_timeout:
|
||||
connect_args["timeout"] = float(connect_timeout)
|
||||
|
||||
# Create engine with schema in connect_args
|
||||
self.engine = create_async_engine(
|
||||
clean_url,
|
||||
echo=False, # Set to True for debugging SQL queries
|
||||
future=True,
|
||||
connect_args=connect_args,
|
||||
)
|
||||
|
||||
self.async_session = async_sessionmaker(
|
||||
self.engine, class_=AsyncSession, expire_on_commit=False
|
||||
)
|
||||
|
||||
async def create_search_record(
|
||||
self,
|
||||
store_listing_version_id: str,
|
||||
store_listing_id: str,
|
||||
field_name: str,
|
||||
field_value: str,
|
||||
embedding: list[float],
|
||||
field_type: SearchFieldType,
|
||||
submission_status: SubmissionStatus,
|
||||
is_available: bool,
|
||||
) -> StoreAgentSearch:
|
||||
"""Create a new search record with embedding"""
|
||||
async with self.async_session() as session:
|
||||
search_record = StoreAgentSearch(
|
||||
storeListingVersionId=store_listing_version_id,
|
||||
storeListingId=store_listing_id,
|
||||
fieldName=field_name,
|
||||
fieldValue=field_value,
|
||||
embedding=embedding,
|
||||
fieldType=field_type,
|
||||
submissionStatus=submission_status,
|
||||
isAvailable=is_available,
|
||||
)
|
||||
session.add(search_record)
|
||||
await session.commit()
|
||||
await session.refresh(search_record)
|
||||
return search_record
|
||||
|
||||
async def batch_create_search_records(
|
||||
self, records: list[dict]
|
||||
) -> list[StoreAgentSearch]:
|
||||
"""Batch create multiple search records"""
|
||||
async with self.async_session() as session:
|
||||
search_records = []
|
||||
for record in records:
|
||||
search_record = StoreAgentSearch(
|
||||
storeListingVersionId=record["storeListingVersionId"],
|
||||
storeListingId=record["storeListingId"],
|
||||
fieldName=record["fieldName"],
|
||||
fieldValue=record["fieldValue"],
|
||||
embedding=record["embedding"],
|
||||
fieldType=record.get("fieldType", SearchFieldType.NAME),
|
||||
submissionStatus=record["submissionStatus"],
|
||||
isAvailable=record["isAvailable"],
|
||||
)
|
||||
session.add(search_record)
|
||||
search_records.append(search_record)
|
||||
|
||||
await session.commit()
|
||||
return search_records
|
||||
|
||||
async def search_by_embedding(
|
||||
self, query_embedding: List[float], limit: int = 30
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for store agents using vector similarity
|
||||
Returns the best matching store listings based on embedding similarity
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
# For vector similarity search, we still need to use raw SQL
|
||||
# because SQLAlchemy doesn't yet fully support pgvector operators
|
||||
# However, we can use SQLAlchemy's text() for safer query construction
|
||||
query = text(
|
||||
"""
|
||||
WITH similarity_scores AS (
|
||||
SELECT
|
||||
sas."storeListingId",
|
||||
MIN(sas.embedding <=> CAST(:embedding AS vector)) AS similarity_score
|
||||
FROM platform."StoreAgentSearch" sas
|
||||
WHERE
|
||||
sas."submissionStatus" = 'APPROVED'
|
||||
AND sas."isAvailable" = true
|
||||
GROUP BY sas."storeListingId"
|
||||
ORDER BY similarity_score
|
||||
LIMIT :limit
|
||||
)
|
||||
SELECT
|
||||
sa.listing_id as id,
|
||||
sa.slug,
|
||||
sa.agent_name,
|
||||
sa.agent_image,
|
||||
sa.description,
|
||||
sa.sub_heading,
|
||||
sa.featured,
|
||||
sa.runs,
|
||||
sa.rating,
|
||||
sa.creator_username,
|
||||
sa.creator_avatar,
|
||||
ss.similarity_score
|
||||
FROM similarity_scores ss
|
||||
INNER JOIN platform."StoreAgent" sa
|
||||
ON sa.listing_id = ss."storeListingId"
|
||||
ORDER BY ss.similarity_score;
|
||||
"""
|
||||
)
|
||||
|
||||
# Format embedding as PostgreSQL array
|
||||
embedding_str = "[" + ",".join(map(str, query_embedding)) + "]"
|
||||
|
||||
result = await session.execute(
|
||||
query, {"embedding": embedding_str, "limit": limit}
|
||||
)
|
||||
|
||||
rows = result.fetchall()
|
||||
|
||||
# Convert rows to dictionaries
|
||||
return [dict(row._mapping) for row in rows]
|
||||
|
||||
async def get_search_records(
|
||||
self,
|
||||
store_listing_version_id: Optional[str] = None,
|
||||
field_name: Optional[str] = None,
|
||||
is_available: Optional[bool] = None,
|
||||
) -> Sequence[StoreAgentSearch]:
|
||||
"""
|
||||
Get search records using SQLAlchemy ORM
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
stmt = select(StoreAgentSearch)
|
||||
|
||||
# Build filters
|
||||
filters = []
|
||||
if store_listing_version_id:
|
||||
filters.append(
|
||||
StoreAgentSearch.storeListingVersionId == store_listing_version_id
|
||||
)
|
||||
if field_name:
|
||||
filters.append(StoreAgentSearch.fieldName == field_name)
|
||||
if is_available is not None:
|
||||
filters.append(StoreAgentSearch.isAvailable == is_available)
|
||||
|
||||
if filters:
|
||||
stmt = stmt.where(and_(*filters))
|
||||
|
||||
result = await session.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
async def update_search_embeddings(
|
||||
self, store_listing_version_id: str, updates: Dict[str, List[float]]
|
||||
) -> None:
|
||||
"""Update embeddings for existing search records"""
|
||||
async with self.async_session() as session:
|
||||
for field_name, embedding in updates.items():
|
||||
# For vector updates, we still need raw SQL due to pgvector
|
||||
# Use $ parameters for asyncpg
|
||||
query = text(
|
||||
"""
|
||||
UPDATE platform."StoreAgentSearch"
|
||||
SET embedding = CAST(:embedding AS vector),
|
||||
"updatedAt" = CAST(:updated_at AS TIMESTAMPTZ)
|
||||
WHERE "storeListingVersionId" = :version_id
|
||||
AND "fieldName" = :field_name
|
||||
"""
|
||||
)
|
||||
|
||||
embedding_str = "[" + ",".join(map(str, embedding)) + "]"
|
||||
|
||||
await session.execute(
|
||||
query,
|
||||
{
|
||||
"embedding": embedding_str,
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
"version_id": store_listing_version_id,
|
||||
"field_name": field_name,
|
||||
},
|
||||
)
|
||||
|
||||
await session.commit()
|
||||
|
||||
async def delete_search_records(self, store_listing_version_id: str) -> None:
|
||||
"""Delete all search records for a store listing version using SQLAlchemy ORM"""
|
||||
async with self.async_session() as session:
|
||||
# Use SQLAlchemy ORM for deletion
|
||||
stmt = select(StoreAgentSearch).where(
|
||||
StoreAgentSearch.storeListingVersionId == store_listing_version_id
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
records = result.scalars().all()
|
||||
|
||||
for record in records:
|
||||
await session.delete(record)
|
||||
|
||||
await session.commit()
|
||||
|
||||
async def upsert_search_record(
|
||||
self,
|
||||
store_listing_version_id: str,
|
||||
store_listing_id: str,
|
||||
field_name: str,
|
||||
field_value: str,
|
||||
embedding: List[float],
|
||||
field_type: SearchFieldType,
|
||||
submission_status: SubmissionStatus,
|
||||
is_available: bool,
|
||||
) -> StoreAgentSearch:
|
||||
"""Upsert a search record (update if exists, create if not)"""
|
||||
async with self.async_session() as session:
|
||||
# Check if record exists
|
||||
stmt = select(StoreAgentSearch).where(
|
||||
and_(
|
||||
StoreAgentSearch.storeListingVersionId == store_listing_version_id,
|
||||
StoreAgentSearch.fieldName == field_name,
|
||||
)
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
existing_record = result.scalar_one_or_none()
|
||||
|
||||
if existing_record:
|
||||
# Update existing record
|
||||
existing_record.fieldValue = field_value # type: ignore[attr-defined]
|
||||
existing_record.embedding = embedding # type: ignore[attr-defined]
|
||||
existing_record.fieldType = field_type # type: ignore[attr-defined]
|
||||
existing_record.submissionStatus = submission_status # type: ignore[attr-defined]
|
||||
existing_record.isAvailable = is_available # type: ignore[attr-defined]
|
||||
existing_record.updatedAt = datetime.now(timezone.utc) # type: ignore[attr-defined]
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_record)
|
||||
return existing_record
|
||||
else:
|
||||
# Create new record
|
||||
return await self.create_search_record(
|
||||
store_listing_version_id=store_listing_version_id,
|
||||
store_listing_id=store_listing_id,
|
||||
field_name=field_name,
|
||||
field_value=field_value,
|
||||
embedding=embedding,
|
||||
field_type=field_type,
|
||||
submission_status=submission_status,
|
||||
is_available=is_available,
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
"""Close the database connection"""
|
||||
await self.engine.dispose()
|
||||
@@ -154,15 +154,26 @@ async def get_agents(
|
||||
)
|
||||
|
||||
try:
|
||||
agents = await backend.server.v2.store.db.get_store_agents(
|
||||
featured=featured,
|
||||
creators=[creator] if creator else None,
|
||||
sorted_by=sorted_by,
|
||||
search_query=search_query,
|
||||
category=category,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
# Use vector similarity search if we have a search query, otherwise use traditional search
|
||||
if search_query and search_query.strip():
|
||||
agents = await backend.server.v2.store.db.search_agents(
|
||||
search_query=search_query,
|
||||
featured=featured,
|
||||
creators=[creator] if creator else None,
|
||||
category=category,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
else:
|
||||
agents = await backend.server.v2.store.db.get_store_agents(
|
||||
featured=featured,
|
||||
creators=[creator] if creator else None,
|
||||
sorted_by=sorted_by,
|
||||
search_query=search_query,
|
||||
category=category,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
return agents
|
||||
except Exception as e:
|
||||
logger.exception("Failed to retrieve store agents: %s", e)
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
-- Enable pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- CreateEnum
|
||||
CREATE TYPE "SearchFieldType" AS ENUM ('NAME', 'DESCRIPTION', 'CATEGORIES', 'SUBHEADING');
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "StoreAgentSearch" (
|
||||
"id" TEXT NOT NULL,
|
||||
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"updatedAt" TIMESTAMP(3) NOT NULL,
|
||||
"storeListingVersionId" TEXT NOT NULL,
|
||||
"storeListingId" TEXT NOT NULL,
|
||||
"fieldName" TEXT NOT NULL,
|
||||
"fieldValue" TEXT NOT NULL,
|
||||
"embedding" vector(1536),
|
||||
"fieldType" "SearchFieldType" NOT NULL,
|
||||
"submissionStatus" "SubmissionStatus" NOT NULL,
|
||||
"isAvailable" BOOLEAN NOT NULL,
|
||||
|
||||
CONSTRAINT "StoreAgentSearch_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "StoreAgentSearch_storeListingVersionId_idx" ON "StoreAgentSearch"("storeListingVersionId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "StoreAgentSearch_storeListingId_idx" ON "StoreAgentSearch"("storeListingId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "StoreAgentSearch_fieldName_idx" ON "StoreAgentSearch"("fieldName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "StoreAgentSearch_fieldType_idx" ON "StoreAgentSearch"("fieldType");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "StoreAgentSearch_submissionStatus_isAvailable_idx" ON "StoreAgentSearch"("submissionStatus", "isAvailable");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "StoreAgentSearch_storeListingVersionId_fieldName_key" ON "StoreAgentSearch"("storeListingVersionId", "fieldName");
|
||||
|
||||
-- Create HNSW index for vector similarity search (pgvector)
|
||||
CREATE INDEX "StoreAgentSearch_embedding_idx" ON "StoreAgentSearch" USING hnsw (embedding vector_cosine_ops);
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "StoreAgentSearch" ADD CONSTRAINT "StoreAgentSearch_storeListingVersionId_fkey" FOREIGN KEY ("storeListingVersionId") REFERENCES "StoreListingVersion"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "StoreAgentSearch" ADD CONSTRAINT "StoreAgentSearch_storeListingId_fkey" FOREIGN KEY ("storeListingId") REFERENCES "StoreListing"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
84
autogpt_platform/backend/poetry.lock
generated
84
autogpt_platform/backend/poetry.lock
generated
@@ -311,6 +311,73 @@ files = [
|
||||
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asyncpg"
|
||||
version = "0.30.0"
|
||||
description = "An asyncio PostgreSQL driver"
|
||||
optional = false
|
||||
python-versions = ">=3.8.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d"},
|
||||
{file = "asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e"},
|
||||
{file = "asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590"},
|
||||
{file = "asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29ff1fc8b5bf724273782ff8b4f57b0f8220a1b2324184846b39d1ab4122031d"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64e899bce0600871b55368b8483e5e3e7f1860c9482e7f12e0a771e747988168"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b290f4726a887f75dcd1b3006f484252db37602313f806e9ffc4e5996cfe5cb"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f86b0e2cd3f1249d6fe6fd6cfe0cd4538ba994e2d8249c0491925629b9104d0f"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:393af4e3214c8fa4c7b86da6364384c0d1b3298d45803375572f415b6f673f38"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fd4406d09208d5b4a14db9a9dbb311b6d7aeeab57bded7ed2f8ea41aeef39b34"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-win32.whl", hash = "sha256:0b448f0150e1c3b96cb0438a0d0aa4871f1472e58de14a3ec320dbb2798fb0d4"},
|
||||
{file = "asyncpg-0.30.0-cp38-cp38-win_amd64.whl", hash = "sha256:f23b836dd90bea21104f69547923a02b167d999ce053f3d502081acea2fba15b"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f4e83f067b35ab5e6371f8a4c93296e0439857b4569850b178a01385e82e9ad"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5df69d55add4efcd25ea2a3b02025b669a285b767bfbf06e356d68dbce4234ff"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3479a0d9a852c7c84e822c073622baca862d1217b10a02dd57ee4a7a081f708"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26683d3b9a62836fad771a18ecf4659a30f348a561279d6227dab96182f46144"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1b982daf2441a0ed314bd10817f1606f1c28b1136abd9e4f11335358c2c631cb"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c06a3a50d014b303e5f6fc1e5f95eb28d2cee89cf58384b700da621e5d5e547"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-win32.whl", hash = "sha256:1b11a555a198b08f5c4baa8f8231c74a366d190755aa4f99aacec5970afe929a"},
|
||||
{file = "asyncpg-0.30.0-cp39-cp39-win_amd64.whl", hash = "sha256:8b684a3c858a83cd876f05958823b68e8d14ec01bb0c0d14a6704c5bf9711773"},
|
||||
{file = "asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.11.0\""}
|
||||
|
||||
[package.extras]
|
||||
docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"]
|
||||
gssauth = ["gssapi ; platform_system != \"Windows\"", "sspilib ; platform_system == \"Windows\""]
|
||||
test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi ; platform_system == \"Linux\"", "k5test ; platform_system == \"Linux\"", "mypy (>=1.8.0,<1.9.0)", "sspilib ; platform_system == \"Windows\"", "uvloop (>=0.15.3) ; platform_system != \"Windows\" and python_version < \"3.14.0\""]
|
||||
|
||||
[[package]]
|
||||
name = "attrs"
|
||||
version = "25.3.0"
|
||||
@@ -3600,6 +3667,21 @@ all = ["pbs-installer[download,install]"]
|
||||
download = ["httpx (>=0.27.0,<1)"]
|
||||
install = ["zstandard (>=0.21.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pgvector"
|
||||
version = "0.4.1"
|
||||
description = "pgvector support for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362"},
|
||||
{file = "pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = "*"
|
||||
|
||||
[[package]]
|
||||
name = "pika"
|
||||
version = "1.3.2"
|
||||
@@ -7132,4 +7214,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.14"
|
||||
content-hash = "892daa57d7126d9a9d5308005b07328a39b8c4cd7fe198f9b5ab10f957787c48"
|
||||
content-hash = "e5e5cd7f11c2e2351c084aec7e4ddec6c154dc7274797af6ee616fa14db5676d"
|
||||
|
||||
@@ -79,6 +79,8 @@ firecrawl-py = "^2.16.3"
|
||||
exa-py = "^1.14.20"
|
||||
croniter = "^6.0.0"
|
||||
stagehand = "^0.5.1"
|
||||
pgvector = "^0.4.1"
|
||||
asyncpg = "^0.30.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
aiohappyeyeballs = "^2.6.1"
|
||||
|
||||
@@ -724,6 +724,7 @@ model StoreListing {
|
||||
|
||||
// Relations
|
||||
Versions StoreListingVersion[] @relation("ListingVersions")
|
||||
SearchIndexes StoreAgentSearch[] @relation("SearchIndexes")
|
||||
|
||||
// Unique index on agentId to ensure only one listing per agent, regardless of number of versions the agent has.
|
||||
@@unique([agentGraphId])
|
||||
@@ -781,6 +782,9 @@ model StoreListingVersion {
|
||||
|
||||
// Reviews for this specific version
|
||||
Reviews StoreListingReview[]
|
||||
|
||||
// Search index entries for vector search
|
||||
SearchIndexes StoreAgentSearch[] @relation("SearchIndexes")
|
||||
|
||||
@@unique([storeListingId, version])
|
||||
@@index([storeListingId, submissionStatus, isAvailable])
|
||||
@@ -789,6 +793,55 @@ model StoreListingVersion {
|
||||
@@index([agentGraphId, agentGraphVersion]) // Non-unique index for efficient lookups
|
||||
}
|
||||
|
||||
model StoreAgentSearch {
|
||||
id String @id @default(uuid())
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
// Relation to StoreListingVersion
|
||||
storeListingVersionId String
|
||||
StoreListingVersion StoreListingVersion @relation("SearchIndexes", fields: [storeListingVersionId], references: [id], onDelete: Cascade)
|
||||
|
||||
// Direct relation to StoreListing for easier joins
|
||||
storeListingId String
|
||||
StoreListing StoreListing @relation("SearchIndexes", fields: [storeListingId], references: [id], onDelete: Cascade)
|
||||
|
||||
// Searchable fields that will be embedded
|
||||
fieldName String // The field being indexed (name, description, categories, subheading)
|
||||
fieldValue String // The actual text content for this field
|
||||
|
||||
// Vector embedding for similarity search using pgvector
|
||||
// text-embedding-3-small produces 1536-dimensional embeddings
|
||||
embedding Float[] // pgvector field for storing 1536-dimensional embeddings
|
||||
|
||||
// Metadata for search optimization
|
||||
fieldType SearchFieldType // Type of field for filtering
|
||||
|
||||
// Denormalized fields for filtering without joins
|
||||
submissionStatus SubmissionStatus // Copy from StoreListingVersion for filtering approved content
|
||||
isAvailable Boolean // Copy from StoreListingVersion for filtering available content
|
||||
|
||||
// Unique constraint to prevent duplicate indexing of the same field
|
||||
@@unique([storeListingVersionId, fieldName])
|
||||
|
||||
// Indexes for performance
|
||||
@@index([storeListingVersionId])
|
||||
@@index([storeListingId])
|
||||
@@index([fieldName])
|
||||
@@index([fieldType])
|
||||
@@index([submissionStatus, isAvailable]) // For filtering approved and available content
|
||||
|
||||
// Note: pgvector index will be created manually in migration for vector similarity search
|
||||
}
|
||||
|
||||
// Enum for searchable field types
|
||||
enum SearchFieldType {
|
||||
NAME // Agent name
|
||||
DESCRIPTION // Agent description
|
||||
CATEGORIES // Agent categories
|
||||
SUBHEADING // Agent subheading
|
||||
}
|
||||
|
||||
model StoreListingReview {
|
||||
id String @id @default(uuid())
|
||||
createdAt DateTime @default(now())
|
||||
|
||||
Reference in New Issue
Block a user