Compare commits

...

6 Commits

Author SHA1 Message Date
Swifty
04460e43f3 fixed test 2025-09-02 23:09:59 +02:00
Swifty
0d93315767 Merge branch 'dev' into swiftyos/secrt-1566-enhance-marketplace-search 2025-09-02 23:06:08 +02:00
Swifty
7528920b5d tidyied up the implementation 2025-09-02 23:05:30 +02:00
Swifty
911b119296 delete expired test suite 2025-09-02 22:58:49 +02:00
Swifty
df50c7352d add vector search in store 2025-09-02 22:56:14 +02:00
Swifty
7db3db6b2e added enhanced search functionality 2025-09-02 15:56:13 +02:00
8 changed files with 864 additions and 16 deletions

View File

@@ -1,4 +1,5 @@
import logging
import os
from datetime import datetime, timezone
import fastapi
@@ -23,11 +24,29 @@ from backend.data.notifications import (
NotificationEventModel,
)
from backend.notifications.notifications import queue_notification_async
from backend.server.v2.store.embeddings import SearchFieldType, StoreAgentSearchService
from backend.server.v2.store.embeddings import (
SubmissionStatus as SearchSubmissionStatus,
)
from backend.server.v2.store.embeddings import create_embedding
from backend.util.settings import Settings
logger = logging.getLogger(__name__)
settings = Settings()
# Initialize the search service with database URL
search_service: StoreAgentSearchService | None = None
def get_search_service() -> StoreAgentSearchService:
"""Get or create the search service instance"""
global search_service
if search_service is None:
# Get database URL from environment variable (same as Prisma uses)
db_url = os.getenv("DATABASE_URL", "postgresql://localhost:5432")
search_service = StoreAgentSearchService(db_url)
return search_service
# Constants for default admin values
DEFAULT_ADMIN_NAME = "AutoGPT Admin"
@@ -53,6 +72,113 @@ def sanitize_query(query: str | None) -> str | None:
)
async def search_store_agents(
search_query: str,
) -> backend.server.v2.store.model.StoreAgentsResponse:
"""
Search for store agents using embeddings with SQLAlchemy.
Falls back to text search if embedding creation fails.
"""
try:
# Try to create embedding for semantic search
query_embedding = await create_embedding(search_query)
if query_embedding is None:
# Fallback to text-based search if embedding fails
logger.warning(
f"Failed to create embedding for query: {search_query}. "
"Falling back to text search."
)
return await get_store_agents(
search_query=search_query,
page=1,
page_size=30,
)
# Use SQLAlchemy service for vector search
service = get_search_service()
results = await service.search_by_embedding(query_embedding, limit=30)
except Exception as e:
logger.error(f"Error during vector search: {e}. Falling back to text search.")
# Fallback to regular text search on any error
return await get_store_agents(
search_query=search_query,
page=1,
page_size=30,
)
# Convert raw results to StoreAgent models
agents = []
for row in results:
try:
# Handle agent_image - it could be a list or a single string
agent_image = row.get("agent_image", "")
if isinstance(agent_image, list) and agent_image:
agent_image = str(agent_image[0])
elif not agent_image:
agent_image = ""
else:
agent_image = str(agent_image)
agent = backend.server.v2.store.model.StoreAgent(
slug=row.get("slug", ""),
agent_name=row.get("agent_name", ""),
agent_image=agent_image,
creator=row.get("creator_username", "Needs Profile"),
creator_avatar=row.get("creator_avatar", ""),
sub_heading=row.get("sub_heading", ""),
description=row.get("description", ""),
runs=row.get("runs", 0),
rating=(
float(row.get("rating", 0.0))
if row.get("rating") is not None
else 0.0
),
)
agents.append(agent)
except Exception as e:
logger.error(f"Error creating StoreAgent from search result: {e}")
continue
return backend.server.v2.store.model.StoreAgentsResponse(
agents=agents,
pagination=backend.server.v2.store.model.Pagination(
current_page=1,
total_items=len(agents),
total_pages=1,
page_size=20,
),
)
async def search_agents(
search_query: str,
featured: bool | None = None,
creators: list[str] | None = None,
category: str | None = None,
page: int = 1,
page_size: int = 20,
) -> backend.server.v2.store.model.StoreAgentsResponse:
"""
Search for store agents using embeddings with optional filters.
Falls back to text search if embedding service is unavailable.
"""
try:
# Try vector search first
return await search_store_agents(search_query)
except Exception as e:
logger.error(f"Vector search failed: {e}. Using text search fallback.")
# Fallback to text search with filters
return await get_store_agents(
featured=featured or False,
creators=creators,
search_query=search_query,
category=category,
page=page,
page_size=page_size,
)
async def get_store_agents(
featured: bool = False,
creators: list[str] | None = None,
@@ -124,7 +250,6 @@ async def get_store_agents(
store_agents.append(store_agent)
except Exception as e:
# Skip this agent if there was an error
# You could log the error here if needed
logger.error(
f"Error parsing Store agent when getting store agents from db: {e}"
)
@@ -191,7 +316,7 @@ async def get_store_agent_details(
agent_video=agent.agent_video or "",
agent_image=agent.agent_image,
creator=agent.creator_username,
creator_avatar=agent.creator_avatar,
creator_avatar=agent.creator_avatar or "",
sub_heading=agent.sub_heading,
description=agent.description,
categories=agent.categories,
@@ -264,7 +389,7 @@ async def get_store_agent_by_version_id(
agent_video=agent.agent_video or "",
agent_image=agent.agent_image,
creator=agent.creator_username,
creator_avatar=agent.creator_avatar,
creator_avatar=agent.creator_avatar or "",
sub_heading=agent.sub_heading,
description=agent.description,
categories=agent.categories,
@@ -1358,6 +1483,64 @@ async def review_store_submission(
f"Failed to update store listing version {store_listing_version_id}"
)
# Create embeddings if approved
if is_approved and submission.StoreListing:
try:
service = get_search_service()
# Create embeddings for the approved listing
fields_to_embed = [
("name", submission.name, SearchFieldType.NAME),
(
"description",
submission.description,
SearchFieldType.DESCRIPTION,
),
]
if submission.subHeading:
fields_to_embed.append(
(
"subHeading",
submission.subHeading,
SearchFieldType.SUBHEADING,
)
)
if submission.categories:
categories_text = ", ".join(submission.categories)
fields_to_embed.append(
("categories", categories_text, SearchFieldType.CATEGORIES)
)
for field_name, field_value, field_type in fields_to_embed:
# Create embedding asynchronously
embedding = await create_embedding(field_value)
# Only store if embedding was created successfully
if embedding:
await service.upsert_search_record(
store_listing_version_id=store_listing_version_id,
store_listing_id=submission.StoreListing.id,
field_name=field_name,
field_value=field_value,
embedding=embedding,
field_type=field_type,
submission_status=SearchSubmissionStatus.APPROVED,
is_available=submission.isAvailable,
)
else:
logger.warning(
f"Failed to create embedding for {field_name} in listing {store_listing_version_id}. "
"Search indexing skipped for this field."
)
except Exception as e:
# Log error but don't fail the approval process
logger.error(
f"Error creating search embeddings for listing {store_listing_version_id}: {e}. "
"Approval will continue without search indexing."
)
# Send email notification to the agent creator
if store_listing_version.AgentGraph and store_listing_version.AgentGraph.User:
agent_creator = store_listing_version.AgentGraph.User

View File

@@ -0,0 +1,469 @@
"""Store search functionality with embeddings and pgvector using SQLAlchemy"""
import logging
from datetime import datetime, timezone
from enum import Enum
from typing import Any, Dict, List, Optional, Sequence
from uuid import uuid4
from openai import AsyncOpenAI, OpenAIError
from pgvector.sqlalchemy import Vector
from sqlalchemy import Boolean, Column, DateTime
from sqlalchemy import Enum as SQLEnum
from sqlalchemy import Index, String, UniqueConstraint, and_, select, text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.ext.declarative import declarative_base
from backend.util.settings import Settings
logger = logging.getLogger(__name__)
settings = Settings()
# Initialize async OpenAI client with proper configuration
_openai_client: Optional[AsyncOpenAI] = None
def get_openai_client() -> AsyncOpenAI:
"""Get or create the async OpenAI client with proper configuration."""
global _openai_client
if _openai_client is None:
api_key = settings.secrets.openai_api_key
if not api_key:
logger.warning(
"OpenAI API key not configured. Vector search will use fallback text search."
)
raise ValueError(
"OpenAI API key is not configured. Please set OPENAI_API_KEY in environment."
)
_openai_client = AsyncOpenAI(api_key=api_key)
return _openai_client
async def create_embedding(text: str) -> Optional[list[float]]:
"""Create an embedding for the given text using OpenAI's API.
Args:
text: The text to create an embedding for
Returns:
A list of floats representing the embedding, or None if creation fails
"""
try:
client = get_openai_client()
response = await client.embeddings.create(
input=text,
model="text-embedding-3-small",
)
return response.data[0].embedding
except ValueError as e:
# API key not configured
logger.error(f"OpenAI configuration error: {e}")
return None
except OpenAIError as e:
# Handle specific OpenAI errors
logger.error(f"OpenAI API error creating embedding: {e}")
return None
except Exception as e:
# Handle unexpected errors
logger.error(f"Unexpected error creating embedding: {e}")
return None
# SQLAlchemy models
Base = declarative_base()
class SubmissionStatus(str, Enum):
PENDING = "PENDING"
APPROVED = "APPROVED"
REJECTED = "REJECTED"
class SearchFieldType(str, Enum):
NAME = "NAME"
DESCRIPTION = "DESCRIPTION"
CATEGORIES = "CATEGORIES"
SUBHEADING = "SUBHEADING"
class StoreAgentSearch(Base):
__tablename__ = "StoreAgentSearch"
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
createdAt = Column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updatedAt = Column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)
# Relations (foreign keys exist in DB but not modeled here)
storeListingVersionId = Column(String, nullable=False)
storeListingId = Column(String, nullable=False)
# Searchable fields
fieldName = Column(String, nullable=False)
fieldValue = Column(String, nullable=False)
# Vector embedding for similarity search
# text-embedding-3-small produces 1536-dimensional embeddings
embedding = Column(Vector(1536), nullable=False)
# Metadata
fieldType = Column(
SQLEnum(SearchFieldType, name="SearchFieldType", schema="platform"),
nullable=False,
)
submissionStatus = Column(
SQLEnum(SubmissionStatus, name="SubmissionStatus", schema="platform"),
nullable=False,
)
isAvailable = Column(Boolean, nullable=False)
# Constraints and schema
__table_args__ = (
UniqueConstraint(
"storeListingVersionId",
"fieldName",
name="_store_listing_version_field_unique",
),
Index("ix_store_agent_search_listing_version", "storeListingVersionId"),
Index("ix_store_agent_search_listing", "storeListingId"),
Index("ix_store_agent_search_field_name", "fieldName"),
Index("ix_store_agent_search_field_type", "fieldType"),
Index(
"ix_store_agent_search_status_available", "submissionStatus", "isAvailable"
),
{"schema": "platform"}, # Specify the schema
)
class StoreAgentSearchService:
"""Service class for Store Agent Search operations using SQLAlchemy"""
def __init__(self, database_url: str):
"""Initialize the search service with async database connection"""
# Parse the URL to handle schema and other params separately
from urllib.parse import parse_qs, urlparse, urlunparse
parsed = urlparse(database_url)
query_params = parse_qs(parsed.query)
# Extract schema if present
schema = query_params.pop("schema", ["public"])[0]
# Remove connect_timeout from query params (will be handled in connect_args)
connect_timeout = query_params.pop("connect_timeout", [None])[0]
# Rebuild query string without schema and connect_timeout
new_query = "&".join([f"{k}={v[0]}" for k, v in query_params.items()])
# Rebuild URL without schema and connect_timeout parameters
clean_url = urlunparse(
(
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
new_query,
parsed.fragment,
)
)
# Convert to async URL for asyncpg
if clean_url.startswith("postgresql://"):
clean_url = clean_url.replace("postgresql://", "postgresql+asyncpg://")
elif clean_url.startswith("postgres://"):
clean_url = clean_url.replace("postgres://", "postgresql+asyncpg://")
# Build connect_args
connect_args: dict[str, Any] = {"server_settings": {"search_path": schema}}
# Add timeout if present (asyncpg uses 'timeout' not 'connect_timeout')
if connect_timeout:
connect_args["timeout"] = float(connect_timeout)
# Create engine with schema in connect_args
self.engine = create_async_engine(
clean_url,
echo=False, # Set to True for debugging SQL queries
future=True,
connect_args=connect_args,
)
self.async_session = async_sessionmaker(
self.engine, class_=AsyncSession, expire_on_commit=False
)
async def create_search_record(
self,
store_listing_version_id: str,
store_listing_id: str,
field_name: str,
field_value: str,
embedding: list[float],
field_type: SearchFieldType,
submission_status: SubmissionStatus,
is_available: bool,
) -> Optional[StoreAgentSearch]:
"""Create a new search record with embedding.
Returns:
The created search record or None if creation fails.
"""
try:
async with self.async_session() as session:
search_record = StoreAgentSearch(
storeListingVersionId=store_listing_version_id,
storeListingId=store_listing_id,
fieldName=field_name,
fieldValue=field_value,
embedding=embedding,
fieldType=field_type,
submissionStatus=submission_status,
isAvailable=is_available,
)
session.add(search_record)
await session.commit()
await session.refresh(search_record)
return search_record
except Exception as e:
logger.error(f"Failed to create search record: {e}")
return None
async def batch_create_search_records(
self, records: list[dict]
) -> list[StoreAgentSearch]:
"""Batch create multiple search records"""
async with self.async_session() as session:
search_records = []
for record in records:
search_record = StoreAgentSearch(
storeListingVersionId=record["storeListingVersionId"],
storeListingId=record["storeListingId"],
fieldName=record["fieldName"],
fieldValue=record["fieldValue"],
embedding=record["embedding"],
fieldType=record.get("fieldType", SearchFieldType.NAME),
submissionStatus=record["submissionStatus"],
isAvailable=record["isAvailable"],
)
session.add(search_record)
search_records.append(search_record)
await session.commit()
return search_records
async def search_by_embedding(
self, query_embedding: List[float], limit: int = 30
) -> List[Dict[str, Any]]:
"""
Search for store agents using vector similarity.
Returns the best matching store listings based on embedding similarity.
Args:
query_embedding: The embedding vector to search with
limit: Maximum number of results to return
Returns:
List of matching store agents with similarity scores
Raises:
Exception: If the database query fails
"""
if not query_embedding:
logger.warning("Empty embedding provided for search")
return []
try:
async with self.async_session() as session:
# Use parameterized query to prevent SQL injection
query = text(
"""
WITH similarity_scores AS (
SELECT
sas."storeListingId",
MIN(sas.embedding <=> CAST(:embedding AS vector)) AS similarity_score
FROM platform."StoreAgentSearch" sas
WHERE
sas."submissionStatus" = 'APPROVED'
AND sas."isAvailable" = true
GROUP BY sas."storeListingId"
ORDER BY similarity_score
LIMIT :limit
)
SELECT
sa.listing_id as id,
sa.slug,
sa.agent_name,
sa.agent_image,
sa.description,
sa.sub_heading,
sa.featured,
sa.runs,
sa.rating,
sa.creator_username,
sa.creator_avatar,
ss.similarity_score
FROM similarity_scores ss
INNER JOIN platform."StoreAgent" sa
ON sa.listing_id = ss."storeListingId"
ORDER BY ss.similarity_score;
"""
)
# Format embedding as PostgreSQL array safely
embedding_str = "[" + ",".join(map(str, query_embedding)) + "]"
result = await session.execute(
query, {"embedding": embedding_str, "limit": limit}
)
rows = result.fetchall()
# Convert rows to dictionaries
return [dict(row._mapping) for row in rows]
except Exception as e:
logger.error(f"Vector search query failed: {e}")
# Return empty results instead of propagating error
# This allows fallback to text search
return []
async def get_search_records(
self,
store_listing_version_id: Optional[str] = None,
field_name: Optional[str] = None,
is_available: Optional[bool] = None,
) -> Sequence[StoreAgentSearch]:
"""
Get search records using SQLAlchemy ORM
"""
async with self.async_session() as session:
stmt = select(StoreAgentSearch)
# Build filters
filters = []
if store_listing_version_id:
filters.append(
StoreAgentSearch.storeListingVersionId == store_listing_version_id
)
if field_name:
filters.append(StoreAgentSearch.fieldName == field_name)
if is_available is not None:
filters.append(StoreAgentSearch.isAvailable == is_available)
if filters:
stmt = stmt.where(and_(*filters))
result = await session.execute(stmt)
return result.scalars().all()
async def update_search_embeddings(
self, store_listing_version_id: str, updates: Dict[str, List[float]]
) -> None:
"""Update embeddings for existing search records"""
async with self.async_session() as session:
for field_name, embedding in updates.items():
# For vector updates, we still need raw SQL due to pgvector
# Use $ parameters for asyncpg
query = text(
"""
UPDATE platform."StoreAgentSearch"
SET embedding = CAST(:embedding AS vector),
"updatedAt" = CAST(:updated_at AS TIMESTAMPTZ)
WHERE "storeListingVersionId" = :version_id
AND "fieldName" = :field_name
"""
)
embedding_str = "[" + ",".join(map(str, embedding)) + "]"
await session.execute(
query,
{
"embedding": embedding_str,
"updated_at": datetime.now(timezone.utc),
"version_id": store_listing_version_id,
"field_name": field_name,
},
)
await session.commit()
async def delete_search_records(self, store_listing_version_id: str) -> None:
"""Delete all search records for a store listing version using SQLAlchemy ORM"""
async with self.async_session() as session:
# Use SQLAlchemy ORM for deletion
stmt = select(StoreAgentSearch).where(
StoreAgentSearch.storeListingVersionId == store_listing_version_id
)
result = await session.execute(stmt)
records = result.scalars().all()
for record in records:
await session.delete(record)
await session.commit()
async def upsert_search_record(
self,
store_listing_version_id: str,
store_listing_id: str,
field_name: str,
field_value: str,
embedding: List[float],
field_type: SearchFieldType,
submission_status: SubmissionStatus,
is_available: bool,
) -> Optional[StoreAgentSearch]:
"""Upsert a search record (update if exists, create if not).
Returns:
The upserted search record or None if operation fails.
"""
try:
async with self.async_session() as session:
# Check if record exists
stmt = select(StoreAgentSearch).where(
and_(
StoreAgentSearch.storeListingVersionId
== store_listing_version_id,
StoreAgentSearch.fieldName == field_name,
)
)
result = await session.execute(stmt)
existing_record = result.scalar_one_or_none()
if existing_record:
# Update existing record
existing_record.fieldValue = field_value # type: ignore[attr-defined]
existing_record.embedding = embedding # type: ignore[attr-defined]
existing_record.fieldType = field_type # type: ignore[attr-defined]
existing_record.submissionStatus = submission_status # type: ignore[attr-defined]
existing_record.isAvailable = is_available # type: ignore[attr-defined]
existing_record.updatedAt = datetime.now(timezone.utc) # type: ignore[attr-defined]
await session.commit()
await session.refresh(existing_record)
return existing_record
else:
# Create new record
return await self.create_search_record(
store_listing_version_id=store_listing_version_id,
store_listing_id=store_listing_id,
field_name=field_name,
field_value=field_value,
embedding=embedding,
field_type=field_type,
submission_status=submission_status,
is_available=is_available,
)
except Exception as e:
logger.error(f"Failed to upsert search record: {e}")
return None
async def close(self):
"""Close the database connection"""
await self.engine.dispose()

View File

@@ -154,15 +154,26 @@ async def get_agents(
)
try:
agents = await backend.server.v2.store.db.get_store_agents(
featured=featured,
creators=[creator] if creator else None,
sorted_by=sorted_by,
search_query=search_query,
category=category,
page=page,
page_size=page_size,
)
# Use vector similarity search if we have a search query, otherwise use traditional search
if search_query and search_query.strip():
agents = await backend.server.v2.store.db.search_agents(
search_query=search_query,
featured=featured,
creators=[creator] if creator else None,
category=category,
page=page,
page_size=page_size,
)
else:
agents = await backend.server.v2.store.db.get_store_agents(
featured=featured,
creators=[creator] if creator else None,
sorted_by=sorted_by,
search_query=search_query,
category=category,
page=page,
page_size=page_size,
)
return agents
except Exception as e:
logger.exception("Failed to retrieve store agents: %s", e)

View File

@@ -234,7 +234,7 @@ def test_get_agents_search(
page_size=20,
),
)
mock_db_call = mocker.patch("backend.server.v2.store.db.get_store_agents")
mock_db_call = mocker.patch("backend.server.v2.store.db.search_agents")
mock_db_call.return_value = mocked_value
response = client.get("/agents?search_query=specific")
assert response.status_code == 200
@@ -246,10 +246,9 @@ def test_get_agents_search(
snapshot.snapshot_dir = "snapshots"
snapshot.assert_match(json.dumps(response.json(), indent=2), "agts_search")
mock_db_call.assert_called_once_with(
search_query="specific",
featured=False,
creators=None,
sorted_by=None,
search_query="specific",
category=None,
page=1,
page_size=20,

View File

@@ -0,0 +1,49 @@
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- CreateEnum
CREATE TYPE "SearchFieldType" AS ENUM ('NAME', 'DESCRIPTION', 'CATEGORIES', 'SUBHEADING');
-- CreateTable
CREATE TABLE "StoreAgentSearch" (
"id" TEXT NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"storeListingVersionId" TEXT NOT NULL,
"storeListingId" TEXT NOT NULL,
"fieldName" TEXT NOT NULL,
"fieldValue" TEXT NOT NULL,
"embedding" vector(1536),
"fieldType" "SearchFieldType" NOT NULL,
"submissionStatus" "SubmissionStatus" NOT NULL,
"isAvailable" BOOLEAN NOT NULL,
CONSTRAINT "StoreAgentSearch_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "StoreAgentSearch_storeListingVersionId_idx" ON "StoreAgentSearch"("storeListingVersionId");
-- CreateIndex
CREATE INDEX "StoreAgentSearch_storeListingId_idx" ON "StoreAgentSearch"("storeListingId");
-- CreateIndex
CREATE INDEX "StoreAgentSearch_fieldName_idx" ON "StoreAgentSearch"("fieldName");
-- CreateIndex
CREATE INDEX "StoreAgentSearch_fieldType_idx" ON "StoreAgentSearch"("fieldType");
-- CreateIndex
CREATE INDEX "StoreAgentSearch_submissionStatus_isAvailable_idx" ON "StoreAgentSearch"("submissionStatus", "isAvailable");
-- CreateIndex
CREATE UNIQUE INDEX "StoreAgentSearch_storeListingVersionId_fieldName_key" ON "StoreAgentSearch"("storeListingVersionId", "fieldName");
-- Create HNSW index for vector similarity search (pgvector)
CREATE INDEX "StoreAgentSearch_embedding_idx" ON "StoreAgentSearch" USING hnsw (embedding vector_cosine_ops);
-- AddForeignKey
ALTER TABLE "StoreAgentSearch" ADD CONSTRAINT "StoreAgentSearch_storeListingVersionId_fkey" FOREIGN KEY ("storeListingVersionId") REFERENCES "StoreListingVersion"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "StoreAgentSearch" ADD CONSTRAINT "StoreAgentSearch_storeListingId_fkey" FOREIGN KEY ("storeListingId") REFERENCES "StoreListing"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -311,6 +311,73 @@ files = [
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
]
[[package]]
name = "asyncpg"
version = "0.30.0"
description = "An asyncio PostgreSQL driver"
optional = false
python-versions = ">=3.8.0"
groups = ["main"]
files = [
{file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
{file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"},
{file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f"},
{file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af"},
{file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75"},
{file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f"},
{file = "asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf"},
{file = "asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50"},
{file = "asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a"},
{file = "asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed"},
{file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a"},
{file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956"},
{file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056"},
{file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454"},
{file = "asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d"},
{file = "asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f"},
{file = "asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e"},
{file = "asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a"},
{file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3"},
{file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737"},
{file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a"},
{file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af"},
{file = "asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e"},
{file = "asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305"},
{file = "asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70"},
{file = "asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3"},
{file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33"},
{file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4"},
{file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4"},
{file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba"},
{file = "asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590"},
{file = "asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e"},
{file = "asyncpg-0.30.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29ff1fc8b5bf724273782ff8b4f57b0f8220a1b2324184846b39d1ab4122031d"},
{file = "asyncpg-0.30.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64e899bce0600871b55368b8483e5e3e7f1860c9482e7f12e0a771e747988168"},
{file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b290f4726a887f75dcd1b3006f484252db37602313f806e9ffc4e5996cfe5cb"},
{file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f86b0e2cd3f1249d6fe6fd6cfe0cd4538ba994e2d8249c0491925629b9104d0f"},
{file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:393af4e3214c8fa4c7b86da6364384c0d1b3298d45803375572f415b6f673f38"},
{file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fd4406d09208d5b4a14db9a9dbb311b6d7aeeab57bded7ed2f8ea41aeef39b34"},
{file = "asyncpg-0.30.0-cp38-cp38-win32.whl", hash = "sha256:0b448f0150e1c3b96cb0438a0d0aa4871f1472e58de14a3ec320dbb2798fb0d4"},
{file = "asyncpg-0.30.0-cp38-cp38-win_amd64.whl", hash = "sha256:f23b836dd90bea21104f69547923a02b167d999ce053f3d502081acea2fba15b"},
{file = "asyncpg-0.30.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f4e83f067b35ab5e6371f8a4c93296e0439857b4569850b178a01385e82e9ad"},
{file = "asyncpg-0.30.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5df69d55add4efcd25ea2a3b02025b669a285b767bfbf06e356d68dbce4234ff"},
{file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3479a0d9a852c7c84e822c073622baca862d1217b10a02dd57ee4a7a081f708"},
{file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26683d3b9a62836fad771a18ecf4659a30f348a561279d6227dab96182f46144"},
{file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1b982daf2441a0ed314bd10817f1606f1c28b1136abd9e4f11335358c2c631cb"},
{file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c06a3a50d014b303e5f6fc1e5f95eb28d2cee89cf58384b700da621e5d5e547"},
{file = "asyncpg-0.30.0-cp39-cp39-win32.whl", hash = "sha256:1b11a555a198b08f5c4baa8f8231c74a366d190755aa4f99aacec5970afe929a"},
{file = "asyncpg-0.30.0-cp39-cp39-win_amd64.whl", hash = "sha256:8b684a3c858a83cd876f05958823b68e8d14ec01bb0c0d14a6704c5bf9711773"},
{file = "asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851"},
]
[package.dependencies]
async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.11.0\""}
[package.extras]
docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"]
gssauth = ["gssapi ; platform_system != \"Windows\"", "sspilib ; platform_system == \"Windows\""]
test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi ; platform_system == \"Linux\"", "k5test ; platform_system == \"Linux\"", "mypy (>=1.8.0,<1.9.0)", "sspilib ; platform_system == \"Windows\"", "uvloop (>=0.15.3) ; platform_system != \"Windows\" and python_version < \"3.14.0\""]
[[package]]
name = "attrs"
version = "25.3.0"
@@ -3600,6 +3667,21 @@ all = ["pbs-installer[download,install]"]
download = ["httpx (>=0.27.0,<1)"]
install = ["zstandard (>=0.21.0)"]
[[package]]
name = "pgvector"
version = "0.4.1"
description = "pgvector support for Python"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362"},
{file = "pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003"},
]
[package.dependencies]
numpy = "*"
[[package]]
name = "pika"
version = "1.3.2"
@@ -7132,4 +7214,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
content-hash = "892daa57d7126d9a9d5308005b07328a39b8c4cd7fe198f9b5ab10f957787c48"
content-hash = "e5e5cd7f11c2e2351c084aec7e4ddec6c154dc7274797af6ee616fa14db5676d"

View File

@@ -79,6 +79,8 @@ firecrawl-py = "^2.16.3"
exa-py = "^1.14.20"
croniter = "^6.0.0"
stagehand = "^0.5.1"
pgvector = "^0.4.1"
asyncpg = "^0.30.0"
[tool.poetry.group.dev.dependencies]
aiohappyeyeballs = "^2.6.1"

View File

@@ -729,6 +729,7 @@ model StoreListing {
// Relations
Versions StoreListingVersion[] @relation("ListingVersions")
SearchIndexes StoreAgentSearch[] @relation("SearchIndexes")
// Unique index on agentId to ensure only one listing per agent, regardless of number of versions the agent has.
@@unique([agentGraphId])
@@ -786,6 +787,9 @@ model StoreListingVersion {
// Reviews for this specific version
Reviews StoreListingReview[]
// Search index entries for vector search
SearchIndexes StoreAgentSearch[] @relation("SearchIndexes")
@@unique([storeListingId, version])
@@index([storeListingId, submissionStatus, isAvailable])
@@ -794,6 +798,55 @@ model StoreListingVersion {
@@index([agentGraphId, agentGraphVersion]) // Non-unique index for efficient lookups
}
model StoreAgentSearch {
id String @id @default(uuid())
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
// Relation to StoreListingVersion
storeListingVersionId String
StoreListingVersion StoreListingVersion @relation("SearchIndexes", fields: [storeListingVersionId], references: [id], onDelete: Cascade)
// Direct relation to StoreListing for easier joins
storeListingId String
StoreListing StoreListing @relation("SearchIndexes", fields: [storeListingId], references: [id], onDelete: Cascade)
// Searchable fields that will be embedded
fieldName String // The field being indexed (name, description, categories, subheading)
fieldValue String // The actual text content for this field
// Vector embedding for similarity search using pgvector
// text-embedding-3-small produces 1536-dimensional embeddings
embedding Float[] // pgvector field for storing 1536-dimensional embeddings
// Metadata for search optimization
fieldType SearchFieldType // Type of field for filtering
// Denormalized fields for filtering without joins
submissionStatus SubmissionStatus // Copy from StoreListingVersion for filtering approved content
isAvailable Boolean // Copy from StoreListingVersion for filtering available content
// Unique constraint to prevent duplicate indexing of the same field
@@unique([storeListingVersionId, fieldName])
// Indexes for performance
@@index([storeListingVersionId])
@@index([storeListingId])
@@index([fieldName])
@@index([fieldType])
@@index([submissionStatus, isAvailable]) // For filtering approved and available content
// Note: pgvector index will be created manually in migration for vector similarity search
}
// Enum for searchable field types
enum SearchFieldType {
NAME // Agent name
DESCRIPTION // Agent description
CATEGORIES // Agent categories
SUBHEADING // Agent subheading
}
model StoreListingReview {
id String @id @default(uuid())
createdAt DateTime @default(now())