fixed test

Merge branch 'dev' into swiftyos/secrt-1566-enhance-marketplace-search
tidyied up the implementation
2026-02-08 13:55:06 -05:00 · 2025-09-02 23:09:59 +02:00 · 2025-09-02 23:06:08 +02:00 · 2025-09-02 23:05:30 +02:00 · 2025-09-02 22:58:49 +02:00 · 2025-09-02 22:56:14 +02:00
8 changed files with 864 additions and 16 deletions
--- a/autogpt_platform/backend/backend/server/v2/store/db.py
+++ b/autogpt_platform/backend/backend/server/v2/store/db.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from datetime import datetime, timezone

 import fastapi
@@ -23,11 +24,29 @@ from backend.data.notifications import (
    NotificationEventModel,
 )
 from backend.notifications.notifications import queue_notification_async
+from backend.server.v2.store.embeddings import SearchFieldType, StoreAgentSearchService
+from backend.server.v2.store.embeddings import (
+    SubmissionStatus as SearchSubmissionStatus,
+)
+from backend.server.v2.store.embeddings import create_embedding
 from backend.util.settings import Settings

 logger = logging.getLogger(__name__)
 settings = Settings()

+# Initialize the search service with database URL
+search_service: StoreAgentSearchService | None = None
+
+
+def get_search_service() -> StoreAgentSearchService:
+    """Get or create the search service instance"""
+    global search_service
+    if search_service is None:
+        # Get database URL from environment variable (same as Prisma uses)
+        db_url = os.getenv("DATABASE_URL", "postgresql://localhost:5432")
+        search_service = StoreAgentSearchService(db_url)
+    return search_service
+

 # Constants for default admin values
 DEFAULT_ADMIN_NAME = "AutoGPT Admin"
@@ -53,6 +72,113 @@ def sanitize_query(query: str | None) -> str | None:
    )


+async def search_store_agents(
+    search_query: str,
+) -> backend.server.v2.store.model.StoreAgentsResponse:
+    """
+    Search for store agents using embeddings with SQLAlchemy.
+    Falls back to text search if embedding creation fails.
+    """
+    try:
+        # Try to create embedding for semantic search
+        query_embedding = await create_embedding(search_query)
+
+        if query_embedding is None:
+            # Fallback to text-based search if embedding fails
+            logger.warning(
+                f"Failed to create embedding for query: {search_query}. "
+                "Falling back to text search."
+            )
+            return await get_store_agents(
+                search_query=search_query,
+                page=1,
+                page_size=30,
+            )
+
+        # Use SQLAlchemy service for vector search
+        service = get_search_service()
+        results = await service.search_by_embedding(query_embedding, limit=30)
+    except Exception as e:
+        logger.error(f"Error during vector search: {e}. Falling back to text search.")
+        # Fallback to regular text search on any error
+        return await get_store_agents(
+            search_query=search_query,
+            page=1,
+            page_size=30,
+        )
+
+    # Convert raw results to StoreAgent models
+    agents = []
+    for row in results:
+        try:
+            # Handle agent_image - it could be a list or a single string
+            agent_image = row.get("agent_image", "")
+            if isinstance(agent_image, list) and agent_image:
+                agent_image = str(agent_image[0])
+            elif not agent_image:
+                agent_image = ""
+            else:
+                agent_image = str(agent_image)
+
+            agent = backend.server.v2.store.model.StoreAgent(
+                slug=row.get("slug", ""),
+                agent_name=row.get("agent_name", ""),
+                agent_image=agent_image,
+                creator=row.get("creator_username", "Needs Profile"),
+                creator_avatar=row.get("creator_avatar", ""),
+                sub_heading=row.get("sub_heading", ""),
+                description=row.get("description", ""),
+                runs=row.get("runs", 0),
+                rating=(
+                    float(row.get("rating", 0.0))
+                    if row.get("rating") is not None
+                    else 0.0
+                ),
+            )
+            agents.append(agent)
+        except Exception as e:
+            logger.error(f"Error creating StoreAgent from search result: {e}")
+            continue
+
+    return backend.server.v2.store.model.StoreAgentsResponse(
+        agents=agents,
+        pagination=backend.server.v2.store.model.Pagination(
+            current_page=1,
+            total_items=len(agents),
+            total_pages=1,
+            page_size=20,
+        ),
+    )
+
+
+async def search_agents(
+    search_query: str,
+    featured: bool | None = None,
+    creators: list[str] | None = None,
+    category: str | None = None,
+    page: int = 1,
+    page_size: int = 20,
+) -> backend.server.v2.store.model.StoreAgentsResponse:
+    """
+    Search for store agents using embeddings with optional filters.
+    Falls back to text search if embedding service is unavailable.
+    """
+    try:
+        # Try vector search first
+        return await search_store_agents(search_query)
+    except Exception as e:
+        logger.error(f"Vector search failed: {e}. Using text search fallback.")
+        # Fallback to text search with filters
+        return await get_store_agents(
+            featured=featured or False,
+            creators=creators,
+            search_query=search_query,
+            category=category,
+            page=page,
+            page_size=page_size,
+        )
+
+
 async def get_store_agents(
    featured: bool = False,
    creators: list[str] | None = None,
@@ -124,7 +250,6 @@ async def get_store_agents(
                store_agents.append(store_agent)
            except Exception as e:
                # Skip this agent if there was an error
-                # You could log the error here if needed
                logger.error(
                    f"Error parsing Store agent when getting store agents from db: {e}"
                )
@@ -191,7 +316,7 @@ async def get_store_agent_details(
            agent_video=agent.agent_video or "",
            agent_image=agent.agent_image,
            creator=agent.creator_username,
-            creator_avatar=agent.creator_avatar,
+            creator_avatar=agent.creator_avatar or "",
            sub_heading=agent.sub_heading,
            description=agent.description,
            categories=agent.categories,
@@ -264,7 +389,7 @@ async def get_store_agent_by_version_id(
            agent_video=agent.agent_video or "",
            agent_image=agent.agent_image,
            creator=agent.creator_username,
-            creator_avatar=agent.creator_avatar,
+            creator_avatar=agent.creator_avatar or "",
            sub_heading=agent.sub_heading,
            description=agent.description,
            categories=agent.categories,
@@ -1358,6 +1483,64 @@ async def review_store_submission(
                f"Failed to update store listing version {store_listing_version_id}"
            )

+        # Create embeddings if approved
+        if is_approved and submission.StoreListing:
+            try:
+                service = get_search_service()
+
+                # Create embeddings for the approved listing
+                fields_to_embed = [
+                    ("name", submission.name, SearchFieldType.NAME),
+                    (
+                        "description",
+                        submission.description,
+                        SearchFieldType.DESCRIPTION,
+                    ),
+                ]
+
+                if submission.subHeading:
+                    fields_to_embed.append(
+                        (
+                            "subHeading",
+                            submission.subHeading,
+                            SearchFieldType.SUBHEADING,
+                        )
+                    )
+
+                if submission.categories:
+                    categories_text = ", ".join(submission.categories)
+                    fields_to_embed.append(
+                        ("categories", categories_text, SearchFieldType.CATEGORIES)
+                    )
+
+                for field_name, field_value, field_type in fields_to_embed:
+                    # Create embedding asynchronously
+                    embedding = await create_embedding(field_value)
+
+                    # Only store if embedding was created successfully
+                    if embedding:
+                        await service.upsert_search_record(
+                            store_listing_version_id=store_listing_version_id,
+                            store_listing_id=submission.StoreListing.id,
+                            field_name=field_name,
+                            field_value=field_value,
+                            embedding=embedding,
+                            field_type=field_type,
+                            submission_status=SearchSubmissionStatus.APPROVED,
+                            is_available=submission.isAvailable,
+                        )
+                    else:
+                        logger.warning(
+                            f"Failed to create embedding for {field_name} in listing {store_listing_version_id}. "
+                            "Search indexing skipped for this field."
+                        )
+            except Exception as e:
+                # Log error but don't fail the approval process
+                logger.error(
+                    f"Error creating search embeddings for listing {store_listing_version_id}: {e}. "
+                    "Approval will continue without search indexing."
+                )
+
        # Send email notification to the agent creator
        if store_listing_version.AgentGraph and store_listing_version.AgentGraph.User:
            agent_creator = store_listing_version.AgentGraph.User
--- a/autogpt_platform/backend/backend/server/v2/store/embeddings.py
+++ b/autogpt_platform/backend/backend/server/v2/store/embeddings.py
@@ -0,0 +1,469 @@
+"""Store search functionality with embeddings and pgvector using SQLAlchemy"""
+
+import logging
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Dict, List, Optional, Sequence
+from uuid import uuid4
+
+from openai import AsyncOpenAI, OpenAIError
+from pgvector.sqlalchemy import Vector
+from sqlalchemy import Boolean, Column, DateTime
+from sqlalchemy import Enum as SQLEnum
+from sqlalchemy import Index, String, UniqueConstraint, and_, select, text
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+from sqlalchemy.ext.declarative import declarative_base
+
+from backend.util.settings import Settings
+
+logger = logging.getLogger(__name__)
+settings = Settings()
+
+# Initialize async OpenAI client with proper configuration
+_openai_client: Optional[AsyncOpenAI] = None
+
+
+def get_openai_client() -> AsyncOpenAI:
+    """Get or create the async OpenAI client with proper configuration."""
+    global _openai_client
+    if _openai_client is None:
+        api_key = settings.secrets.openai_api_key
+        if not api_key:
+            logger.warning(
+                "OpenAI API key not configured. Vector search will use fallback text search."
+            )
+            raise ValueError(
+                "OpenAI API key is not configured. Please set OPENAI_API_KEY in environment."
+            )
+        _openai_client = AsyncOpenAI(api_key=api_key)
+    return _openai_client
+
+
+async def create_embedding(text: str) -> Optional[list[float]]:
+    """Create an embedding for the given text using OpenAI's API.
+
+    Args:
+        text: The text to create an embedding for
+
+    Returns:
+        A list of floats representing the embedding, or None if creation fails
+    """
+    try:
+        client = get_openai_client()
+        response = await client.embeddings.create(
+            input=text,
+            model="text-embedding-3-small",
+        )
+        return response.data[0].embedding
+    except ValueError as e:
+        # API key not configured
+        logger.error(f"OpenAI configuration error: {e}")
+        return None
+    except OpenAIError as e:
+        # Handle specific OpenAI errors
+        logger.error(f"OpenAI API error creating embedding: {e}")
+        return None
+    except Exception as e:
+        # Handle unexpected errors
+        logger.error(f"Unexpected error creating embedding: {e}")
+        return None
+
+
+# SQLAlchemy models
+Base = declarative_base()
+
+
+class SubmissionStatus(str, Enum):
+    PENDING = "PENDING"
+    APPROVED = "APPROVED"
+    REJECTED = "REJECTED"
+
+
+class SearchFieldType(str, Enum):
+    NAME = "NAME"
+    DESCRIPTION = "DESCRIPTION"
+    CATEGORIES = "CATEGORIES"
+    SUBHEADING = "SUBHEADING"
+
+
+class StoreAgentSearch(Base):
+    __tablename__ = "StoreAgentSearch"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid4()))
+    createdAt = Column(
+        DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+    )
+    updatedAt = Column(
+        DateTime(timezone=True),
+        default=lambda: datetime.now(timezone.utc),
+        onupdate=lambda: datetime.now(timezone.utc),
+    )
+
+    # Relations (foreign keys exist in DB but not modeled here)
+    storeListingVersionId = Column(String, nullable=False)
+    storeListingId = Column(String, nullable=False)
+
+    # Searchable fields
+    fieldName = Column(String, nullable=False)
+    fieldValue = Column(String, nullable=False)
+
+    # Vector embedding for similarity search
+    # text-embedding-3-small produces 1536-dimensional embeddings
+    embedding = Column(Vector(1536), nullable=False)
+
+    # Metadata
+    fieldType = Column(
+        SQLEnum(SearchFieldType, name="SearchFieldType", schema="platform"),
+        nullable=False,
+    )
+    submissionStatus = Column(
+        SQLEnum(SubmissionStatus, name="SubmissionStatus", schema="platform"),
+        nullable=False,
+    )
+    isAvailable = Column(Boolean, nullable=False)
+
+    # Constraints and schema
+    __table_args__ = (
+        UniqueConstraint(
+            "storeListingVersionId",
+            "fieldName",
+            name="_store_listing_version_field_unique",
+        ),
+        Index("ix_store_agent_search_listing_version", "storeListingVersionId"),
+        Index("ix_store_agent_search_listing", "storeListingId"),
+        Index("ix_store_agent_search_field_name", "fieldName"),
+        Index("ix_store_agent_search_field_type", "fieldType"),
+        Index(
+            "ix_store_agent_search_status_available", "submissionStatus", "isAvailable"
+        ),
+        {"schema": "platform"},  # Specify the schema
+    )
+
+
+class StoreAgentSearchService:
+    """Service class for Store Agent Search operations using SQLAlchemy"""
+
+    def __init__(self, database_url: str):
+        """Initialize the search service with async database connection"""
+        # Parse the URL to handle schema and other params separately
+        from urllib.parse import parse_qs, urlparse, urlunparse
+
+        parsed = urlparse(database_url)
+        query_params = parse_qs(parsed.query)
+
+        # Extract schema if present
+        schema = query_params.pop("schema", ["public"])[0]
+
+        # Remove connect_timeout from query params (will be handled in connect_args)
+        connect_timeout = query_params.pop("connect_timeout", [None])[0]
+
+        # Rebuild query string without schema and connect_timeout
+        new_query = "&".join([f"{k}={v[0]}" for k, v in query_params.items()])
+
+        # Rebuild URL without schema and connect_timeout parameters
+        clean_url = urlunparse(
+            (
+                parsed.scheme,
+                parsed.netloc,
+                parsed.path,
+                parsed.params,
+                new_query,
+                parsed.fragment,
+            )
+        )
+
+        # Convert to async URL for asyncpg
+        if clean_url.startswith("postgresql://"):
+            clean_url = clean_url.replace("postgresql://", "postgresql+asyncpg://")
+        elif clean_url.startswith("postgres://"):
+            clean_url = clean_url.replace("postgres://", "postgresql+asyncpg://")
+
+        # Build connect_args
+        connect_args: dict[str, Any] = {"server_settings": {"search_path": schema}}
+
+        # Add timeout if present (asyncpg uses 'timeout' not 'connect_timeout')
+        if connect_timeout:
+            connect_args["timeout"] = float(connect_timeout)
+
+        # Create engine with schema in connect_args
+        self.engine = create_async_engine(
+            clean_url,
+            echo=False,  # Set to True for debugging SQL queries
+            future=True,
+            connect_args=connect_args,
+        )
+
+        self.async_session = async_sessionmaker(
+            self.engine, class_=AsyncSession, expire_on_commit=False
+        )
+
+    async def create_search_record(
+        self,
+        store_listing_version_id: str,
+        store_listing_id: str,
+        field_name: str,
+        field_value: str,
+        embedding: list[float],
+        field_type: SearchFieldType,
+        submission_status: SubmissionStatus,
+        is_available: bool,
+    ) -> Optional[StoreAgentSearch]:
+        """Create a new search record with embedding.
+
+        Returns:
+            The created search record or None if creation fails.
+        """
+        try:
+            async with self.async_session() as session:
+                search_record = StoreAgentSearch(
+                    storeListingVersionId=store_listing_version_id,
+                    storeListingId=store_listing_id,
+                    fieldName=field_name,
+                    fieldValue=field_value,
+                    embedding=embedding,
+                    fieldType=field_type,
+                    submissionStatus=submission_status,
+                    isAvailable=is_available,
+                )
+                session.add(search_record)
+                await session.commit()
+                await session.refresh(search_record)
+                return search_record
+        except Exception as e:
+            logger.error(f"Failed to create search record: {e}")
+            return None
+
+    async def batch_create_search_records(
+        self, records: list[dict]
+    ) -> list[StoreAgentSearch]:
+        """Batch create multiple search records"""
+        async with self.async_session() as session:
+            search_records = []
+            for record in records:
+                search_record = StoreAgentSearch(
+                    storeListingVersionId=record["storeListingVersionId"],
+                    storeListingId=record["storeListingId"],
+                    fieldName=record["fieldName"],
+                    fieldValue=record["fieldValue"],
+                    embedding=record["embedding"],
+                    fieldType=record.get("fieldType", SearchFieldType.NAME),
+                    submissionStatus=record["submissionStatus"],
+                    isAvailable=record["isAvailable"],
+                )
+                session.add(search_record)
+                search_records.append(search_record)
+
+            await session.commit()
+            return search_records
+
+    async def search_by_embedding(
+        self, query_embedding: List[float], limit: int = 30
+    ) -> List[Dict[str, Any]]:
+        """
+        Search for store agents using vector similarity.
+        Returns the best matching store listings based on embedding similarity.
+
+        Args:
+            query_embedding: The embedding vector to search with
+            limit: Maximum number of results to return
+
+        Returns:
+            List of matching store agents with similarity scores
+
+        Raises:
+            Exception: If the database query fails
+        """
+        if not query_embedding:
+            logger.warning("Empty embedding provided for search")
+            return []
+
+        try:
+            async with self.async_session() as session:
+                # Use parameterized query to prevent SQL injection
+                query = text(
+                    """
+                WITH similarity_scores AS (
+                    SELECT
+                        sas."storeListingId",
+                        MIN(sas.embedding <=> CAST(:embedding AS vector)) AS similarity_score
+                    FROM platform."StoreAgentSearch" sas
+                    WHERE
+                        sas."submissionStatus" = 'APPROVED'
+                        AND sas."isAvailable" = true
+                    GROUP BY sas."storeListingId"
+                    ORDER BY similarity_score
+                    LIMIT :limit
+                )
+                SELECT
+                    sa.listing_id as id,
+                    sa.slug,
+                    sa.agent_name,
+                    sa.agent_image,
+                    sa.description,
+                    sa.sub_heading,
+                    sa.featured,
+                    sa.runs,
+                    sa.rating,
+                    sa.creator_username,
+                    sa.creator_avatar,
+                    ss.similarity_score
+                FROM similarity_scores ss
+                INNER JOIN platform."StoreAgent" sa
+                    ON sa.listing_id = ss."storeListingId"
+                ORDER BY ss.similarity_score;
+            """
+                )
+
+                # Format embedding as PostgreSQL array safely
+                embedding_str = "[" + ",".join(map(str, query_embedding)) + "]"
+
+                result = await session.execute(
+                    query, {"embedding": embedding_str, "limit": limit}
+                )
+
+                rows = result.fetchall()
+
+                # Convert rows to dictionaries
+                return [dict(row._mapping) for row in rows]
+        except Exception as e:
+            logger.error(f"Vector search query failed: {e}")
+            # Return empty results instead of propagating error
+            # This allows fallback to text search
+            return []
+
+    async def get_search_records(
+        self,
+        store_listing_version_id: Optional[str] = None,
+        field_name: Optional[str] = None,
+        is_available: Optional[bool] = None,
+    ) -> Sequence[StoreAgentSearch]:
+        """
+        Get search records using SQLAlchemy ORM
+        """
+        async with self.async_session() as session:
+            stmt = select(StoreAgentSearch)
+
+            # Build filters
+            filters = []
+            if store_listing_version_id:
+                filters.append(
+                    StoreAgentSearch.storeListingVersionId == store_listing_version_id
+                )
+            if field_name:
+                filters.append(StoreAgentSearch.fieldName == field_name)
+            if is_available is not None:
+                filters.append(StoreAgentSearch.isAvailable == is_available)
+
+            if filters:
+                stmt = stmt.where(and_(*filters))
+
+            result = await session.execute(stmt)
+            return result.scalars().all()
+
+    async def update_search_embeddings(
+        self, store_listing_version_id: str, updates: Dict[str, List[float]]
+    ) -> None:
+        """Update embeddings for existing search records"""
+        async with self.async_session() as session:
+            for field_name, embedding in updates.items():
+                # For vector updates, we still need raw SQL due to pgvector
+                # Use $ parameters for asyncpg
+                query = text(
+                    """
+                    UPDATE platform."StoreAgentSearch"
+                    SET embedding = CAST(:embedding AS vector),
+                        "updatedAt" = CAST(:updated_at AS TIMESTAMPTZ)
+                    WHERE "storeListingVersionId" = :version_id
+                    AND "fieldName" = :field_name
+                """
+                )
+
+                embedding_str = "[" + ",".join(map(str, embedding)) + "]"
+
+                await session.execute(
+                    query,
+                    {
+                        "embedding": embedding_str,
+                        "updated_at": datetime.now(timezone.utc),
+                        "version_id": store_listing_version_id,
+                        "field_name": field_name,
+                    },
+                )
+
+            await session.commit()
+
+    async def delete_search_records(self, store_listing_version_id: str) -> None:
+        """Delete all search records for a store listing version using SQLAlchemy ORM"""
+        async with self.async_session() as session:
+            # Use SQLAlchemy ORM for deletion
+            stmt = select(StoreAgentSearch).where(
+                StoreAgentSearch.storeListingVersionId == store_listing_version_id
+            )
+            result = await session.execute(stmt)
+            records = result.scalars().all()
+
+            for record in records:
+                await session.delete(record)
+
+            await session.commit()
+
+    async def upsert_search_record(
+        self,
+        store_listing_version_id: str,
+        store_listing_id: str,
+        field_name: str,
+        field_value: str,
+        embedding: List[float],
+        field_type: SearchFieldType,
+        submission_status: SubmissionStatus,
+        is_available: bool,
+    ) -> Optional[StoreAgentSearch]:
+        """Upsert a search record (update if exists, create if not).
+
+        Returns:
+            The upserted search record or None if operation fails.
+        """
+        try:
+            async with self.async_session() as session:
+                # Check if record exists
+                stmt = select(StoreAgentSearch).where(
+                    and_(
+                        StoreAgentSearch.storeListingVersionId
+                        == store_listing_version_id,
+                        StoreAgentSearch.fieldName == field_name,
+                    )
+                )
+                result = await session.execute(stmt)
+                existing_record = result.scalar_one_or_none()
+
+                if existing_record:
+                    # Update existing record
+                    existing_record.fieldValue = field_value  # type: ignore[attr-defined]
+                    existing_record.embedding = embedding  # type: ignore[attr-defined]
+                    existing_record.fieldType = field_type  # type: ignore[attr-defined]
+                    existing_record.submissionStatus = submission_status  # type: ignore[attr-defined]
+                    existing_record.isAvailable = is_available  # type: ignore[attr-defined]
+                    existing_record.updatedAt = datetime.now(timezone.utc)  # type: ignore[attr-defined]
+
+                    await session.commit()
+                    await session.refresh(existing_record)
+                    return existing_record
+                else:
+                    # Create new record
+                    return await self.create_search_record(
+                        store_listing_version_id=store_listing_version_id,
+                        store_listing_id=store_listing_id,
+                        field_name=field_name,
+                        field_value=field_value,
+                        embedding=embedding,
+                        field_type=field_type,
+                        submission_status=submission_status,
+                        is_available=is_available,
+                    )
+        except Exception as e:
+            logger.error(f"Failed to upsert search record: {e}")
+            return None
+
+    async def close(self):
+        """Close the database connection"""
+        await self.engine.dispose()
--- a/autogpt_platform/backend/backend/server/v2/store/routes.py
+++ b/autogpt_platform/backend/backend/server/v2/store/routes.py
@@ -154,15 +154,26 @@ async def get_agents(
        )

    try:
-        agents = await backend.server.v2.store.db.get_store_agents(
-            featured=featured,
-            creators=[creator] if creator else None,
-            sorted_by=sorted_by,
-            search_query=search_query,
-            category=category,
-            page=page,
-            page_size=page_size,
-        )
+        # Use vector similarity search if we have a search query, otherwise use traditional search
+        if search_query and search_query.strip():
+            agents = await backend.server.v2.store.db.search_agents(
+                search_query=search_query,
+                featured=featured,
+                creators=[creator] if creator else None,
+                category=category,
+                page=page,
+                page_size=page_size,
+            )
+        else:
+            agents = await backend.server.v2.store.db.get_store_agents(
+                featured=featured,
+                creators=[creator] if creator else None,
+                sorted_by=sorted_by,
+                search_query=search_query,
+                category=category,
+                page=page,
+                page_size=page_size,
+            )
        return agents
    except Exception as e:
        logger.exception("Failed to retrieve store agents: %s", e)
--- a/autogpt_platform/backend/backend/server/v2/store/routes_test.py
+++ b/autogpt_platform/backend/backend/server/v2/store/routes_test.py
@@ -234,7 +234,7 @@ def test_get_agents_search(
            page_size=20,
        ),
    )
-    mock_db_call = mocker.patch("backend.server.v2.store.db.get_store_agents")
+    mock_db_call = mocker.patch("backend.server.v2.store.db.search_agents")
    mock_db_call.return_value = mocked_value
    response = client.get("/agents?search_query=specific")
    assert response.status_code == 200
@@ -246,10 +246,9 @@ def test_get_agents_search(
    snapshot.snapshot_dir = "snapshots"
    snapshot.assert_match(json.dumps(response.json(), indent=2), "agts_search")
    mock_db_call.assert_called_once_with(
+        search_query="specific",
        featured=False,
        creators=None,
-        sorted_by=None,
-        search_query="specific",
        category=None,
        page=1,
        page_size=20,
--- a/autogpt_platform/backend/migrations/20250902182037_add_store_agent_search/migration.sql
+++ b/autogpt_platform/backend/migrations/20250902182037_add_store_agent_search/migration.sql
@@ -0,0 +1,49 @@
+-- Enable pgvector extension
+CREATE EXTENSION IF NOT EXISTS vector;
+
+-- CreateEnum
+CREATE TYPE "SearchFieldType" AS ENUM ('NAME', 'DESCRIPTION', 'CATEGORIES', 'SUBHEADING');
+
+-- CreateTable
+CREATE TABLE "StoreAgentSearch" (
+    "id" TEXT NOT NULL,
+    "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updatedAt" TIMESTAMP(3) NOT NULL,
+    "storeListingVersionId" TEXT NOT NULL,
+    "storeListingId" TEXT NOT NULL,
+    "fieldName" TEXT NOT NULL,
+    "fieldValue" TEXT NOT NULL,
+    "embedding" vector(1536),
+    "fieldType" "SearchFieldType" NOT NULL,
+    "submissionStatus" "SubmissionStatus" NOT NULL,
+    "isAvailable" BOOLEAN NOT NULL,
+
+    CONSTRAINT "StoreAgentSearch_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateIndex
+CREATE INDEX "StoreAgentSearch_storeListingVersionId_idx" ON "StoreAgentSearch"("storeListingVersionId");
+
+-- CreateIndex
+CREATE INDEX "StoreAgentSearch_storeListingId_idx" ON "StoreAgentSearch"("storeListingId");
+
+-- CreateIndex
+CREATE INDEX "StoreAgentSearch_fieldName_idx" ON "StoreAgentSearch"("fieldName");
+
+-- CreateIndex
+CREATE INDEX "StoreAgentSearch_fieldType_idx" ON "StoreAgentSearch"("fieldType");
+
+-- CreateIndex
+CREATE INDEX "StoreAgentSearch_submissionStatus_isAvailable_idx" ON "StoreAgentSearch"("submissionStatus", "isAvailable");
+
+-- CreateIndex
+CREATE UNIQUE INDEX "StoreAgentSearch_storeListingVersionId_fieldName_key" ON "StoreAgentSearch"("storeListingVersionId", "fieldName");
+
+-- Create HNSW index for vector similarity search (pgvector)
+CREATE INDEX "StoreAgentSearch_embedding_idx" ON "StoreAgentSearch" USING hnsw (embedding vector_cosine_ops);
+
+-- AddForeignKey
+ALTER TABLE "StoreAgentSearch" ADD CONSTRAINT "StoreAgentSearch_storeListingVersionId_fkey" FOREIGN KEY ("storeListingVersionId") REFERENCES "StoreListingVersion"("id") ON DELETE CASCADE ON UPDATE CASCADE;
+
+-- AddForeignKey
+ALTER TABLE "StoreAgentSearch" ADD CONSTRAINT "StoreAgentSearch_storeListingId_fkey" FOREIGN KEY ("storeListingId") REFERENCES "StoreListing"("id") ON DELETE CASCADE ON UPDATE CASCADE;
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -311,6 +311,73 @@ files = [
    {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
 ]

+[[package]]
+name = "asyncpg"
+version = "0.30.0"
+description = "An asyncio PostgreSQL driver"
+optional = false
+python-versions = ">=3.8.0"
+groups = ["main"]
+files = [
+    {file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
+    {file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"},
+    {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f"},
+    {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af"},
+    {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75"},
+    {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f"},
+    {file = "asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf"},
+    {file = "asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50"},
+    {file = "asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a"},
+    {file = "asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed"},
+    {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a"},
+    {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956"},
+    {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056"},
+    {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454"},
+    {file = "asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d"},
+    {file = "asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f"},
+    {file = "asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e"},
+    {file = "asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a"},
+    {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3"},
+    {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737"},
+    {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a"},
+    {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af"},
+    {file = "asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e"},
+    {file = "asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305"},
+    {file = "asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70"},
+    {file = "asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3"},
+    {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33"},
+    {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4"},
+    {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4"},
+    {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba"},
+    {file = "asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590"},
+    {file = "asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e"},
+    {file = "asyncpg-0.30.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29ff1fc8b5bf724273782ff8b4f57b0f8220a1b2324184846b39d1ab4122031d"},
+    {file = "asyncpg-0.30.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64e899bce0600871b55368b8483e5e3e7f1860c9482e7f12e0a771e747988168"},
+    {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b290f4726a887f75dcd1b3006f484252db37602313f806e9ffc4e5996cfe5cb"},
+    {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f86b0e2cd3f1249d6fe6fd6cfe0cd4538ba994e2d8249c0491925629b9104d0f"},
+    {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:393af4e3214c8fa4c7b86da6364384c0d1b3298d45803375572f415b6f673f38"},
+    {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fd4406d09208d5b4a14db9a9dbb311b6d7aeeab57bded7ed2f8ea41aeef39b34"},
+    {file = "asyncpg-0.30.0-cp38-cp38-win32.whl", hash = "sha256:0b448f0150e1c3b96cb0438a0d0aa4871f1472e58de14a3ec320dbb2798fb0d4"},
+    {file = "asyncpg-0.30.0-cp38-cp38-win_amd64.whl", hash = "sha256:f23b836dd90bea21104f69547923a02b167d999ce053f3d502081acea2fba15b"},
+    {file = "asyncpg-0.30.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f4e83f067b35ab5e6371f8a4c93296e0439857b4569850b178a01385e82e9ad"},
+    {file = "asyncpg-0.30.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5df69d55add4efcd25ea2a3b02025b669a285b767bfbf06e356d68dbce4234ff"},
+    {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3479a0d9a852c7c84e822c073622baca862d1217b10a02dd57ee4a7a081f708"},
+    {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26683d3b9a62836fad771a18ecf4659a30f348a561279d6227dab96182f46144"},
+    {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1b982daf2441a0ed314bd10817f1606f1c28b1136abd9e4f11335358c2c631cb"},
+    {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c06a3a50d014b303e5f6fc1e5f95eb28d2cee89cf58384b700da621e5d5e547"},
+    {file = "asyncpg-0.30.0-cp39-cp39-win32.whl", hash = "sha256:1b11a555a198b08f5c4baa8f8231c74a366d190755aa4f99aacec5970afe929a"},
+    {file = "asyncpg-0.30.0-cp39-cp39-win_amd64.whl", hash = "sha256:8b684a3c858a83cd876f05958823b68e8d14ec01bb0c0d14a6704c5bf9711773"},
+    {file = "asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851"},
+]
+
+[package.dependencies]
+async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.11.0\""}
+
+[package.extras]
+docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"]
+gssauth = ["gssapi ; platform_system != \"Windows\"", "sspilib ; platform_system == \"Windows\""]
+test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi ; platform_system == \"Linux\"", "k5test ; platform_system == \"Linux\"", "mypy (>=1.8.0,<1.9.0)", "sspilib ; platform_system == \"Windows\"", "uvloop (>=0.15.3) ; platform_system != \"Windows\" and python_version < \"3.14.0\""]
+
 [[package]]
 name = "attrs"
 version = "25.3.0"
@@ -3600,6 +3667,21 @@ all = ["pbs-installer[download,install]"]
 download = ["httpx (>=0.27.0,<1)"]
 install = ["zstandard (>=0.21.0)"]

+[[package]]
+name = "pgvector"
+version = "0.4.1"
+description = "pgvector support for Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362"},
+    {file = "pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003"},
+]
+
+[package.dependencies]
+numpy = "*"
+
 [[package]]
 name = "pika"
 version = "1.3.2"
@@ -7132,4 +7214,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "892daa57d7126d9a9d5308005b07328a39b8c4cd7fe198f9b5ab10f957787c48"
+content-hash = "e5e5cd7f11c2e2351c084aec7e4ddec6c154dc7274797af6ee616fa14db5676d"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -79,6 +79,8 @@ firecrawl-py = "^2.16.3"
 exa-py = "^1.14.20"
 croniter = "^6.0.0"
 stagehand = "^0.5.1"
+pgvector = "^0.4.1"
+asyncpg = "^0.30.0"

 [tool.poetry.group.dev.dependencies]
 aiohappyeyeballs = "^2.6.1"
--- a/autogpt_platform/backend/schema.prisma
+++ b/autogpt_platform/backend/schema.prisma
@@ -729,6 +729,7 @@ model StoreListing {

  // Relations
  Versions StoreListingVersion[] @relation("ListingVersions")
+  SearchIndexes StoreAgentSearch[] @relation("SearchIndexes")

  // Unique index on agentId to ensure only one listing per agent, regardless of number of versions the agent has.
  @@unique([agentGraphId])
@@ -786,6 +787,9 @@ model StoreListingVersion {

  // Reviews for this specific version
  Reviews StoreListingReview[]
+  
+  // Search index entries for vector search
+  SearchIndexes StoreAgentSearch[] @relation("SearchIndexes")

  @@unique([storeListingId, version])
  @@index([storeListingId, submissionStatus, isAvailable])
@@ -794,6 +798,55 @@ model StoreListingVersion {
  @@index([agentGraphId, agentGraphVersion]) // Non-unique index for efficient lookups
 }

+model StoreAgentSearch {
+  id        String   @id @default(uuid())
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
+
+  // Relation to StoreListingVersion
+  storeListingVersionId String
+  StoreListingVersion   StoreListingVersion @relation("SearchIndexes", fields: [storeListingVersionId], references: [id], onDelete: Cascade)
+
+  // Direct relation to StoreListing for easier joins
+  storeListingId String
+  StoreListing   StoreListing @relation("SearchIndexes", fields: [storeListingId], references: [id], onDelete: Cascade)
+
+  // Searchable fields that will be embedded
+  fieldName String // The field being indexed (name, description, categories, subheading)
+  fieldValue String // The actual text content for this field
+  
+  // Vector embedding for similarity search using pgvector
+  // text-embedding-3-small produces 1536-dimensional embeddings
+  embedding Float[] // pgvector field for storing 1536-dimensional embeddings
+  
+  // Metadata for search optimization
+  fieldType SearchFieldType // Type of field for filtering
+  
+  // Denormalized fields for filtering without joins
+  submissionStatus SubmissionStatus // Copy from StoreListingVersion for filtering approved content
+  isAvailable      Boolean          // Copy from StoreListingVersion for filtering available content
+  
+  // Unique constraint to prevent duplicate indexing of the same field
+  @@unique([storeListingVersionId, fieldName])
+  
+  // Indexes for performance
+  @@index([storeListingVersionId])
+  @@index([storeListingId])
+  @@index([fieldName])
+  @@index([fieldType])
+  @@index([submissionStatus, isAvailable]) // For filtering approved and available content
+  
+  // Note: pgvector index will be created manually in migration for vector similarity search
+}
+
+// Enum for searchable field types
+enum SearchFieldType {
+  NAME        // Agent name
+  DESCRIPTION // Agent description
+  CATEGORIES  // Agent categories
+  SUBHEADING  // Agent subheading
+}
+
 model StoreListingReview {
  id        String   @id @default(uuid())
  createdAt DateTime @default(now())
Author	SHA1	Message	Date
Swifty	04460e43f3	fixed test	2025-09-02 23:09:59 +02:00
Swifty	0d93315767	Merge branch 'dev' into swiftyos/secrt-1566-enhance-marketplace-search	2025-09-02 23:06:08 +02:00
Swifty	7528920b5d	tidyied up the implementation	2025-09-02 23:05:30 +02:00
Swifty	911b119296	delete expired test suite	2025-09-02 22:58:49 +02:00
Swifty	df50c7352d	add vector search in store	2025-09-02 22:56:14 +02:00
Swifty	7db3db6b2e	added enhanced search functionality	2025-09-02 15:56:13 +02:00