Compare commits

..

1 Commits

Author SHA1 Message Date
Swifty
0cbf7db58c extracted frontend changes out of the hackathon/copilot branch 2026-01-07 09:28:39 +01:00
479 changed files with 14717 additions and 23900 deletions

View File

@@ -1,37 +0,0 @@
{
"worktreeCopyPatterns": [
".env*",
".vscode/**",
".auth/**",
".claude/**",
"autogpt_platform/.env*",
"autogpt_platform/backend/.env*",
"autogpt_platform/frontend/.env*",
"autogpt_platform/frontend/.auth/**",
"autogpt_platform/db/docker/.env*"
],
"worktreeCopyIgnores": [
"**/node_modules/**",
"**/dist/**",
"**/.git/**",
"**/Thumbs.db",
"**/.DS_Store",
"**/.next/**",
"**/__pycache__/**",
"**/.ruff_cache/**",
"**/.pytest_cache/**",
"**/*.pyc",
"**/playwright-report/**",
"**/logs/**",
"**/site/**"
],
"worktreePathTemplate": "$BASE_PATH.worktree",
"postCreateCmd": [
"cd autogpt_platform/autogpt_libs && poetry install",
"cd autogpt_platform/backend && poetry install && poetry run prisma generate",
"cd autogpt_platform/frontend && pnpm install",
"cd docs && pip install -r requirements.txt"
],
"terminalCommand": "code .",
"deleteBranchWithWorktree": false
}

View File

@@ -16,7 +16,6 @@
!autogpt_platform/backend/poetry.lock
!autogpt_platform/backend/README.md
!autogpt_platform/backend/.env
!autogpt_platform/backend/gen_prisma_types_stub.py
# Platform - Market
!autogpt_platform/market/market/

View File

@@ -74,7 +74,7 @@ jobs:
- name: Generate Prisma Client
working-directory: autogpt_platform/backend
run: poetry run prisma generate && poetry run gen-prisma-stub
run: poetry run prisma generate
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
- name: Set up Node.js

View File

@@ -90,7 +90,7 @@ jobs:
- name: Generate Prisma Client
working-directory: autogpt_platform/backend
run: poetry run prisma generate && poetry run gen-prisma-stub
run: poetry run prisma generate
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
- name: Set up Node.js

View File

@@ -72,7 +72,7 @@ jobs:
- name: Generate Prisma Client
working-directory: autogpt_platform/backend
run: poetry run prisma generate && poetry run gen-prisma-stub
run: poetry run prisma generate
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
- name: Set up Node.js
@@ -108,16 +108,6 @@ jobs:
# run: pnpm playwright install --with-deps chromium
# Docker setup for development environment
- name: Free up disk space
run: |
# Remove large unused tools to free disk space for Docker builds
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker system prune -af
df -h
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

View File

@@ -134,7 +134,7 @@ jobs:
run: poetry install
- name: Generate Prisma Client
run: poetry run prisma generate && poetry run gen-prisma-stub
run: poetry run prisma generate
- id: supabase
name: Start Supabase
@@ -176,7 +176,7 @@ jobs:
}
- name: Run Database Migrations
run: poetry run prisma migrate deploy
run: poetry run prisma migrate dev --name updates
env:
DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}

View File

@@ -11,7 +11,6 @@ on:
- ".github/workflows/platform-frontend-ci.yml"
- "autogpt_platform/frontend/**"
merge_group:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'merge_group' && format('merge-queue-{0}', github.ref) || format('{0}-{1}', github.ref, github.event.pull_request.number || github.sha) }}
@@ -152,14 +151,6 @@ jobs:
run: |
cp ../.env.default ../.env
- name: Copy backend .env and set OpenAI API key
run: |
cp ../backend/.env.default ../backend/.env
echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
env:
# Used by E2E test data script to generate embeddings for approved store agents
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -235,25 +226,13 @@ jobs:
- name: Run Playwright tests
run: pnpm test:no-build
continue-on-error: false
- name: Upload Playwright report
if: always()
- name: Upload Playwright artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: playwright-report
path: playwright-report
if-no-files-found: ignore
retention-days: 3
- name: Upload Playwright test results
if: always()
uses: actions/upload-artifact@v4
with:
name: playwright-test-results
path: test-results
if-no-files-found: ignore
retention-days: 3
- name: Print Final Docker Compose logs
if: always()

View File

@@ -6,14 +6,12 @@ start-core:
# Stop core services
stop-core:
docker compose stop
docker compose stop deps
reset-db:
docker compose stop db
rm -rf db/docker/volumes/db/data
cd backend && poetry run prisma migrate deploy
cd backend && poetry run prisma generate
cd backend && poetry run gen-prisma-stub
# View logs for core services
logs-core:
@@ -35,7 +33,6 @@ init-env:
migrate:
cd backend && poetry run prisma migrate deploy
cd backend && poetry run prisma generate
cd backend && poetry run gen-prisma-stub
run-backend:
cd backend && poetry run app
@@ -61,4 +58,4 @@ help:
@echo " run-backend - Run the backend FastAPI server"
@echo " run-frontend - Run the frontend Next.js development server"
@echo " test-data - Run the test data creator"
@echo " load-store-agents - Load store agents from agents/ folder into test database"
@echo " load-store-agents - Load store agents from agents/ folder into test database"

View File

@@ -58,13 +58,6 @@ V0_API_KEY=
OPEN_ROUTER_API_KEY=
NVIDIA_API_KEY=
# Langfuse Prompt Management
# Used for managing the CoPilot system prompt externally
# Get credentials from https://cloud.langfuse.com or your self-hosted instance
LANGFUSE_PUBLIC_KEY=
LANGFUSE_SECRET_KEY=
LANGFUSE_HOST=https://cloud.langfuse.com
# OAuth Credentials
# For the OAuth callback URL, use <your_frontend_url>/auth/integrations/oauth_callback,
# e.g. http://localhost:3000/auth/integrations/oauth_callback

View File

@@ -18,4 +18,3 @@ load-tests/results/
load-tests/*.json
load-tests/*.log
load-tests/node_modules/*
migrations/*/rollback*.sql

View File

@@ -48,8 +48,7 @@ RUN poetry install --no-ansi --no-root
# Generate Prisma client
COPY autogpt_platform/backend/schema.prisma ./
COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/partial_types.py
COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
RUN poetry run prisma generate && poetry run gen-prisma-stub
RUN poetry run prisma generate
FROM debian:13-slim AS server_dependencies

View File

@@ -70,7 +70,7 @@ class RunAgentRequest(BaseModel):
)
def _create_ephemeral_session(user_id: str) -> ChatSession:
def _create_ephemeral_session(user_id: str | None) -> ChatSession:
"""Create an ephemeral session for stateless API requests."""
return ChatSession.new(user_id)

View File

@@ -1,6 +1,7 @@
"""Configuration management for chat system."""
import os
from pathlib import Path
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings
@@ -11,11 +12,7 @@ class ChatConfig(BaseSettings):
# OpenAI API Configuration
model: str = Field(
default="anthropic/claude-opus-4.5", description="Default model to use"
)
title_model: str = Field(
default="openai/gpt-4o-mini",
description="Model to use for generating session titles (should be fast/cheap)",
default="qwen/qwen3-235b-a22b-2507", description="Default model to use"
)
api_key: str | None = Field(default=None, description="OpenAI API key")
base_url: str | None = Field(
@@ -26,6 +23,12 @@ class ChatConfig(BaseSettings):
# Session TTL Configuration - 12 hours
session_ttl: int = Field(default=43200, description="Session TTL in seconds")
# System Prompt Configuration
system_prompt_path: str = Field(
default="prompts/chat_system.md",
description="Path to system prompt file relative to chat module",
)
# Streaming Configuration
max_context_messages: int = Field(
default=50, ge=1, le=200, description="Maximum context messages"
@@ -38,13 +41,6 @@ class ChatConfig(BaseSettings):
default=3, description="Maximum number of agent schedules"
)
# Langfuse Prompt Management Configuration
# Note: Langfuse credentials are in Settings().secrets (settings.py)
langfuse_prompt_name: str = Field(
default="CoPilot Prompt",
description="Name of the prompt in Langfuse to fetch",
)
@field_validator("api_key", mode="before")
@classmethod
def get_api_key(cls, v):
@@ -76,11 +72,43 @@ class ChatConfig(BaseSettings):
v = "https://openrouter.ai/api/v1"
return v
# Prompt paths for different contexts
PROMPT_PATHS: dict[str, str] = {
"default": "prompts/chat_system.md",
"onboarding": "prompts/onboarding_system.md",
}
def get_system_prompt(self, **template_vars) -> str:
"""Load and render the system prompt from file.
Args:
**template_vars: Variables to substitute in the template
Returns:
Rendered system prompt string
"""
# Get the path relative to this module
module_dir = Path(__file__).parent
prompt_path = module_dir / self.system_prompt_path
# Check for .j2 extension first (Jinja2 template)
j2_path = Path(str(prompt_path) + ".j2")
if j2_path.exists():
try:
from jinja2 import Template
template = Template(j2_path.read_text())
return template.render(**template_vars)
except ImportError:
# Jinja2 not installed, fall back to reading as plain text
return j2_path.read_text()
# Check for markdown file
if prompt_path.exists():
content = prompt_path.read_text()
# Simple variable substitution if Jinja2 is not available
for key, value in template_vars.items():
placeholder = f"{{{key}}}"
content = content.replace(placeholder, str(value))
return content
raise FileNotFoundError(f"System prompt file not found: {prompt_path}")
class Config:
"""Pydantic config."""

View File

@@ -1,249 +0,0 @@
"""Database operations for chat sessions."""
import asyncio
import logging
from datetime import UTC, datetime
from typing import Any, cast
from prisma.models import ChatMessage as PrismaChatMessage
from prisma.models import ChatSession as PrismaChatSession
from prisma.types import (
ChatMessageCreateInput,
ChatSessionCreateInput,
ChatSessionUpdateInput,
ChatSessionWhereInput,
)
from backend.data.db import transaction
from backend.util.json import SafeJson
logger = logging.getLogger(__name__)
async def get_chat_session(session_id: str) -> PrismaChatSession | None:
"""Get a chat session by ID from the database."""
session = await PrismaChatSession.prisma().find_unique(
where={"id": session_id},
include={"Messages": True},
)
if session and session.Messages:
# Sort messages by sequence in Python - Prisma Python client doesn't support
# order_by in include clauses (unlike Prisma JS), so we sort after fetching
session.Messages.sort(key=lambda m: m.sequence)
return session
async def create_chat_session(
session_id: str,
user_id: str,
) -> PrismaChatSession:
"""Create a new chat session in the database."""
data = ChatSessionCreateInput(
id=session_id,
userId=user_id,
credentials=SafeJson({}),
successfulAgentRuns=SafeJson({}),
successfulAgentSchedules=SafeJson({}),
)
return await PrismaChatSession.prisma().create(
data=data,
include={"Messages": True},
)
async def update_chat_session(
session_id: str,
credentials: dict[str, Any] | None = None,
successful_agent_runs: dict[str, Any] | None = None,
successful_agent_schedules: dict[str, Any] | None = None,
total_prompt_tokens: int | None = None,
total_completion_tokens: int | None = None,
title: str | None = None,
) -> PrismaChatSession | None:
"""Update a chat session's metadata."""
data: ChatSessionUpdateInput = {"updatedAt": datetime.now(UTC)}
if credentials is not None:
data["credentials"] = SafeJson(credentials)
if successful_agent_runs is not None:
data["successfulAgentRuns"] = SafeJson(successful_agent_runs)
if successful_agent_schedules is not None:
data["successfulAgentSchedules"] = SafeJson(successful_agent_schedules)
if total_prompt_tokens is not None:
data["totalPromptTokens"] = total_prompt_tokens
if total_completion_tokens is not None:
data["totalCompletionTokens"] = total_completion_tokens
if title is not None:
data["title"] = title
session = await PrismaChatSession.prisma().update(
where={"id": session_id},
data=data,
include={"Messages": True},
)
if session and session.Messages:
# Sort in Python - Prisma Python doesn't support order_by in include clauses
session.Messages.sort(key=lambda m: m.sequence)
return session
async def add_chat_message(
session_id: str,
role: str,
sequence: int,
content: str | None = None,
name: str | None = None,
tool_call_id: str | None = None,
refusal: str | None = None,
tool_calls: list[dict[str, Any]] | None = None,
function_call: dict[str, Any] | None = None,
) -> PrismaChatMessage:
"""Add a message to a chat session."""
# Build input dict dynamically rather than using ChatMessageCreateInput directly
# because Prisma's TypedDict validation rejects optional fields set to None.
# We only include fields that have values, then cast at the end.
data: dict[str, Any] = {
"Session": {"connect": {"id": session_id}},
"role": role,
"sequence": sequence,
}
# Add optional string fields
if content is not None:
data["content"] = content
if name is not None:
data["name"] = name
if tool_call_id is not None:
data["toolCallId"] = tool_call_id
if refusal is not None:
data["refusal"] = refusal
# Add optional JSON fields only when they have values
if tool_calls is not None:
data["toolCalls"] = SafeJson(tool_calls)
if function_call is not None:
data["functionCall"] = SafeJson(function_call)
# Run message create and session timestamp update in parallel for lower latency
_, message = await asyncio.gather(
PrismaChatSession.prisma().update(
where={"id": session_id},
data={"updatedAt": datetime.now(UTC)},
),
PrismaChatMessage.prisma().create(data=cast(ChatMessageCreateInput, data)),
)
return message
async def add_chat_messages_batch(
session_id: str,
messages: list[dict[str, Any]],
start_sequence: int,
) -> list[PrismaChatMessage]:
"""Add multiple messages to a chat session in a batch.
Uses a transaction for atomicity - if any message creation fails,
the entire batch is rolled back.
"""
if not messages:
return []
created_messages = []
async with transaction() as tx:
for i, msg in enumerate(messages):
# Build input dict dynamically rather than using ChatMessageCreateInput
# directly because Prisma's TypedDict validation rejects optional fields
# set to None. We only include fields that have values, then cast.
data: dict[str, Any] = {
"Session": {"connect": {"id": session_id}},
"role": msg["role"],
"sequence": start_sequence + i,
}
# Add optional string fields
if msg.get("content") is not None:
data["content"] = msg["content"]
if msg.get("name") is not None:
data["name"] = msg["name"]
if msg.get("tool_call_id") is not None:
data["toolCallId"] = msg["tool_call_id"]
if msg.get("refusal") is not None:
data["refusal"] = msg["refusal"]
# Add optional JSON fields only when they have values
if msg.get("tool_calls") is not None:
data["toolCalls"] = SafeJson(msg["tool_calls"])
if msg.get("function_call") is not None:
data["functionCall"] = SafeJson(msg["function_call"])
created = await PrismaChatMessage.prisma(tx).create(
data=cast(ChatMessageCreateInput, data)
)
created_messages.append(created)
# Update session's updatedAt timestamp within the same transaction.
# Note: Token usage (total_prompt_tokens, total_completion_tokens) is updated
# separately via update_chat_session() after streaming completes.
await PrismaChatSession.prisma(tx).update(
where={"id": session_id},
data={"updatedAt": datetime.now(UTC)},
)
return created_messages
async def get_user_chat_sessions(
user_id: str,
limit: int = 50,
offset: int = 0,
) -> list[PrismaChatSession]:
"""Get chat sessions for a user, ordered by most recent."""
return await PrismaChatSession.prisma().find_many(
where={"userId": user_id},
order={"updatedAt": "desc"},
take=limit,
skip=offset,
)
async def get_user_session_count(user_id: str) -> int:
"""Get the total number of chat sessions for a user."""
return await PrismaChatSession.prisma().count(where={"userId": user_id})
async def delete_chat_session(session_id: str, user_id: str | None = None) -> bool:
"""Delete a chat session and all its messages.
Args:
session_id: The session ID to delete.
user_id: If provided, validates that the session belongs to this user
before deletion. This prevents unauthorized deletion of other
users' sessions.
Returns:
True if deleted successfully, False otherwise.
"""
try:
# Build typed where clause with optional user_id validation
where_clause: ChatSessionWhereInput = {"id": session_id}
if user_id is not None:
where_clause["userId"] = user_id
result = await PrismaChatSession.prisma().delete_many(where=where_clause)
if result == 0:
logger.warning(
f"No session deleted for {session_id} "
f"(user_id validation: {user_id is not None})"
)
return False
return True
except Exception as e:
logger.error(f"Failed to delete chat session {session_id}: {e}")
return False
async def get_chat_session_message_count(session_id: str) -> int:
"""Get the number of messages in a chat session."""
count = await PrismaChatMessage.prisma().count(where={"sessionId": session_id})
return count

View File

@@ -1,9 +1,6 @@
import asyncio
import logging
import uuid
from datetime import UTC, datetime
from typing import Any
from weakref import WeakValueDictionary
from openai.types.chat import (
ChatCompletionAssistantMessageParam,
@@ -19,63 +16,17 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
ChatCompletionMessageToolCallParam,
Function,
)
from prisma.models import ChatMessage as PrismaChatMessage
from prisma.models import ChatSession as PrismaChatSession
from pydantic import BaseModel
from backend.data.redis_client import get_redis_async
from backend.util import json
from backend.util.exceptions import DatabaseError, RedisError
from backend.util.exceptions import RedisError
from . import db as chat_db
from .config import ChatConfig
logger = logging.getLogger(__name__)
config = ChatConfig()
def _parse_json_field(value: str | dict | list | None, default: Any = None) -> Any:
"""Parse a JSON field that may be stored as string or already parsed."""
if value is None:
return default
if isinstance(value, str):
return json.loads(value)
return value
# Redis cache key prefix for chat sessions
CHAT_SESSION_CACHE_PREFIX = "chat:session:"
def _get_session_cache_key(session_id: str) -> str:
"""Get the Redis cache key for a chat session."""
return f"{CHAT_SESSION_CACHE_PREFIX}{session_id}"
# Session-level locks to prevent race conditions during concurrent upserts.
# Uses WeakValueDictionary to automatically garbage collect locks when no longer referenced,
# preventing unbounded memory growth while maintaining lock semantics for active sessions.
# Invalidation: Locks are auto-removed by GC when no coroutine holds a reference (after
# async with lock: completes). Explicit cleanup also occurs in delete_chat_session().
_session_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
_session_locks_mutex = asyncio.Lock()
async def _get_session_lock(session_id: str) -> asyncio.Lock:
"""Get or create a lock for a specific session to prevent concurrent upserts.
Uses WeakValueDictionary for automatic cleanup: locks are garbage collected
when no coroutine holds a reference to them, preventing memory leaks from
unbounded growth of session locks.
"""
async with _session_locks_mutex:
lock = _session_locks.get(session_id)
if lock is None:
lock = asyncio.Lock()
_session_locks[session_id] = lock
return lock
class ChatMessage(BaseModel):
role: str
content: str | None = None
@@ -94,8 +45,7 @@ class Usage(BaseModel):
class ChatSession(BaseModel):
session_id: str
user_id: str
title: str | None = None
user_id: str | None
messages: list[ChatMessage]
usage: list[Usage]
credentials: dict[str, dict] = {} # Map of provider -> credential metadata
@@ -105,11 +55,10 @@ class ChatSession(BaseModel):
successful_agent_schedules: dict[str, int] = {}
@staticmethod
def new(user_id: str) -> "ChatSession":
def new(user_id: str | None) -> "ChatSession":
return ChatSession(
session_id=str(uuid.uuid4()),
user_id=user_id,
title=None,
messages=[],
usage=[],
credentials={},
@@ -117,61 +66,6 @@ class ChatSession(BaseModel):
updated_at=datetime.now(UTC),
)
@staticmethod
def from_db(
prisma_session: PrismaChatSession,
prisma_messages: list[PrismaChatMessage] | None = None,
) -> "ChatSession":
"""Convert Prisma models to Pydantic ChatSession."""
messages = []
if prisma_messages:
for msg in prisma_messages:
messages.append(
ChatMessage(
role=msg.role,
content=msg.content,
name=msg.name,
tool_call_id=msg.toolCallId,
refusal=msg.refusal,
tool_calls=_parse_json_field(msg.toolCalls),
function_call=_parse_json_field(msg.functionCall),
)
)
# Parse JSON fields from Prisma
credentials = _parse_json_field(prisma_session.credentials, default={})
successful_agent_runs = _parse_json_field(
prisma_session.successfulAgentRuns, default={}
)
successful_agent_schedules = _parse_json_field(
prisma_session.successfulAgentSchedules, default={}
)
# Calculate usage from token counts
usage = []
if prisma_session.totalPromptTokens or prisma_session.totalCompletionTokens:
usage.append(
Usage(
prompt_tokens=prisma_session.totalPromptTokens or 0,
completion_tokens=prisma_session.totalCompletionTokens or 0,
total_tokens=(prisma_session.totalPromptTokens or 0)
+ (prisma_session.totalCompletionTokens or 0),
)
)
return ChatSession(
session_id=prisma_session.id,
user_id=prisma_session.userId,
title=prisma_session.title,
messages=messages,
usage=usage,
credentials=credentials,
started_at=prisma_session.createdAt,
updated_at=prisma_session.updatedAt,
successful_agent_runs=successful_agent_runs,
successful_agent_schedules=successful_agent_schedules,
)
def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
messages = []
for message in self.messages:
@@ -261,337 +155,50 @@ class ChatSession(BaseModel):
return messages
async def _get_session_from_cache(session_id: str) -> ChatSession | None:
"""Get a chat session from Redis cache."""
redis_key = _get_session_cache_key(session_id)
async def get_chat_session(
session_id: str,
user_id: str | None,
) -> ChatSession | None:
"""Get a chat session by ID."""
redis_key = f"chat:session:{session_id}"
async_redis = await get_redis_async()
raw_session: bytes | None = await async_redis.get(redis_key)
if raw_session is None:
logger.warning(f"Session {session_id} not found in Redis")
return None
try:
session = ChatSession.model_validate_json(raw_session)
logger.info(
f"Loading session {session_id} from cache: "
f"message_count={len(session.messages)}, "
f"roles={[m.role for m in session.messages]}"
)
return session
except Exception as e:
logger.error(f"Failed to deserialize session {session_id}: {e}", exc_info=True)
raise RedisError(f"Corrupted session data for {session_id}") from e
async def _cache_session(session: ChatSession) -> None:
"""Cache a chat session in Redis."""
redis_key = _get_session_cache_key(session.session_id)
async_redis = await get_redis_async()
await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())
async def _get_session_from_db(session_id: str) -> ChatSession | None:
"""Get a chat session from the database."""
prisma_session = await chat_db.get_chat_session(session_id)
if not prisma_session:
return None
messages = prisma_session.Messages
logger.info(
f"Loading session {session_id} from DB: "
f"has_messages={messages is not None}, "
f"message_count={len(messages) if messages else 0}, "
f"roles={[m.role for m in messages] if messages else []}"
)
return ChatSession.from_db(prisma_session, messages)
async def _save_session_to_db(
session: ChatSession, existing_message_count: int
) -> None:
"""Save or update a chat session in the database."""
# Check if session exists in DB
existing = await chat_db.get_chat_session(session.session_id)
if not existing:
# Create new session
await chat_db.create_chat_session(
session_id=session.session_id,
user_id=session.user_id,
)
existing_message_count = 0
# Calculate total tokens from usage
total_prompt = sum(u.prompt_tokens for u in session.usage)
total_completion = sum(u.completion_tokens for u in session.usage)
# Update session metadata
await chat_db.update_chat_session(
session_id=session.session_id,
credentials=session.credentials,
successful_agent_runs=session.successful_agent_runs,
successful_agent_schedules=session.successful_agent_schedules,
total_prompt_tokens=total_prompt,
total_completion_tokens=total_completion,
)
# Add new messages (only those after existing count)
new_messages = session.messages[existing_message_count:]
if new_messages:
messages_data = []
for msg in new_messages:
messages_data.append(
{
"role": msg.role,
"content": msg.content,
"name": msg.name,
"tool_call_id": msg.tool_call_id,
"refusal": msg.refusal,
"tool_calls": msg.tool_calls,
"function_call": msg.function_call,
}
)
logger.info(
f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
f"roles={[m['role'] for m in messages_data]}, "
f"start_sequence={existing_message_count}"
)
await chat_db.add_chat_messages_batch(
session_id=session.session_id,
messages=messages_data,
start_sequence=existing_message_count,
)
async def get_chat_session(
session_id: str,
user_id: str | None = None,
) -> ChatSession | None:
"""Get a chat session by ID.
Checks Redis cache first, falls back to database if not found.
Caches database results back to Redis.
Args:
session_id: The session ID to fetch.
user_id: If provided, validates that the session belongs to this user.
If None, ownership is not validated (admin/system access).
"""
# Try cache first
try:
session = await _get_session_from_cache(session_id)
if session:
# Verify user ownership if user_id was provided for validation
if user_id is not None and session.user_id != user_id:
logger.warning(
f"Session {session_id} user id mismatch: {session.user_id} != {user_id}"
)
return None
return session
except RedisError:
logger.warning(f"Cache error for session {session_id}, trying database")
except Exception as e:
logger.warning(f"Unexpected cache error for session {session_id}: {e}")
# Fall back to database
logger.info(f"Session {session_id} not in cache, checking database")
session = await _get_session_from_db(session_id)
if session is None:
logger.warning(f"Session {session_id} not found in cache or database")
return None
# Verify user ownership if user_id was provided for validation
if user_id is not None and session.user_id != user_id:
if session.user_id is not None and session.user_id != user_id:
logger.warning(
f"Session {session_id} user id mismatch: {session.user_id} != {user_id}"
)
return None
# Cache the session from DB
try:
await _cache_session(session)
logger.info(f"Cached session {session_id} from database")
except Exception as e:
logger.warning(f"Failed to cache session {session_id}: {e}")
return session
async def upsert_chat_session(
session: ChatSession,
) -> ChatSession:
"""Update a chat session in both cache and database.
"""Update a chat session with the given messages."""
Uses session-level locking to prevent race conditions when concurrent
operations (e.g., background title update and main stream handler)
attempt to upsert the same session simultaneously.
redis_key = f"chat:session:{session.session_id}"
Raises:
DatabaseError: If the database write fails. The cache is still updated
as a best-effort optimization, but the error is propagated to ensure
callers are aware of the persistence failure.
RedisError: If the cache write fails (after successful DB write).
"""
# Acquire session-specific lock to prevent concurrent upserts
lock = await _get_session_lock(session.session_id)
async_redis = await get_redis_async()
resp = await async_redis.setex(
redis_key, config.session_ttl, session.model_dump_json()
)
async with lock:
# Get existing message count from DB for incremental saves
existing_message_count = await chat_db.get_chat_session_message_count(
session.session_id
if not resp:
raise RedisError(
f"Failed to persist chat session {session.session_id} to Redis: {resp}"
)
db_error: Exception | None = None
# Save to database (primary storage)
try:
await _save_session_to_db(session, existing_message_count)
except Exception as e:
logger.error(
f"Failed to save session {session.session_id} to database: {e}"
)
db_error = e
# Save to cache (best-effort, even if DB failed)
try:
await _cache_session(session)
except Exception as e:
# If DB succeeded but cache failed, raise cache error
if db_error is None:
raise RedisError(
f"Failed to persist chat session {session.session_id} to Redis: {e}"
) from e
# If both failed, log cache error but raise DB error (more critical)
logger.warning(
f"Cache write also failed for session {session.session_id}: {e}"
)
# Propagate DB error after attempting cache (prevents data loss)
if db_error is not None:
raise DatabaseError(
f"Failed to persist chat session {session.session_id} to database"
) from db_error
return session
async def create_chat_session(user_id: str) -> ChatSession:
"""Create a new chat session and persist it.
Raises:
DatabaseError: If the database write fails. We fail fast to ensure
callers never receive a non-persisted session that only exists
in cache (which would be lost when the cache expires).
"""
session = ChatSession.new(user_id)
# Create in database first - fail fast if this fails
try:
await chat_db.create_chat_session(
session_id=session.session_id,
user_id=user_id,
)
except Exception as e:
logger.error(f"Failed to create session {session.session_id} in database: {e}")
raise DatabaseError(
f"Failed to create chat session {session.session_id} in database"
) from e
# Cache the session (best-effort optimization, DB is source of truth)
try:
await _cache_session(session)
except Exception as e:
logger.warning(f"Failed to cache new session {session.session_id}: {e}")
return session
async def get_user_sessions(
user_id: str,
limit: int = 50,
offset: int = 0,
) -> tuple[list[ChatSession], int]:
"""Get chat sessions for a user from the database with total count.
Returns:
A tuple of (sessions, total_count) where total_count is the overall
number of sessions for the user (not just the current page).
"""
prisma_sessions = await chat_db.get_user_chat_sessions(user_id, limit, offset)
total_count = await chat_db.get_user_session_count(user_id)
sessions = []
for prisma_session in prisma_sessions:
# Convert without messages for listing (lighter weight)
sessions.append(ChatSession.from_db(prisma_session, None))
return sessions, total_count
async def delete_chat_session(session_id: str, user_id: str | None = None) -> bool:
"""Delete a chat session from both cache and database.
Args:
session_id: The session ID to delete.
user_id: If provided, validates that the session belongs to this user
before deletion. This prevents unauthorized deletion.
Returns:
True if deleted successfully, False otherwise.
"""
# Delete from database first (with optional user_id validation)
# This confirms ownership before invalidating cache
deleted = await chat_db.delete_chat_session(session_id, user_id)
if not deleted:
return False
# Only invalidate cache and clean up lock after DB confirms deletion
try:
redis_key = _get_session_cache_key(session_id)
async_redis = await get_redis_async()
await async_redis.delete(redis_key)
except Exception as e:
logger.warning(f"Failed to delete session {session_id} from cache: {e}")
# Clean up session lock (belt-and-suspenders with WeakValueDictionary)
async with _session_locks_mutex:
_session_locks.pop(session_id, None)
return True
async def update_session_title(session_id: str, title: str) -> bool:
"""Update only the title of a chat session.
This is a lightweight operation that doesn't touch messages, avoiding
race conditions with concurrent message updates. Use this for background
title generation instead of upsert_chat_session.
Args:
session_id: The session ID to update.
title: The new title to set.
Returns:
True if updated successfully, False otherwise.
"""
try:
result = await chat_db.update_chat_session(session_id=session_id, title=title)
if result is None:
logger.warning(f"Session {session_id} not found for title update")
return False
# Invalidate cache so next fetch gets updated title
try:
redis_key = _get_session_cache_key(session_id)
async_redis = await get_redis_async()
await async_redis.delete(redis_key)
except Exception as e:
logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
return True
except Exception as e:
logger.error(f"Failed to update title for session {session_id}: {e}")
return False

View File

@@ -43,9 +43,9 @@ async def test_chatsession_serialization_deserialization():
@pytest.mark.asyncio(loop_scope="session")
async def test_chatsession_redis_storage(setup_test_user, test_user_id):
async def test_chatsession_redis_storage():
s = ChatSession.new(user_id=test_user_id)
s = ChatSession.new(user_id=None)
s.messages = messages
s = await upsert_chat_session(s)
@@ -59,61 +59,12 @@ async def test_chatsession_redis_storage(setup_test_user, test_user_id):
@pytest.mark.asyncio(loop_scope="session")
async def test_chatsession_redis_storage_user_id_mismatch(
setup_test_user, test_user_id
):
async def test_chatsession_redis_storage_user_id_mismatch():
s = ChatSession.new(user_id=test_user_id)
s = ChatSession.new(user_id="abc123")
s.messages = messages
s = await upsert_chat_session(s)
s2 = await get_chat_session(s.session_id, "different_user_id")
s2 = await get_chat_session(s.session_id, None)
assert s2 is None
@pytest.mark.asyncio(loop_scope="session")
async def test_chatsession_db_storage(setup_test_user, test_user_id):
"""Test that messages are correctly saved to and loaded from DB (not cache)."""
from backend.data.redis_client import get_redis_async
# Create session with messages including assistant message
s = ChatSession.new(user_id=test_user_id)
s.messages = messages # Contains user, assistant, and tool messages
assert s.session_id is not None, "Session id is not set"
# Upsert to save to both cache and DB
s = await upsert_chat_session(s)
# Clear the Redis cache to force DB load
redis_key = f"chat:session:{s.session_id}"
async_redis = await get_redis_async()
await async_redis.delete(redis_key)
# Load from DB (cache was cleared)
s2 = await get_chat_session(
session_id=s.session_id,
user_id=s.user_id,
)
assert s2 is not None, "Session not found after loading from DB"
assert len(s2.messages) == len(
s.messages
), f"Message count mismatch: expected {len(s.messages)}, got {len(s2.messages)}"
# Verify all roles are present
roles = [m.role for m in s2.messages]
assert "user" in roles, f"User message missing. Roles found: {roles}"
assert "assistant" in roles, f"Assistant message missing. Roles found: {roles}"
assert "tool" in roles, f"Tool message missing. Roles found: {roles}"
# Verify message content
for orig, loaded in zip(s.messages, s2.messages):
assert orig.role == loaded.role, f"Role mismatch: {orig.role} != {loaded.role}"
assert (
orig.content == loaded.content
), f"Content mismatch for {orig.role}: {orig.content} != {loaded.content}"
if orig.tool_calls:
assert (
loaded.tool_calls is not None
), f"Tool calls missing for {orig.role} message"
assert len(orig.tool_calls) == len(loaded.tool_calls)

View File

@@ -0,0 +1,104 @@
You are Otto, an AI Co-Pilot and Forward Deployed Engineer for AutoGPT, an AI Business Automation tool. Your mission is to help users quickly find and set up AutoGPT agents to solve their business problems.
Here are the functions available to you:
<functions>
1. **find_agent** - Search for agents that solve the user's problem
2. **run_agent** - Run or schedule an agent (automatically handles setup)
</functions>
## HOW run_agent WORKS
The `run_agent` tool automatically handles the entire setup flow:
1. **First call** (no inputs) → Returns available inputs so user can decide what values to use
2. **Credentials check** → If missing, UI automatically prompts user to add them (you don't need to mention this)
3. **Execution** → Runs when you provide `inputs` OR set `use_defaults=true`
Parameters:
- `username_agent_slug` (required): Agent identifier like "creator/agent-name"
- `inputs`: Object with input values for the agent
- `use_defaults`: Set to `true` to run with default values (only after user confirms)
- `schedule_name` + `cron`: For scheduled execution
## WORKFLOW
1. **find_agent** - Search for agents that solve the user's problem
2. **run_agent** (first call, no inputs) - Get available inputs for the agent
3. **Ask user** what values they want to use OR if they want to use defaults
4. **run_agent** (second call) - Either with `inputs={...}` or `use_defaults=true`
## YOUR APPROACH
**Step 1: Understand the Problem**
- Ask maximum 1-2 targeted questions
- Focus on: What business problem are they solving?
- Move quickly to searching for solutions
**Step 2: Find Agents**
- Use `find_agent` immediately with relevant keywords
- Suggest the best option from search results
- Explain briefly how it solves their problem
**Step 3: Get Agent Inputs**
- Call `run_agent(username_agent_slug="creator/agent-name")` without inputs
- This returns the available inputs (required and optional)
- Present these to the user and ask what values they want
**Step 4: Run with User's Choice**
- If user provides values: `run_agent(username_agent_slug="...", inputs={...})`
- If user says "use defaults": `run_agent(username_agent_slug="...", use_defaults=true)`
- On success, share the agent link with the user
**For Scheduled Execution:**
- Add `schedule_name` and `cron` parameters
- Example: `run_agent(username_agent_slug="...", inputs={...}, schedule_name="Daily Report", cron="0 9 * * *")`
## FUNCTION CALL FORMAT
To call a function, use this exact format:
`<function_call>function_name(parameter="value")</function_call>`
Examples:
- `<function_call>find_agent(query="social media automation")</function_call>`
- `<function_call>run_agent(username_agent_slug="creator/agent-name")</function_call>` (get inputs)
- `<function_call>run_agent(username_agent_slug="creator/agent-name", inputs={"topic": "AI news"})</function_call>`
- `<function_call>run_agent(username_agent_slug="creator/agent-name", use_defaults=true)</function_call>`
## KEY RULES
**What You DON'T Do:**
- Don't help with login (frontend handles this)
- Don't mention or explain credentials to the user (frontend handles this automatically)
- Don't run agents without first showing available inputs to the user
- Don't use `use_defaults=true` without user explicitly confirming
- Don't write responses longer than 3 sentences
**What You DO:**
- Always call run_agent first without inputs to see what's available
- Ask user what values they want OR if they want to use defaults
- Keep all responses to maximum 3 sentences
- Include the agent link in your response after successful execution
**Error Handling:**
- Authentication needed → "Please sign in via the interface"
- Credentials missing → The UI handles this automatically. Focus on asking the user about input values instead.
## RESPONSE STRUCTURE
Before responding, wrap your analysis in <thinking> tags to systematically plan your approach:
- Extract the key business problem or request from the user's message
- Determine what function call (if any) you need to make next
- Plan your response to stay under the 3-sentence maximum
Example interaction:
```
User: "Run the AI news agent for me"
Otto: <function_call>run_agent(username_agent_slug="autogpt/ai-news")</function_call>
[Tool returns: Agent accepts inputs - Required: topic. Optional: num_articles (default: 5)]
Otto: The AI News agent needs a topic. What topic would you like news about, or should I use the defaults?
User: "Use defaults"
Otto: <function_call>run_agent(username_agent_slug="autogpt/ai-news", use_defaults=true)</function_call>
```
KEEP ANSWERS TO 3 SENTENCES

View File

@@ -1,10 +1,3 @@
"""
Response models for Vercel AI SDK UI Stream Protocol.
This module implements the AI SDK UI Stream Protocol (v1) for streaming chat responses.
See: https://ai-sdk.dev/docs/ai-sdk-ui/stream-protocol
"""
from enum import Enum
from typing import Any
@@ -12,133 +5,97 @@ from pydantic import BaseModel, Field
class ResponseType(str, Enum):
"""Types of streaming responses following AI SDK protocol."""
"""Types of streaming responses."""
# Message lifecycle
START = "start"
FINISH = "finish"
# Text streaming
TEXT_START = "text-start"
TEXT_DELTA = "text-delta"
TEXT_END = "text-end"
# Tool interaction
TOOL_INPUT_START = "tool-input-start"
TOOL_INPUT_AVAILABLE = "tool-input-available"
TOOL_OUTPUT_AVAILABLE = "tool-output-available"
# Other
TEXT_CHUNK = "text_chunk"
TEXT_ENDED = "text_ended"
TOOL_CALL = "tool_call"
TOOL_CALL_START = "tool_call_start"
TOOL_RESPONSE = "tool_response"
ERROR = "error"
USAGE = "usage"
STREAM_END = "stream_end"
class StreamBaseResponse(BaseModel):
"""Base response model for all streaming responses."""
type: ResponseType
timestamp: str | None = None
def to_sse(self) -> str:
"""Convert to SSE format."""
return f"data: {self.model_dump_json()}\n\n"
# ========== Message Lifecycle ==========
class StreamTextChunk(StreamBaseResponse):
"""Streaming text content from the assistant."""
type: ResponseType = ResponseType.TEXT_CHUNK
content: str = Field(..., description="Text content chunk")
class StreamStart(StreamBaseResponse):
"""Start of a new message."""
type: ResponseType = ResponseType.START
messageId: str = Field(..., description="Unique message ID")
class StreamFinish(StreamBaseResponse):
"""End of message/stream."""
type: ResponseType = ResponseType.FINISH
# ========== Text Streaming ==========
class StreamTextStart(StreamBaseResponse):
"""Start of a text block."""
type: ResponseType = ResponseType.TEXT_START
id: str = Field(..., description="Text block ID")
class StreamTextDelta(StreamBaseResponse):
"""Streaming text content delta."""
type: ResponseType = ResponseType.TEXT_DELTA
id: str = Field(..., description="Text block ID")
delta: str = Field(..., description="Text content delta")
class StreamTextEnd(StreamBaseResponse):
"""End of a text block."""
type: ResponseType = ResponseType.TEXT_END
id: str = Field(..., description="Text block ID")
# ========== Tool Interaction ==========
class StreamToolInputStart(StreamBaseResponse):
class StreamToolCallStart(StreamBaseResponse):
"""Tool call started notification."""
type: ResponseType = ResponseType.TOOL_INPUT_START
toolCallId: str = Field(..., description="Unique tool call ID")
toolName: str = Field(..., description="Name of the tool being called")
type: ResponseType = ResponseType.TOOL_CALL_START
tool_name: str = Field(..., description="Name of the tool that was executed")
tool_id: str = Field(..., description="Unique tool call ID")
class StreamToolInputAvailable(StreamBaseResponse):
"""Tool input is ready for execution."""
class StreamToolCall(StreamBaseResponse):
"""Tool invocation notification."""
type: ResponseType = ResponseType.TOOL_INPUT_AVAILABLE
toolCallId: str = Field(..., description="Unique tool call ID")
toolName: str = Field(..., description="Name of the tool being called")
input: dict[str, Any] = Field(
default_factory=dict, description="Tool input arguments"
type: ResponseType = ResponseType.TOOL_CALL
tool_id: str = Field(..., description="Unique tool call ID")
tool_name: str = Field(..., description="Name of the tool being called")
arguments: dict[str, Any] = Field(
default_factory=dict, description="Tool arguments"
)
class StreamToolOutputAvailable(StreamBaseResponse):
class StreamToolExecutionResult(StreamBaseResponse):
"""Tool execution result."""
type: ResponseType = ResponseType.TOOL_OUTPUT_AVAILABLE
toolCallId: str = Field(..., description="Tool call ID this responds to")
output: str | dict[str, Any] = Field(..., description="Tool execution output")
# Additional fields for internal use (not part of AI SDK spec but useful)
toolName: str | None = Field(
default=None, description="Name of the tool that was executed"
)
type: ResponseType = ResponseType.TOOL_RESPONSE
tool_id: str = Field(..., description="Tool call ID this responds to")
tool_name: str = Field(..., description="Name of the tool that was executed")
result: str | dict[str, Any] = Field(..., description="Tool execution result")
success: bool = Field(
default=True, description="Whether the tool execution succeeded"
)
# ========== Other ==========
class StreamUsage(StreamBaseResponse):
"""Token usage statistics."""
type: ResponseType = ResponseType.USAGE
promptTokens: int = Field(..., description="Number of prompt tokens")
completionTokens: int = Field(..., description="Number of completion tokens")
totalTokens: int = Field(..., description="Total number of tokens")
prompt_tokens: int
completion_tokens: int
total_tokens: int
class StreamError(StreamBaseResponse):
"""Error response."""
type: ResponseType = ResponseType.ERROR
errorText: str = Field(..., description="Error message text")
message: str = Field(..., description="Error message")
code: str | None = Field(default=None, description="Error code")
details: dict[str, Any] | None = Field(
default=None, description="Additional error details"
)
class StreamTextEnded(StreamBaseResponse):
"""Text streaming completed marker."""
type: ResponseType = ResponseType.TEXT_ENDED
class StreamEnd(StreamBaseResponse):
"""End of stream marker."""
type: ResponseType = ResponseType.STREAM_END
summary: dict[str, Any] | None = Field(
default=None, description="Stream summary statistics"
)

View File

@@ -13,25 +13,12 @@ from backend.util.exceptions import NotFoundError
from . import service as chat_service
from .config import ChatConfig
from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions
config = ChatConfig()
logger = logging.getLogger(__name__)
async def _validate_and_get_session(
session_id: str,
user_id: str | None,
) -> ChatSession:
"""Validate session exists and belongs to user."""
session = await get_chat_session(session_id, user_id)
if not session:
raise NotFoundError(f"Session {session_id} not found.")
return session
router = APIRouter(
tags=["chat"],
)
@@ -39,14 +26,6 @@ router = APIRouter(
# ========== Request/Response Models ==========
class StreamChatRequest(BaseModel):
"""Request model for streaming chat with optional context."""
message: str
is_user_message: bool = True
context: dict[str, str] | None = None # {url: str, content: str}
class CreateSessionResponse(BaseModel):
"""Response model containing information on a newly created chat session."""
@@ -65,77 +44,22 @@ class SessionDetailResponse(BaseModel):
messages: list[dict]
class SessionSummaryResponse(BaseModel):
"""Response model for a session summary (without messages)."""
id: str
created_at: str
updated_at: str
title: str | None = None
class ListSessionsResponse(BaseModel):
"""Response model for listing chat sessions."""
sessions: list[SessionSummaryResponse]
total: int
# ========== Routes ==========
@router.get(
"/sessions",
dependencies=[Security(auth.requires_user)],
)
async def list_sessions(
user_id: Annotated[str, Security(auth.get_user_id)],
limit: int = Query(default=50, ge=1, le=100),
offset: int = Query(default=0, ge=0),
) -> ListSessionsResponse:
"""
List chat sessions for the authenticated user.
Returns a paginated list of chat sessions belonging to the current user,
ordered by most recently updated.
Args:
user_id: The authenticated user's ID.
limit: Maximum number of sessions to return (1-100).
offset: Number of sessions to skip for pagination.
Returns:
ListSessionsResponse: List of session summaries and total count.
"""
sessions, total_count = await get_user_sessions(user_id, limit, offset)
return ListSessionsResponse(
sessions=[
SessionSummaryResponse(
id=session.session_id,
created_at=session.started_at.isoformat(),
updated_at=session.updated_at.isoformat(),
title=session.title,
)
for session in sessions
],
total=total_count,
)
@router.post(
"/sessions",
)
async def create_session(
user_id: Annotated[str, Depends(auth.get_user_id)],
user_id: Annotated[str | None, Depends(auth.get_user_id)],
) -> CreateSessionResponse:
"""
Create a new chat session.
Initiates a new chat session for the authenticated user.
Initiates a new chat session for either an authenticated or anonymous user.
Args:
user_id: The authenticated user ID parsed from the JWT (required).
user_id: The optional authenticated user ID parsed from the JWT. If missing, creates an anonymous session.
Returns:
CreateSessionResponse: Details of the created session.
@@ -143,15 +67,15 @@ async def create_session(
"""
logger.info(
f"Creating session with user_id: "
f"...{user_id[-8:] if len(user_id) > 8 else '<redacted>'}"
f"...{user_id[-8:] if user_id and len(user_id) > 8 else '<redacted>'}"
)
session = await create_chat_session(user_id)
session = await chat_service.create_chat_session(user_id)
return CreateSessionResponse(
id=session.session_id,
created_at=session.started_at.isoformat(),
user_id=session.user_id,
user_id=session.user_id or None,
)
@@ -175,88 +99,29 @@ async def get_session(
SessionDetailResponse: Details for the requested session; raises NotFoundError if not found.
"""
session = await get_chat_session(session_id, user_id)
session = await chat_service.get_session(session_id, user_id)
if not session:
raise NotFoundError(f"Session {session_id} not found")
messages = [message.model_dump() for message in session.messages]
logger.info(
f"Returning session {session_id}: "
f"message_count={len(messages)}, "
f"roles={[m.get('role') for m in messages]}"
)
return SessionDetailResponse(
id=session.session_id,
created_at=session.started_at.isoformat(),
updated_at=session.updated_at.isoformat(),
user_id=session.user_id or None,
messages=messages,
)
@router.post(
"/sessions/{session_id}/stream",
)
async def stream_chat_post(
session_id: str,
request: StreamChatRequest,
user_id: str | None = Depends(auth.get_user_id),
):
"""
Stream chat responses for a session (POST with context support).
Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
- Text fragments as they are generated
- Tool call UI elements (if invoked)
- Tool execution results
Args:
session_id: The chat session identifier to associate with the streamed messages.
request: Request body containing message, is_user_message, and optional context.
user_id: Optional authenticated user ID.
Returns:
StreamingResponse: SSE-formatted response chunks.
"""
session = await _validate_and_get_session(session_id, user_id)
async def event_generator() -> AsyncGenerator[str, None]:
async for chunk in chat_service.stream_chat_completion(
session_id,
request.message,
is_user_message=request.is_user_message,
user_id=user_id,
session=session, # Pass pre-fetched session to avoid double-fetch
context=request.context,
):
yield chunk.to_sse()
# AI SDK protocol termination
yield "data: [DONE]\n\n"
return StreamingResponse(
event_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
"x-vercel-ai-ui-message-stream": "v1", # AI SDK protocol header
},
messages=[message.model_dump() for message in session.messages],
)
@router.get(
"/sessions/{session_id}/stream",
)
async def stream_chat_get(
async def stream_chat(
session_id: str,
message: Annotated[str, Query(min_length=1, max_length=10000)],
user_id: str | None = Depends(auth.get_user_id),
is_user_message: bool = Query(default=True),
):
"""
Stream chat responses for a session (GET - legacy endpoint).
Stream chat responses for a session.
Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
- Text fragments as they are generated
@@ -272,7 +137,14 @@ async def stream_chat_get(
StreamingResponse: SSE-formatted response chunks.
"""
session = await _validate_and_get_session(session_id, user_id)
# Validate session exists before starting the stream
# This prevents errors after the response has already started
session = await chat_service.get_session(session_id, user_id)
if not session:
raise NotFoundError(f"Session {session_id} not found. ")
if session.user_id is None and user_id is not None:
session = await chat_service.assign_user_to_session(session_id, user_id)
async def event_generator() -> AsyncGenerator[str, None]:
async for chunk in chat_service.stream_chat_completion(
@@ -283,8 +155,6 @@ async def stream_chat_get(
session=session, # Pass pre-fetched session to avoid double-fetch
):
yield chunk.to_sse()
# AI SDK protocol termination
yield "data: [DONE]\n\n"
return StreamingResponse(
event_generator(),
@@ -293,7 +163,6 @@ async def stream_chat_get(
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
"x-vercel-ai-ui-message-stream": "v1", # AI SDK protocol header
},
)
@@ -332,28 +201,16 @@ async def health_check() -> dict:
"""
Health check endpoint for the chat service.
Performs a full cycle test of session creation and retrieval. Should always return healthy
Performs a full cycle test of session creation, assignment, and retrieval. Should always return healthy
if the service and data layer are operational.
Returns:
dict: A status dictionary indicating health, service name, and API version.
"""
from backend.data.user import get_or_create_user
# Ensure health check user exists (required for FK constraint)
health_check_user_id = "health-check-user"
await get_or_create_user(
{
"sub": health_check_user_id,
"email": "health-check@system.local",
"user_metadata": {"name": "Health Check User"},
}
)
# Create and retrieve session to verify full data layer
session = await create_chat_session(health_check_user_id)
await get_chat_session(session.session_id, health_check_user_id)
session = await chat_service.create_chat_session(None)
await chat_service.assign_user_to_session(session.session_id, "test_user")
await chat_service.get_session(session.session_id, "test_user")
return {
"status": "healthy",

File diff suppressed because it is too large Load Diff

View File

@@ -4,19 +4,18 @@ from os import getenv
import pytest
from . import service as chat_service
from .model import create_chat_session, get_chat_session, upsert_chat_session
from .response_model import (
StreamEnd,
StreamError,
StreamFinish,
StreamTextDelta,
StreamToolOutputAvailable,
StreamTextChunk,
StreamToolExecutionResult,
)
logger = logging.getLogger(__name__)
@pytest.mark.asyncio(loop_scope="session")
async def test_stream_chat_completion(setup_test_user, test_user_id):
async def test_stream_chat_completion():
"""
Test the stream_chat_completion function.
"""
@@ -24,7 +23,7 @@ async def test_stream_chat_completion(setup_test_user, test_user_id):
if not api_key:
return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")
session = await create_chat_session(test_user_id)
session = await chat_service.create_chat_session()
has_errors = False
has_ended = False
@@ -35,9 +34,9 @@ async def test_stream_chat_completion(setup_test_user, test_user_id):
logger.info(chunk)
if isinstance(chunk, StreamError):
has_errors = True
if isinstance(chunk, StreamTextDelta):
assistant_message += chunk.delta
if isinstance(chunk, StreamFinish):
if isinstance(chunk, StreamTextChunk):
assistant_message += chunk.content
if isinstance(chunk, StreamEnd):
has_ended = True
assert has_ended, "Chat completion did not end"
@@ -46,7 +45,7 @@ async def test_stream_chat_completion(setup_test_user, test_user_id):
@pytest.mark.asyncio(loop_scope="session")
async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user_id):
async def test_stream_chat_completion_with_tool_calls():
"""
Test the stream_chat_completion function.
"""
@@ -54,8 +53,8 @@ async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user
if not api_key:
return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")
session = await create_chat_session(test_user_id)
session = await upsert_chat_session(session)
session = await chat_service.create_chat_session()
session = await chat_service.upsert_chat_session(session)
has_errors = False
has_ended = False
@@ -69,14 +68,14 @@ async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user
if isinstance(chunk, StreamError):
has_errors = True
if isinstance(chunk, StreamFinish):
if isinstance(chunk, StreamEnd):
has_ended = True
if isinstance(chunk, StreamToolOutputAvailable):
if isinstance(chunk, StreamToolExecutionResult):
had_tool_calls = True
assert has_ended, "Chat completion did not end"
assert not has_errors, "Error occurred while streaming chat completion"
assert had_tool_calls, "Tool calls did not occur"
session = await get_chat_session(session.session_id)
session = await chat_service.get_session(session.session_id)
assert session, "Session not found"
assert session.usage, "Usage is empty"

View File

@@ -4,34 +4,21 @@ from openai.types.chat import ChatCompletionToolParam
from backend.api.features.chat.model import ChatSession
from .add_understanding import AddUnderstandingTool
from .agent_output import AgentOutputTool
from .base import BaseTool
from .find_agent import FindAgentTool
from .find_library_agent import FindLibraryAgentTool
from .run_block import RunBlockTool
from .run_agent import RunAgentTool
if TYPE_CHECKING:
from backend.api.features.chat.response_model import StreamToolOutputAvailable
from backend.api.features.chat.response_model import StreamToolExecutionResult
# Single source of truth for all tools
TOOL_REGISTRY: dict[str, BaseTool] = {
"add_understanding": AddUnderstandingTool(),
"find_agent": FindAgentTool(),
"find_library_agent": FindLibraryAgentTool(),
"run_agent": RunAgentTool(),
"agent_output": AgentOutputTool(),
"run_block": RunBlockTool(),
}
# Initialize tool instances
find_agent_tool = FindAgentTool()
run_agent_tool = RunAgentTool()
# Export individual tool instances for backwards compatibility
find_agent_tool = TOOL_REGISTRY["find_agent"]
run_agent_tool = TOOL_REGISTRY["run_agent"]
# Generated from registry for OpenAI API
# Export tools as OpenAI format
tools: list[ChatCompletionToolParam] = [
tool.as_openai_tool() for tool in TOOL_REGISTRY.values()
find_agent_tool.as_openai_tool(),
run_agent_tool.as_openai_tool(),
]
@@ -41,9 +28,14 @@ async def execute_tool(
user_id: str | None,
session: ChatSession,
tool_call_id: str,
) -> "StreamToolOutputAvailable":
"""Execute a tool by name."""
tool = TOOL_REGISTRY.get(tool_name)
if not tool:
) -> "StreamToolExecutionResult":
tool_map: dict[str, BaseTool] = {
"find_agent": find_agent_tool,
"run_agent": run_agent_tool,
}
if tool_name not in tool_map:
raise ValueError(f"Tool {tool_name} not found")
return await tool.execute(user_id, session, tool_call_id, **parameters)
return await tool_map[tool_name].execute(
user_id, session, tool_call_id, **parameters
)

View File

@@ -3,7 +3,6 @@ from datetime import UTC, datetime
from os import getenv
import pytest
from prisma.types import ProfileCreateInput
from pydantic import SecretStr
from backend.api.features.chat.model import ChatSession
@@ -18,7 +17,7 @@ from backend.data.user import get_or_create_user
from backend.integrations.credentials_store import IntegrationCredentialsStore
def make_session(user_id: str):
def make_session(user_id: str | None = None):
return ChatSession(
session_id=str(uuid.uuid4()),
user_id=user_id,
@@ -50,13 +49,13 @@ async def setup_test_data():
# 1b. Create a profile with username for the user (required for store agent lookup)
username = user.email.split("@")[0]
await prisma.profile.create(
data=ProfileCreateInput(
userId=user.id,
username=username,
name=f"Test User {username}",
description="Test user profile",
links=[], # Required field - empty array for test profiles
)
data={
"userId": user.id,
"username": username,
"name": f"Test User {username}",
"description": "Test user profile",
"links": [], # Required field - empty array for test profiles
}
)
# 2. Create a test graph with agent input -> agent output
@@ -173,13 +172,13 @@ async def setup_llm_test_data():
# 1b. Create a profile with username for the user (required for store agent lookup)
username = user.email.split("@")[0]
await prisma.profile.create(
data=ProfileCreateInput(
userId=user.id,
username=username,
name=f"Test User {username}",
description="Test user profile for LLM tests",
links=[], # Required field - empty array for test profiles
)
data={
"userId": user.id,
"username": username,
"name": f"Test User {username}",
"description": "Test user profile for LLM tests",
"links": [], # Required field - empty array for test profiles
}
)
# 2. Create test OpenAI credentials for the user
@@ -333,13 +332,13 @@ async def setup_firecrawl_test_data():
# 1b. Create a profile with username for the user (required for store agent lookup)
username = user.email.split("@")[0]
await prisma.profile.create(
data=ProfileCreateInput(
userId=user.id,
username=username,
name=f"Test User {username}",
description="Test user profile for Firecrawl tests",
links=[], # Required field - empty array for test profiles
)
data={
"userId": user.id,
"username": username,
"name": f"Test User {username}",
"description": "Test user profile for Firecrawl tests",
"links": [], # Required field - empty array for test profiles
}
)
# NOTE: We deliberately do NOT create Firecrawl credentials for this user

View File

@@ -1,119 +0,0 @@
"""Tool for capturing user business understanding incrementally."""
import logging
from typing import Any
from backend.api.features.chat.model import ChatSession
from backend.data.understanding import (
BusinessUnderstandingInput,
upsert_business_understanding,
)
from .base import BaseTool
from .models import ErrorResponse, ToolResponseBase, UnderstandingUpdatedResponse
logger = logging.getLogger(__name__)
class AddUnderstandingTool(BaseTool):
"""Tool for capturing user's business understanding incrementally."""
@property
def name(self) -> str:
return "add_understanding"
@property
def description(self) -> str:
return """Capture and store information about the user's business context,
workflows, pain points, and automation goals. Call this tool whenever the user
shares information about their business. Each call incrementally adds to the
existing understanding - you don't need to provide all fields at once.
Use this to build a comprehensive profile that helps recommend better agents
and automations for the user's specific needs."""
@property
def parameters(self) -> dict[str, Any]:
# Auto-generate from Pydantic model schema
schema = BusinessUnderstandingInput.model_json_schema()
properties = {}
for field_name, field_schema in schema.get("properties", {}).items():
prop: dict[str, Any] = {"description": field_schema.get("description", "")}
# Handle anyOf for Optional types
if "anyOf" in field_schema:
for option in field_schema["anyOf"]:
if option.get("type") != "null":
prop["type"] = option.get("type", "string")
if "items" in option:
prop["items"] = option["items"]
break
else:
prop["type"] = field_schema.get("type", "string")
if "items" in field_schema:
prop["items"] = field_schema["items"]
properties[field_name] = prop
return {"type": "object", "properties": properties, "required": []}
@property
def requires_auth(self) -> bool:
"""Requires authentication to store user-specific data."""
return True
async def _execute(
self,
user_id: str | None,
session: ChatSession,
**kwargs,
) -> ToolResponseBase:
"""
Capture and store business understanding incrementally.
Each call merges new data with existing understanding:
- String fields are overwritten if provided
- List fields are appended (with deduplication)
"""
session_id = session.session_id
if not user_id:
return ErrorResponse(
message="Authentication required to save business understanding.",
session_id=session_id,
)
# Check if any data was provided
if not any(v is not None for v in kwargs.values()):
return ErrorResponse(
message="Please provide at least one field to update.",
session_id=session_id,
)
# Build input model from kwargs (only include fields defined in the model)
valid_fields = set(BusinessUnderstandingInput.model_fields.keys())
input_data = BusinessUnderstandingInput(
**{k: v for k, v in kwargs.items() if k in valid_fields}
)
# Track which fields were updated
updated_fields = [
k for k, v in kwargs.items() if k in valid_fields and v is not None
]
# Upsert with merge
understanding = await upsert_business_understanding(user_id, input_data)
# Build current understanding summary (filter out empty values)
current_understanding = {
k: v
for k, v in understanding.model_dump(
exclude={"id", "user_id", "created_at", "updated_at"}
).items()
if v is not None and v != [] and v != ""
}
return UnderstandingUpdatedResponse(
message=f"Updated understanding with: {', '.join(updated_fields)}. "
"I now have a better picture of your business context.",
session_id=session_id,
updated_fields=updated_fields,
current_understanding=current_understanding,
)

View File

@@ -1,446 +0,0 @@
"""Tool for retrieving agent execution outputs from user's library."""
import logging
import re
from datetime import datetime, timedelta, timezone
from typing import Any
from pydantic import BaseModel, field_validator
from backend.api.features.chat.model import ChatSession
from backend.api.features.library import db as library_db
from backend.api.features.library.model import LibraryAgent
from backend.data import execution as execution_db
from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta
from .base import BaseTool
from .models import (
AgentOutputResponse,
ErrorResponse,
ExecutionOutputInfo,
NoResultsResponse,
ToolResponseBase,
)
from .utils import fetch_graph_from_store_slug
logger = logging.getLogger(__name__)
class AgentOutputInput(BaseModel):
"""Input parameters for the agent_output tool."""
agent_name: str = ""
library_agent_id: str = ""
store_slug: str = ""
execution_id: str = ""
run_time: str = "latest"
@field_validator(
"agent_name",
"library_agent_id",
"store_slug",
"execution_id",
"run_time",
mode="before",
)
@classmethod
def strip_strings(cls, v: Any) -> Any:
"""Strip whitespace from string fields."""
return v.strip() if isinstance(v, str) else v
def parse_time_expression(
time_expr: str | None,
) -> tuple[datetime | None, datetime | None]:
"""
Parse time expression into datetime range (start, end).
Supports: "latest", "yesterday", "today", "last week", "last 7 days",
"last month", "last 30 days", ISO date "YYYY-MM-DD", ISO datetime.
"""
if not time_expr or time_expr.lower() == "latest":
return None, None
now = datetime.now(timezone.utc)
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
expr = time_expr.lower().strip()
# Relative time expressions lookup
relative_times: dict[str, tuple[datetime, datetime]] = {
"yesterday": (today_start - timedelta(days=1), today_start),
"today": (today_start, now),
"last week": (now - timedelta(days=7), now),
"last 7 days": (now - timedelta(days=7), now),
"last month": (now - timedelta(days=30), now),
"last 30 days": (now - timedelta(days=30), now),
}
if expr in relative_times:
return relative_times[expr]
# Try ISO date format (YYYY-MM-DD)
date_match = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", expr)
if date_match:
try:
year, month, day = map(int, date_match.groups())
start = datetime(year, month, day, 0, 0, 0, tzinfo=timezone.utc)
return start, start + timedelta(days=1)
except ValueError:
# Invalid date components (e.g., month=13, day=32)
pass
# Try ISO datetime
try:
parsed = datetime.fromisoformat(expr.replace("Z", "+00:00"))
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed - timedelta(hours=1), parsed + timedelta(hours=1)
except ValueError:
return None, None
class AgentOutputTool(BaseTool):
"""Tool for retrieving execution outputs from user's library agents."""
@property
def name(self) -> str:
return "agent_output"
@property
def description(self) -> str:
return """Retrieve execution outputs from agents in the user's library.
Identify the agent using one of:
- agent_name: Fuzzy search in user's library
- library_agent_id: Exact library agent ID
- store_slug: Marketplace format 'username/agent-name'
Select which run to retrieve using:
- execution_id: Specific execution ID
- run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
"""
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"agent_name": {
"type": "string",
"description": "Agent name to search for in user's library (fuzzy match)",
},
"library_agent_id": {
"type": "string",
"description": "Exact library agent ID",
},
"store_slug": {
"type": "string",
"description": "Marketplace identifier: 'username/agent-slug'",
},
"execution_id": {
"type": "string",
"description": "Specific execution ID to retrieve",
},
"run_time": {
"type": "string",
"description": (
"Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
),
},
},
"required": [],
}
@property
def requires_auth(self) -> bool:
return True
async def _resolve_agent(
self,
user_id: str,
agent_name: str | None,
library_agent_id: str | None,
store_slug: str | None,
) -> tuple[LibraryAgent | None, str | None]:
"""
Resolve agent from provided identifiers.
Returns (library_agent, error_message).
"""
# Priority 1: Exact library agent ID
if library_agent_id:
try:
agent = await library_db.get_library_agent(library_agent_id, user_id)
return agent, None
except Exception as e:
logger.warning(f"Failed to get library agent by ID: {e}")
return None, f"Library agent '{library_agent_id}' not found"
# Priority 2: Store slug (username/agent-name)
if store_slug and "/" in store_slug:
username, agent_slug = store_slug.split("/", 1)
graph, _ = await fetch_graph_from_store_slug(username, agent_slug)
if not graph:
return None, f"Agent '{store_slug}' not found in marketplace"
# Find in user's library by graph_id
agent = await library_db.get_library_agent_by_graph_id(user_id, graph.id)
if not agent:
return (
None,
f"Agent '{store_slug}' is not in your library. "
"Add it first to see outputs.",
)
return agent, None
# Priority 3: Fuzzy name search in library
if agent_name:
try:
response = await library_db.list_library_agents(
user_id=user_id,
search_term=agent_name,
page_size=5,
)
if not response.agents:
return (
None,
f"No agents matching '{agent_name}' found in your library",
)
# Return best match (first result from search)
return response.agents[0], None
except Exception as e:
logger.error(f"Error searching library agents: {e}")
return None, f"Error searching for agent: {e}"
return (
None,
"Please specify an agent name, library_agent_id, or store_slug",
)
async def _get_execution(
self,
user_id: str,
graph_id: str,
execution_id: str | None,
time_start: datetime | None,
time_end: datetime | None,
) -> tuple[GraphExecution | None, list[GraphExecutionMeta], str | None]:
"""
Fetch execution(s) based on filters.
Returns (single_execution, available_executions_meta, error_message).
"""
# If specific execution_id provided, fetch it directly
if execution_id:
execution = await execution_db.get_graph_execution(
user_id=user_id,
execution_id=execution_id,
include_node_executions=False,
)
if not execution:
return None, [], f"Execution '{execution_id}' not found"
return execution, [], None
# Get completed executions with time filters
executions = await execution_db.get_graph_executions(
graph_id=graph_id,
user_id=user_id,
statuses=[ExecutionStatus.COMPLETED],
created_time_gte=time_start,
created_time_lte=time_end,
limit=10,
)
if not executions:
return None, [], None # No error, just no executions
# If only one execution, fetch full details
if len(executions) == 1:
full_execution = await execution_db.get_graph_execution(
user_id=user_id,
execution_id=executions[0].id,
include_node_executions=False,
)
return full_execution, [], None
# Multiple executions - return latest with full details, plus list of available
full_execution = await execution_db.get_graph_execution(
user_id=user_id,
execution_id=executions[0].id,
include_node_executions=False,
)
return full_execution, executions, None
def _build_response(
self,
agent: LibraryAgent,
execution: GraphExecution | None,
available_executions: list[GraphExecutionMeta],
session_id: str | None,
) -> AgentOutputResponse:
"""Build the response based on execution data."""
library_agent_link = f"/library/agents/{agent.id}"
if not execution:
return AgentOutputResponse(
message=f"No completed executions found for agent '{agent.name}'",
session_id=session_id,
agent_name=agent.name,
agent_id=agent.graph_id,
library_agent_id=agent.id,
library_agent_link=library_agent_link,
total_executions=0,
)
execution_info = ExecutionOutputInfo(
execution_id=execution.id,
status=execution.status.value,
started_at=execution.started_at,
ended_at=execution.ended_at,
outputs=dict(execution.outputs),
inputs_summary=execution.inputs if execution.inputs else None,
)
available_list = None
if len(available_executions) > 1:
available_list = [
{
"id": e.id,
"status": e.status.value,
"started_at": e.started_at.isoformat() if e.started_at else None,
}
for e in available_executions[:5]
]
message = f"Found execution outputs for agent '{agent.name}'"
if len(available_executions) > 1:
message += (
f". Showing latest of {len(available_executions)} matching executions."
)
return AgentOutputResponse(
message=message,
session_id=session_id,
agent_name=agent.name,
agent_id=agent.graph_id,
library_agent_id=agent.id,
library_agent_link=library_agent_link,
execution=execution_info,
available_executions=available_list,
total_executions=len(available_executions) if available_executions else 1,
)
async def _execute(
self,
user_id: str | None,
session: ChatSession,
**kwargs,
) -> ToolResponseBase:
"""Execute the agent_output tool."""
session_id = session.session_id
# Parse and validate input
try:
input_data = AgentOutputInput(**kwargs)
except Exception as e:
logger.error(f"Invalid input: {e}")
return ErrorResponse(
message="Invalid input parameters",
error=str(e),
session_id=session_id,
)
# Ensure user_id is present (should be guaranteed by requires_auth)
if not user_id:
return ErrorResponse(
message="User authentication required",
session_id=session_id,
)
# Check if at least one identifier is provided
if not any(
[
input_data.agent_name,
input_data.library_agent_id,
input_data.store_slug,
input_data.execution_id,
]
):
return ErrorResponse(
message=(
"Please specify at least one of: agent_name, "
"library_agent_id, store_slug, or execution_id"
),
session_id=session_id,
)
# If only execution_id provided, we need to find the agent differently
if (
input_data.execution_id
and not input_data.agent_name
and not input_data.library_agent_id
and not input_data.store_slug
):
# Fetch execution directly to get graph_id
execution = await execution_db.get_graph_execution(
user_id=user_id,
execution_id=input_data.execution_id,
include_node_executions=False,
)
if not execution:
return ErrorResponse(
message=f"Execution '{input_data.execution_id}' not found",
session_id=session_id,
)
# Find library agent by graph_id
agent = await library_db.get_library_agent_by_graph_id(
user_id, execution.graph_id
)
if not agent:
return NoResultsResponse(
message=(
f"Execution found but agent not in your library. "
f"Graph ID: {execution.graph_id}"
),
session_id=session_id,
suggestions=["Add the agent to your library to see more details"],
)
return self._build_response(agent, execution, [], session_id)
# Resolve agent from identifiers
agent, error = await self._resolve_agent(
user_id=user_id,
agent_name=input_data.agent_name or None,
library_agent_id=input_data.library_agent_id or None,
store_slug=input_data.store_slug or None,
)
if error or not agent:
return NoResultsResponse(
message=error or "Agent not found",
session_id=session_id,
suggestions=[
"Check the agent name or ID",
"Make sure the agent is in your library",
],
)
# Parse time expression
time_start, time_end = parse_time_expression(input_data.run_time)
# Fetch execution(s)
execution, available_executions, exec_error = await self._get_execution(
user_id=user_id,
graph_id=agent.graph_id,
execution_id=input_data.execution_id or None,
time_start=time_start,
time_end=time_end,
)
if exec_error:
return ErrorResponse(
message=exec_error,
session_id=session_id,
)
return self._build_response(agent, execution, available_executions, session_id)

View File

@@ -1,151 +0,0 @@
"""Shared agent search functionality for find_agent and find_library_agent tools."""
import logging
from typing import Literal
from backend.api.features.library import db as library_db
from backend.api.features.store import db as store_db
from backend.util.exceptions import DatabaseError, NotFoundError
from .models import (
AgentInfo,
AgentsFoundResponse,
ErrorResponse,
NoResultsResponse,
ToolResponseBase,
)
logger = logging.getLogger(__name__)
SearchSource = Literal["marketplace", "library"]
async def search_agents(
query: str,
source: SearchSource,
session_id: str | None,
user_id: str | None = None,
) -> ToolResponseBase:
"""
Search for agents in marketplace or user library.
Args:
query: Search query string
source: "marketplace" or "library"
session_id: Chat session ID
user_id: User ID (required for library search)
Returns:
AgentsFoundResponse, NoResultsResponse, or ErrorResponse
"""
if not query:
return ErrorResponse(
message="Please provide a search query", session_id=session_id
)
if source == "library" and not user_id:
return ErrorResponse(
message="User authentication required to search library",
session_id=session_id,
)
agents: list[AgentInfo] = []
try:
if source == "marketplace":
logger.info(f"Searching marketplace for: {query}")
results = await store_db.get_store_agents(search_query=query, page_size=5)
for agent in results.agents:
agents.append(
AgentInfo(
id=f"{agent.creator}/{agent.slug}",
name=agent.agent_name,
description=agent.description or "",
source="marketplace",
in_library=False,
creator=agent.creator,
category="general",
rating=agent.rating,
runs=agent.runs,
is_featured=False,
)
)
else: # library
logger.info(f"Searching user library for: {query}")
results = await library_db.list_library_agents(
user_id=user_id, # type: ignore[arg-type]
search_term=query,
page_size=10,
)
for agent in results.agents:
agents.append(
AgentInfo(
id=agent.id,
name=agent.name,
description=agent.description or "",
source="library",
in_library=True,
creator=agent.creator_name,
status=agent.status.value,
can_access_graph=agent.can_access_graph,
has_external_trigger=agent.has_external_trigger,
new_output=agent.new_output,
graph_id=agent.graph_id,
)
)
logger.info(f"Found {len(agents)} agents in {source}")
except NotFoundError:
pass
except DatabaseError as e:
logger.error(f"Error searching {source}: {e}", exc_info=True)
return ErrorResponse(
message=f"Failed to search {source}. Please try again.",
error=str(e),
session_id=session_id,
)
if not agents:
suggestions = (
[
"Try more general terms",
"Browse categories in the marketplace",
"Check spelling",
]
if source == "marketplace"
else [
"Try different keywords",
"Use find_agent to search the marketplace",
"Check your library at /library",
]
)
no_results_msg = (
f"No agents found matching '{query}'. Try different keywords or browse the marketplace."
if source == "marketplace"
else f"No agents matching '{query}' found in your library."
)
return NoResultsResponse(
message=no_results_msg, session_id=session_id, suggestions=suggestions
)
title = f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} "
title += (
f"for '{query}'"
if source == "marketplace"
else f"in your library for '{query}'"
)
message = (
"Now you have found some options for the user to choose from. "
"You can add a link to a recommended agent at: /marketplace/agent/agent_id "
"Please ask the user if they would like to use any of these agents."
if source == "marketplace"
else "Found agents in the user's library. You can provide a link to view an agent at: "
"/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute."
)
return AgentsFoundResponse(
message=message,
title=title,
agents=agents,
count=len(agents),
session_id=session_id,
)

View File

@@ -6,7 +6,7 @@ from typing import Any
from openai.types.chat import ChatCompletionToolParam
from backend.api.features.chat.model import ChatSession
from backend.api.features.chat.response_model import StreamToolOutputAvailable
from backend.api.features.chat.response_model import StreamToolExecutionResult
from .models import ErrorResponse, NeedLoginResponse, ToolResponseBase
@@ -53,7 +53,7 @@ class BaseTool:
session: ChatSession,
tool_call_id: str,
**kwargs,
) -> StreamToolOutputAvailable:
) -> StreamToolExecutionResult:
"""Execute the tool with authentication check.
Args:
@@ -69,10 +69,10 @@ class BaseTool:
logger.error(
f"Attempted tool call for {self.name} but user not authenticated"
)
return StreamToolOutputAvailable(
toolCallId=tool_call_id,
toolName=self.name,
output=NeedLoginResponse(
return StreamToolExecutionResult(
tool_id=tool_call_id,
tool_name=self.name,
result=NeedLoginResponse(
message=f"Please sign in to use {self.name}",
session_id=session.session_id,
).model_dump_json(),
@@ -81,17 +81,17 @@ class BaseTool:
try:
result = await self._execute(user_id, session, **kwargs)
return StreamToolOutputAvailable(
toolCallId=tool_call_id,
toolName=self.name,
output=result.model_dump_json(),
return StreamToolExecutionResult(
tool_id=tool_call_id,
tool_name=self.name,
result=result.model_dump_json(),
)
except Exception as e:
logger.error(f"Error in {self.name}: {e}", exc_info=True)
return StreamToolOutputAvailable(
toolCallId=tool_call_id,
toolName=self.name,
output=ErrorResponse(
return StreamToolExecutionResult(
tool_id=tool_call_id,
tool_name=self.name,
result=ErrorResponse(
message=f"An error occurred while executing {self.name}",
error=str(e),
session_id=session.session_id,

View File

@@ -1,16 +1,26 @@
"""Tool for discovering agents from marketplace."""
"""Tool for discovering agents from marketplace and user library."""
import logging
from typing import Any
from backend.api.features.chat.model import ChatSession
from backend.api.features.store import db as store_db
from backend.util.exceptions import DatabaseError, NotFoundError
from .agent_search import search_agents
from .base import BaseTool
from .models import ToolResponseBase
from .models import (
AgentCarouselResponse,
AgentInfo,
ErrorResponse,
NoResultsResponse,
ToolResponseBase,
)
logger = logging.getLogger(__name__)
class FindAgentTool(BaseTool):
"""Tool for discovering agents from the marketplace."""
"""Tool for discovering agents based on user needs."""
@property
def name(self) -> str:
@@ -36,11 +46,84 @@ class FindAgentTool(BaseTool):
}
async def _execute(
self, user_id: str | None, session: ChatSession, **kwargs
self,
user_id: str | None,
session: ChatSession,
**kwargs,
) -> ToolResponseBase:
return await search_agents(
query=kwargs.get("query", "").strip(),
source="marketplace",
session_id=session.session_id,
user_id=user_id,
"""Search for agents in the marketplace.
Args:
user_id: User ID (may be anonymous)
session_id: Chat session ID
query: Search query
Returns:
AgentCarouselResponse: List of agents found in the marketplace
NoResultsResponse: No agents found in the marketplace
ErrorResponse: Error message
"""
query = kwargs.get("query", "").strip()
session_id = session.session_id
if not query:
return ErrorResponse(
message="Please provide a search query",
session_id=session_id,
)
agents = []
try:
logger.info(f"Searching marketplace for: {query}")
store_results = await store_db.get_store_agents(
search_query=query,
page_size=5,
)
logger.info(f"Find agents tool found {len(store_results.agents)} agents")
for agent in store_results.agents:
agent_id = f"{agent.creator}/{agent.slug}"
logger.info(f"Building agent ID = {agent_id}")
agents.append(
AgentInfo(
id=agent_id,
name=agent.agent_name,
description=agent.description or "",
source="marketplace",
in_library=False,
creator=agent.creator,
category="general",
rating=agent.rating,
runs=agent.runs,
is_featured=False,
),
)
except NotFoundError:
pass
except DatabaseError as e:
logger.error(f"Error searching agents: {e}", exc_info=True)
return ErrorResponse(
message="Failed to search for agents. Please try again.",
error=str(e),
session_id=session_id,
)
if not agents:
return NoResultsResponse(
message=f"No agents found matching '{query}'. Try different keywords or browse the marketplace. If you have 3 consecutive find_agent tool calls results and found no agents. Please stop trying and ask the user if there is anything else you can help with.",
session_id=session_id,
suggestions=[
"Try more general terms",
"Browse categories in the marketplace",
"Check spelling",
],
)
# Return formatted carousel
title = (
f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} for '{query}'"
)
return AgentCarouselResponse(
message="Now you have found some options for the user to choose from. You can add a link to a recommended agent at: /marketplace/agent/agent_id Please ask the user if they would like to use any of these agents. If they do, please call the get_agent_details tool for this agent.",
title=title,
agents=agents,
count=len(agents),
session_id=session_id,
)

View File

@@ -1,52 +0,0 @@
"""Tool for searching agents in the user's library."""
from typing import Any
from backend.api.features.chat.model import ChatSession
from .agent_search import search_agents
from .base import BaseTool
from .models import ToolResponseBase
class FindLibraryAgentTool(BaseTool):
"""Tool for searching agents in the user's library."""
@property
def name(self) -> str:
return "find_library_agent"
@property
def description(self) -> str:
return (
"Search for agents in the user's library. Use this to find agents "
"the user has already added to their library, including agents they "
"created or added from the marketplace."
)
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query to find agents by name or description.",
},
},
"required": ["query"],
}
@property
def requires_auth(self) -> bool:
return True
async def _execute(
self, user_id: str | None, session: ChatSession, **kwargs
) -> ToolResponseBase:
return await search_agents(
query=kwargs.get("query", "").strip(),
source="library",
session_id=session.session_id,
user_id=user_id,
)

View File

@@ -1,28 +1,24 @@
"""Pydantic models for tool responses."""
from datetime import datetime
from enum import Enum
from typing import Any
from pydantic import BaseModel, Field
from backend.data import block
from backend.data.model import CredentialsMetaInput
class ResponseType(str, Enum):
"""Types of tool responses."""
AGENTS_FOUND = "agents_found"
AGENT_CAROUSEL = "agent_carousel"
AGENT_DETAILS = "agent_details"
BLOCK_OUTPUT = "block_output"
SETUP_REQUIREMENTS = "setup_requirements"
EXECUTION_STARTED = "execution_started"
NEED_LOGIN = "need_login"
ERROR = "error"
NO_RESULTS = "no_results"
AGENT_OUTPUT = "agent_output"
UNDERSTANDING_UPDATED = "understanding_updated"
SUCCESS = "success"
# Base response model
@@ -55,22 +51,15 @@ class AgentInfo(BaseModel):
graph_id: str | None = None
class AgentsFoundResponse(ToolResponseBase):
class AgentCarouselResponse(ToolResponseBase):
"""Response for find_agent tool."""
type: ResponseType = ResponseType.AGENTS_FOUND
type: ResponseType = ResponseType.AGENT_CAROUSEL
title: str = "Available Agents"
agents: list[AgentInfo]
count: int
name: str = "agents_found"
name: str = "agent_carousel"
class BlockOutputResponse(ToolResponseBase):
"""Response for find_block tool"""
type: ResponseType = ResponseType.BLOCK_OUTPUT
block_id: str
block_name: str
outputs: dict[str, list[Any]]
success: bool = True
class NoResultsResponse(ToolResponseBase):
"""Response when no agents found."""
@@ -184,37 +173,3 @@ class ErrorResponse(ToolResponseBase):
type: ResponseType = ResponseType.ERROR
error: str | None = None
details: dict[str, Any] | None = None
# Agent output models
class ExecutionOutputInfo(BaseModel):
"""Summary of a single execution's outputs."""
execution_id: str
status: str
started_at: datetime | None = None
ended_at: datetime | None = None
outputs: dict[str, list[Any]]
inputs_summary: dict[str, Any] | None = None
class AgentOutputResponse(ToolResponseBase):
"""Response for agent_output tool."""
type: ResponseType = ResponseType.AGENT_OUTPUT
agent_name: str
agent_id: str
library_agent_id: str | None = None
library_agent_link: str | None = None
execution: ExecutionOutputInfo | None = None
available_executions: list[dict[str, Any]] | None = None
total_executions: int = 0
# Business understanding models
class UnderstandingUpdatedResponse(ToolResponseBase):
"""Response for add_understanding tool."""
type: ResponseType = ResponseType.UNDERSTANDING_UPDATED
updated_fields: list[str] = Field(default_factory=list)
current_understanding: dict[str, Any] = Field(default_factory=dict)

View File

@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field, field_validator
from backend.api.features.chat.config import ChatConfig
from backend.api.features.chat.model import ChatSession
from backend.api.features.library import db as library_db
from backend.data.graph import GraphModel
from backend.data.model import CredentialsMetaInput
from backend.data.user import get_user_by_id
@@ -58,7 +57,6 @@ class RunAgentInput(BaseModel):
"""Input parameters for the run_agent tool."""
username_agent_slug: str = ""
library_agent_id: str = ""
inputs: dict[str, Any] = Field(default_factory=dict)
use_defaults: bool = False
schedule_name: str = ""
@@ -66,12 +64,7 @@ class RunAgentInput(BaseModel):
timezone: str = "UTC"
@field_validator(
"username_agent_slug",
"library_agent_id",
"schedule_name",
"cron",
"timezone",
mode="before",
"username_agent_slug", "schedule_name", "cron", "timezone", mode="before"
)
@classmethod
def strip_strings(cls, v: Any) -> Any:
@@ -97,7 +90,7 @@ class RunAgentTool(BaseTool):
@property
def description(self) -> str:
return """Run or schedule an agent from the marketplace or user's library.
return """Run or schedule an agent from the marketplace.
The tool automatically handles the setup flow:
- Returns missing inputs if required fields are not provided
@@ -105,10 +98,6 @@ class RunAgentTool(BaseTool):
- Executes immediately if all requirements are met
- Schedules execution if cron expression is provided
Identify the agent using either:
- username_agent_slug: Marketplace format 'username/agent-name'
- library_agent_id: ID of an agent in the user's library
For scheduled execution, provide: schedule_name, cron, and optionally timezone."""
@property
@@ -120,10 +109,6 @@ class RunAgentTool(BaseTool):
"type": "string",
"description": "Agent identifier in format 'username/agent-name'",
},
"library_agent_id": {
"type": "string",
"description": "Library agent ID from user's library",
},
"inputs": {
"type": "object",
"description": "Input values for the agent",
@@ -146,7 +131,7 @@ class RunAgentTool(BaseTool):
"description": "IANA timezone for schedule (default: UTC)",
},
},
"required": [],
"required": ["username_agent_slug"],
}
@property
@@ -164,16 +149,10 @@ class RunAgentTool(BaseTool):
params = RunAgentInput(**kwargs)
session_id = session.session_id
# Validate at least one identifier is provided
has_slug = params.username_agent_slug and "/" in params.username_agent_slug
has_library_id = bool(params.library_agent_id)
if not has_slug and not has_library_id:
# Validate agent slug format
if not params.username_agent_slug or "/" not in params.username_agent_slug:
return ErrorResponse(
message=(
"Please provide either a username_agent_slug "
"(format 'username/agent-name') or a library_agent_id"
),
message="Please provide an agent slug in format 'username/agent-name'",
session_id=session_id,
)
@@ -188,41 +167,13 @@ class RunAgentTool(BaseTool):
is_schedule = bool(params.schedule_name or params.cron)
try:
# Step 1: Fetch agent details
graph: GraphModel | None = None
library_agent = None
# Priority: library_agent_id if provided
if has_library_id:
library_agent = await library_db.get_library_agent(
params.library_agent_id, user_id
)
if not library_agent:
return ErrorResponse(
message=f"Library agent '{params.library_agent_id}' not found",
session_id=session_id,
)
# Get the graph from the library agent
from backend.data.graph import get_graph
graph = await get_graph(
library_agent.graph_id,
library_agent.graph_version,
user_id=user_id,
)
else:
# Fetch from marketplace slug
username, agent_name = params.username_agent_slug.split("/", 1)
graph, _ = await fetch_graph_from_store_slug(username, agent_name)
# Step 1: Fetch agent details (always happens first)
username, agent_name = params.username_agent_slug.split("/", 1)
graph, store_agent = await fetch_graph_from_store_slug(username, agent_name)
if not graph:
identifier = (
params.library_agent_id
if has_library_id
else params.username_agent_slug
)
return ErrorResponse(
message=f"Agent '{identifier}' not found",
message=f"Agent '{params.username_agent_slug}' not found in marketplace",
session_id=session_id,
)

View File

@@ -1,5 +1,4 @@
import uuid
from unittest.mock import AsyncMock, patch
import orjson
import pytest
@@ -18,17 +17,6 @@ setup_test_data = setup_test_data
setup_firecrawl_test_data = setup_firecrawl_test_data
@pytest.fixture(scope="session", autouse=True)
def mock_embedding_functions():
"""Mock embedding functions for all tests to avoid database/API dependencies."""
with patch(
"backend.api.features.store.db.ensure_embedding",
new_callable=AsyncMock,
return_value=True,
):
yield
@pytest.mark.asyncio(scope="session")
async def test_run_agent(setup_test_data):
"""Test that the run_agent tool successfully executes an approved agent"""
@@ -58,11 +46,11 @@ async def test_run_agent(setup_test_data):
# Verify the response
assert response is not None
assert hasattr(response, "output")
assert hasattr(response, "result")
# Parse the result JSON to verify the execution started
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
assert "execution_id" in result_data
assert "graph_id" in result_data
assert result_data["graph_id"] == graph.id
@@ -98,11 +86,11 @@ async def test_run_agent_missing_inputs(setup_test_data):
# Verify that we get an error response
assert response is not None
assert hasattr(response, "output")
assert hasattr(response, "result")
# The tool should return an ErrorResponse when setup info indicates not ready
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
assert "message" in result_data
@@ -130,10 +118,10 @@ async def test_run_agent_invalid_agent_id(setup_test_data):
# Verify that we get an error response
assert response is not None
assert hasattr(response, "output")
assert hasattr(response, "result")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
assert "message" in result_data
# Should get an error about failed setup or not found
assert any(
@@ -170,12 +158,12 @@ async def test_run_agent_with_llm_credentials(setup_llm_test_data):
# Verify the response
assert response is not None
assert hasattr(response, "output")
assert hasattr(response, "result")
# Parse the result JSON to verify the execution started
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should successfully start execution since credentials are available
assert "execution_id" in result_data
@@ -207,9 +195,9 @@ async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_da
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should return agent_details type showing available inputs
assert result_data.get("type") == "agent_details"
@@ -242,9 +230,9 @@ async def test_run_agent_with_use_defaults(setup_test_data):
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should execute successfully
assert "execution_id" in result_data
@@ -272,9 +260,9 @@ async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should return setup_requirements type with missing credentials
assert result_data.get("type") == "setup_requirements"
@@ -304,9 +292,9 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should return error
assert result_data.get("type") == "error"
@@ -317,10 +305,9 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
async def test_run_agent_unauthenticated():
"""Test that run_agent returns need_login for unauthenticated users."""
tool = RunAgentTool()
# Session has a user_id (session owner), but we test tool execution without user_id
session = make_session(user_id="test-session-owner")
session = make_session(user_id=None)
# Execute without user_id to test unauthenticated behavior
# Execute without user_id
response = await tool.execute(
user_id=None,
session_id=str(uuid.uuid4()),
@@ -331,9 +318,9 @@ async def test_run_agent_unauthenticated():
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Base tool returns need_login type for unauthenticated users
assert result_data.get("type") == "need_login"
@@ -363,9 +350,9 @@ async def test_run_agent_schedule_without_cron(setup_test_data):
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should return error about missing cron
assert result_data.get("type") == "error"
@@ -395,9 +382,9 @@ async def test_run_agent_schedule_without_name(setup_test_data):
)
assert response is not None
assert hasattr(response, "output")
assert isinstance(response.output, str)
result_data = orjson.loads(response.output)
assert hasattr(response, "result")
assert isinstance(response.result, str)
result_data = orjson.loads(response.result)
# Should return error about missing schedule_name
assert result_data.get("type") == "error"

View File

@@ -1,287 +0,0 @@
"""Tool for executing blocks directly."""
import logging
from collections import defaultdict
from typing import Any
from backend.api.features.chat.model import ChatSession
from backend.data.block import get_block
from backend.data.model import CredentialsMetaInput
from backend.integrations.creds_manager import IntegrationCredentialsManager
from backend.util.exceptions import BlockError
from .base import BaseTool
from .models import (
BlockOutputResponse,
ErrorResponse,
SetupInfo,
SetupRequirementsResponse,
ToolResponseBase,
UserReadiness,
)
logger = logging.getLogger(__name__)
class RunBlockTool(BaseTool):
"""Tool for executing a block and returning its outputs."""
@property
def name(self) -> str:
return "run_block"
@property
def description(self) -> str:
return (
"Execute a specific block with the provided input data. "
"Use find_block to discover available blocks and their input schemas. "
"The block will run and return its outputs once complete."
)
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"block_id": {
"type": "string",
"description": "The UUID of the block to execute",
},
"input_data": {
"type": "object",
"description": (
"Input values for the block. Must match the block's input schema. "
"Check the block's input_schema from find_block for required fields."
),
},
},
"required": ["block_id", "input_data"],
}
@property
def requires_auth(self) -> bool:
return True
async def _check_block_credentials(
self,
user_id: str,
block: Any,
) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
"""
Check if user has required credentials for a block.
Returns:
tuple[matched_credentials, missing_credentials]
"""
matched_credentials: dict[str, CredentialsMetaInput] = {}
missing_credentials: list[CredentialsMetaInput] = []
# Get credential field info from block's input schema
credentials_fields_info = block.input_schema.get_credentials_fields_info()
if not credentials_fields_info:
return matched_credentials, missing_credentials
# Get user's available credentials
creds_manager = IntegrationCredentialsManager()
available_creds = await creds_manager.store.get_all_creds(user_id)
for field_name, field_info in credentials_fields_info.items():
# field_info.provider is a frozenset of acceptable providers
# field_info.supported_types is a frozenset of acceptable types
matching_cred = next(
(
cred
for cred in available_creds
if cred.provider in field_info.provider
and cred.type in field_info.supported_types
),
None,
)
if matching_cred:
matched_credentials[field_name] = CredentialsMetaInput(
id=matching_cred.id,
provider=matching_cred.provider, # type: ignore
type=matching_cred.type,
title=matching_cred.title,
)
else:
# Create a placeholder for the missing credential
provider = next(iter(field_info.provider), "unknown")
cred_type = next(iter(field_info.supported_types), "api_key")
missing_credentials.append(
CredentialsMetaInput(
id=field_name,
provider=provider, # type: ignore
type=cred_type, # type: ignore
title=field_name.replace("_", " ").title(),
)
)
return matched_credentials, missing_credentials
async def _execute(
self,
user_id: str | None,
session: ChatSession,
**kwargs,
) -> ToolResponseBase:
"""Execute a block with the given input data.
Args:
user_id: User ID (required)
session: Chat session
block_id: Block UUID to execute
input_data: Input values for the block
Returns:
BlockOutputResponse: Block execution outputs
SetupRequirementsResponse: Missing credentials
ErrorResponse: Error message
"""
block_id = kwargs.get("block_id", "").strip()
input_data = kwargs.get("input_data", {})
session_id = session.session_id
if not block_id:
return ErrorResponse(
message="Please provide a block_id",
session_id=session_id,
)
if not isinstance(input_data, dict):
return ErrorResponse(
message="input_data must be an object",
session_id=session_id,
)
if not user_id:
return ErrorResponse(
message="Authentication required",
session_id=session_id,
)
# Get the block
block = get_block(block_id)
if not block:
return ErrorResponse(
message=f"Block '{block_id}' not found",
session_id=session_id,
)
logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")
# Check credentials
creds_manager = IntegrationCredentialsManager()
matched_credentials, missing_credentials = await self._check_block_credentials(
user_id, block
)
if missing_credentials:
# Return setup requirements response with missing credentials
missing_creds_dict = {c.id: c.model_dump() for c in missing_credentials}
return SetupRequirementsResponse(
message=(
f"Block '{block.name}' requires credentials that are not configured. "
"Please set up the required credentials before running this block."
),
session_id=session_id,
setup_info=SetupInfo(
agent_id=block_id,
agent_name=block.name,
user_readiness=UserReadiness(
has_all_credentials=False,
missing_credentials=missing_creds_dict,
ready_to_run=False,
),
requirements={
"credentials": [c.model_dump() for c in missing_credentials],
"inputs": self._get_inputs_list(block),
"execution_modes": ["immediate"],
},
),
graph_id=None,
graph_version=None,
)
try:
# Fetch actual credentials and prepare kwargs for block execution
exec_kwargs: dict[str, Any] = {"user_id": user_id}
for field_name, cred_meta in matched_credentials.items():
# Inject metadata into input_data (for validation)
if field_name not in input_data:
input_data[field_name] = cred_meta.model_dump()
# Fetch actual credentials and pass as kwargs (for execution)
actual_credentials = await creds_manager.get(
user_id, cred_meta.id, lock=False
)
if actual_credentials:
exec_kwargs[field_name] = actual_credentials
else:
return ErrorResponse(
message=f"Failed to retrieve credentials for {field_name}",
session_id=session_id,
)
# Execute the block and collect outputs
outputs: dict[str, list[Any]] = defaultdict(list)
async for output_name, output_data in block.execute(
input_data,
**exec_kwargs,
):
outputs[output_name].append(output_data)
return BlockOutputResponse(
message=f"Block '{block.name}' executed successfully",
block_id=block_id,
block_name=block.name,
outputs=dict(outputs),
success=True,
session_id=session_id,
)
except BlockError as e:
logger.warning(f"Block execution failed: {e}")
return ErrorResponse(
message=f"Block execution failed: {e}",
error=str(e),
session_id=session_id,
)
except Exception as e:
logger.error(f"Unexpected error executing block: {e}", exc_info=True)
return ErrorResponse(
message=f"Failed to execute block: {str(e)}",
error=str(e),
session_id=session_id,
)
def _get_inputs_list(self, block: Any) -> list[dict[str, Any]]:
"""Extract non-credential inputs from block schema."""
inputs_list = []
schema = block.input_schema.jsonschema()
properties = schema.get("properties", {})
required_fields = set(schema.get("required", []))
# Get credential field names to exclude
credentials_fields = set(block.input_schema.get_credentials_fields().keys())
for field_name, field_schema in properties.items():
# Skip credential fields
if field_name in credentials_fields:
continue
inputs_list.append(
{
"name": field_name,
"title": field_schema.get("title", field_name),
"type": field_schema.get("type", "string"),
"description": field_schema.get("description", ""),
"required": field_name in required_fields,
}
)
return inputs_list

View File

@@ -35,7 +35,11 @@ from backend.data.model import (
OAuth2Credentials,
UserIntegrations,
)
from backend.data.onboarding import OnboardingStep, complete_onboarding_step
from backend.data.onboarding import (
OnboardingStep,
complete_onboarding_step,
increment_runs,
)
from backend.data.user import get_user_integrations
from backend.executor.utils import add_graph_execution
from backend.integrations.ayrshare import AyrshareClient, SocialPlatform
@@ -171,7 +175,6 @@ async def callback(
f"Successfully processed OAuth callback for user {user_id} "
f"and provider {provider.value}"
)
return CredentialsMetaResponse(
id=credentials.id,
provider=credentials.provider,
@@ -190,7 +193,6 @@ async def list_credentials(
user_id: Annotated[str, Security(get_user_id)],
) -> list[CredentialsMetaResponse]:
credentials = await creds_manager.store.get_all_creds(user_id)
return [
CredentialsMetaResponse(
id=cred.id,
@@ -213,7 +215,6 @@ async def list_credentials_by_provider(
user_id: Annotated[str, Security(get_user_id)],
) -> list[CredentialsMetaResponse]:
credentials = await creds_manager.store.get_creds_by_provider(user_id, provider)
return [
CredentialsMetaResponse(
id=cred.id,
@@ -377,6 +378,7 @@ async def webhook_ingress_generic(
return
await complete_onboarding_step(user_id, OnboardingStep.TRIGGER_WEBHOOK)
await increment_runs(user_id)
# Execute all triggers concurrently for better performance
tasks = []
@@ -829,18 +831,6 @@ async def list_providers() -> List[str]:
return all_providers
@router.get("/providers/system", response_model=List[str])
async def list_system_providers() -> List[str]:
"""
Get a list of providers that have platform credits (system credentials) available.
These providers can be used without the user providing their own API keys.
"""
from backend.integrations.credentials_store import SYSTEM_PROVIDERS
return list(SYSTEM_PROVIDERS)
@router.get("/providers/names", response_model=ProviderNamesResponse)
async def get_provider_names() -> ProviderNamesResponse:
"""

View File

@@ -489,7 +489,7 @@ async def update_agent_version_in_library(
agent_graph_version: int,
) -> library_model.LibraryAgent:
"""
Updates the agent version in the library for any agent owned by the user.
Updates the agent version in the library if useGraphIsActiveVersion is True.
Args:
user_id: Owner of the LibraryAgent.
@@ -498,31 +498,20 @@ async def update_agent_version_in_library(
Raises:
DatabaseError: If there's an error with the update.
NotFoundError: If no library agent is found for this user and agent.
"""
logger.debug(
f"Updating agent version in library for user #{user_id}, "
f"agent #{agent_graph_id} v{agent_graph_version}"
)
async with transaction() as tx:
library_agent = await prisma.models.LibraryAgent.prisma(tx).find_first_or_raise(
try:
library_agent = await prisma.models.LibraryAgent.prisma().find_first_or_raise(
where={
"userId": user_id,
"agentGraphId": agent_graph_id,
"useGraphIsActiveVersion": True,
},
)
# Delete any conflicting LibraryAgent for the target version
await prisma.models.LibraryAgent.prisma(tx).delete_many(
where={
"userId": user_id,
"agentGraphId": agent_graph_id,
"agentGraphVersion": agent_graph_version,
"id": {"not": library_agent.id},
}
)
lib = await prisma.models.LibraryAgent.prisma(tx).update(
lib = await prisma.models.LibraryAgent.prisma().update(
where={"id": library_agent.id},
data={
"AgentGraph": {
@@ -536,13 +525,13 @@ async def update_agent_version_in_library(
},
include={"AgentGraph": True},
)
if lib is None:
raise NotFoundError(f"Library agent {library_agent.id} not found")
if lib is None:
raise NotFoundError(
f"Failed to update library agent for {agent_graph_id} v{agent_graph_version}"
)
return library_model.LibraryAgent.from_db(lib)
return library_model.LibraryAgent.from_db(lib)
except prisma.errors.PrismaError as e:
logger.error(f"Database error updating agent version in library: {e}")
raise DatabaseError("Failed to update agent version in library") from e
async def update_library_agent(
@@ -836,7 +825,6 @@ async def add_store_agent_to_library(
}
},
"isCreatedByUser": False,
"useGraphIsActiveVersion": False,
"settings": SafeJson(
_initialize_graph_settings(graph_model).model_dump()
),

View File

@@ -48,7 +48,6 @@ class LibraryAgent(pydantic.BaseModel):
id: str
graph_id: str
graph_version: int
owner_user_id: str # ID of user who owns/created this agent graph
image_url: str | None
@@ -164,7 +163,6 @@ class LibraryAgent(pydantic.BaseModel):
id=agent.id,
graph_id=agent.agentGraphId,
graph_version=agent.agentGraphVersion,
owner_user_id=agent.userId,
image_url=agent.imageUrl,
creator_name=creator_name,
creator_image_url=creator_image_url,

View File

@@ -8,6 +8,7 @@ from backend.data.execution import GraphExecutionMeta
from backend.data.graph import get_graph
from backend.data.integrations import get_webhook
from backend.data.model import CredentialsMetaInput
from backend.data.onboarding import increment_runs
from backend.executor.utils import add_graph_execution, make_node_credentials_input_map
from backend.integrations.creds_manager import IntegrationCredentialsManager
from backend.integrations.webhooks import get_webhook_manager
@@ -402,6 +403,8 @@ async def execute_preset(
merged_node_input = preset.inputs | inputs
merged_credential_inputs = preset.credentials | credential_inputs
await increment_runs(user_id)
return await add_graph_execution(
user_id=user_id,
graph_id=preset.graph_id,

View File

@@ -42,7 +42,6 @@ async def test_get_library_agents_success(
id="test-agent-1",
graph_id="test-agent-1",
graph_version=1,
owner_user_id=test_user_id,
name="Test Agent 1",
description="Test Description 1",
image_url=None,
@@ -65,7 +64,6 @@ async def test_get_library_agents_success(
id="test-agent-2",
graph_id="test-agent-2",
graph_version=1,
owner_user_id=test_user_id,
name="Test Agent 2",
description="Test Description 2",
image_url=None,
@@ -140,7 +138,6 @@ async def test_get_favorite_library_agents_success(
id="test-agent-1",
graph_id="test-agent-1",
graph_version=1,
owner_user_id=test_user_id,
name="Favorite Agent 1",
description="Test Favorite Description 1",
image_url=None,
@@ -208,7 +205,6 @@ def test_add_agent_to_library_success(
id="test-library-agent-id",
graph_id="test-agent-1",
graph_version=1,
owner_user_id=test_user_id,
name="Test Agent 1",
description="Test Description 1",
image_url=None,

View File

@@ -1,7 +1,8 @@
import asyncio
import logging
import typing
from datetime import datetime, timezone
from typing import Any, Literal
from typing import Literal
import fastapi
import prisma.enums
@@ -9,7 +10,7 @@ import prisma.errors
import prisma.models
import prisma.types
from backend.data.db import transaction
from backend.data.db import query_raw_with_schema, transaction
from backend.data.graph import (
GraphMeta,
GraphModel,
@@ -29,8 +30,6 @@ from backend.util.settings import Settings
from . import exceptions as store_exceptions
from . import model as store_model
from .embeddings import ensure_embedding
from .hybrid_search import hybrid_search
logger = logging.getLogger(__name__)
settings = Settings()
@@ -51,77 +50,128 @@ async def get_store_agents(
page_size: int = 20,
) -> store_model.StoreAgentsResponse:
"""
Get PUBLIC store agents from the StoreAgent view.
Search behavior:
- With search_query: Uses hybrid search (semantic + lexical)
- Fallback: If embeddings unavailable, gracefully degrades to lexical-only
- Rationale: User-facing endpoint prioritizes availability over accuracy
Note: Admin operations (approval) use fail-fast to prevent inconsistent state.
Get PUBLIC store agents from the StoreAgent view
"""
logger.debug(
f"Getting store agents. featured={featured}, creators={creators}, sorted_by={sorted_by}, search={search_query}, category={category}, page={page}"
)
search_used_hybrid = False
store_agents: list[store_model.StoreAgent] = []
agents: list[dict[str, Any]] = []
total = 0
total_pages = 0
try:
# If search_query is provided, use hybrid search (embeddings + tsvector)
# If search_query is provided, use full-text search
if search_query:
# Try hybrid search combining semantic and lexical signals
# Falls back to lexical-only if OpenAI unavailable (user-facing, high SLA)
try:
agents, total = await hybrid_search(
query=search_query,
featured=featured,
creators=creators,
category=category,
sorted_by="relevance", # Use hybrid scoring for relevance
page=page,
page_size=page_size,
)
search_used_hybrid = True
except Exception as e:
# Log error but fall back to lexical search for better UX
logger.error(
f"Hybrid search failed (likely OpenAI unavailable), "
f"falling back to lexical search: {e}"
)
# search_used_hybrid remains False, will use fallback path below
offset = (page - 1) * page_size
# Convert hybrid search results (dict format) if hybrid succeeded
if search_used_hybrid:
total_pages = (total + page_size - 1) // page_size
store_agents: list[store_model.StoreAgent] = []
for agent in agents:
try:
store_agent = store_model.StoreAgent(
slug=agent["slug"],
agent_name=agent["agent_name"],
agent_image=(
agent["agent_image"][0] if agent["agent_image"] else ""
),
creator=agent["creator_username"] or "Needs Profile",
creator_avatar=agent["creator_avatar"] or "",
sub_heading=agent["sub_heading"],
description=agent["description"],
runs=agent["runs"],
rating=agent["rating"],
)
store_agents.append(store_agent)
except Exception as e:
logger.error(
f"Error parsing Store agent from hybrid search results: {e}"
)
continue
# Whitelist allowed order_by columns
ALLOWED_ORDER_BY = {
"rating": "rating DESC, rank DESC",
"runs": "runs DESC, rank DESC",
"name": "agent_name ASC, rank ASC",
"updated_at": "updated_at DESC, rank DESC",
}
if not search_used_hybrid:
# Fallback path - use basic search or no search
# Validate and get order clause
if sorted_by and sorted_by in ALLOWED_ORDER_BY:
order_by_clause = ALLOWED_ORDER_BY[sorted_by]
else:
order_by_clause = "updated_at DESC, rank DESC"
# Build WHERE conditions and parameters list
where_parts: list[str] = []
params: list[typing.Any] = [search_query] # $1 - search term
param_index = 2 # Start at $2 for next parameter
# Always filter for available agents
where_parts.append("is_available = true")
if featured:
where_parts.append("featured = true")
if creators and creators:
# Use ANY with array parameter
where_parts.append(f"creator_username = ANY(${param_index})")
params.append(creators)
param_index += 1
if category and category:
where_parts.append(f"${param_index} = ANY(categories)")
params.append(category)
param_index += 1
sql_where_clause: str = " AND ".join(where_parts) if where_parts else "1=1"
# Add pagination params
params.extend([page_size, offset])
limit_param = f"${param_index}"
offset_param = f"${param_index + 1}"
# Execute full-text search query with parameterized values
sql_query = f"""
SELECT
slug,
agent_name,
agent_image,
creator_username,
creator_avatar,
sub_heading,
description,
runs,
rating,
categories,
featured,
is_available,
updated_at,
ts_rank_cd(search, query) AS rank
FROM {{schema_prefix}}"StoreAgent",
plainto_tsquery('english', $1) AS query
WHERE {sql_where_clause}
AND search @@ query
ORDER BY {order_by_clause}
LIMIT {limit_param} OFFSET {offset_param}
"""
# Count query for pagination - only uses search term parameter
count_query = f"""
SELECT COUNT(*) as count
FROM {{schema_prefix}}"StoreAgent",
plainto_tsquery('english', $1) AS query
WHERE {sql_where_clause}
AND search @@ query
"""
# Execute both queries with parameters
agents = await query_raw_with_schema(sql_query, *params)
# For count, use params without pagination (last 2 params)
count_params = params[:-2]
count_result = await query_raw_with_schema(count_query, *count_params)
total = count_result[0]["count"] if count_result else 0
total_pages = (total + page_size - 1) // page_size
# Convert raw results to StoreAgent models
store_agents: list[store_model.StoreAgent] = []
for agent in agents:
try:
store_agent = store_model.StoreAgent(
slug=agent["slug"],
agent_name=agent["agent_name"],
agent_image=(
agent["agent_image"][0] if agent["agent_image"] else ""
),
creator=agent["creator_username"] or "Needs Profile",
creator_avatar=agent["creator_avatar"] or "",
sub_heading=agent["sub_heading"],
description=agent["description"],
runs=agent["runs"],
rating=agent["rating"],
)
store_agents.append(store_agent)
except Exception as e:
logger.error(f"Error parsing Store agent from search results: {e}")
continue
else:
# Non-search query path (original logic)
where_clause: prisma.types.StoreAgentWhereInput = {"is_available": True}
if featured:
where_clause["featured"] = featured
@@ -130,14 +180,6 @@ async def get_store_agents(
if category:
where_clause["categories"] = {"has": category}
# Add basic text search if search_query provided but hybrid failed
if search_query:
where_clause["OR"] = [
{"agent_name": {"contains": search_query, "mode": "insensitive"}},
{"sub_heading": {"contains": search_query, "mode": "insensitive"}},
{"description": {"contains": search_query, "mode": "insensitive"}},
]
order_by = []
if sorted_by == "rating":
order_by.append({"rating": "desc"})
@@ -146,7 +188,7 @@ async def get_store_agents(
elif sorted_by == "name":
order_by.append({"agent_name": "asc"})
db_agents = await prisma.models.StoreAgent.prisma().find_many(
agents = await prisma.models.StoreAgent.prisma().find_many(
where=where_clause,
order=order_by,
skip=(page - 1) * page_size,
@@ -157,7 +199,7 @@ async def get_store_agents(
total_pages = (total + page_size - 1) // page_size
store_agents: list[store_model.StoreAgent] = []
for agent in db_agents:
for agent in agents:
try:
# Create the StoreAgent object safely
store_agent = store_model.StoreAgent(
@@ -572,7 +614,6 @@ async def get_store_submissions(
submission_models = []
for sub in submissions:
submission_model = store_model.StoreSubmission(
listing_id=sub.listing_id,
agent_id=sub.agent_id,
agent_version=sub.agent_version,
name=sub.name,
@@ -626,48 +667,35 @@ async def delete_store_submission(
submission_id: str,
) -> bool:
"""
Delete a store submission version as the submitting user.
Delete a store listing submission as the submitting user.
Args:
user_id: ID of the authenticated user
submission_id: StoreListingVersion ID to delete
submission_id: ID of the submission to be deleted
Returns:
bool: True if successfully deleted
bool: True if the submission was successfully deleted, False otherwise
"""
logger.debug(f"Deleting store submission {submission_id} for user {user_id}")
try:
# Find the submission version with ownership check
version = await prisma.models.StoreListingVersion.prisma().find_first(
where={"id": submission_id}, include={"StoreListing": True}
# Verify the submission belongs to this user
submission = await prisma.models.StoreListing.prisma().find_first(
where={"agentGraphId": submission_id, "owningUserId": user_id}
)
if (
not version
or not version.StoreListing
or version.StoreListing.owningUserId != user_id
):
raise store_exceptions.SubmissionNotFoundError("Submission not found")
# Prevent deletion of approved submissions
if version.submissionStatus == prisma.enums.SubmissionStatus.APPROVED:
raise store_exceptions.InvalidOperationError(
"Cannot delete approved submissions"
if not submission:
logger.warning(f"Submission not found for user {user_id}: {submission_id}")
raise store_exceptions.SubmissionNotFoundError(
f"Submission not found for this user. User ID: {user_id}, Submission ID: {submission_id}"
)
# Delete the version
await prisma.models.StoreListingVersion.prisma().delete(
where={"id": version.id}
)
# Delete the submission
await prisma.models.StoreListing.prisma().delete(where={"id": submission.id})
# Clean up empty listing if this was the last version
remaining = await prisma.models.StoreListingVersion.prisma().count(
where={"storeListingId": version.storeListingId}
logger.debug(
f"Successfully deleted submission {submission_id} for user {user_id}"
)
if remaining == 0:
await prisma.models.StoreListing.prisma().delete(
where={"id": version.storeListingId}
)
return True
except Exception as e:
@@ -731,15 +759,9 @@ async def create_store_submission(
logger.warning(
f"Agent not found for user {user_id}: {agent_id} v{agent_version}"
)
# Provide more user-friendly error message when agent_id is empty
if not agent_id or agent_id.strip() == "":
raise store_exceptions.AgentNotFoundError(
"No agent selected. Please select an agent before submitting to the store."
)
else:
raise store_exceptions.AgentNotFoundError(
f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
)
raise store_exceptions.AgentNotFoundError(
f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
)
# Check if listing already exists for this agent
existing_listing = await prisma.models.StoreListing.prisma().find_first(
@@ -811,7 +833,6 @@ async def create_store_submission(
logger.debug(f"Created store listing for agent {agent_id}")
# Return submission details
return store_model.StoreSubmission(
listing_id=listing.id,
agent_id=agent_id,
agent_version=agent_version,
name=name,
@@ -923,56 +944,81 @@ async def edit_store_submission(
# Currently we are not allowing user to update the agent associated with a submission
# If we allow it in future, then we need a check here to verify the agent belongs to this user.
# Only allow editing of PENDING submissions
if current_version.submissionStatus != prisma.enums.SubmissionStatus.PENDING:
# Check if we can edit this submission
if current_version.submissionStatus == prisma.enums.SubmissionStatus.REJECTED:
raise store_exceptions.InvalidOperationError(
f"Cannot edit a {current_version.submissionStatus.value.lower()} submission. Only pending submissions can be edited."
"Cannot edit a rejected submission"
)
# For APPROVED submissions, we need to create a new version
if current_version.submissionStatus == prisma.enums.SubmissionStatus.APPROVED:
# Create a new version for the existing listing
return await create_store_version(
user_id=user_id,
agent_id=current_version.agentGraphId,
agent_version=current_version.agentGraphVersion,
store_listing_id=current_version.storeListingId,
name=name,
video_url=video_url,
agent_output_demo_url=agent_output_demo_url,
image_urls=image_urls,
description=description,
sub_heading=sub_heading,
categories=categories,
changes_summary=changes_summary,
recommended_schedule_cron=recommended_schedule_cron,
instructions=instructions,
)
# For PENDING submissions, we can update the existing version
# Update the existing version
updated_version = await prisma.models.StoreListingVersion.prisma().update(
where={"id": store_listing_version_id},
data=prisma.types.StoreListingVersionUpdateInput(
elif current_version.submissionStatus == prisma.enums.SubmissionStatus.PENDING:
# Update the existing version
updated_version = await prisma.models.StoreListingVersion.prisma().update(
where={"id": store_listing_version_id},
data=prisma.types.StoreListingVersionUpdateInput(
name=name,
videoUrl=video_url,
agentOutputDemoUrl=agent_output_demo_url,
imageUrls=image_urls,
description=description,
categories=categories,
subHeading=sub_heading,
changesSummary=changes_summary,
recommendedScheduleCron=recommended_schedule_cron,
instructions=instructions,
),
)
logger.debug(
f"Updated existing version {store_listing_version_id} for agent {current_version.agentGraphId}"
)
if not updated_version:
raise DatabaseError("Failed to update store listing version")
return store_model.StoreSubmission(
agent_id=current_version.agentGraphId,
agent_version=current_version.agentGraphVersion,
name=name,
videoUrl=video_url,
agentOutputDemoUrl=agent_output_demo_url,
imageUrls=image_urls,
sub_heading=sub_heading,
slug=current_version.StoreListing.slug,
description=description,
categories=categories,
subHeading=sub_heading,
changesSummary=changes_summary,
recommendedScheduleCron=recommended_schedule_cron,
instructions=instructions,
),
)
image_urls=image_urls,
date_submitted=updated_version.submittedAt or updated_version.createdAt,
status=updated_version.submissionStatus,
runs=0,
rating=0.0,
store_listing_version_id=updated_version.id,
changes_summary=changes_summary,
video_url=video_url,
categories=categories,
version=updated_version.version,
)
logger.debug(
f"Updated existing version {store_listing_version_id} for agent {current_version.agentGraphId}"
)
if not updated_version:
raise DatabaseError("Failed to update store listing version")
return store_model.StoreSubmission(
listing_id=current_version.StoreListing.id,
agent_id=current_version.agentGraphId,
agent_version=current_version.agentGraphVersion,
name=name,
sub_heading=sub_heading,
slug=current_version.StoreListing.slug,
description=description,
instructions=instructions,
image_urls=image_urls,
date_submitted=updated_version.submittedAt or updated_version.createdAt,
status=updated_version.submissionStatus,
runs=0,
rating=0.0,
store_listing_version_id=updated_version.id,
changes_summary=changes_summary,
video_url=video_url,
categories=categories,
version=updated_version.version,
)
else:
raise store_exceptions.InvalidOperationError(
f"Cannot edit submission with status: {current_version.submissionStatus}"
)
except (
store_exceptions.SubmissionNotFoundError,
@@ -1051,78 +1097,38 @@ async def create_store_version(
f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
)
# Check if there's already a PENDING submission for this agent (any version)
existing_pending_submission = (
await prisma.models.StoreListingVersion.prisma().find_first(
where=prisma.types.StoreListingVersionWhereInput(
storeListingId=store_listing_id,
agentGraphId=agent_id,
submissionStatus=prisma.enums.SubmissionStatus.PENDING,
isDeleted=False,
)
# Get the latest version number
latest_version = listing.Versions[0] if listing.Versions else None
next_version = (latest_version.version + 1) if latest_version else 1
# Create a new version for the existing listing
new_version = await prisma.models.StoreListingVersion.prisma().create(
data=prisma.types.StoreListingVersionCreateInput(
version=next_version,
agentGraphId=agent_id,
agentGraphVersion=agent_version,
name=name,
videoUrl=video_url,
agentOutputDemoUrl=agent_output_demo_url,
imageUrls=image_urls,
description=description,
instructions=instructions,
categories=categories,
subHeading=sub_heading,
submissionStatus=prisma.enums.SubmissionStatus.PENDING,
submittedAt=datetime.now(),
changesSummary=changes_summary,
recommendedScheduleCron=recommended_schedule_cron,
storeListingId=store_listing_id,
)
)
# Handle existing pending submission and create new one atomically
async with transaction() as tx:
# Get the latest version number first
latest_listing = await prisma.models.StoreListing.prisma(tx).find_first(
where=prisma.types.StoreListingWhereInput(
id=store_listing_id, owningUserId=user_id
),
include={"Versions": {"order_by": {"version": "desc"}, "take": 1}},
)
if not latest_listing:
raise store_exceptions.ListingNotFoundError(
f"Store listing not found. User ID: {user_id}, Listing ID: {store_listing_id}"
)
latest_version = (
latest_listing.Versions[0] if latest_listing.Versions else None
)
next_version = (latest_version.version + 1) if latest_version else 1
# If there's an existing pending submission, delete it atomically before creating new one
if existing_pending_submission:
logger.info(
f"Found existing PENDING submission for agent {agent_id} (was v{existing_pending_submission.agentGraphVersion}, now v{agent_version}), replacing existing submission instead of creating duplicate"
)
await prisma.models.StoreListingVersion.prisma(tx).delete(
where={"id": existing_pending_submission.id}
)
logger.debug(
f"Deleted existing pending submission {existing_pending_submission.id}"
)
# Create a new version for the existing listing
new_version = await prisma.models.StoreListingVersion.prisma(tx).create(
data=prisma.types.StoreListingVersionCreateInput(
version=next_version,
agentGraphId=agent_id,
agentGraphVersion=agent_version,
name=name,
videoUrl=video_url,
agentOutputDemoUrl=agent_output_demo_url,
imageUrls=image_urls,
description=description,
instructions=instructions,
categories=categories,
subHeading=sub_heading,
submissionStatus=prisma.enums.SubmissionStatus.PENDING,
submittedAt=datetime.now(),
changesSummary=changes_summary,
recommendedScheduleCron=recommended_schedule_cron,
storeListingId=store_listing_id,
)
)
logger.debug(
f"Created new version for listing {store_listing_id} of agent {agent_id}"
)
# Return submission details
return store_model.StoreSubmission(
listing_id=listing.id,
agent_id=agent_id,
agent_version=agent_version,
name=name,
@@ -1535,7 +1541,7 @@ async def review_store_submission(
)
# Update the AgentGraph with store listing data
await prisma.models.AgentGraph.prisma(tx).update(
await prisma.models.AgentGraph.prisma().update(
where={
"graphVersionId": {
"id": store_listing_version.agentGraphId,
@@ -1550,23 +1556,6 @@ async def review_store_submission(
},
)
# Generate embedding for approved listing (blocking - admin operation)
# Inside transaction: if embedding fails, entire transaction rolls back
embedding_success = await ensure_embedding(
version_id=store_listing_version_id,
name=store_listing_version.name,
description=store_listing_version.description,
sub_heading=store_listing_version.subHeading,
categories=store_listing_version.categories or [],
tx=tx,
)
if not embedding_success:
raise ValueError(
f"Failed to generate embedding for listing {store_listing_version_id}. "
"This is likely due to OpenAI API being unavailable. "
"Please try again later or contact support if the issue persists."
)
await prisma.models.StoreListing.prisma(tx).update(
where={"id": store_listing_version.StoreListing.id},
data={
@@ -1719,12 +1708,15 @@ async def review_store_submission(
# Convert to Pydantic model for consistency
return store_model.StoreSubmission(
listing_id=(submission.StoreListing.id if submission.StoreListing else ""),
agent_id=submission.agentGraphId,
agent_version=submission.agentGraphVersion,
name=submission.name,
sub_heading=submission.subHeading,
slug=(submission.StoreListing.slug if submission.StoreListing else ""),
slug=(
submission.StoreListing.slug
if hasattr(submission, "storeListing") and submission.StoreListing
else ""
),
description=submission.description,
instructions=submission.instructions,
image_urls=submission.imageUrls or [],
@@ -1826,7 +1818,9 @@ async def get_admin_listings_with_versions(
where = prisma.types.StoreListingWhereInput(**where_dict)
include = prisma.types.StoreListingInclude(
Versions=prisma.types.FindManyStoreListingVersionArgsFromStoreListing(
order_by={"version": "desc"}
order_by=prisma.types._StoreListingVersion_version_OrderByInput(
version="desc"
)
),
OwningUser=True,
)
@@ -1851,7 +1845,6 @@ async def get_admin_listings_with_versions(
# If we have versions, turn them into StoreSubmission models
for version in listing.Versions or []:
version_model = store_model.StoreSubmission(
listing_id=listing.id,
agent_id=version.agentGraphId,
agent_version=version.agentGraphVersion,
name=version.name,

View File

@@ -1,568 +0,0 @@
"""
Unified Content Embeddings Service
Handles generation and storage of OpenAI embeddings for all content types
(store listings, blocks, documentation, library agents) to enable semantic/hybrid search.
"""
import asyncio
import logging
import time
from typing import Any
import prisma
from prisma.enums import ContentType
from tiktoken import encoding_for_model
from backend.data.db import execute_raw_with_schema, query_raw_with_schema
from backend.util.clients import get_openai_client
from backend.util.json import dumps
logger = logging.getLogger(__name__)
# OpenAI embedding model configuration
EMBEDDING_MODEL = "text-embedding-3-small"
# OpenAI embedding token limit (8,191 with 1 token buffer for safety)
EMBEDDING_MAX_TOKENS = 8191
def build_searchable_text(
name: str,
description: str,
sub_heading: str,
categories: list[str],
) -> str:
"""
Build searchable text from listing version fields.
Combines relevant fields into a single string for embedding.
"""
parts = []
# Name is important - include it
if name:
parts.append(name)
# Sub-heading provides context
if sub_heading:
parts.append(sub_heading)
# Description is the main content
if description:
parts.append(description)
# Categories help with semantic matching
if categories:
parts.append(" ".join(categories))
return " ".join(parts)
async def generate_embedding(text: str) -> list[float] | None:
"""
Generate embedding for text using OpenAI API.
Returns None if embedding generation fails.
Fail-fast: no retries to maintain consistency with approval flow.
"""
try:
client = get_openai_client()
if not client:
logger.error("openai_internal_api_key not set, cannot generate embedding")
return None
# Truncate text to token limit using tiktoken
# Character-based truncation is insufficient because token ratios vary by content type
enc = encoding_for_model(EMBEDDING_MODEL)
tokens = enc.encode(text)
if len(tokens) > EMBEDDING_MAX_TOKENS:
tokens = tokens[:EMBEDDING_MAX_TOKENS]
truncated_text = enc.decode(tokens)
logger.info(
f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
)
else:
truncated_text = text
start_time = time.time()
response = await client.embeddings.create(
model=EMBEDDING_MODEL,
input=truncated_text,
)
latency_ms = (time.time() - start_time) * 1000
embedding = response.data[0].embedding
logger.info(
f"Generated embedding: {len(embedding)} dims, "
f"{len(tokens)} tokens, {latency_ms:.0f}ms"
)
return embedding
except Exception as e:
logger.error(f"Failed to generate embedding: {e}")
return None
async def store_embedding(
version_id: str,
embedding: list[float],
tx: prisma.Prisma | None = None,
) -> bool:
"""
Store embedding in the database.
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
DEPRECATED: Use ensure_embedding() instead (includes searchable_text).
"""
return await store_content_embedding(
content_type=ContentType.STORE_AGENT,
content_id=version_id,
embedding=embedding,
searchable_text="", # Empty for backward compat; ensure_embedding() populates this
metadata=None,
user_id=None, # Store agents are public
tx=tx,
)
async def store_content_embedding(
content_type: ContentType,
content_id: str,
embedding: list[float],
searchable_text: str,
metadata: dict | None = None,
user_id: str | None = None,
tx: prisma.Prisma | None = None,
) -> bool:
"""
Store embedding in the unified content embeddings table.
New function for unified content embedding storage.
Uses raw SQL since Prisma doesn't natively support pgvector.
"""
try:
client = tx if tx else prisma.get_client()
# Convert embedding to PostgreSQL vector format
embedding_str = embedding_to_vector_string(embedding)
metadata_json = dumps(metadata or {})
# Upsert the embedding
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
await execute_raw_with_schema(
"""
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
)
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
ON CONFLICT ("contentType", "contentId", "userId")
DO UPDATE SET
"embedding" = $4::vector,
"searchableText" = $5,
"metadata" = $6::jsonb,
"updatedAt" = NOW()
WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
""",
content_type,
content_id,
user_id,
embedding_str,
searchable_text,
metadata_json,
client=client,
set_public_search_path=True,
)
logger.info(f"Stored embedding for {content_type}:{content_id}")
return True
except Exception as e:
logger.error(f"Failed to store embedding for {content_type}:{content_id}: {e}")
return False
async def get_embedding(version_id: str) -> dict[str, Any] | None:
"""
Retrieve embedding record for a listing version.
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
Returns dict with storeListingVersionId, embedding, timestamps or None if not found.
"""
result = await get_content_embedding(
ContentType.STORE_AGENT, version_id, user_id=None
)
if result:
# Transform to old format for backward compatibility
return {
"storeListingVersionId": result["contentId"],
"embedding": result["embedding"],
"createdAt": result["createdAt"],
"updatedAt": result["updatedAt"],
}
return None
async def get_content_embedding(
content_type: ContentType, content_id: str, user_id: str | None = None
) -> dict[str, Any] | None:
"""
Retrieve embedding record for any content type.
New function for unified content embedding retrieval.
Returns dict with contentType, contentId, embedding, timestamps or None if not found.
"""
try:
result = await query_raw_with_schema(
"""
SELECT
"contentType",
"contentId",
"userId",
"embedding"::text as "embedding",
"searchableText",
"metadata",
"createdAt",
"updatedAt"
FROM {schema_prefix}"UnifiedContentEmbedding"
WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
""",
content_type,
content_id,
user_id,
set_public_search_path=True,
)
if result and len(result) > 0:
return result[0]
return None
except Exception as e:
logger.error(f"Failed to get embedding for {content_type}:{content_id}: {e}")
return None
async def ensure_embedding(
version_id: str,
name: str,
description: str,
sub_heading: str,
categories: list[str],
force: bool = False,
tx: prisma.Prisma | None = None,
) -> bool:
"""
Ensure an embedding exists for the listing version.
Creates embedding if missing. Use force=True to regenerate.
Backward-compatible wrapper for store listings.
Args:
version_id: The StoreListingVersion ID
name: Agent name
description: Agent description
sub_heading: Agent sub-heading
categories: Agent categories
force: Force regeneration even if embedding exists
tx: Optional transaction client
Returns:
True if embedding exists/was created, False on failure
"""
try:
# Check if embedding already exists
if not force:
existing = await get_embedding(version_id)
if existing and existing.get("embedding"):
logger.debug(f"Embedding for version {version_id} already exists")
return True
# Build searchable text for embedding
searchable_text = build_searchable_text(
name, description, sub_heading, categories
)
# Generate new embedding
embedding = await generate_embedding(searchable_text)
if embedding is None:
logger.warning(f"Could not generate embedding for version {version_id}")
return False
# Store the embedding with metadata using new function
metadata = {
"name": name,
"subHeading": sub_heading,
"categories": categories,
}
return await store_content_embedding(
content_type=ContentType.STORE_AGENT,
content_id=version_id,
embedding=embedding,
searchable_text=searchable_text,
metadata=metadata,
user_id=None, # Store agents are public
tx=tx,
)
except Exception as e:
logger.error(f"Failed to ensure embedding for version {version_id}: {e}")
return False
async def delete_embedding(version_id: str) -> bool:
"""
Delete embedding for a listing version.
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
Note: This is usually handled automatically by CASCADE delete,
but provided for manual cleanup if needed.
"""
return await delete_content_embedding(ContentType.STORE_AGENT, version_id)
async def delete_content_embedding(
content_type: ContentType, content_id: str, user_id: str | None = None
) -> bool:
"""
Delete embedding for any content type.
New function for unified content embedding deletion.
Note: This is usually handled automatically by CASCADE delete,
but provided for manual cleanup if needed.
Args:
content_type: The type of content (STORE_AGENT, LIBRARY_AGENT, etc.)
content_id: The unique identifier for the content
user_id: Optional user ID. For public content (STORE_AGENT, BLOCK), pass None.
For user-scoped content (LIBRARY_AGENT), pass the user's ID to avoid
deleting embeddings belonging to other users.
Returns:
True if deletion succeeded, False otherwise
"""
try:
client = prisma.get_client()
await execute_raw_with_schema(
"""
DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
WHERE "contentType" = $1::{schema_prefix}"ContentType"
AND "contentId" = $2
AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
""",
content_type,
content_id,
user_id,
client=client,
)
user_str = f" (user: {user_id})" if user_id else ""
logger.info(f"Deleted embedding for {content_type}:{content_id}{user_str}")
return True
except Exception as e:
logger.error(f"Failed to delete embedding for {content_type}:{content_id}: {e}")
return False
async def get_embedding_stats() -> dict[str, Any]:
"""
Get statistics about embedding coverage.
Returns counts of:
- Total approved listing versions
- Versions with embeddings
- Versions without embeddings
"""
try:
# Count approved versions
approved_result = await query_raw_with_schema(
"""
SELECT COUNT(*) as count
FROM {schema_prefix}"StoreListingVersion"
WHERE "submissionStatus" = 'APPROVED'
AND "isDeleted" = false
"""
)
total_approved = approved_result[0]["count"] if approved_result else 0
# Count versions with embeddings
embedded_result = await query_raw_with_schema(
"""
SELECT COUNT(*) as count
FROM {schema_prefix}"StoreListingVersion" slv
JOIN {schema_prefix}"UnifiedContentEmbedding" uce ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
WHERE slv."submissionStatus" = 'APPROVED'
AND slv."isDeleted" = false
"""
)
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
return {
"total_approved": total_approved,
"with_embeddings": with_embeddings,
"without_embeddings": total_approved - with_embeddings,
"coverage_percent": (
round(with_embeddings / total_approved * 100, 1)
if total_approved > 0
else 0
),
}
except Exception as e:
logger.error(f"Failed to get embedding stats: {e}")
return {
"total_approved": 0,
"with_embeddings": 0,
"without_embeddings": 0,
"coverage_percent": 0,
"error": str(e),
}
async def backfill_missing_embeddings(batch_size: int = 10) -> dict[str, Any]:
"""
Generate embeddings for approved listings that don't have them.
Args:
batch_size: Number of embeddings to generate in one call
Returns:
Dict with success/failure counts
"""
try:
# Find approved versions without embeddings
missing = await query_raw_with_schema(
"""
SELECT
slv.id,
slv.name,
slv.description,
slv."subHeading",
slv.categories
FROM {schema_prefix}"StoreListingVersion" slv
LEFT JOIN {schema_prefix}"UnifiedContentEmbedding" uce
ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
WHERE slv."submissionStatus" = 'APPROVED'
AND slv."isDeleted" = false
AND uce."contentId" IS NULL
LIMIT $1
""",
batch_size,
)
if not missing:
return {
"processed": 0,
"success": 0,
"failed": 0,
"message": "No missing embeddings",
}
# Process embeddings concurrently for better performance
embedding_tasks = [
ensure_embedding(
version_id=row["id"],
name=row["name"],
description=row["description"],
sub_heading=row["subHeading"],
categories=row["categories"] or [],
)
for row in missing
]
results = await asyncio.gather(*embedding_tasks, return_exceptions=True)
success = sum(1 for result in results if result is True)
failed = len(results) - success
return {
"processed": len(missing),
"success": success,
"failed": failed,
"message": f"Backfilled {success} embeddings, {failed} failed",
}
except Exception as e:
logger.error(f"Failed to backfill embeddings: {e}")
return {
"processed": 0,
"success": 0,
"failed": 0,
"error": str(e),
}
async def embed_query(query: str) -> list[float] | None:
"""
Generate embedding for a search query.
Same as generate_embedding but with clearer intent.
"""
return await generate_embedding(query)
def embedding_to_vector_string(embedding: list[float]) -> str:
"""Convert embedding list to PostgreSQL vector string format."""
return "[" + ",".join(str(x) for x in embedding) + "]"
async def ensure_content_embedding(
content_type: ContentType,
content_id: str,
searchable_text: str,
metadata: dict | None = None,
user_id: str | None = None,
force: bool = False,
tx: prisma.Prisma | None = None,
) -> bool:
"""
Ensure an embedding exists for any content type.
Generic function for creating embeddings for store agents, blocks, docs, etc.
Args:
content_type: ContentType enum value (STORE_AGENT, BLOCK, etc.)
content_id: Unique identifier for the content
searchable_text: Combined text for embedding generation
metadata: Optional metadata to store with embedding
force: Force regeneration even if embedding exists
tx: Optional transaction client
Returns:
True if embedding exists/was created, False on failure
"""
try:
# Check if embedding already exists
if not force:
existing = await get_content_embedding(content_type, content_id, user_id)
if existing and existing.get("embedding"):
logger.debug(
f"Embedding for {content_type}:{content_id} already exists"
)
return True
# Generate new embedding
embedding = await generate_embedding(searchable_text)
if embedding is None:
logger.warning(
f"Could not generate embedding for {content_type}:{content_id}"
)
return False
# Store the embedding
return await store_content_embedding(
content_type=content_type,
content_id=content_id,
embedding=embedding,
searchable_text=searchable_text,
metadata=metadata or {},
user_id=user_id,
tx=tx,
)
except Exception as e:
logger.error(f"Failed to ensure embedding for {content_type}:{content_id}: {e}")
return False

View File

@@ -1,329 +0,0 @@
"""
Integration tests for embeddings with schema handling.
These tests verify that embeddings operations work correctly across different database schemas.
"""
from unittest.mock import AsyncMock, patch
import pytest
from prisma.enums import ContentType
from backend.api.features.store import embeddings
# Schema prefix tests removed - functionality moved to db.raw_with_schema() helper
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_store_content_embedding_with_schema():
"""Test storing embeddings with proper schema handling."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch("prisma.get_client") as mock_get_client:
mock_client = AsyncMock()
mock_get_client.return_value = mock_client
result = await embeddings.store_content_embedding(
content_type=ContentType.STORE_AGENT,
content_id="test-id",
embedding=[0.1] * 1536,
searchable_text="test text",
metadata={"test": "data"},
user_id=None,
)
# Verify the query was called
assert mock_client.execute_raw.called
# Get the SQL query that was executed
call_args = mock_client.execute_raw.call_args
sql_query = call_args[0][0]
# Verify schema prefix is in the query
assert '"platform"."UnifiedContentEmbedding"' in sql_query
# Verify result
assert result is True
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_get_content_embedding_with_schema():
"""Test retrieving embeddings with proper schema handling."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch("prisma.get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.query_raw.return_value = [
{
"contentType": "STORE_AGENT",
"contentId": "test-id",
"userId": None,
"embedding": "[0.1, 0.2]",
"searchableText": "test",
"metadata": {},
"createdAt": "2024-01-01",
"updatedAt": "2024-01-01",
}
]
mock_get_client.return_value = mock_client
result = await embeddings.get_content_embedding(
ContentType.STORE_AGENT,
"test-id",
user_id=None,
)
# Verify the query was called
assert mock_client.query_raw.called
# Get the SQL query that was executed
call_args = mock_client.query_raw.call_args
sql_query = call_args[0][0]
# Verify schema prefix is in the query
assert '"platform"."UnifiedContentEmbedding"' in sql_query
# Verify result
assert result is not None
assert result["contentId"] == "test-id"
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_delete_content_embedding_with_schema():
"""Test deleting embeddings with proper schema handling."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch("prisma.get_client") as mock_get_client:
mock_client = AsyncMock()
mock_get_client.return_value = mock_client
result = await embeddings.delete_content_embedding(
ContentType.STORE_AGENT,
"test-id",
)
# Verify the query was called
assert mock_client.execute_raw.called
# Get the SQL query that was executed
call_args = mock_client.execute_raw.call_args
sql_query = call_args[0][0]
# Verify schema prefix is in the query
assert '"platform"."UnifiedContentEmbedding"' in sql_query
# Verify result
assert result is True
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_get_embedding_stats_with_schema():
"""Test embedding statistics with proper schema handling."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch("prisma.get_client") as mock_get_client:
mock_client = AsyncMock()
# Mock both query results
mock_client.query_raw.side_effect = [
[{"count": 100}], # total_approved
[{"count": 80}], # with_embeddings
]
mock_get_client.return_value = mock_client
result = await embeddings.get_embedding_stats()
# Verify both queries were called
assert mock_client.query_raw.call_count == 2
# Get both SQL queries
first_call = mock_client.query_raw.call_args_list[0]
second_call = mock_client.query_raw.call_args_list[1]
first_sql = first_call[0][0]
second_sql = second_call[0][0]
# Verify schema prefix in both queries
assert '"platform"."StoreListingVersion"' in first_sql
assert '"platform"."StoreListingVersion"' in second_sql
assert '"platform"."UnifiedContentEmbedding"' in second_sql
# Verify results
assert result["total_approved"] == 100
assert result["with_embeddings"] == 80
assert result["without_embeddings"] == 20
assert result["coverage_percent"] == 80.0
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_backfill_missing_embeddings_with_schema():
"""Test backfilling embeddings with proper schema handling."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch("prisma.get_client") as mock_get_client:
mock_client = AsyncMock()
# Mock missing embeddings query
mock_client.query_raw.return_value = [
{
"id": "version-1",
"name": "Test Agent",
"description": "Test description",
"subHeading": "Test heading",
"categories": ["test"],
}
]
mock_get_client.return_value = mock_client
with patch(
"backend.api.features.store.embeddings.ensure_embedding"
) as mock_ensure:
mock_ensure.return_value = True
result = await embeddings.backfill_missing_embeddings(batch_size=10)
# Verify the query was called
assert mock_client.query_raw.called
# Get the SQL query
call_args = mock_client.query_raw.call_args
sql_query = call_args[0][0]
# Verify schema prefix in query
assert '"platform"."StoreListingVersion"' in sql_query
assert '"platform"."UnifiedContentEmbedding"' in sql_query
# Verify ensure_embedding was called
assert mock_ensure.called
# Verify results
assert result["processed"] == 1
assert result["success"] == 1
assert result["failed"] == 0
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_ensure_content_embedding_with_schema():
"""Test ensuring embeddings exist with proper schema handling."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch(
"backend.api.features.store.embeddings.get_content_embedding"
) as mock_get:
# Simulate no existing embedding
mock_get.return_value = None
with patch(
"backend.api.features.store.embeddings.generate_embedding"
) as mock_generate:
mock_generate.return_value = [0.1] * 1536
with patch(
"backend.api.features.store.embeddings.store_content_embedding"
) as mock_store:
mock_store.return_value = True
result = await embeddings.ensure_content_embedding(
content_type=ContentType.STORE_AGENT,
content_id="test-id",
searchable_text="test text",
metadata={"test": "data"},
user_id=None,
force=False,
)
# Verify the flow
assert mock_get.called
assert mock_generate.called
assert mock_store.called
assert result is True
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_backward_compatibility_store_embedding():
"""Test backward compatibility wrapper for store_embedding."""
with patch(
"backend.api.features.store.embeddings.store_content_embedding"
) as mock_store:
mock_store.return_value = True
result = await embeddings.store_embedding(
version_id="test-version-id",
embedding=[0.1] * 1536,
tx=None,
)
# Verify it calls the new function with correct parameters
assert mock_store.called
call_args = mock_store.call_args
assert call_args[1]["content_type"] == ContentType.STORE_AGENT
assert call_args[1]["content_id"] == "test-version-id"
assert call_args[1]["user_id"] is None
assert result is True
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_backward_compatibility_get_embedding():
"""Test backward compatibility wrapper for get_embedding."""
with patch(
"backend.api.features.store.embeddings.get_content_embedding"
) as mock_get:
mock_get.return_value = {
"contentType": "STORE_AGENT",
"contentId": "test-version-id",
"embedding": "[0.1, 0.2]",
"createdAt": "2024-01-01",
"updatedAt": "2024-01-01",
}
result = await embeddings.get_embedding("test-version-id")
# Verify it calls the new function
assert mock_get.called
# Verify it transforms to old format
assert result is not None
assert result["storeListingVersionId"] == "test-version-id"
assert "embedding" in result
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_schema_handling_error_cases():
"""Test error handling in schema-aware operations."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch("prisma.get_client") as mock_get_client:
mock_client = AsyncMock()
mock_client.execute_raw.side_effect = Exception("Database error")
mock_get_client.return_value = mock_client
result = await embeddings.store_content_embedding(
content_type=ContentType.STORE_AGENT,
content_id="test-id",
embedding=[0.1] * 1536,
searchable_text="test",
metadata=None,
user_id=None,
)
# Should return False on error, not raise
assert result is False
if __name__ == "__main__":
pytest.main([__file__, "-v", "-s"])

View File

@@ -1,387 +0,0 @@
from unittest.mock import AsyncMock, MagicMock, patch
import prisma
import pytest
from prisma import Prisma
from prisma.enums import ContentType
from backend.api.features.store import embeddings
@pytest.fixture(autouse=True)
async def setup_prisma():
"""Setup Prisma client for tests."""
try:
Prisma()
except prisma.errors.ClientAlreadyRegisteredError:
pass
yield
@pytest.mark.asyncio(loop_scope="session")
async def test_build_searchable_text():
"""Test searchable text building from listing fields."""
result = embeddings.build_searchable_text(
name="AI Assistant",
description="A helpful AI assistant for productivity",
sub_heading="Boost your productivity",
categories=["AI", "Productivity"],
)
expected = "AI Assistant Boost your productivity A helpful AI assistant for productivity AI Productivity"
assert result == expected
@pytest.mark.asyncio(loop_scope="session")
async def test_build_searchable_text_empty_fields():
"""Test searchable text building with empty fields."""
result = embeddings.build_searchable_text(
name="", description="Test description", sub_heading="", categories=[]
)
assert result == "Test description"
@pytest.mark.asyncio(loop_scope="session")
async def test_generate_embedding_success():
"""Test successful embedding generation."""
# Mock OpenAI response
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.data = [MagicMock()]
mock_response.data[0].embedding = [0.1, 0.2, 0.3] * 512 # 1536 dimensions
# Use AsyncMock for async embeddings.create method
mock_client.embeddings.create = AsyncMock(return_value=mock_response)
# Patch at the point of use in embeddings.py
with patch(
"backend.api.features.store.embeddings.get_openai_client"
) as mock_get_client:
mock_get_client.return_value = mock_client
result = await embeddings.generate_embedding("test text")
assert result is not None
assert len(result) == 1536
assert result[0] == 0.1
mock_client.embeddings.create.assert_called_once_with(
model="text-embedding-3-small", input="test text"
)
@pytest.mark.asyncio(loop_scope="session")
async def test_generate_embedding_no_api_key():
"""Test embedding generation without API key."""
# Patch at the point of use in embeddings.py
with patch(
"backend.api.features.store.embeddings.get_openai_client"
) as mock_get_client:
mock_get_client.return_value = None
result = await embeddings.generate_embedding("test text")
assert result is None
@pytest.mark.asyncio(loop_scope="session")
async def test_generate_embedding_api_error():
"""Test embedding generation with API error."""
mock_client = MagicMock()
mock_client.embeddings.create = AsyncMock(side_effect=Exception("API Error"))
# Patch at the point of use in embeddings.py
with patch(
"backend.api.features.store.embeddings.get_openai_client"
) as mock_get_client:
mock_get_client.return_value = mock_client
result = await embeddings.generate_embedding("test text")
assert result is None
@pytest.mark.asyncio(loop_scope="session")
async def test_generate_embedding_text_truncation():
"""Test that long text is properly truncated using tiktoken."""
from tiktoken import encoding_for_model
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.data = [MagicMock()]
mock_response.data[0].embedding = [0.1] * 1536
# Use AsyncMock for async embeddings.create method
mock_client.embeddings.create = AsyncMock(return_value=mock_response)
# Patch at the point of use in embeddings.py
with patch(
"backend.api.features.store.embeddings.get_openai_client"
) as mock_get_client:
mock_get_client.return_value = mock_client
# Create text that will exceed 8191 tokens
# Use varied characters to ensure token-heavy text: each word is ~1 token
words = [f"word{i}" for i in range(10000)]
long_text = " ".join(words) # ~10000 tokens
await embeddings.generate_embedding(long_text)
# Verify text was truncated to 8191 tokens
call_args = mock_client.embeddings.create.call_args
truncated_text = call_args.kwargs["input"]
# Count actual tokens in truncated text
enc = encoding_for_model("text-embedding-3-small")
actual_tokens = len(enc.encode(truncated_text))
# Should be at or just under 8191 tokens
assert actual_tokens <= 8191
# Should be close to the limit (not over-truncated)
assert actual_tokens >= 8100
@pytest.mark.asyncio(loop_scope="session")
async def test_store_embedding_success(mocker):
"""Test successful embedding storage."""
mock_client = mocker.AsyncMock()
mock_client.execute_raw = mocker.AsyncMock()
embedding = [0.1, 0.2, 0.3]
result = await embeddings.store_embedding(
version_id="test-version-id", embedding=embedding, tx=mock_client
)
assert result is True
# execute_raw is called twice: once for SET search_path, once for INSERT
assert mock_client.execute_raw.call_count == 2
# First call: SET search_path
first_call_args = mock_client.execute_raw.call_args_list[0][0]
assert "SET search_path" in first_call_args[0]
# Second call: INSERT query with the actual data
second_call_args = mock_client.execute_raw.call_args_list[1][0]
assert "test-version-id" in second_call_args
assert "[0.1,0.2,0.3]" in second_call_args
assert None in second_call_args # userId should be None for store agents
@pytest.mark.asyncio(loop_scope="session")
async def test_store_embedding_database_error(mocker):
"""Test embedding storage with database error."""
mock_client = mocker.AsyncMock()
mock_client.execute_raw.side_effect = Exception("Database error")
embedding = [0.1, 0.2, 0.3]
result = await embeddings.store_embedding(
version_id="test-version-id", embedding=embedding, tx=mock_client
)
assert result is False
@pytest.mark.asyncio(loop_scope="session")
async def test_get_embedding_success():
"""Test successful embedding retrieval."""
mock_result = [
{
"contentType": "STORE_AGENT",
"contentId": "test-version-id",
"userId": None,
"embedding": "[0.1,0.2,0.3]",
"searchableText": "Test text",
"metadata": {},
"createdAt": "2024-01-01T00:00:00Z",
"updatedAt": "2024-01-01T00:00:00Z",
}
]
with patch(
"backend.api.features.store.embeddings.query_raw_with_schema",
return_value=mock_result,
):
result = await embeddings.get_embedding("test-version-id")
assert result is not None
assert result["storeListingVersionId"] == "test-version-id"
assert result["embedding"] == "[0.1,0.2,0.3]"
@pytest.mark.asyncio(loop_scope="session")
async def test_get_embedding_not_found():
"""Test embedding retrieval when not found."""
with patch(
"backend.api.features.store.embeddings.query_raw_with_schema",
return_value=[],
):
result = await embeddings.get_embedding("test-version-id")
assert result is None
@pytest.mark.asyncio(loop_scope="session")
@patch("backend.api.features.store.embeddings.generate_embedding")
@patch("backend.api.features.store.embeddings.store_embedding")
@patch("backend.api.features.store.embeddings.get_embedding")
async def test_ensure_embedding_already_exists(mock_get, mock_store, mock_generate):
"""Test ensure_embedding when embedding already exists."""
mock_get.return_value = {"embedding": "[0.1,0.2,0.3]"}
result = await embeddings.ensure_embedding(
version_id="test-id",
name="Test",
description="Test description",
sub_heading="Test heading",
categories=["test"],
)
assert result is True
mock_generate.assert_not_called()
mock_store.assert_not_called()
@pytest.mark.asyncio(loop_scope="session")
@patch("backend.api.features.store.embeddings.generate_embedding")
@patch("backend.api.features.store.embeddings.store_content_embedding")
@patch("backend.api.features.store.embeddings.get_embedding")
async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
"""Test ensure_embedding creating new embedding."""
mock_get.return_value = None
mock_generate.return_value = [0.1, 0.2, 0.3]
mock_store.return_value = True
result = await embeddings.ensure_embedding(
version_id="test-id",
name="Test",
description="Test description",
sub_heading="Test heading",
categories=["test"],
)
assert result is True
mock_generate.assert_called_once_with("Test Test heading Test description test")
mock_store.assert_called_once_with(
content_type=ContentType.STORE_AGENT,
content_id="test-id",
embedding=[0.1, 0.2, 0.3],
searchable_text="Test Test heading Test description test",
metadata={"name": "Test", "subHeading": "Test heading", "categories": ["test"]},
user_id=None,
tx=None,
)
@pytest.mark.asyncio(loop_scope="session")
@patch("backend.api.features.store.embeddings.generate_embedding")
@patch("backend.api.features.store.embeddings.get_embedding")
async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
"""Test ensure_embedding when generation fails."""
mock_get.return_value = None
mock_generate.return_value = None
result = await embeddings.ensure_embedding(
version_id="test-id",
name="Test",
description="Test description",
sub_heading="Test heading",
categories=["test"],
)
assert result is False
@pytest.mark.asyncio(loop_scope="session")
async def test_get_embedding_stats():
"""Test embedding statistics retrieval."""
# Mock approved count query and embedded count query
mock_approved_result = [{"count": 100}]
mock_embedded_result = [{"count": 75}]
with patch(
"backend.api.features.store.embeddings.query_raw_with_schema",
side_effect=[mock_approved_result, mock_embedded_result],
):
result = await embeddings.get_embedding_stats()
assert result["total_approved"] == 100
assert result["with_embeddings"] == 75
assert result["without_embeddings"] == 25
assert result["coverage_percent"] == 75.0
@pytest.mark.asyncio(loop_scope="session")
@patch("backend.api.features.store.embeddings.ensure_embedding")
async def test_backfill_missing_embeddings_success(mock_ensure):
"""Test backfill with successful embedding generation."""
# Mock missing embeddings query
mock_missing = [
{
"id": "version-1",
"name": "Agent 1",
"description": "Description 1",
"subHeading": "Heading 1",
"categories": ["AI"],
},
{
"id": "version-2",
"name": "Agent 2",
"description": "Description 2",
"subHeading": "Heading 2",
"categories": ["Productivity"],
},
]
# Mock ensure_embedding to succeed for first, fail for second
mock_ensure.side_effect = [True, False]
with patch(
"backend.api.features.store.embeddings.query_raw_with_schema",
return_value=mock_missing,
):
result = await embeddings.backfill_missing_embeddings(batch_size=5)
assert result["processed"] == 2
assert result["success"] == 1
assert result["failed"] == 1
assert mock_ensure.call_count == 2
@pytest.mark.asyncio(loop_scope="session")
async def test_backfill_missing_embeddings_no_missing():
"""Test backfill when no embeddings are missing."""
with patch(
"backend.api.features.store.embeddings.query_raw_with_schema",
return_value=[],
):
result = await embeddings.backfill_missing_embeddings(batch_size=5)
assert result["processed"] == 0
assert result["success"] == 0
assert result["failed"] == 0
assert result["message"] == "No missing embeddings"
@pytest.mark.asyncio(loop_scope="session")
async def test_embedding_to_vector_string():
"""Test embedding to PostgreSQL vector string conversion."""
embedding = [0.1, 0.2, 0.3, -0.4]
result = embeddings.embedding_to_vector_string(embedding)
assert result == "[0.1,0.2,0.3,-0.4]"
@pytest.mark.asyncio(loop_scope="session")
async def test_embed_query():
"""Test embed_query function (alias for generate_embedding)."""
with patch(
"backend.api.features.store.embeddings.generate_embedding"
) as mock_generate:
mock_generate.return_value = [0.1, 0.2, 0.3]
result = await embeddings.embed_query("test query")
assert result == [0.1, 0.2, 0.3]
mock_generate.assert_called_once_with("test query")

View File

@@ -1,393 +0,0 @@
"""
Hybrid Search for Store Agents
Combines semantic (embedding) search with lexical (tsvector) search
for improved relevance in marketplace agent discovery.
"""
import logging
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Literal
from backend.api.features.store.embeddings import (
embed_query,
embedding_to_vector_string,
)
from backend.data.db import query_raw_with_schema
logger = logging.getLogger(__name__)
@dataclass
class HybridSearchWeights:
"""Weights for combining search signals."""
semantic: float = 0.30 # Embedding cosine similarity
lexical: float = 0.30 # tsvector ts_rank_cd score
category: float = 0.20 # Category match boost
recency: float = 0.10 # Newer agents ranked higher
popularity: float = 0.10 # Agent usage/runs (PageRank-like)
def __post_init__(self):
"""Validate weights are non-negative and sum to approximately 1.0."""
total = (
self.semantic
+ self.lexical
+ self.category
+ self.recency
+ self.popularity
)
if any(
w < 0
for w in [
self.semantic,
self.lexical,
self.category,
self.recency,
self.popularity,
]
):
raise ValueError("All weights must be non-negative")
if not (0.99 <= total <= 1.01):
raise ValueError(f"Weights must sum to ~1.0, got {total:.3f}")
DEFAULT_WEIGHTS = HybridSearchWeights()
# Minimum relevance score threshold - agents below this are filtered out
# With weights (0.30 semantic + 0.30 lexical + 0.20 category + 0.10 recency + 0.10 popularity):
# - 0.20 means at least ~60% semantic match OR strong lexical match required
# - Ensures only genuinely relevant results are returned
# - Recency/popularity alone (0.10 each) won't pass the threshold
DEFAULT_MIN_SCORE = 0.20
@dataclass
class HybridSearchResult:
"""A single search result with score breakdown."""
slug: str
agent_name: str
agent_image: str
creator_username: str
creator_avatar: str
sub_heading: str
description: str
runs: int
rating: float
categories: list[str]
featured: bool
is_available: bool
updated_at: datetime
# Score breakdown (for debugging/tuning)
combined_score: float
semantic_score: float = 0.0
lexical_score: float = 0.0
category_score: float = 0.0
recency_score: float = 0.0
popularity_score: float = 0.0
async def hybrid_search(
query: str,
featured: bool = False,
creators: list[str] | None = None,
category: str | None = None,
sorted_by: (
Literal["relevance", "rating", "runs", "name", "updated_at"] | None
) = None,
page: int = 1,
page_size: int = 20,
weights: HybridSearchWeights | None = None,
min_score: float | None = None,
) -> tuple[list[dict[str, Any]], int]:
"""
Perform hybrid search combining semantic and lexical signals.
Args:
query: Search query string
featured: Filter for featured agents only
creators: Filter by creator usernames
category: Filter by category
sorted_by: Sort order (relevance uses hybrid scoring)
page: Page number (1-indexed)
page_size: Results per page
weights: Custom weights for search signals
min_score: Minimum relevance score threshold (0-1). Results below
this score are filtered out. Defaults to DEFAULT_MIN_SCORE.
Returns:
Tuple of (results list, total count). Returns empty list if no
results meet the minimum relevance threshold.
"""
# Validate inputs
query = query.strip()
if not query:
return [], 0 # Empty query returns no results
if page < 1:
page = 1
if page_size < 1:
page_size = 1
if page_size > 100: # Cap at reasonable limit to prevent performance issues
page_size = 100
if weights is None:
weights = DEFAULT_WEIGHTS
if min_score is None:
min_score = DEFAULT_MIN_SCORE
offset = (page - 1) * page_size
# Generate query embedding
query_embedding = await embed_query(query)
# Build WHERE clause conditions
where_parts: list[str] = ["sa.is_available = true"]
params: list[Any] = []
param_index = 1
# Add search query for lexical matching
params.append(query)
query_param = f"${param_index}"
param_index += 1
# Add lowercased query for category matching
params.append(query.lower())
query_lower_param = f"${param_index}"
param_index += 1
if featured:
where_parts.append("sa.featured = true")
if creators:
where_parts.append(f"sa.creator_username = ANY(${param_index})")
params.append(creators)
param_index += 1
if category:
where_parts.append(f"${param_index} = ANY(sa.categories)")
params.append(category)
param_index += 1
# Safe: where_parts only contains hardcoded strings with $N parameter placeholders
# No user input is concatenated directly into the SQL string
where_clause = " AND ".join(where_parts)
# Embedding is required for hybrid search - fail fast if unavailable
if query_embedding is None or not query_embedding:
# Log detailed error server-side
logger.error(
"Failed to generate query embedding. "
"Check that openai_internal_api_key is configured and OpenAI API is accessible."
)
# Raise generic error to client
raise ValueError("Search service temporarily unavailable")
# Add embedding parameter
embedding_str = embedding_to_vector_string(query_embedding)
params.append(embedding_str)
embedding_param = f"${param_index}"
param_index += 1
# Add weight parameters for SQL calculation
params.append(weights.semantic)
weight_semantic_param = f"${param_index}"
param_index += 1
params.append(weights.lexical)
weight_lexical_param = f"${param_index}"
param_index += 1
params.append(weights.category)
weight_category_param = f"${param_index}"
param_index += 1
params.append(weights.recency)
weight_recency_param = f"${param_index}"
param_index += 1
params.append(weights.popularity)
weight_popularity_param = f"${param_index}"
param_index += 1
# Add min_score parameter
params.append(min_score)
min_score_param = f"${param_index}"
param_index += 1
# Optimized hybrid search query:
# 1. Direct join to UnifiedContentEmbedding via contentId=storeListingVersionId (no redundant JOINs)
# 2. UNION approach (deduplicates agents matching both branches)
# 3. COUNT(*) OVER() to get total count in single query
# 4. Optimized category matching with EXISTS + unnest
# 5. Pre-calculated max values for lexical and popularity normalization
# 6. Simplified recency calculation with linear decay
# 7. Logarithmic popularity scaling to prevent viral agents from dominating
sql_query = f"""
WITH candidates AS (
-- Lexical matches (uses GIN index on search column)
SELECT sa."storeListingVersionId"
FROM {{schema_prefix}}"StoreAgent" sa
WHERE {where_clause}
AND sa.search @@ plainto_tsquery('english', {query_param})
UNION
-- Semantic matches (uses HNSW index on embedding with KNN)
SELECT "storeListingVersionId"
FROM (
SELECT sa."storeListingVersionId", uce.embedding
FROM {{schema_prefix}}"StoreAgent" sa
INNER JOIN {{schema_prefix}}"UnifiedContentEmbedding" uce
ON sa."storeListingVersionId" = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
WHERE {where_clause}
ORDER BY uce.embedding <=> {embedding_param}::vector
LIMIT 200
) semantic_results
),
search_scores AS (
SELECT
sa.slug,
sa.agent_name,
sa.agent_image,
sa.creator_username,
sa.creator_avatar,
sa.sub_heading,
sa.description,
sa.runs,
sa.rating,
sa.categories,
sa.featured,
sa.is_available,
sa.updated_at,
-- Semantic score: cosine similarity (1 - distance)
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
-- Lexical score: ts_rank_cd (will be normalized later)
COALESCE(ts_rank_cd(sa.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
-- Category match: optimized with unnest for better performance
CASE
WHEN EXISTS (
SELECT 1 FROM unnest(sa.categories) cat
WHERE LOWER(cat) LIKE '%' || {query_lower_param} || '%'
)
THEN 1.0
ELSE 0.0
END as category_score,
-- Recency score: linear decay over 90 days (simpler than exponential)
GREATEST(0, 1 - EXTRACT(EPOCH FROM (NOW() - sa.updated_at)) / (90 * 24 * 3600)) as recency_score,
-- Popularity raw: agent runs count (will be normalized with log scaling)
sa.runs as popularity_raw
FROM candidates c
INNER JOIN {{schema_prefix}}"StoreAgent" sa
ON c."storeListingVersionId" = sa."storeListingVersionId"
LEFT JOIN {{schema_prefix}}"UnifiedContentEmbedding" uce
ON sa."storeListingVersionId" = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
),
max_lexical AS (
SELECT MAX(lexical_raw) as max_val FROM search_scores
),
max_popularity AS (
SELECT MAX(popularity_raw) as max_val FROM search_scores
),
normalized AS (
SELECT
ss.*,
-- Normalize lexical score by pre-calculated max
CASE
WHEN ml.max_val > 0
THEN ss.lexical_raw / ml.max_val
ELSE 0
END as lexical_score,
-- Normalize popularity with logarithmic scaling to prevent viral agents from dominating
-- LOG(1 + runs) / LOG(1 + max_runs) ensures score is 0-1 range
CASE
WHEN mp.max_val > 0 AND ss.popularity_raw > 0
THEN LN(1 + ss.popularity_raw) / LN(1 + mp.max_val)
ELSE 0
END as popularity_score
FROM search_scores ss
CROSS JOIN max_lexical ml
CROSS JOIN max_popularity mp
),
scored AS (
SELECT
slug,
agent_name,
agent_image,
creator_username,
creator_avatar,
sub_heading,
description,
runs,
rating,
categories,
featured,
is_available,
updated_at,
semantic_score,
lexical_score,
category_score,
recency_score,
popularity_score,
(
{weight_semantic_param} * semantic_score +
{weight_lexical_param} * lexical_score +
{weight_category_param} * category_score +
{weight_recency_param} * recency_score +
{weight_popularity_param} * popularity_score
) as combined_score
FROM normalized
),
filtered AS (
SELECT
*,
COUNT(*) OVER () as total_count
FROM scored
WHERE combined_score >= {min_score_param}
)
SELECT * FROM filtered
ORDER BY combined_score DESC
LIMIT ${param_index} OFFSET ${param_index + 1}
"""
# Add pagination params
params.extend([page_size, offset])
# Execute search query - includes total_count via window function
results = await query_raw_with_schema(
sql_query, *params, set_public_search_path=True
)
# Extract total count from first result (all rows have same count)
total = results[0]["total_count"] if results else 0
# Remove total_count from results before returning
for result in results:
result.pop("total_count", None)
# Log without sensitive query content
logger.info(f"Hybrid search: {len(results)} results, {total} total")
return results, total
async def hybrid_search_simple(
query: str,
page: int = 1,
page_size: int = 20,
) -> tuple[list[dict[str, Any]], int]:
"""
Simplified hybrid search for common use cases.
Uses default weights and no filters.
"""
return await hybrid_search(
query=query,
page=page,
page_size=page_size,
)

View File

@@ -1,334 +0,0 @@
"""
Integration tests for hybrid search with schema handling.
These tests verify that hybrid search works correctly across different database schemas.
"""
from unittest.mock import patch
import pytest
from backend.api.features.store.hybrid_search import HybridSearchWeights, hybrid_search
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_with_schema_handling():
"""Test that hybrid search correctly handles database schema prefixes."""
# Test with a mock query to ensure schema handling works
query = "test agent"
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
# Mock the query result
mock_query.return_value = [
{
"slug": "test/agent",
"agent_name": "Test Agent",
"agent_image": "test.png",
"creator_username": "test",
"creator_avatar": "avatar.png",
"sub_heading": "Test sub-heading",
"description": "Test description",
"runs": 10,
"rating": 4.5,
"categories": ["test"],
"featured": False,
"is_available": True,
"updated_at": "2024-01-01T00:00:00Z",
"combined_score": 0.8,
"semantic_score": 0.7,
"lexical_score": 0.6,
"category_score": 0.5,
"recency_score": 0.4,
"total_count": 1,
}
]
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536 # Mock embedding
results, total = await hybrid_search(
query=query,
page=1,
page_size=20,
)
# Verify the query was called
assert mock_query.called
# Verify the SQL template uses schema_prefix placeholder
call_args = mock_query.call_args
sql_template = call_args[0][0]
assert "{schema_prefix}" in sql_template
# Verify results
assert len(results) == 1
assert total == 1
assert results[0]["slug"] == "test/agent"
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_with_public_schema():
"""Test hybrid search when using public schema (no prefix needed)."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "public"
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
mock_query.return_value = []
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
results, total = await hybrid_search(
query="test",
page=1,
page_size=20,
)
# Verify the mock was set up correctly
assert mock_schema.return_value == "public"
# Results should work even with empty results
assert results == []
assert total == 0
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_with_custom_schema():
"""Test hybrid search when using custom schema (e.g., 'platform')."""
with patch("backend.data.db.get_database_schema") as mock_schema:
mock_schema.return_value = "platform"
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
mock_query.return_value = []
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
results, total = await hybrid_search(
query="test",
page=1,
page_size=20,
)
# Verify the mock was set up correctly
assert mock_schema.return_value == "platform"
assert results == []
assert total == 0
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_without_embeddings():
"""Test hybrid search fails fast when embeddings are unavailable."""
# Patch where the function is used, not where it's defined
with patch("backend.api.features.store.hybrid_search.embed_query") as mock_embed:
# Simulate embedding failure
mock_embed.return_value = None
# Should raise ValueError with helpful message
with pytest.raises(ValueError) as exc_info:
await hybrid_search(
query="test",
page=1,
page_size=20,
)
# Verify error message is generic (doesn't leak implementation details)
assert "Search service temporarily unavailable" in str(exc_info.value)
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_with_filters():
"""Test hybrid search with various filters."""
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
mock_query.return_value = []
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
# Test with featured filter
results, total = await hybrid_search(
query="test",
featured=True,
creators=["user1", "user2"],
category="productivity",
page=1,
page_size=10,
)
# Verify filters were applied in the query
call_args = mock_query.call_args
params = call_args[0][1:] # Skip SQL template
# Should have query, query_lower, creators array, category
assert len(params) >= 4
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_weights():
"""Test hybrid search with custom weights."""
custom_weights = HybridSearchWeights(
semantic=0.5,
lexical=0.3,
category=0.1,
recency=0.1,
popularity=0.0,
)
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
mock_query.return_value = []
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
results, total = await hybrid_search(
query="test",
weights=custom_weights,
page=1,
page_size=20,
)
# Verify custom weights were used in the query
call_args = mock_query.call_args
sql_template = call_args[0][0]
params = call_args[0][1:] # Get all parameters passed
# Check that SQL uses parameterized weights (not f-string interpolation)
assert "$" in sql_template # Verify parameterization is used
# Check that custom weights are in the params
assert 0.5 in params # semantic weight
assert 0.3 in params # lexical weight
assert 0.1 in params # category and recency weights
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_min_score_filtering():
"""Test hybrid search minimum score threshold."""
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
# Return results with varying scores
mock_query.return_value = [
{
"slug": "high-score/agent",
"agent_name": "High Score Agent",
"combined_score": 0.8,
"total_count": 1,
# ... other fields
}
]
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
# Test with custom min_score
results, total = await hybrid_search(
query="test",
min_score=0.5, # High threshold
page=1,
page_size=20,
)
# Verify min_score was applied in query
call_args = mock_query.call_args
sql_template = call_args[0][0]
params = call_args[0][1:] # Get all parameters
# Check that SQL uses parameterized min_score
assert "combined_score >=" in sql_template
assert "$" in sql_template # Verify parameterization
# Check that custom min_score is in the params
assert 0.5 in params
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_pagination():
"""Test hybrid search pagination."""
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
mock_query.return_value = []
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
# Test page 2 with page_size 10
results, total = await hybrid_search(
query="test",
page=2,
page_size=10,
)
# Verify pagination parameters
call_args = mock_query.call_args
params = call_args[0]
# Last two params should be LIMIT and OFFSET
limit = params[-2]
offset = params[-1]
assert limit == 10 # page_size
assert offset == 10 # (page - 1) * page_size = (2 - 1) * 10
@pytest.mark.asyncio(loop_scope="session")
@pytest.mark.integration
async def test_hybrid_search_error_handling():
"""Test hybrid search error handling."""
with patch(
"backend.api.features.store.hybrid_search.query_raw_with_schema"
) as mock_query:
# Simulate database error
mock_query.side_effect = Exception("Database connection error")
with patch(
"backend.api.features.store.hybrid_search.embed_query"
) as mock_embed:
mock_embed.return_value = [0.1] * 1536
# Should raise exception
with pytest.raises(Exception) as exc_info:
await hybrid_search(
query="test",
page=1,
page_size=20,
)
assert "Database connection error" in str(exc_info.value)
if __name__ == "__main__":
pytest.main([__file__, "-v", "-s"])

View File

@@ -110,7 +110,6 @@ class Profile(pydantic.BaseModel):
class StoreSubmission(pydantic.BaseModel):
listing_id: str
agent_id: str
agent_version: int
name: str
@@ -165,12 +164,8 @@ class StoreListingsWithVersionsResponse(pydantic.BaseModel):
class StoreSubmissionRequest(pydantic.BaseModel):
agent_id: str = pydantic.Field(
..., min_length=1, description="Agent ID cannot be empty"
)
agent_version: int = pydantic.Field(
..., gt=0, description="Agent version must be greater than 0"
)
agent_id: str
agent_version: int
slug: str
name: str
sub_heading: str

View File

@@ -138,7 +138,6 @@ def test_creator_details():
def test_store_submission():
submission = store_model.StoreSubmission(
listing_id="listing123",
agent_id="agent123",
agent_version=1,
sub_heading="Test subheading",
@@ -160,7 +159,6 @@ def test_store_submissions_response():
response = store_model.StoreSubmissionsResponse(
submissions=[
store_model.StoreSubmission(
listing_id="listing123",
agent_id="agent123",
agent_version=1,
sub_heading="Test subheading",

View File

@@ -521,7 +521,6 @@ def test_get_submissions_success(
mocked_value = store_model.StoreSubmissionsResponse(
submissions=[
store_model.StoreSubmission(
listing_id="test-listing-id",
name="Test Agent",
description="Test agent description",
image_urls=["test.jpg"],

View File

@@ -64,6 +64,7 @@ from backend.data.onboarding import (
complete_re_run_agent,
get_recommended_agents,
get_user_onboarding,
increment_runs,
onboarding_enabled,
reset_user_onboarding,
update_user_onboarding,
@@ -974,6 +975,7 @@ async def execute_graph(
# Record successful graph execution
record_graph_execution(graph_id=graph_id, status="success", user_id=user_id)
record_graph_operation(operation="execute", status="success")
await increment_runs(user_id)
await complete_re_run_agent(user_id, graph_id)
if source == "library":
await complete_onboarding_step(

View File

@@ -6,9 +6,6 @@ import hashlib
import hmac
import logging
from enum import Enum
from typing import cast
from prisma.types import Serializable
from backend.sdk import (
BaseWebhooksManager,
@@ -87,9 +84,7 @@ class AirtableWebhookManager(BaseWebhooksManager):
# update webhook config
await update_webhook(
webhook.id,
config=cast(
dict[str, Serializable], {"base_id": base_id, "cursor": response.cursor}
),
config={"base_id": base_id, "cursor": response.cursor},
)
event_type = "notification"

View File

@@ -1,184 +0,0 @@
"""
Shared helpers for Human-In-The-Loop (HITL) review functionality.
Used by both the dedicated HumanInTheLoopBlock and blocks that require human review.
"""
import logging
from typing import Any, Optional
from prisma.enums import ReviewStatus
from pydantic import BaseModel
from backend.data.execution import ExecutionContext, ExecutionStatus
from backend.data.human_review import ReviewResult
from backend.executor.manager import async_update_node_execution_status
from backend.util.clients import get_database_manager_async_client
logger = logging.getLogger(__name__)
class ReviewDecision(BaseModel):
"""Result of a review decision."""
should_proceed: bool
message: str
review_result: ReviewResult
class HITLReviewHelper:
"""Helper class for Human-In-The-Loop review operations."""
@staticmethod
async def get_or_create_human_review(**kwargs) -> Optional[ReviewResult]:
"""Create or retrieve a human review from the database."""
return await get_database_manager_async_client().get_or_create_human_review(
**kwargs
)
@staticmethod
async def update_node_execution_status(**kwargs) -> None:
"""Update the execution status of a node."""
await async_update_node_execution_status(
db_client=get_database_manager_async_client(), **kwargs
)
@staticmethod
async def update_review_processed_status(
node_exec_id: str, processed: bool
) -> None:
"""Update the processed status of a review."""
return await get_database_manager_async_client().update_review_processed_status(
node_exec_id, processed
)
@staticmethod
async def _handle_review_request(
input_data: Any,
user_id: str,
node_exec_id: str,
graph_exec_id: str,
graph_id: str,
graph_version: int,
execution_context: ExecutionContext,
block_name: str = "Block",
editable: bool = False,
) -> Optional[ReviewResult]:
"""
Handle a review request for a block that requires human review.
Args:
input_data: The input data to be reviewed
user_id: ID of the user requesting the review
node_exec_id: ID of the node execution
graph_exec_id: ID of the graph execution
graph_id: ID of the graph
graph_version: Version of the graph
execution_context: Current execution context
block_name: Name of the block requesting review
editable: Whether the reviewer can edit the data
Returns:
ReviewResult if review is complete, None if waiting for human input
Raises:
Exception: If review creation or status update fails
"""
# Skip review if safe mode is disabled - return auto-approved result
if not execution_context.safe_mode:
logger.info(
f"Block {block_name} skipping review for node {node_exec_id} - safe mode disabled"
)
return ReviewResult(
data=input_data,
status=ReviewStatus.APPROVED,
message="Auto-approved (safe mode disabled)",
processed=True,
node_exec_id=node_exec_id,
)
result = await HITLReviewHelper.get_or_create_human_review(
user_id=user_id,
node_exec_id=node_exec_id,
graph_exec_id=graph_exec_id,
graph_id=graph_id,
graph_version=graph_version,
input_data=input_data,
message=f"Review required for {block_name} execution",
editable=editable,
)
if result is None:
logger.info(
f"Block {block_name} pausing execution for node {node_exec_id} - awaiting human review"
)
await HITLReviewHelper.update_node_execution_status(
exec_id=node_exec_id,
status=ExecutionStatus.REVIEW,
)
return None # Signal that execution should pause
# Mark review as processed if not already done
if not result.processed:
await HITLReviewHelper.update_review_processed_status(
node_exec_id=node_exec_id, processed=True
)
return result
@staticmethod
async def handle_review_decision(
input_data: Any,
user_id: str,
node_exec_id: str,
graph_exec_id: str,
graph_id: str,
graph_version: int,
execution_context: ExecutionContext,
block_name: str = "Block",
editable: bool = False,
) -> Optional[ReviewDecision]:
"""
Handle a review request and return the decision in a single call.
Args:
input_data: The input data to be reviewed
user_id: ID of the user requesting the review
node_exec_id: ID of the node execution
graph_exec_id: ID of the graph execution
graph_id: ID of the graph
graph_version: Version of the graph
execution_context: Current execution context
block_name: Name of the block requesting review
editable: Whether the reviewer can edit the data
Returns:
ReviewDecision if review is complete (approved/rejected),
None if execution should pause (awaiting review)
"""
review_result = await HITLReviewHelper._handle_review_request(
input_data=input_data,
user_id=user_id,
node_exec_id=node_exec_id,
graph_exec_id=graph_exec_id,
graph_id=graph_id,
graph_version=graph_version,
execution_context=execution_context,
block_name=block_name,
editable=editable,
)
if review_result is None:
# Still awaiting review - return None to pause execution
return None
# Review is complete, determine outcome
should_proceed = review_result.status == ReviewStatus.APPROVED
message = review_result.message or (
"Execution approved by reviewer"
if should_proceed
else "Execution rejected by reviewer"
)
return ReviewDecision(
should_proceed=should_proceed, message=message, review_result=review_result
)

View File

@@ -3,7 +3,6 @@ from typing import Any
from prisma.enums import ReviewStatus
from backend.blocks.helpers.review import HITLReviewHelper
from backend.data.block import (
Block,
BlockCategory,
@@ -12,9 +11,11 @@ from backend.data.block import (
BlockSchemaOutput,
BlockType,
)
from backend.data.execution import ExecutionContext
from backend.data.execution import ExecutionContext, ExecutionStatus
from backend.data.human_review import ReviewResult
from backend.data.model import SchemaField
from backend.executor.manager import async_update_node_execution_status
from backend.util.clients import get_database_manager_async_client
logger = logging.getLogger(__name__)
@@ -71,26 +72,32 @@ class HumanInTheLoopBlock(Block):
("approved_data", {"name": "John Doe", "age": 30}),
],
test_mock={
"handle_review_decision": lambda **kwargs: type(
"ReviewDecision",
(),
{
"should_proceed": True,
"message": "Test approval message",
"review_result": ReviewResult(
data={"name": "John Doe", "age": 30},
status=ReviewStatus.APPROVED,
message="",
processed=False,
node_exec_id="test-node-exec-id",
),
},
)(),
"get_or_create_human_review": lambda *_args, **_kwargs: ReviewResult(
data={"name": "John Doe", "age": 30},
status=ReviewStatus.APPROVED,
message="",
processed=False,
node_exec_id="test-node-exec-id",
),
"update_node_execution_status": lambda *_args, **_kwargs: None,
"update_review_processed_status": lambda *_args, **_kwargs: None,
},
)
async def handle_review_decision(self, **kwargs):
return await HITLReviewHelper.handle_review_decision(**kwargs)
async def get_or_create_human_review(self, **kwargs):
return await get_database_manager_async_client().get_or_create_human_review(
**kwargs
)
async def update_node_execution_status(self, **kwargs):
return await async_update_node_execution_status(
db_client=get_database_manager_async_client(), **kwargs
)
async def update_review_processed_status(self, node_exec_id: str, processed: bool):
return await get_database_manager_async_client().update_review_processed_status(
node_exec_id, processed
)
async def run(
self,
@@ -102,7 +109,7 @@ class HumanInTheLoopBlock(Block):
graph_id: str,
graph_version: int,
execution_context: ExecutionContext,
**_kwargs,
**kwargs,
) -> BlockOutput:
if not execution_context.safe_mode:
logger.info(
@@ -112,28 +119,48 @@ class HumanInTheLoopBlock(Block):
yield "review_message", "Auto-approved (safe mode disabled)"
return
decision = await self.handle_review_decision(
input_data=input_data.data,
user_id=user_id,
node_exec_id=node_exec_id,
graph_exec_id=graph_exec_id,
graph_id=graph_id,
graph_version=graph_version,
execution_context=execution_context,
block_name=self.name,
editable=input_data.editable,
)
try:
result = await self.get_or_create_human_review(
user_id=user_id,
node_exec_id=node_exec_id,
graph_exec_id=graph_exec_id,
graph_id=graph_id,
graph_version=graph_version,
input_data=input_data.data,
message=input_data.name,
editable=input_data.editable,
)
except Exception as e:
logger.error(f"Error in HITL block for node {node_exec_id}: {str(e)}")
raise
if decision is None:
return
if result is None:
logger.info(
f"HITL block pausing execution for node {node_exec_id} - awaiting human review"
)
try:
await self.update_node_execution_status(
exec_id=node_exec_id,
status=ExecutionStatus.REVIEW,
)
return
except Exception as e:
logger.error(
f"Failed to update node status for HITL block {node_exec_id}: {str(e)}"
)
raise
status = decision.review_result.status
if status == ReviewStatus.APPROVED:
yield "approved_data", decision.review_result.data
elif status == ReviewStatus.REJECTED:
yield "rejected_data", decision.review_result.data
else:
raise RuntimeError(f"Unexpected review status: {status}")
if not result.processed:
await self.update_review_processed_status(
node_exec_id=node_exec_id, processed=True
)
if decision.message:
yield "review_message", decision.message
if result.status == ReviewStatus.APPROVED:
yield "approved_data", result.data
if result.message:
yield "review_message", result.message
elif result.status == ReviewStatus.REJECTED:
yield "rejected_data", result.data
if result.message:
yield "review_message", result.message

File diff suppressed because it is too large Load Diff

View File

@@ -18,7 +18,6 @@ from backend.data.model import (
SchemaField,
)
from backend.integrations.providers import ProviderName
from backend.util.request import DEFAULT_USER_AGENT
class GetWikipediaSummaryBlock(Block, GetRequest):
@@ -40,27 +39,17 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
output_schema=GetWikipediaSummaryBlock.Output,
test_input={"topic": "Artificial Intelligence"},
test_output=("summary", "summary content"),
test_mock={
"get_request": lambda url, headers, json: {"extract": "summary content"}
},
test_mock={"get_request": lambda url, json: {"extract": "summary content"}},
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
topic = input_data.topic
# URL-encode the topic to handle spaces and special characters
encoded_topic = quote(topic, safe="")
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_topic}"
# Set headers per Wikimedia robot policy (https://w.wiki/4wJS)
# - User-Agent: Required, must identify the bot
# - Accept-Encoding: gzip recommended to reduce bandwidth
headers = {
"User-Agent": DEFAULT_USER_AGENT,
"Accept-Encoding": "gzip, deflate",
}
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
# Note: User-Agent is now automatically set by the request library
# to comply with Wikimedia's robot policy (https://w.wiki/4wJS)
try:
response = await self.get_request(url, headers=headers, json=True)
response = await self.get_request(url, json=True)
if "extract" not in response:
raise ValueError(f"Unable to parse Wikipedia response: {response}")
yield "summary", response["extract"]

View File

@@ -391,12 +391,8 @@ class SmartDecisionMakerBlock(Block):
"""
block = sink_node.block
# Use custom name from node metadata if set, otherwise fall back to block.name
custom_name = sink_node.metadata.get("customized_name")
tool_name = custom_name if custom_name else block.name
tool_function: dict[str, Any] = {
"name": SmartDecisionMakerBlock.cleanup(tool_name),
"name": SmartDecisionMakerBlock.cleanup(block.name),
"description": block.description,
}
sink_block_input_schema = block.input_schema
@@ -493,24 +489,14 @@ class SmartDecisionMakerBlock(Block):
f"Sink graph metadata not found: {graph_id} {graph_version}"
)
# Use custom name from node metadata if set, otherwise fall back to graph name
custom_name = sink_node.metadata.get("customized_name")
tool_name = custom_name if custom_name else sink_graph_meta.name
tool_function: dict[str, Any] = {
"name": SmartDecisionMakerBlock.cleanup(tool_name),
"name": SmartDecisionMakerBlock.cleanup(sink_graph_meta.name),
"description": sink_graph_meta.description,
}
properties = {}
field_mapping = {}
for link in links:
field_name = link.sink_name
clean_field_name = SmartDecisionMakerBlock.cleanup(field_name)
field_mapping[clean_field_name] = field_name
sink_block_input_schema = sink_node.input_default["input_schema"]
sink_block_properties = sink_block_input_schema.get("properties", {}).get(
link.sink_name, {}
@@ -520,7 +506,7 @@ class SmartDecisionMakerBlock(Block):
if "description" in sink_block_properties
else f"The {link.sink_name} of the tool"
)
properties[clean_field_name] = {
properties[link.sink_name] = {
"type": "string",
"description": description,
"default": json.dumps(sink_block_properties.get("default", None)),
@@ -533,7 +519,7 @@ class SmartDecisionMakerBlock(Block):
"strict": True,
}
tool_function["_field_mapping"] = field_mapping
# Store node info for later use in output processing
tool_function["_sink_node_id"] = sink_node.id
return {"type": "function", "function": tool_function}
@@ -989,28 +975,10 @@ class SmartDecisionMakerBlock(Block):
graph_version: int,
execution_context: ExecutionContext,
execution_processor: "ExecutionProcessor",
nodes_to_skip: set[str] | None = None,
**kwargs,
) -> BlockOutput:
tool_functions = await self._create_tool_node_signatures(node_id)
original_tool_count = len(tool_functions)
# Filter out tools for nodes that should be skipped (e.g., missing optional credentials)
if nodes_to_skip:
tool_functions = [
tf
for tf in tool_functions
if tf.get("function", {}).get("_sink_node_id") not in nodes_to_skip
]
# Only raise error if we had tools but they were all filtered out
if original_tool_count > 0 and not tool_functions:
raise ValueError(
"No available tools to execute - all downstream nodes are unavailable "
"(possibly due to missing optional credentials)"
)
yield "tool_functions", json.dumps(tool_functions)
conversation_history = input_data.conversation_history or []
@@ -1161,9 +1129,8 @@ class SmartDecisionMakerBlock(Block):
original_field_name = field_mapping.get(clean_arg_name, clean_arg_name)
arg_value = tool_args.get(clean_arg_name)
# Use original_field_name directly (not sanitized) to match link sink_name
# The field_mapping already translates from LLM's cleaned names to original names
emit_key = f"tools_^_{sink_node_id}_~_{original_field_name}"
sanitized_arg_name = self.cleanup(original_field_name)
emit_key = f"tools_^_{sink_node_id}_~_{sanitized_arg_name}"
logger.debug(
"[SmartDecisionMakerBlock|geid:%s|neid:%s] emit %s",

View File

@@ -1057,153 +1057,3 @@ async def test_smart_decision_maker_traditional_mode_default():
) # Should yield individual tool parameters
assert "tools_^_test-sink-node-id_~_max_keyword_difficulty" in outputs
assert "conversations" in outputs
@pytest.mark.asyncio
async def test_smart_decision_maker_uses_customized_name_for_blocks():
"""Test that SmartDecisionMakerBlock uses customized_name from node metadata for tool names."""
from unittest.mock import MagicMock
from backend.blocks.basic import StoreValueBlock
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.data.graph import Link, Node
# Create a mock node with customized_name in metadata
mock_node = MagicMock(spec=Node)
mock_node.id = "test-node-id"
mock_node.block_id = StoreValueBlock().id
mock_node.metadata = {"customized_name": "My Custom Tool Name"}
mock_node.block = StoreValueBlock()
# Create a mock link
mock_link = MagicMock(spec=Link)
mock_link.sink_name = "input"
# Call the function directly
result = await SmartDecisionMakerBlock._create_block_function_signature(
mock_node, [mock_link]
)
# Verify the tool name uses the customized name (cleaned up)
assert result["type"] == "function"
assert result["function"]["name"] == "my_custom_tool_name" # Cleaned version
assert result["function"]["_sink_node_id"] == "test-node-id"
@pytest.mark.asyncio
async def test_smart_decision_maker_falls_back_to_block_name():
"""Test that SmartDecisionMakerBlock falls back to block.name when no customized_name."""
from unittest.mock import MagicMock
from backend.blocks.basic import StoreValueBlock
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.data.graph import Link, Node
# Create a mock node without customized_name
mock_node = MagicMock(spec=Node)
mock_node.id = "test-node-id"
mock_node.block_id = StoreValueBlock().id
mock_node.metadata = {} # No customized_name
mock_node.block = StoreValueBlock()
# Create a mock link
mock_link = MagicMock(spec=Link)
mock_link.sink_name = "input"
# Call the function directly
result = await SmartDecisionMakerBlock._create_block_function_signature(
mock_node, [mock_link]
)
# Verify the tool name uses the block's default name
assert result["type"] == "function"
assert result["function"]["name"] == "storevalueblock" # Default block name cleaned
assert result["function"]["_sink_node_id"] == "test-node-id"
@pytest.mark.asyncio
async def test_smart_decision_maker_uses_customized_name_for_agents():
"""Test that SmartDecisionMakerBlock uses customized_name from metadata for agent nodes."""
from unittest.mock import AsyncMock, MagicMock, patch
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.data.graph import Link, Node
# Create a mock node with customized_name in metadata
mock_node = MagicMock(spec=Node)
mock_node.id = "test-agent-node-id"
mock_node.metadata = {"customized_name": "My Custom Agent"}
mock_node.input_default = {
"graph_id": "test-graph-id",
"graph_version": 1,
"input_schema": {"properties": {"test_input": {"description": "Test input"}}},
}
# Create a mock link
mock_link = MagicMock(spec=Link)
mock_link.sink_name = "test_input"
# Mock the database client
mock_graph_meta = MagicMock()
mock_graph_meta.name = "Original Agent Name"
mock_graph_meta.description = "Agent description"
mock_db_client = AsyncMock()
mock_db_client.get_graph_metadata.return_value = mock_graph_meta
with patch(
"backend.blocks.smart_decision_maker.get_database_manager_async_client",
return_value=mock_db_client,
):
result = await SmartDecisionMakerBlock._create_agent_function_signature(
mock_node, [mock_link]
)
# Verify the tool name uses the customized name (cleaned up)
assert result["type"] == "function"
assert result["function"]["name"] == "my_custom_agent" # Cleaned version
assert result["function"]["_sink_node_id"] == "test-agent-node-id"
@pytest.mark.asyncio
async def test_smart_decision_maker_agent_falls_back_to_graph_name():
"""Test that agent node falls back to graph name when no customized_name."""
from unittest.mock import AsyncMock, MagicMock, patch
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.data.graph import Link, Node
# Create a mock node without customized_name
mock_node = MagicMock(spec=Node)
mock_node.id = "test-agent-node-id"
mock_node.metadata = {} # No customized_name
mock_node.input_default = {
"graph_id": "test-graph-id",
"graph_version": 1,
"input_schema": {"properties": {"test_input": {"description": "Test input"}}},
}
# Create a mock link
mock_link = MagicMock(spec=Link)
mock_link.sink_name = "test_input"
# Mock the database client
mock_graph_meta = MagicMock()
mock_graph_meta.name = "Original Agent Name"
mock_graph_meta.description = "Agent description"
mock_db_client = AsyncMock()
mock_db_client.get_graph_metadata.return_value = mock_graph_meta
with patch(
"backend.blocks.smart_decision_maker.get_database_manager_async_client",
return_value=mock_db_client,
):
result = await SmartDecisionMakerBlock._create_agent_function_signature(
mock_node, [mock_link]
)
# Verify the tool name uses the graph's default name
assert result["type"] == "function"
assert result["function"]["name"] == "original_agent_name" # Graph name cleaned
assert result["function"]["_sink_node_id"] == "test-agent-node-id"

View File

@@ -15,7 +15,6 @@ async def test_smart_decision_maker_handles_dynamic_dict_fields():
mock_node.block = CreateDictionaryBlock()
mock_node.block_id = CreateDictionaryBlock().id
mock_node.input_default = {}
mock_node.metadata = {}
# Create mock links with dynamic dictionary fields
mock_links = [
@@ -78,7 +77,6 @@ async def test_smart_decision_maker_handles_dynamic_list_fields():
mock_node.block = AddToListBlock()
mock_node.block_id = AddToListBlock().id
mock_node.input_default = {}
mock_node.metadata = {}
# Create mock links with dynamic list fields
mock_links = [

View File

@@ -44,7 +44,6 @@ async def test_create_block_function_signature_with_dict_fields():
mock_node.block = CreateDictionaryBlock()
mock_node.block_id = CreateDictionaryBlock().id
mock_node.input_default = {}
mock_node.metadata = {}
# Create mock links with dynamic dictionary fields (source sanitized, sink original)
mock_links = [
@@ -107,7 +106,6 @@ async def test_create_block_function_signature_with_list_fields():
mock_node.block = AddToListBlock()
mock_node.block_id = AddToListBlock().id
mock_node.input_default = {}
mock_node.metadata = {}
# Create mock links with dynamic list fields
mock_links = [
@@ -161,7 +159,6 @@ async def test_create_block_function_signature_with_object_fields():
mock_node.block = MatchTextPatternBlock()
mock_node.block_id = MatchTextPatternBlock().id
mock_node.input_default = {}
mock_node.metadata = {}
# Create mock links with dynamic object fields
mock_links = [
@@ -211,13 +208,11 @@ async def test_create_tool_node_signatures():
mock_dict_node.block = CreateDictionaryBlock()
mock_dict_node.block_id = CreateDictionaryBlock().id
mock_dict_node.input_default = {}
mock_dict_node.metadata = {}
mock_list_node = Mock()
mock_list_node.block = AddToListBlock()
mock_list_node.block_id = AddToListBlock().id
mock_list_node.input_default = {}
mock_list_node.metadata = {}
# Mock links with dynamic fields
dict_link1 = Mock(
@@ -428,7 +423,6 @@ async def test_mixed_regular_and_dynamic_fields():
mock_node.block.name = "TestBlock"
mock_node.block.description = "A test block"
mock_node.block.input_schema = Mock()
mock_node.metadata = {}
# Mock the get_field_schema to return a proper schema for regular fields
def get_field_schema(field_name):

View File

@@ -1,3 +1,3 @@
from .blog import WordPressCreatePostBlock, WordPressGetAllPostsBlock
from .blog import WordPressCreatePostBlock
__all__ = ["WordPressCreatePostBlock", "WordPressGetAllPostsBlock"]
__all__ = ["WordPressCreatePostBlock"]

View File

@@ -161,7 +161,7 @@ async def oauth_exchange_code_for_tokens(
grant_type="authorization_code",
).model_dump(exclude_none=True)
response = await Requests(raise_for_status=False).post(
response = await Requests().post(
f"{WORDPRESS_BASE_URL}oauth2/token",
headers=headers,
data=data,
@@ -205,7 +205,7 @@ async def oauth_refresh_tokens(
grant_type="refresh_token",
).model_dump(exclude_none=True)
response = await Requests(raise_for_status=False).post(
response = await Requests().post(
f"{WORDPRESS_BASE_URL}oauth2/token",
headers=headers,
data=data,
@@ -252,7 +252,7 @@ async def validate_token(
"token": token,
}
response = await Requests(raise_for_status=False).get(
response = await Requests().get(
f"{WORDPRESS_BASE_URL}oauth2/token-info",
params=params,
)
@@ -296,7 +296,7 @@ async def make_api_request(
url = f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}"
request_method = getattr(Requests(raise_for_status=False), method.lower())
request_method = getattr(Requests(), method.lower())
response = await request_method(
url,
headers=headers,
@@ -476,7 +476,6 @@ async def create_post(
data["tags"] = ",".join(str(t) for t in data["tags"])
# Make the API request
site = normalize_site(site)
endpoint = f"/rest/v1.1/sites/{site}/posts/new"
headers = {
@@ -484,7 +483,7 @@ async def create_post(
"Content-Type": "application/x-www-form-urlencoded",
}
response = await Requests(raise_for_status=False).post(
response = await Requests().post(
f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}",
headers=headers,
data=data,
@@ -500,132 +499,3 @@ async def create_post(
)
error_message = error_data.get("message", response.text)
raise ValueError(f"Failed to create post: {response.status} - {error_message}")
class Post(BaseModel):
"""Response model for individual posts in a posts list response.
This is a simplified version compared to PostResponse, as the list endpoint
returns less detailed information than the create/get single post endpoints.
"""
ID: int
site_ID: int
author: PostAuthor
date: datetime
modified: datetime
title: str
URL: str
short_URL: str
content: str | None = None
excerpt: str | None = None
slug: str
guid: str
status: str
sticky: bool
password: str | None = ""
parent: Union[Dict[str, Any], bool, None] = None
type: str
discussion: Dict[str, Union[str, bool, int]] | None = None
likes_enabled: bool | None = None
sharing_enabled: bool | None = None
like_count: int | None = None
i_like: bool | None = None
is_reblogged: bool | None = None
is_following: bool | None = None
global_ID: str | None = None
featured_image: str | None = None
post_thumbnail: Dict[str, Any] | None = None
format: str | None = None
geo: Union[Dict[str, Any], bool, None] = None
menu_order: int | None = None
page_template: str | None = None
publicize_URLs: List[str] | None = None
terms: Dict[str, Dict[str, Any]] | None = None
tags: Dict[str, Dict[str, Any]] | None = None
categories: Dict[str, Dict[str, Any]] | None = None
attachments: Dict[str, Dict[str, Any]] | None = None
attachment_count: int | None = None
metadata: List[Dict[str, Any]] | None = None
meta: Dict[str, Any] | None = None
capabilities: Dict[str, bool] | None = None
revisions: List[int] | None = None
other_URLs: Dict[str, Any] | None = None
class PostsResponse(BaseModel):
"""Response model for WordPress posts list."""
found: int
posts: List[Post]
meta: Dict[str, Any]
def normalize_site(site: str) -> str:
"""
Normalize a site identifier by stripping protocol and trailing slashes.
Args:
site: Site URL, domain, or ID (e.g., "https://myblog.wordpress.com/", "myblog.wordpress.com", "123456789")
Returns:
Normalized site identifier (domain or ID only)
"""
site = site.strip()
if site.startswith("https://"):
site = site[8:]
elif site.startswith("http://"):
site = site[7:]
return site.rstrip("/")
async def get_posts(
credentials: Credentials,
site: str,
status: PostStatus | None = None,
number: int = 100,
offset: int = 0,
) -> PostsResponse:
"""
Get posts from a WordPress site.
Args:
credentials: OAuth credentials
site: Site ID or domain (e.g., "myblog.wordpress.com" or "123456789")
status: Filter by post status using PostStatus enum, or None for all
number: Number of posts to retrieve (max 100)
offset: Number of posts to skip (for pagination)
Returns:
PostsResponse with the list of posts
"""
site = normalize_site(site)
endpoint = f"/rest/v1.1/sites/{site}/posts"
headers = {
"Authorization": credentials.auth_header(),
}
params: Dict[str, Any] = {
"number": max(1, min(number, 100)), # 1100 posts per request
"offset": offset,
}
if status:
params["status"] = status.value
response = await Requests(raise_for_status=False).get(
f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}",
headers=headers,
params=params,
)
if response.ok:
return PostsResponse.model_validate(response.json())
error_data = (
response.json()
if response.headers.get("content-type", "").startswith("application/json")
else {}
)
error_message = error_data.get("message", response.text)
raise ValueError(f"Failed to get posts: {response.status} - {error_message}")

View File

@@ -9,15 +9,7 @@ from backend.sdk import (
SchemaField,
)
from ._api import (
CreatePostRequest,
Post,
PostResponse,
PostsResponse,
PostStatus,
create_post,
get_posts,
)
from ._api import CreatePostRequest, PostResponse, PostStatus, create_post
from ._config import wordpress
@@ -57,15 +49,8 @@ class WordPressCreatePostBlock(Block):
media_urls: list[str] = SchemaField(
description="URLs of images to sideload and attach to the post", default=[]
)
publish_as_draft: bool = SchemaField(
description="If True, publishes the post as a draft. If False, publishes it publicly.",
default=False,
)
class Output(BlockSchemaOutput):
site: str = SchemaField(
description="The site ID or domain (pass-through for chaining with other blocks)"
)
post_id: int = SchemaField(description="The ID of the created post")
post_url: str = SchemaField(description="The full URL of the created post")
short_url: str = SchemaField(description="The shortened wp.me URL")
@@ -93,9 +78,7 @@ class WordPressCreatePostBlock(Block):
tags=input_data.tags,
featured_image=input_data.featured_image,
media_urls=input_data.media_urls,
status=(
PostStatus.DRAFT if input_data.publish_as_draft else PostStatus.PUBLISH
),
status=PostStatus.PUBLISH,
)
post_response: PostResponse = await create_post(
@@ -104,69 +87,7 @@ class WordPressCreatePostBlock(Block):
post_data=post_request,
)
yield "site", input_data.site
yield "post_id", post_response.ID
yield "post_url", post_response.URL
yield "short_url", post_response.short_URL
yield "post_data", post_response.model_dump()
class WordPressGetAllPostsBlock(Block):
"""
Fetches all posts from a WordPress.com site or Jetpack-enabled site.
Supports filtering by status and pagination.
"""
class Input(BlockSchemaInput):
credentials: CredentialsMetaInput = wordpress.credentials_field()
site: str = SchemaField(
description="Site ID or domain (e.g., 'myblog.wordpress.com' or '123456789')"
)
status: PostStatus | None = SchemaField(
description="Filter by post status, or None for all",
default=None,
)
number: int = SchemaField(
description="Number of posts to retrieve (max 100 per request)", default=20
)
offset: int = SchemaField(
description="Number of posts to skip (for pagination)", default=0
)
class Output(BlockSchemaOutput):
site: str = SchemaField(
description="The site ID or domain (pass-through for chaining with other blocks)"
)
found: int = SchemaField(description="Total number of posts found")
posts: list[Post] = SchemaField(
description="List of post objects with their details"
)
post: Post = SchemaField(
description="Individual post object (yielded for each post)"
)
def __init__(self):
super().__init__(
id="97728fa7-7f6f-4789-ba0c-f2c114119536",
description="Fetch all posts from WordPress.com or Jetpack sites",
categories={BlockCategory.SOCIAL},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: Credentials, **kwargs
) -> BlockOutput:
posts_response: PostsResponse = await get_posts(
credentials=credentials,
site=input_data.site,
status=input_data.status,
number=input_data.number,
offset=input_data.offset,
)
yield "site", input_data.site
yield "found", posts_response.found
yield "posts", posts_response.posts
for post in posts_response.posts:
yield "post", post

View File

@@ -50,8 +50,6 @@ from .model import (
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from backend.data.execution import ExecutionContext
from .graph import Link
app_config = Config()
@@ -474,7 +472,6 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
self.block_type = block_type
self.webhook_config = webhook_config
self.execution_stats: NodeExecutionStats = NodeExecutionStats()
self.requires_human_review: bool = False
if self.webhook_config:
if isinstance(self.webhook_config, BlockWebhookConfig):
@@ -617,77 +614,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
block_id=self.id,
) from ex
async def is_block_exec_need_review(
self,
input_data: BlockInput,
*,
user_id: str,
node_exec_id: str,
graph_exec_id: str,
graph_id: str,
graph_version: int,
execution_context: "ExecutionContext",
**kwargs,
) -> tuple[bool, BlockInput]:
"""
Check if this block execution needs human review and handle the review process.
Returns:
Tuple of (should_pause, input_data_to_use)
- should_pause: True if execution should be paused for review
- input_data_to_use: The input data to use (may be modified by reviewer)
"""
# Skip review if not required or safe mode is disabled
if not self.requires_human_review or not execution_context.safe_mode:
return False, input_data
from backend.blocks.helpers.review import HITLReviewHelper
# Handle the review request and get decision
decision = await HITLReviewHelper.handle_review_decision(
input_data=input_data,
user_id=user_id,
node_exec_id=node_exec_id,
graph_exec_id=graph_exec_id,
graph_id=graph_id,
graph_version=graph_version,
execution_context=execution_context,
block_name=self.name,
editable=True,
)
if decision is None:
# We're awaiting review - pause execution
return True, input_data
if not decision.should_proceed:
# Review was rejected, raise an error to stop execution
raise BlockExecutionError(
message=f"Block execution rejected by reviewer: {decision.message}",
block_name=self.name,
block_id=self.id,
)
# Review was approved - use the potentially modified data
# ReviewResult.data must be a dict for block inputs
reviewed_data = decision.review_result.data
if not isinstance(reviewed_data, dict):
raise BlockExecutionError(
message=f"Review data must be a dict for block input, got {type(reviewed_data).__name__}",
block_name=self.name,
block_id=self.id,
)
return False, reviewed_data
async def _execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
# Check for review requirement and get potentially modified input data
should_pause, input_data = await self.is_block_exec_need_review(
input_data, **kwargs
)
if should_pause:
return
# Validate the input data (original or reviewer-modified) once
if error := self.input_schema.validate_data(input_data):
raise BlockInputError(
message=f"Unable to execute block with invalid input data: {error}",
@@ -695,7 +622,6 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
block_id=self.id,
)
# Use the validated input data
async for output_name, output_data in self.run(
self.input_schema(**{k: v for k, v in input_data.items() if v is not None}),
**kwargs,

View File

@@ -38,20 +38,6 @@ POOL_TIMEOUT = os.getenv("DB_POOL_TIMEOUT")
if POOL_TIMEOUT:
DATABASE_URL = add_param(DATABASE_URL, "pool_timeout", POOL_TIMEOUT)
# Add public schema to search_path for pgvector type access
# The vector extension is in public schema, but search_path is determined by schema parameter
# Extract the schema from DATABASE_URL or default to 'public' (matching get_database_schema())
parsed_url = urlparse(DATABASE_URL)
url_params = dict(parse_qsl(parsed_url.query))
db_schema = url_params.get("schema", "public")
# Build search_path, avoiding duplicates if db_schema is already 'public'
search_path_schemas = list(
dict.fromkeys([db_schema, "public"])
) # Preserves order, removes duplicates
search_path = ",".join(search_path_schemas)
# This allows using ::vector without schema qualification
DATABASE_URL = add_param(DATABASE_URL, "options", f"-c search_path={search_path}")
HTTP_TIMEOUT = int(POOL_TIMEOUT) if POOL_TIMEOUT else None
prisma = Prisma(
@@ -122,102 +108,21 @@ def get_database_schema() -> str:
return query_params.get("schema", "public")
async def _raw_with_schema(
query_template: str,
*args,
execute: bool = False,
client: Prisma | None = None,
set_public_search_path: bool = False,
) -> list[dict] | int:
"""Internal: Execute raw SQL with proper schema handling.
Use query_raw_with_schema() or execute_raw_with_schema() instead.
Args:
query_template: SQL query with {schema_prefix} placeholder
*args: Query parameters
execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
client: Optional Prisma client for transactions (only used when execute=True).
set_public_search_path: If True, sets search_path to include public schema.
Needed for pgvector types and other public schema objects.
Returns:
- list[dict] if execute=False (query results)
- int if execute=True (number of affected rows)
"""
async def query_raw_with_schema(query_template: str, *args) -> list[dict]:
"""Execute raw SQL query with proper schema handling."""
schema = get_database_schema()
schema_prefix = f'"{schema}".' if schema != "public" else ""
formatted_query = query_template.format(schema_prefix=schema_prefix)
import prisma as prisma_module
db_client = client if client else prisma_module.get_client()
# Set search_path to include public schema if requested
# Prisma doesn't support the 'options' connection parameter, so we set it per-session
# This is idempotent and safe to call multiple times
if set_public_search_path:
await db_client.execute_raw(f"SET search_path = {schema}, public") # type: ignore
if execute:
result = await db_client.execute_raw(formatted_query, *args) # type: ignore
else:
result = await db_client.query_raw(formatted_query, *args) # type: ignore
result = await prisma_module.get_client().query_raw(
formatted_query, *args # type: ignore
)
return result
async def query_raw_with_schema(
query_template: str, *args, set_public_search_path: bool = False
) -> list[dict]:
"""Execute raw SQL SELECT query with proper schema handling.
Args:
query_template: SQL query with {schema_prefix} placeholder
*args: Query parameters
set_public_search_path: If True, sets search_path to include public schema.
Needed for pgvector types and other public schema objects.
Returns:
List of result rows as dictionaries
Example:
results = await query_raw_with_schema(
'SELECT * FROM {schema_prefix}"User" WHERE id = $1',
user_id
)
"""
return await _raw_with_schema(query_template, *args, execute=False, set_public_search_path=set_public_search_path) # type: ignore
async def execute_raw_with_schema(
query_template: str,
*args,
client: Prisma | None = None,
set_public_search_path: bool = False,
) -> int:
"""Execute raw SQL command (INSERT/UPDATE/DELETE) with proper schema handling.
Args:
query_template: SQL query with {schema_prefix} placeholder
*args: Query parameters
client: Optional Prisma client for transactions
set_public_search_path: If True, sets search_path to include public schema.
Needed for pgvector types and other public schema objects.
Returns:
Number of affected rows
Example:
await execute_raw_with_schema(
'INSERT INTO {schema_prefix}"User" (id, name) VALUES ($1, $2)',
user_id, name,
client=tx # Optional transaction client
)
"""
return await _raw_with_schema(query_template, *args, execute=True, client=client, set_public_search_path=set_public_search_path) # type: ignore
class BaseDbModel(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))

View File

@@ -383,7 +383,6 @@ class GraphExecutionWithNodes(GraphExecution):
self,
execution_context: ExecutionContext,
compiled_nodes_input_masks: Optional[NodesInputMasks] = None,
nodes_to_skip: Optional[set[str]] = None,
):
return GraphExecutionEntry(
user_id=self.user_id,
@@ -391,7 +390,6 @@ class GraphExecutionWithNodes(GraphExecution):
graph_version=self.graph_version or 0,
graph_exec_id=self.id,
nodes_input_masks=compiled_nodes_input_masks,
nodes_to_skip=nodes_to_skip or set(),
execution_context=execution_context,
)
@@ -1147,8 +1145,6 @@ class GraphExecutionEntry(BaseModel):
graph_id: str
graph_version: int
nodes_input_masks: Optional[NodesInputMasks] = None
nodes_to_skip: set[str] = Field(default_factory=set)
"""Node IDs that should be skipped due to optional credentials not being configured."""
execution_context: ExecutionContext = Field(default_factory=ExecutionContext)

View File

@@ -94,15 +94,6 @@ class Node(BaseDbModel):
input_links: list[Link] = []
output_links: list[Link] = []
@property
def credentials_optional(self) -> bool:
"""
Whether credentials are optional for this node.
When True and credentials are not configured, the node will be skipped
during execution rather than causing a validation error.
"""
return self.metadata.get("credentials_optional", False)
@property
def block(self) -> AnyBlockSchema | "_UnknownBlockBase":
"""Get the block for this node. Returns UnknownBlock if block is deleted/missing."""
@@ -244,10 +235,7 @@ class BaseGraph(BaseDbModel):
return any(
node.block_id
for node in self.nodes
if (
node.block.block_type == BlockType.HUMAN_IN_THE_LOOP
or node.block.requires_human_review
)
if node.block.block_type == BlockType.HUMAN_IN_THE_LOOP
)
@property
@@ -338,35 +326,7 @@ class Graph(BaseGraph):
@computed_field
@property
def credentials_input_schema(self) -> dict[str, Any]:
schema = self._credentials_input_schema.jsonschema()
# Determine which credential fields are required based on credentials_optional metadata
graph_credentials_inputs = self.aggregate_credentials_inputs()
required_fields = []
# Build a map of node_id -> node for quick lookup
all_nodes = {node.id: node for node in self.nodes}
for sub_graph in self.sub_graphs:
for node in sub_graph.nodes:
all_nodes[node.id] = node
for field_key, (
_field_info,
node_field_pairs,
) in graph_credentials_inputs.items():
# A field is required if ANY node using it has credentials_optional=False
is_required = False
for node_id, _field_name in node_field_pairs:
node = all_nodes.get(node_id)
if node and not node.credentials_optional:
is_required = True
break
if is_required:
required_fields.append(field_key)
schema["required"] = required_fields
return schema
return self._credentials_input_schema.jsonschema()
@property
def _credentials_input_schema(self) -> type[BlockSchema]:

View File

@@ -1,6 +1,5 @@
import json
from typing import Any
from unittest.mock import AsyncMock, patch
from uuid import UUID
import fastapi.exceptions
@@ -19,17 +18,6 @@ from backend.usecases.sample import create_test_user
from backend.util.test import SpinTestServer
@pytest.fixture(scope="session", autouse=True)
def mock_embedding_functions():
"""Mock embedding functions for all tests to avoid database/API dependencies."""
with patch(
"backend.api.features.store.db.ensure_embedding",
new_callable=AsyncMock,
return_value=True,
):
yield
@pytest.mark.asyncio(loop_scope="session")
async def test_graph_creation(server: SpinTestServer, snapshot: Snapshot):
"""
@@ -408,58 +396,3 @@ async def test_access_store_listing_graph(server: SpinTestServer):
created_graph.id, created_graph.version, "3e53486c-cf57-477e-ba2a-cb02dc828e1b"
)
assert got_graph is not None
# ============================================================================
# Tests for Optional Credentials Feature
# ============================================================================
def test_node_credentials_optional_default():
"""Test that credentials_optional defaults to False when not set in metadata."""
node = Node(
id="test_node",
block_id=StoreValueBlock().id,
input_default={},
metadata={},
)
assert node.credentials_optional is False
def test_node_credentials_optional_true():
"""Test that credentials_optional returns True when explicitly set."""
node = Node(
id="test_node",
block_id=StoreValueBlock().id,
input_default={},
metadata={"credentials_optional": True},
)
assert node.credentials_optional is True
def test_node_credentials_optional_false():
"""Test that credentials_optional returns False when explicitly set to False."""
node = Node(
id="test_node",
block_id=StoreValueBlock().id,
input_default={},
metadata={"credentials_optional": False},
)
assert node.credentials_optional is False
def test_node_credentials_optional_with_other_metadata():
"""Test that credentials_optional works correctly with other metadata present."""
node = Node(
id="test_node",
block_id=StoreValueBlock().id,
input_default={},
metadata={
"position": {"x": 100, "y": 200},
"customized_name": "My Custom Node",
"credentials_optional": True,
},
)
assert node.credentials_optional is True
assert node.metadata["position"] == {"x": 100, "y": 200}
assert node.metadata["customized_name"] == "My Custom Node"

View File

@@ -334,7 +334,7 @@ async def _get_user_timezone(user_id: str) -> str:
return get_user_timezone_or_utc(user.timezone if user else None)
async def increment_onboarding_runs(user_id: str):
async def increment_runs(user_id: str):
"""
Increment a user's run counters and trigger any onboarding milestones.
"""

View File

@@ -1,404 +0,0 @@
"""Data models and access layer for user business understanding."""
import logging
from datetime import datetime
from typing import Any, Optional, cast
import pydantic
from prisma.models import CoPilotUnderstanding
from backend.data.redis_client import get_redis_async
from backend.util.json import SafeJson
logger = logging.getLogger(__name__)
# Cache configuration
CACHE_KEY_PREFIX = "understanding"
CACHE_TTL_SECONDS = 48 * 60 * 60 # 48 hours
def _cache_key(user_id: str) -> str:
"""Generate cache key for user business understanding."""
return f"{CACHE_KEY_PREFIX}:{user_id}"
def _json_to_list(value: Any) -> list[str]:
"""Convert Json field to list[str], handling None."""
if value is None:
return []
if isinstance(value, list):
return cast(list[str], value)
return []
class BusinessUnderstandingInput(pydantic.BaseModel):
"""Input model for updating business understanding - all fields optional for incremental updates."""
# User info
user_name: Optional[str] = pydantic.Field(None, description="The user's name")
job_title: Optional[str] = pydantic.Field(None, description="The user's job title")
# Business basics
business_name: Optional[str] = pydantic.Field(
None, description="Name of the user's business"
)
industry: Optional[str] = pydantic.Field(None, description="Industry or sector")
business_size: Optional[str] = pydantic.Field(
None, description="Company size (e.g., '1-10', '11-50')"
)
user_role: Optional[str] = pydantic.Field(
None,
description="User's role in the organization (e.g., 'decision maker', 'implementer')",
)
# Processes & activities
key_workflows: Optional[list[str]] = pydantic.Field(
None, description="Key business workflows"
)
daily_activities: Optional[list[str]] = pydantic.Field(
None, description="Daily activities performed"
)
# Pain points & goals
pain_points: Optional[list[str]] = pydantic.Field(
None, description="Current pain points"
)
bottlenecks: Optional[list[str]] = pydantic.Field(
None, description="Process bottlenecks"
)
manual_tasks: Optional[list[str]] = pydantic.Field(
None, description="Manual/repetitive tasks"
)
automation_goals: Optional[list[str]] = pydantic.Field(
None, description="Desired automation goals"
)
# Current tools
current_software: Optional[list[str]] = pydantic.Field(
None, description="Software/tools currently used"
)
existing_automation: Optional[list[str]] = pydantic.Field(
None, description="Existing automations"
)
# Additional context
additional_notes: Optional[str] = pydantic.Field(
None, description="Any additional context"
)
class BusinessUnderstanding(pydantic.BaseModel):
"""Full business understanding model returned from database."""
id: str
user_id: str
created_at: datetime
updated_at: datetime
# User info
user_name: Optional[str] = None
job_title: Optional[str] = None
# Business basics
business_name: Optional[str] = None
industry: Optional[str] = None
business_size: Optional[str] = None
user_role: Optional[str] = None
# Processes & activities
key_workflows: list[str] = pydantic.Field(default_factory=list)
daily_activities: list[str] = pydantic.Field(default_factory=list)
# Pain points & goals
pain_points: list[str] = pydantic.Field(default_factory=list)
bottlenecks: list[str] = pydantic.Field(default_factory=list)
manual_tasks: list[str] = pydantic.Field(default_factory=list)
automation_goals: list[str] = pydantic.Field(default_factory=list)
# Current tools
current_software: list[str] = pydantic.Field(default_factory=list)
existing_automation: list[str] = pydantic.Field(default_factory=list)
# Additional context
additional_notes: Optional[str] = None
@classmethod
def from_db(cls, db_record: CoPilotUnderstanding) -> "BusinessUnderstanding":
"""Convert database record to Pydantic model."""
data = db_record.data if isinstance(db_record.data, dict) else {}
business = (
data.get("business", {}) if isinstance(data.get("business"), dict) else {}
)
return cls(
id=db_record.id,
user_id=db_record.userId,
created_at=db_record.createdAt,
updated_at=db_record.updatedAt,
user_name=data.get("name"),
job_title=business.get("job_title"),
business_name=business.get("business_name"),
industry=business.get("industry"),
business_size=business.get("business_size"),
user_role=business.get("user_role"),
key_workflows=_json_to_list(business.get("key_workflows")),
daily_activities=_json_to_list(business.get("daily_activities")),
pain_points=_json_to_list(business.get("pain_points")),
bottlenecks=_json_to_list(business.get("bottlenecks")),
manual_tasks=_json_to_list(business.get("manual_tasks")),
automation_goals=_json_to_list(business.get("automation_goals")),
current_software=_json_to_list(business.get("current_software")),
existing_automation=_json_to_list(business.get("existing_automation")),
additional_notes=business.get("additional_notes"),
)
def _merge_lists(existing: list | None, new: list | None) -> list | None:
"""Merge two lists, removing duplicates while preserving order."""
if new is None:
return existing
if existing is None:
return new
# Preserve order, add new items that don't exist
merged = list(existing)
for item in new:
if item not in merged:
merged.append(item)
return merged
async def _get_from_cache(user_id: str) -> Optional[BusinessUnderstanding]:
"""Get business understanding from Redis cache."""
try:
redis = await get_redis_async()
cached_data = await redis.get(_cache_key(user_id))
if cached_data:
return BusinessUnderstanding.model_validate_json(cached_data)
except Exception as e:
logger.warning(f"Failed to get understanding from cache: {e}")
return None
async def _set_cache(user_id: str, understanding: BusinessUnderstanding) -> None:
"""Set business understanding in Redis cache with TTL."""
try:
redis = await get_redis_async()
await redis.setex(
_cache_key(user_id),
CACHE_TTL_SECONDS,
understanding.model_dump_json(),
)
except Exception as e:
logger.warning(f"Failed to set understanding in cache: {e}")
async def _delete_cache(user_id: str) -> None:
"""Delete business understanding from Redis cache."""
try:
redis = await get_redis_async()
await redis.delete(_cache_key(user_id))
except Exception as e:
logger.warning(f"Failed to delete understanding from cache: {e}")
async def get_business_understanding(
user_id: str,
) -> Optional[BusinessUnderstanding]:
"""Get the business understanding for a user.
Checks cache first, falls back to database if not cached.
Results are cached for 48 hours.
"""
# Try cache first
cached = await _get_from_cache(user_id)
if cached:
logger.debug(f"Business understanding cache hit for user {user_id}")
return cached
# Cache miss - load from database
logger.debug(f"Business understanding cache miss for user {user_id}")
record = await CoPilotUnderstanding.prisma().find_unique(where={"userId": user_id})
if record is None:
return None
understanding = BusinessUnderstanding.from_db(record)
# Store in cache for next time
await _set_cache(user_id, understanding)
return understanding
async def upsert_business_understanding(
user_id: str,
input_data: BusinessUnderstandingInput,
) -> BusinessUnderstanding:
"""
Create or update business understanding with incremental merge strategy.
- String fields: new value overwrites if provided (not None)
- List fields: new items are appended to existing (deduplicated)
Data is stored as: {name: ..., business: {version: 1, ...}}
"""
# Get existing record for merge
existing = await CoPilotUnderstanding.prisma().find_unique(
where={"userId": user_id}
)
# Get existing data structure or start fresh
existing_data: dict[str, Any] = {}
if existing and isinstance(existing.data, dict):
existing_data = dict(existing.data)
existing_business: dict[str, Any] = {}
if isinstance(existing_data.get("business"), dict):
existing_business = dict(existing_data["business"])
# Business fields (stored inside business object)
business_string_fields = [
"job_title",
"business_name",
"industry",
"business_size",
"user_role",
"additional_notes",
]
business_list_fields = [
"key_workflows",
"daily_activities",
"pain_points",
"bottlenecks",
"manual_tasks",
"automation_goals",
"current_software",
"existing_automation",
]
# Handle top-level name field
if input_data.user_name is not None:
existing_data["name"] = input_data.user_name
# Business string fields - overwrite if provided
for field in business_string_fields:
value = getattr(input_data, field)
if value is not None:
existing_business[field] = value
# Business list fields - merge with existing
for field in business_list_fields:
value = getattr(input_data, field)
if value is not None:
existing_list = _json_to_list(existing_business.get(field))
merged = _merge_lists(existing_list, value)
existing_business[field] = merged
# Set version and nest business data
existing_business["version"] = 1
existing_data["business"] = existing_business
# Upsert with the merged data
record = await CoPilotUnderstanding.prisma().upsert(
where={"userId": user_id},
data={
"create": {"userId": user_id, "data": SafeJson(existing_data)},
"update": {"data": SafeJson(existing_data)},
},
)
understanding = BusinessUnderstanding.from_db(record)
# Update cache with new understanding
await _set_cache(user_id, understanding)
return understanding
async def clear_business_understanding(user_id: str) -> bool:
"""Clear/delete business understanding for a user from both DB and cache."""
# Delete from cache first
await _delete_cache(user_id)
try:
await CoPilotUnderstanding.prisma().delete(where={"userId": user_id})
return True
except Exception:
# Record might not exist
return False
def format_understanding_for_prompt(understanding: BusinessUnderstanding) -> str:
"""Format business understanding as text for system prompt injection."""
sections = []
# User info section
user_info = []
if understanding.user_name:
user_info.append(f"Name: {understanding.user_name}")
if understanding.job_title:
user_info.append(f"Job Title: {understanding.job_title}")
if user_info:
sections.append("## User\n" + "\n".join(user_info))
# Business section
business_info = []
if understanding.business_name:
business_info.append(f"Company: {understanding.business_name}")
if understanding.industry:
business_info.append(f"Industry: {understanding.industry}")
if understanding.business_size:
business_info.append(f"Size: {understanding.business_size}")
if understanding.user_role:
business_info.append(f"Role Context: {understanding.user_role}")
if business_info:
sections.append("## Business\n" + "\n".join(business_info))
# Processes section
processes = []
if understanding.key_workflows:
processes.append(f"Key Workflows: {', '.join(understanding.key_workflows)}")
if understanding.daily_activities:
processes.append(
f"Daily Activities: {', '.join(understanding.daily_activities)}"
)
if processes:
sections.append("## Processes\n" + "\n".join(processes))
# Pain points section
pain_points = []
if understanding.pain_points:
pain_points.append(f"Pain Points: {', '.join(understanding.pain_points)}")
if understanding.bottlenecks:
pain_points.append(f"Bottlenecks: {', '.join(understanding.bottlenecks)}")
if understanding.manual_tasks:
pain_points.append(f"Manual Tasks: {', '.join(understanding.manual_tasks)}")
if pain_points:
sections.append("## Pain Points\n" + "\n".join(pain_points))
# Goals section
if understanding.automation_goals:
sections.append(
"## Automation Goals\n"
+ "\n".join(f"- {goal}" for goal in understanding.automation_goals)
)
# Current tools section
tools_info = []
if understanding.current_software:
tools_info.append(
f"Current Software: {', '.join(understanding.current_software)}"
)
if understanding.existing_automation:
tools_info.append(
f"Existing Automation: {', '.join(understanding.existing_automation)}"
)
if tools_info:
sections.append("## Current Tools\n" + "\n".join(tools_info))
# Additional notes
if understanding.additional_notes:
sections.append(f"## Additional Context\n{understanding.additional_notes}")
if not sections:
return ""
return "# User Business Context\n\n" + "\n\n".join(sections)

View File

@@ -7,10 +7,6 @@ from backend.api.features.library.db import (
list_library_agents,
)
from backend.api.features.store.db import get_store_agent_details, get_store_agents
from backend.api.features.store.embeddings import (
backfill_missing_embeddings,
get_embedding_stats,
)
from backend.data import db
from backend.data.analytics import (
get_accuracy_trends_and_alerts,
@@ -24,7 +20,6 @@ from backend.data.execution import (
get_execution_kv_data,
get_execution_outputs_by_node_exec_id,
get_frequently_executed_graphs,
get_graph_execution,
get_graph_execution_meta,
get_graph_executions,
get_graph_executions_count,
@@ -62,7 +57,6 @@ from backend.data.notifications import (
get_user_notification_oldest_message_in_batch,
remove_notifications_from_batch,
)
from backend.data.onboarding import increment_onboarding_runs
from backend.data.user import (
get_active_user_ids_in_timerange,
get_user_by_id,
@@ -146,7 +140,6 @@ class DatabaseManager(AppService):
get_child_graph_executions = _(get_child_graph_executions)
get_graph_executions = _(get_graph_executions)
get_graph_executions_count = _(get_graph_executions_count)
get_graph_execution = _(get_graph_execution)
get_graph_execution_meta = _(get_graph_execution_meta)
create_graph_execution = _(create_graph_execution)
get_node_execution = _(get_node_execution)
@@ -211,17 +204,10 @@ class DatabaseManager(AppService):
add_store_agent_to_library = _(add_store_agent_to_library)
validate_graph_execution_permissions = _(validate_graph_execution_permissions)
# Onboarding
increment_onboarding_runs = _(increment_onboarding_runs)
# Store
get_store_agents = _(get_store_agents)
get_store_agent_details = _(get_store_agent_details)
# Store Embeddings
get_embedding_stats = _(get_embedding_stats)
backfill_missing_embeddings = _(backfill_missing_embeddings)
# Summary data - async
get_user_execution_summary_data = _(get_user_execution_summary_data)
@@ -273,10 +259,6 @@ class DatabaseManagerClient(AppServiceClient):
get_store_agents = _(d.get_store_agents)
get_store_agent_details = _(d.get_store_agent_details)
# Store Embeddings
get_embedding_stats = _(d.get_embedding_stats)
backfill_missing_embeddings = _(d.backfill_missing_embeddings)
class DatabaseManagerAsyncClient(AppServiceClient):
d = DatabaseManager
@@ -292,7 +274,6 @@ class DatabaseManagerAsyncClient(AppServiceClient):
get_graph = d.get_graph
get_graph_metadata = d.get_graph_metadata
get_graph_settings = d.get_graph_settings
get_graph_execution = d.get_graph_execution
get_graph_execution_meta = d.get_graph_execution_meta
get_node = d.get_node
get_node_execution = d.get_node_execution
@@ -337,9 +318,6 @@ class DatabaseManagerAsyncClient(AppServiceClient):
add_store_agent_to_library = d.add_store_agent_to_library
validate_graph_execution_permissions = d.validate_graph_execution_permissions
# Onboarding
increment_onboarding_runs = d.increment_onboarding_runs
# Store
get_store_agents = d.get_store_agents
get_store_agent_details = d.get_store_agent_details

View File

@@ -178,7 +178,6 @@ async def execute_node(
execution_processor: "ExecutionProcessor",
execution_stats: NodeExecutionStats | None = None,
nodes_input_masks: Optional[NodesInputMasks] = None,
nodes_to_skip: Optional[set[str]] = None,
) -> BlockOutput:
"""
Execute a node in the graph. This will trigger a block execution on a node,
@@ -246,7 +245,6 @@ async def execute_node(
"user_id": user_id,
"execution_context": execution_context,
"execution_processor": execution_processor,
"nodes_to_skip": nodes_to_skip or set(),
}
# Last-minute fetch credentials + acquire a system-wide read-write lock to prevent
@@ -544,7 +542,6 @@ class ExecutionProcessor:
node_exec_progress: NodeExecutionProgress,
nodes_input_masks: Optional[NodesInputMasks],
graph_stats_pair: tuple[GraphExecutionStats, threading.Lock],
nodes_to_skip: Optional[set[str]] = None,
) -> NodeExecutionStats:
log_metadata = LogMetadata(
logger=_logger,
@@ -567,7 +564,6 @@ class ExecutionProcessor:
db_client=db_client,
log_metadata=log_metadata,
nodes_input_masks=nodes_input_masks,
nodes_to_skip=nodes_to_skip,
)
if isinstance(status, BaseException):
raise status
@@ -613,7 +609,6 @@ class ExecutionProcessor:
db_client: "DatabaseManagerAsyncClient",
log_metadata: LogMetadata,
nodes_input_masks: Optional[NodesInputMasks] = None,
nodes_to_skip: Optional[set[str]] = None,
) -> ExecutionStatus:
status = ExecutionStatus.RUNNING
@@ -650,7 +645,6 @@ class ExecutionProcessor:
execution_processor=self,
execution_stats=stats,
nodes_input_masks=nodes_input_masks,
nodes_to_skip=nodes_to_skip,
):
await persist_output(output_name, output_data)
@@ -962,21 +956,6 @@ class ExecutionProcessor:
queued_node_exec = execution_queue.get()
# Check if this node should be skipped due to optional credentials
if queued_node_exec.node_id in graph_exec.nodes_to_skip:
log_metadata.info(
f"Skipping node execution {queued_node_exec.node_exec_id} "
f"for node {queued_node_exec.node_id} - optional credentials not configured"
)
# Mark the node as completed without executing
# No outputs will be produced, so downstream nodes won't trigger
update_node_execution_status(
db_client=db_client,
exec_id=queued_node_exec.node_exec_id,
status=ExecutionStatus.COMPLETED,
)
continue
log_metadata.debug(
f"Dispatching node execution {queued_node_exec.node_exec_id} "
f"for node {queued_node_exec.node_id}",
@@ -1037,7 +1016,6 @@ class ExecutionProcessor:
execution_stats,
execution_stats_lock,
),
nodes_to_skip=graph_exec.nodes_to_skip,
),
self.node_execution_loop,
)

View File

@@ -1,5 +1,4 @@
import logging
from unittest.mock import AsyncMock, patch
import fastapi.responses
import pytest
@@ -20,17 +19,6 @@ from backend.util.test import SpinTestServer, wait_execution
logger = logging.getLogger(__name__)
@pytest.fixture(scope="session", autouse=True)
def mock_embedding_functions():
"""Mock embedding functions for all tests to avoid database/API dependencies."""
with patch(
"backend.api.features.store.db.ensure_embedding",
new_callable=AsyncMock,
return_value=True,
):
yield
async def create_graph(s: SpinTestServer, g: graph.Graph, u: User) -> graph.Graph:
logger.info(f"Creating graph for user {u.id}")
return await s.agent_server.test_create_graph(CreateGraph(graph=g), u.id)

View File

@@ -2,7 +2,6 @@ import asyncio
import logging
import os
import threading
import time
import uuid
from enum import Enum
from typing import Optional
@@ -28,6 +27,7 @@ from backend.data.auth.oauth import cleanup_expired_oauth_tokens
from backend.data.block import BlockInput
from backend.data.execution import GraphExecutionWithNodes
from backend.data.model import CredentialsMetaInput
from backend.data.onboarding import increment_runs
from backend.executor import utils as execution_utils
from backend.monitoring import (
NotificationJobArgs,
@@ -37,7 +37,7 @@ from backend.monitoring import (
report_execution_accuracy_alerts,
report_late_executions,
)
from backend.util.clients import get_database_manager_client, get_scheduler_client
from backend.util.clients import get_scheduler_client
from backend.util.cloud_storage import cleanup_expired_files_async
from backend.util.exceptions import (
GraphNotFoundError,
@@ -156,6 +156,7 @@ async def _execute_graph(**kwargs):
inputs=args.input_data,
graph_credentials_inputs=args.input_credentials,
)
await increment_runs(args.user_id)
elapsed = asyncio.get_event_loop().time() - start_time
logger.info(
f"Graph execution started with ID {graph_exec.id} for graph {args.graph_id} "
@@ -253,74 +254,6 @@ def execution_accuracy_alerts():
return report_execution_accuracy_alerts()
def ensure_embeddings_coverage():
"""
Ensure approved store agents have embeddings for hybrid search.
Processes ALL missing embeddings in batches of 10 until 100% coverage.
Missing embeddings = agents invisible in hybrid search.
Schedule: Runs every 6 hours (balanced between coverage and API costs).
- Catches agents approved between scheduled runs
- Batch size 10: gradual processing to avoid rate limits
- Manual trigger available via execute_ensure_embeddings_coverage endpoint
"""
db_client = get_database_manager_client()
stats = db_client.get_embedding_stats()
# Check for error from get_embedding_stats() first
if "error" in stats:
logger.error(
f"Failed to get embedding stats: {stats['error']} - skipping backfill"
)
return {"processed": 0, "success": 0, "failed": 0, "error": stats["error"]}
if stats["without_embeddings"] == 0:
logger.info("All approved agents have embeddings, skipping backfill")
return {"processed": 0, "success": 0, "failed": 0}
logger.info(
f"Found {stats['without_embeddings']} agents without embeddings "
f"({stats['coverage_percent']}% coverage) - processing all"
)
total_processed = 0
total_success = 0
total_failed = 0
# Process in batches until no more missing embeddings
while True:
result = db_client.backfill_missing_embeddings(batch_size=10)
total_processed += result["processed"]
total_success += result["success"]
total_failed += result["failed"]
if result["processed"] == 0:
# No more missing embeddings
break
if result["success"] == 0 and result["processed"] > 0:
# All attempts in this batch failed - stop to avoid infinite loop
logger.error(
f"All {result['processed']} embedding attempts failed - stopping backfill"
)
break
# Small delay between batches to avoid rate limits
time.sleep(1)
logger.info(
f"Embedding backfill completed: {total_success}/{total_processed} succeeded, "
f"{total_failed} failed"
)
return {
"processed": total_processed,
"success": total_success,
"failed": total_failed,
}
# Monitoring functions are now imported from monitoring module
@@ -542,19 +475,6 @@ class Scheduler(AppService):
jobstore=Jobstores.EXECUTION.value,
)
# Embedding Coverage - Every 6 hours
# Ensures all approved agents have embeddings for hybrid search
# Critical: missing embeddings = agents invisible in search
self.scheduler.add_job(
ensure_embeddings_coverage,
id="ensure_embeddings_coverage",
trigger="interval",
hours=6,
replace_existing=True,
max_instances=1, # Prevent overlapping runs
jobstore=Jobstores.EXECUTION.value,
)
self.scheduler.add_listener(job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
self.scheduler.add_listener(job_missed_listener, EVENT_JOB_MISSED)
self.scheduler.add_listener(job_max_instances_listener, EVENT_JOB_MAX_INSTANCES)
@@ -712,11 +632,6 @@ class Scheduler(AppService):
"""Manually trigger execution accuracy alert checking."""
return execution_accuracy_alerts()
@expose
def execute_ensure_embeddings_coverage(self):
"""Manually trigger embedding backfill for approved store agents."""
return ensure_embeddings_coverage()
class SchedulerClient(AppServiceClient):
@classmethod

View File

@@ -10,7 +10,6 @@ from pydantic import BaseModel, JsonValue, ValidationError
from backend.data import execution as execution_db
from backend.data import graph as graph_db
from backend.data import onboarding as onboarding_db
from backend.data import user as user_db
from backend.data.block import (
Block,
@@ -32,6 +31,7 @@ from backend.data.execution import (
GraphExecutionStats,
GraphExecutionWithNodes,
NodesInputMasks,
get_graph_execution,
)
from backend.data.graph import GraphModel, Node
from backend.data.model import USER_TIMEZONE_NOT_SET, CredentialsMetaInput
@@ -239,19 +239,14 @@ async def _validate_node_input_credentials(
graph: GraphModel,
user_id: str,
nodes_input_masks: Optional[NodesInputMasks] = None,
) -> tuple[dict[str, dict[str, str]], set[str]]:
) -> dict[str, dict[str, str]]:
"""
Checks all credentials for all nodes of the graph and returns structured errors
and a set of nodes that should be skipped due to optional missing credentials.
Checks all credentials for all nodes of the graph and returns structured errors.
Returns:
tuple[
dict[node_id, dict[field_name, error_message]]: Credential validation errors per node,
set[node_id]: Nodes that should be skipped (optional credentials not configured)
]
dict[node_id, dict[field_name, error_message]]: Credential validation errors per node
"""
credential_errors: dict[str, dict[str, str]] = defaultdict(dict)
nodes_to_skip: set[str] = set()
for node in graph.nodes:
block = node.block
@@ -261,46 +256,27 @@ async def _validate_node_input_credentials(
if not credentials_fields:
continue
# Track if any credential field is missing for this node
has_missing_credentials = False
for field_name, credentials_meta_type in credentials_fields.items():
try:
# Check nodes_input_masks first, then input_default
field_value = None
if (
nodes_input_masks
and (node_input_mask := nodes_input_masks.get(node.id))
and field_name in node_input_mask
):
field_value = node_input_mask[field_name]
credentials_meta = credentials_meta_type.model_validate(
node_input_mask[field_name]
)
elif field_name in node.input_default:
# For optional credentials, don't use input_default - treat as missing
# This prevents stale credential IDs from failing validation
if node.credentials_optional:
field_value = None
else:
field_value = node.input_default[field_name]
# Check if credentials are missing (None, empty, or not present)
if field_value is None or (
isinstance(field_value, dict) and not field_value.get("id")
):
has_missing_credentials = True
# If node has credentials_optional flag, mark for skipping instead of error
if node.credentials_optional:
continue # Don't add error, will be marked for skip after loop
else:
credential_errors[node.id][
field_name
] = "These credentials are required"
continue
credentials_meta = credentials_meta_type.model_validate(field_value)
credentials_meta = credentials_meta_type.model_validate(
node.input_default[field_name]
)
else:
# Missing credentials
credential_errors[node.id][
field_name
] = "These credentials are required"
continue
except ValidationError as e:
# Validation error means credentials were provided but invalid
# This should always be an error, even if optional
credential_errors[node.id][field_name] = f"Invalid credentials: {e}"
continue
@@ -311,7 +287,6 @@ async def _validate_node_input_credentials(
)
except Exception as e:
# Handle any errors fetching credentials
# If credentials were explicitly configured but unavailable, it's an error
credential_errors[node.id][
field_name
] = f"Credentials not available: {e}"
@@ -338,19 +313,7 @@ async def _validate_node_input_credentials(
] = "Invalid credentials: type/provider mismatch"
continue
# If node has optional credentials and any are missing, mark for skipping
# But only if there are no other errors for this node
if (
has_missing_credentials
and node.credentials_optional
and node.id not in credential_errors
):
nodes_to_skip.add(node.id)
logger.info(
f"Node #{node.id} will be skipped: optional credentials not configured"
)
return credential_errors, nodes_to_skip
return credential_errors
def make_node_credentials_input_map(
@@ -392,25 +355,21 @@ async def validate_graph_with_credentials(
graph: GraphModel,
user_id: str,
nodes_input_masks: Optional[NodesInputMasks] = None,
) -> tuple[Mapping[str, Mapping[str, str]], set[str]]:
) -> Mapping[str, Mapping[str, str]]:
"""
Validate graph including credentials and return structured errors per node,
along with a set of nodes that should be skipped due to optional missing credentials.
Validate graph including credentials and return structured errors per node.
Returns:
tuple[
dict[node_id, dict[field_name, error_message]]: Validation errors per node,
set[node_id]: Nodes that should be skipped (optional credentials not configured)
]
dict[node_id, dict[field_name, error_message]]: Validation errors per node
"""
# Get input validation errors
node_input_errors = GraphModel.validate_graph_get_errors(
graph, for_run=True, nodes_input_masks=nodes_input_masks
)
# Get credential input/availability/validation errors and nodes to skip
node_credential_input_errors, nodes_to_skip = (
await _validate_node_input_credentials(graph, user_id, nodes_input_masks)
# Get credential input/availability/validation errors
node_credential_input_errors = await _validate_node_input_credentials(
graph, user_id, nodes_input_masks
)
# Merge credential errors with structural errors
@@ -419,7 +378,7 @@ async def validate_graph_with_credentials(
node_input_errors[node_id] = {}
node_input_errors[node_id].update(field_errors)
return node_input_errors, nodes_to_skip
return node_input_errors
async def _construct_starting_node_execution_input(
@@ -427,7 +386,7 @@ async def _construct_starting_node_execution_input(
user_id: str,
graph_inputs: BlockInput,
nodes_input_masks: Optional[NodesInputMasks] = None,
) -> tuple[list[tuple[str, BlockInput]], set[str]]:
) -> list[tuple[str, BlockInput]]:
"""
Validates and prepares the input data for executing a graph.
This function checks the graph for starting nodes, validates the input data
@@ -441,14 +400,11 @@ async def _construct_starting_node_execution_input(
node_credentials_map: `dict[node_id, dict[input_name, CredentialsMetaInput]]`
Returns:
tuple[
list[tuple[str, BlockInput]]: A list of tuples, each containing the node ID
and the corresponding input data for that node.
set[str]: Node IDs that should be skipped (optional credentials not configured)
]
list[tuple[str, BlockInput]]: A list of tuples, each containing the node ID and
the corresponding input data for that node.
"""
# Use new validation function that includes credentials
validation_errors, nodes_to_skip = await validate_graph_with_credentials(
validation_errors = await validate_graph_with_credentials(
graph, user_id, nodes_input_masks
)
n_error_nodes = len(validation_errors)
@@ -489,7 +445,7 @@ async def _construct_starting_node_execution_input(
"No starting nodes found for the graph, make sure an AgentInput or blocks with no inbound links are present as starting nodes."
)
return nodes_input, nodes_to_skip
return nodes_input
async def validate_and_construct_node_execution_input(
@@ -500,7 +456,7 @@ async def validate_and_construct_node_execution_input(
graph_credentials_inputs: Optional[Mapping[str, CredentialsMetaInput]] = None,
nodes_input_masks: Optional[NodesInputMasks] = None,
is_sub_graph: bool = False,
) -> tuple[GraphModel, list[tuple[str, BlockInput]], NodesInputMasks, set[str]]:
) -> tuple[GraphModel, list[tuple[str, BlockInput]], NodesInputMasks]:
"""
Public wrapper that handles graph fetching, credential mapping, and validation+construction.
This centralizes the logic used by both scheduler validation and actual execution.
@@ -517,7 +473,6 @@ async def validate_and_construct_node_execution_input(
GraphModel: Full graph object for the given `graph_id`.
list[tuple[node_id, BlockInput]]: Starting node IDs with corresponding inputs.
dict[str, BlockInput]: Node input masks including all passed-in credentials.
set[str]: Node IDs that should be skipped (optional credentials not configured).
Raises:
NotFoundError: If the graph is not found.
@@ -559,16 +514,14 @@ async def validate_and_construct_node_execution_input(
nodes_input_masks or {},
)
starting_nodes_input, nodes_to_skip = (
await _construct_starting_node_execution_input(
graph=graph,
user_id=user_id,
graph_inputs=graph_inputs,
nodes_input_masks=nodes_input_masks,
)
starting_nodes_input = await _construct_starting_node_execution_input(
graph=graph,
user_id=user_id,
graph_inputs=graph_inputs,
nodes_input_masks=nodes_input_masks,
)
return graph, starting_nodes_input, nodes_input_masks, nodes_to_skip
return graph, starting_nodes_input, nodes_input_masks
def _merge_nodes_input_masks(
@@ -809,14 +762,13 @@ async def add_graph_execution(
edb = execution_db
udb = user_db
gdb = graph_db
odb = onboarding_db
else:
edb = udb = gdb = odb = get_database_manager_async_client()
edb = udb = gdb = get_database_manager_async_client()
# Get or create the graph execution
if graph_exec_id:
# Resume existing execution
graph_exec = await edb.get_graph_execution(
graph_exec = await get_graph_execution(
user_id=user_id,
execution_id=graph_exec_id,
include_node_executions=True,
@@ -827,9 +779,6 @@ async def add_graph_execution(
# Use existing execution's compiled input masks
compiled_nodes_input_masks = graph_exec.nodes_input_masks or {}
# For resumed executions, nodes_to_skip was already determined at creation time
# TODO: Consider storing nodes_to_skip in DB if we need to preserve it across resumes
nodes_to_skip: set[str] = set()
logger.info(f"Resuming graph execution #{graph_exec.id} for graph #{graph_id}")
else:
@@ -838,7 +787,7 @@ async def add_graph_execution(
)
# Create new execution
graph, starting_nodes_input, compiled_nodes_input_masks, nodes_to_skip = (
graph, starting_nodes_input, compiled_nodes_input_masks = (
await validate_and_construct_node_execution_input(
graph_id=graph_id,
user_id=user_id,
@@ -887,12 +836,10 @@ async def add_graph_execution(
try:
graph_exec_entry = graph_exec.to_graph_execution_entry(
compiled_nodes_input_masks=compiled_nodes_input_masks,
nodes_to_skip=nodes_to_skip,
execution_context=execution_context,
)
logger.info(f"Publishing execution {graph_exec.id} to execution queue")
# Publish to execution queue for executor to pick up
exec_queue = await get_async_execution_queue()
await exec_queue.publish_message(
routing_key=GRAPH_EXECUTION_ROUTING_KEY,
@@ -901,12 +848,14 @@ async def add_graph_execution(
)
logger.info(f"Published execution {graph_exec.id} to RabbitMQ queue")
# Update execution status to QUEUED
graph_exec.status = ExecutionStatus.QUEUED
await edb.update_graph_execution_stats(
graph_exec_id=graph_exec.id,
status=graph_exec.status,
)
await get_async_execution_event_bus().publish(graph_exec)
return graph_exec
except BaseException as e:
err = str(e) or type(e).__name__
if not graph_exec:
@@ -927,24 +876,6 @@ async def add_graph_execution(
)
raise
try:
await get_async_execution_event_bus().publish(graph_exec)
logger.info(f"Published update for execution #{graph_exec.id} to event bus")
except Exception as e:
logger.error(
f"Failed to publish execution event for graph exec #{graph_exec.id}: {e}"
)
try:
await odb.increment_onboarding_runs(user_id)
logger.info(
f"Incremented user #{user_id} onboarding runs for exec #{graph_exec.id}"
)
except Exception as e:
logger.error(f"Failed to increment onboarding runs for user #{user_id}: {e}")
return graph_exec
# ============ Execution Output Helpers ============ #

View File

@@ -367,13 +367,10 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
)
# Setup mock returns
# The function returns (graph, starting_nodes_input, compiled_nodes_input_masks, nodes_to_skip)
nodes_to_skip: set[str] = set()
mock_validate.return_value = (
mock_graph,
starting_nodes_input,
compiled_nodes_input_masks,
nodes_to_skip,
)
mock_prisma.is_connected.return_value = True
mock_edb.create_graph_execution = mocker.AsyncMock(return_value=mock_graph_exec)
@@ -459,212 +456,3 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
# Both executions should succeed (though they create different objects)
assert result1 == mock_graph_exec
assert result2 == mock_graph_exec_2
# ============================================================================
# Tests for Optional Credentials Feature
# ============================================================================
@pytest.mark.asyncio
async def test_validate_node_input_credentials_returns_nodes_to_skip(
mocker: MockerFixture,
):
"""
Test that _validate_node_input_credentials returns nodes_to_skip set
for nodes with credentials_optional=True and missing credentials.
"""
from backend.executor.utils import _validate_node_input_credentials
# Create a mock node with credentials_optional=True
mock_node = mocker.MagicMock()
mock_node.id = "node-with-optional-creds"
mock_node.credentials_optional = True
mock_node.input_default = {} # No credentials configured
# Create a mock block with credentials field
mock_block = mocker.MagicMock()
mock_credentials_field_type = mocker.MagicMock()
mock_block.input_schema.get_credentials_fields.return_value = {
"credentials": mock_credentials_field_type
}
mock_node.block = mock_block
# Create mock graph
mock_graph = mocker.MagicMock()
mock_graph.nodes = [mock_node]
# Call the function
errors, nodes_to_skip = await _validate_node_input_credentials(
graph=mock_graph,
user_id="test-user-id",
nodes_input_masks=None,
)
# Node should be in nodes_to_skip, not in errors
assert mock_node.id in nodes_to_skip
assert mock_node.id not in errors
@pytest.mark.asyncio
async def test_validate_node_input_credentials_required_missing_creds_error(
mocker: MockerFixture,
):
"""
Test that _validate_node_input_credentials returns errors
for nodes with credentials_optional=False and missing credentials.
"""
from backend.executor.utils import _validate_node_input_credentials
# Create a mock node with credentials_optional=False (required)
mock_node = mocker.MagicMock()
mock_node.id = "node-with-required-creds"
mock_node.credentials_optional = False
mock_node.input_default = {} # No credentials configured
# Create a mock block with credentials field
mock_block = mocker.MagicMock()
mock_credentials_field_type = mocker.MagicMock()
mock_block.input_schema.get_credentials_fields.return_value = {
"credentials": mock_credentials_field_type
}
mock_node.block = mock_block
# Create mock graph
mock_graph = mocker.MagicMock()
mock_graph.nodes = [mock_node]
# Call the function
errors, nodes_to_skip = await _validate_node_input_credentials(
graph=mock_graph,
user_id="test-user-id",
nodes_input_masks=None,
)
# Node should be in errors, not in nodes_to_skip
assert mock_node.id in errors
assert "credentials" in errors[mock_node.id]
assert "required" in errors[mock_node.id]["credentials"].lower()
assert mock_node.id not in nodes_to_skip
@pytest.mark.asyncio
async def test_validate_graph_with_credentials_returns_nodes_to_skip(
mocker: MockerFixture,
):
"""
Test that validate_graph_with_credentials returns nodes_to_skip set
from _validate_node_input_credentials.
"""
from backend.executor.utils import validate_graph_with_credentials
# Mock _validate_node_input_credentials to return specific values
mock_validate = mocker.patch(
"backend.executor.utils._validate_node_input_credentials"
)
expected_errors = {"node1": {"field": "error"}}
expected_nodes_to_skip = {"node2", "node3"}
mock_validate.return_value = (expected_errors, expected_nodes_to_skip)
# Mock GraphModel with validate_graph_get_errors method
mock_graph = mocker.MagicMock()
mock_graph.validate_graph_get_errors.return_value = {}
# Call the function
errors, nodes_to_skip = await validate_graph_with_credentials(
graph=mock_graph,
user_id="test-user-id",
nodes_input_masks=None,
)
# Verify nodes_to_skip is passed through
assert nodes_to_skip == expected_nodes_to_skip
assert "node1" in errors
@pytest.mark.asyncio
async def test_add_graph_execution_with_nodes_to_skip(mocker: MockerFixture):
"""
Test that add_graph_execution properly passes nodes_to_skip
to the graph execution entry.
"""
from backend.data.execution import GraphExecutionWithNodes
from backend.executor.utils import add_graph_execution
# Mock data
graph_id = "test-graph-id"
user_id = "test-user-id"
inputs = {"test_input": "test_value"}
graph_version = 1
# Mock the graph object
mock_graph = mocker.MagicMock()
mock_graph.version = graph_version
# Starting nodes and masks
starting_nodes_input = [("node1", {"input1": "value1"})]
compiled_nodes_input_masks = {}
nodes_to_skip = {"skipped-node-1", "skipped-node-2"}
# Mock the graph execution object
mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
mock_graph_exec.id = "execution-id-123"
mock_graph_exec.node_executions = []
# Track what's passed to to_graph_execution_entry
captured_kwargs = {}
def capture_to_entry(**kwargs):
captured_kwargs.update(kwargs)
return mocker.MagicMock()
mock_graph_exec.to_graph_execution_entry.side_effect = capture_to_entry
# Setup mocks
mock_validate = mocker.patch(
"backend.executor.utils.validate_and_construct_node_execution_input"
)
mock_edb = mocker.patch("backend.executor.utils.execution_db")
mock_prisma = mocker.patch("backend.executor.utils.prisma")
mock_udb = mocker.patch("backend.executor.utils.user_db")
mock_gdb = mocker.patch("backend.executor.utils.graph_db")
mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
mock_get_event_bus = mocker.patch(
"backend.executor.utils.get_async_execution_event_bus"
)
# Setup returns - include nodes_to_skip in the tuple
mock_validate.return_value = (
mock_graph,
starting_nodes_input,
compiled_nodes_input_masks,
nodes_to_skip, # This should be passed through
)
mock_prisma.is_connected.return_value = True
mock_edb.create_graph_execution = mocker.AsyncMock(return_value=mock_graph_exec)
mock_edb.update_graph_execution_stats = mocker.AsyncMock(
return_value=mock_graph_exec
)
mock_edb.update_node_execution_status_batch = mocker.AsyncMock()
mock_user = mocker.MagicMock()
mock_user.timezone = "UTC"
mock_settings = mocker.MagicMock()
mock_settings.human_in_the_loop_safe_mode = True
mock_udb.get_user_by_id = mocker.AsyncMock(return_value=mock_user)
mock_gdb.get_graph_settings = mocker.AsyncMock(return_value=mock_settings)
mock_get_queue.return_value = mocker.AsyncMock()
mock_get_event_bus.return_value = mocker.MagicMock(publish=mocker.AsyncMock())
# Call the function
await add_graph_execution(
graph_id=graph_id,
user_id=user_id,
inputs=inputs,
graph_version=graph_version,
)
# Verify nodes_to_skip was passed to to_graph_execution_entry
assert "nodes_to_skip" in captured_kwargs
assert captured_kwargs["nodes_to_skip"] == nodes_to_skip

View File

@@ -245,21 +245,6 @@ DEFAULT_CREDENTIALS = [
webshare_proxy_credentials,
]
SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS}
# Set of providers that have system credentials available
SYSTEM_PROVIDERS = {cred.provider for cred in DEFAULT_CREDENTIALS}
def is_system_credential(credential_id: str) -> bool:
"""Check if a credential ID belongs to a system-managed credential."""
return credential_id in SYSTEM_CREDENTIAL_IDS
def is_system_provider(provider: str) -> bool:
"""Check if a provider has system-managed credentials available."""
return provider in SYSTEM_PROVIDERS
class IntegrationCredentialsStore:
def __init__(self):

View File

@@ -8,7 +8,6 @@ from .discord import DiscordOAuthHandler
from .github import GitHubOAuthHandler
from .google import GoogleOAuthHandler
from .notion import NotionOAuthHandler
from .reddit import RedditOAuthHandler
from .twitter import TwitterOAuthHandler
if TYPE_CHECKING:
@@ -21,7 +20,6 @@ _ORIGINAL_HANDLERS = [
GitHubOAuthHandler,
GoogleOAuthHandler,
NotionOAuthHandler,
RedditOAuthHandler,
TwitterOAuthHandler,
TodoistOAuthHandler,
]

View File

@@ -1,208 +0,0 @@
import time
import urllib.parse
from typing import ClassVar, Optional
from pydantic import SecretStr
from backend.data.model import OAuth2Credentials
from backend.integrations.oauth.base import BaseOAuthHandler
from backend.integrations.providers import ProviderName
from backend.util.request import Requests
from backend.util.settings import Settings
settings = Settings()
class RedditOAuthHandler(BaseOAuthHandler):
"""
Reddit OAuth 2.0 handler.
Based on the documentation at:
- https://github.com/reddit-archive/reddit/wiki/OAuth2
Notes:
- Reddit requires `duration=permanent` to get refresh tokens
- Access tokens expire after 1 hour (3600 seconds)
- Reddit requires HTTP Basic Auth for token requests
- Reddit requires a unique User-Agent header
"""
PROVIDER_NAME = ProviderName.REDDIT
DEFAULT_SCOPES: ClassVar[list[str]] = [
"identity", # Get username, verify auth
"read", # Access posts and comments
"submit", # Submit new posts and comments
"edit", # Edit own posts and comments
"history", # Access user's post history
"privatemessages", # Access inbox and send private messages
"flair", # Access and set flair on posts/subreddits
]
AUTHORIZE_URL = "https://www.reddit.com/api/v1/authorize"
TOKEN_URL = "https://www.reddit.com/api/v1/access_token"
USERNAME_URL = "https://oauth.reddit.com/api/v1/me"
REVOKE_URL = "https://www.reddit.com/api/v1/revoke_token"
def __init__(self, client_id: str, client_secret: str, redirect_uri: str):
self.client_id = client_id
self.client_secret = client_secret
self.redirect_uri = redirect_uri
def get_login_url(
self, scopes: list[str], state: str, code_challenge: Optional[str]
) -> str:
"""Generate Reddit OAuth 2.0 authorization URL"""
scopes = self.handle_default_scopes(scopes)
params = {
"response_type": "code",
"client_id": self.client_id,
"redirect_uri": self.redirect_uri,
"scope": " ".join(scopes),
"state": state,
"duration": "permanent", # Required for refresh tokens
}
return f"{self.AUTHORIZE_URL}?{urllib.parse.urlencode(params)}"
async def exchange_code_for_tokens(
self, code: str, scopes: list[str], code_verifier: Optional[str]
) -> OAuth2Credentials:
"""Exchange authorization code for access tokens"""
scopes = self.handle_default_scopes(scopes)
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": settings.config.reddit_user_agent,
}
data = {
"grant_type": "authorization_code",
"code": code,
"redirect_uri": self.redirect_uri,
}
# Reddit requires HTTP Basic Auth for token requests
auth = (self.client_id, self.client_secret)
response = await Requests().post(
self.TOKEN_URL, headers=headers, data=data, auth=auth
)
if not response.ok:
error_text = response.text()
raise ValueError(
f"Reddit token exchange failed: {response.status} - {error_text}"
)
tokens = response.json()
if "error" in tokens:
raise ValueError(f"Reddit OAuth error: {tokens.get('error')}")
username = await self._get_username(tokens["access_token"])
return OAuth2Credentials(
provider=self.PROVIDER_NAME,
title=None,
username=username,
access_token=tokens["access_token"],
refresh_token=tokens.get("refresh_token"),
access_token_expires_at=int(time.time()) + tokens.get("expires_in", 3600),
refresh_token_expires_at=None, # Reddit refresh tokens don't expire
scopes=scopes,
)
async def _get_username(self, access_token: str) -> str:
"""Get the username from the access token"""
headers = {
"Authorization": f"Bearer {access_token}",
"User-Agent": settings.config.reddit_user_agent,
}
response = await Requests().get(self.USERNAME_URL, headers=headers)
if not response.ok:
raise ValueError(f"Failed to get Reddit username: {response.status}")
data = response.json()
return data.get("name", "unknown")
async def _refresh_tokens(
self, credentials: OAuth2Credentials
) -> OAuth2Credentials:
"""Refresh access tokens using refresh token"""
if not credentials.refresh_token:
raise ValueError("No refresh token available")
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": settings.config.reddit_user_agent,
}
data = {
"grant_type": "refresh_token",
"refresh_token": credentials.refresh_token.get_secret_value(),
}
auth = (self.client_id, self.client_secret)
response = await Requests().post(
self.TOKEN_URL, headers=headers, data=data, auth=auth
)
if not response.ok:
error_text = response.text()
raise ValueError(
f"Reddit token refresh failed: {response.status} - {error_text}"
)
tokens = response.json()
if "error" in tokens:
raise ValueError(f"Reddit OAuth error: {tokens.get('error')}")
username = await self._get_username(tokens["access_token"])
# Reddit may or may not return a new refresh token
new_refresh_token = tokens.get("refresh_token")
if new_refresh_token:
refresh_token: SecretStr | None = SecretStr(new_refresh_token)
elif credentials.refresh_token:
# Keep the existing refresh token
refresh_token = credentials.refresh_token
else:
refresh_token = None
return OAuth2Credentials(
id=credentials.id,
provider=self.PROVIDER_NAME,
title=credentials.title,
username=username,
access_token=tokens["access_token"],
refresh_token=refresh_token,
access_token_expires_at=int(time.time()) + tokens.get("expires_in", 3600),
refresh_token_expires_at=None,
scopes=credentials.scopes,
)
async def revoke_tokens(self, credentials: OAuth2Credentials) -> bool:
"""Revoke the access token"""
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": settings.config.reddit_user_agent,
}
data = {
"token": credentials.access_token.get_secret_value(),
"token_type_hint": "access_token",
}
auth = (self.client_id, self.client_secret)
response = await Requests().post(
self.REVOKE_URL, headers=headers, data=data, auth=auth
)
# Reddit returns 204 No Content on successful revocation
return response.ok

View File

@@ -10,7 +10,6 @@ from backend.util.settings import Settings
settings = Settings()
if TYPE_CHECKING:
from openai import AsyncOpenAI
from supabase import AClient, Client
from backend.data.execution import (
@@ -140,24 +139,6 @@ async def get_async_supabase() -> "AClient":
)
# ============ OpenAI Client ============ #
@cached(ttl_seconds=3600)
def get_openai_client() -> "AsyncOpenAI | None":
"""
Get a process-cached async OpenAI client for embeddings.
Returns None if API key is not configured.
"""
from openai import AsyncOpenAI
api_key = settings.secrets.openai_internal_api_key
if not api_key:
return None
return AsyncOpenAI(api_key=api_key)
# ============ Notification Queue Helpers ============ #

View File

@@ -264,7 +264,7 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
)
reddit_user_agent: str = Field(
default="web:AutoGPT:v0.6.0 (by /u/autogpt)",
default="AutoGPT:1.0 (by /u/autogpt)",
description="The user agent for the Reddit API",
)
@@ -658,14 +658,6 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
ayrshare_api_key: str = Field(default="", description="Ayrshare API Key")
ayrshare_jwt_key: str = Field(default="", description="Ayrshare private Key")
# Langfuse prompt management
langfuse_public_key: str = Field(default="", description="Langfuse public key")
langfuse_secret_key: str = Field(default="", description="Langfuse secret key")
langfuse_host: str = Field(
default="https://cloud.langfuse.com", description="Langfuse host URL"
)
# Add more secret fields as needed
model_config = SettingsConfigDict(
env_file=".env",

View File

@@ -1,227 +0,0 @@
#!/usr/bin/env python3
"""
Generate a lightweight stub for prisma/types.py that collapses all exported
symbols to Any. This prevents Pyright from spending time/budget on Prisma's
query DSL types while keeping runtime behavior unchanged.
Usage:
poetry run gen-prisma-stub
This script automatically finds the prisma package location and generates
the types.pyi stub file in the same directory as types.py.
"""
from __future__ import annotations
import ast
import importlib.util
import sys
from pathlib import Path
from typing import Iterable, Set
def _iter_assigned_names(target: ast.expr) -> Iterable[str]:
"""Extract names from assignment targets (handles tuple unpacking)."""
if isinstance(target, ast.Name):
yield target.id
elif isinstance(target, (ast.Tuple, ast.List)):
for elt in target.elts:
yield from _iter_assigned_names(elt)
def _is_private(name: str) -> bool:
"""Check if a name is private (starts with _ but not __)."""
return name.startswith("_") and not name.startswith("__")
def _is_safe_type_alias(node: ast.Assign) -> bool:
"""Check if an assignment is a safe type alias that shouldn't be stubbed.
Safe types are:
- Literal types (don't cause type budget issues)
- Simple type references (SortMode, SortOrder, etc.)
- TypeVar definitions
"""
if not node.value:
return False
# Check if it's a Subscript (like Literal[...], Union[...], TypeVar[...])
if isinstance(node.value, ast.Subscript):
# Get the base type name
if isinstance(node.value.value, ast.Name):
base_name = node.value.value.id
# Literal types are safe
if base_name == "Literal":
return True
# TypeVar is safe
if base_name == "TypeVar":
return True
elif isinstance(node.value.value, ast.Attribute):
# Handle typing_extensions.Literal etc.
if node.value.value.attr == "Literal":
return True
# Check if it's a simple Name reference (like SortMode = _types.SortMode)
if isinstance(node.value, ast.Attribute):
return True
# Check if it's a Call (like TypeVar(...))
if isinstance(node.value, ast.Call):
if isinstance(node.value.func, ast.Name):
if node.value.func.id == "TypeVar":
return True
return False
def collect_top_level_symbols(
tree: ast.Module, source_lines: list[str]
) -> tuple[Set[str], Set[str], list[str], Set[str]]:
"""Collect all top-level symbols from an AST module.
Returns:
Tuple of (class_names, function_names, safe_variable_sources, unsafe_variable_names)
safe_variable_sources contains the actual source code lines for safe variables
"""
classes: Set[str] = set()
functions: Set[str] = set()
safe_variable_sources: list[str] = []
unsafe_variables: Set[str] = set()
for node in tree.body:
if isinstance(node, ast.ClassDef):
if not _is_private(node.name):
classes.add(node.name)
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
if not _is_private(node.name):
functions.add(node.name)
elif isinstance(node, ast.Assign):
is_safe = _is_safe_type_alias(node)
names = []
for t in node.targets:
for n in _iter_assigned_names(t):
if not _is_private(n):
names.append(n)
if names:
if is_safe:
# Extract the source code for this assignment
start_line = node.lineno - 1 # 0-indexed
end_line = node.end_lineno if node.end_lineno else node.lineno
source = "\n".join(source_lines[start_line:end_line])
safe_variable_sources.append(source)
else:
unsafe_variables.update(names)
elif isinstance(node, ast.AnnAssign) and node.target:
# Annotated assignments are always stubbed
for n in _iter_assigned_names(node.target):
if not _is_private(n):
unsafe_variables.add(n)
return classes, functions, safe_variable_sources, unsafe_variables
def find_prisma_types_path() -> Path:
"""Find the prisma types.py file in the installed package."""
spec = importlib.util.find_spec("prisma")
if spec is None or spec.origin is None:
raise RuntimeError("Could not find prisma package. Is it installed?")
prisma_dir = Path(spec.origin).parent
types_path = prisma_dir / "types.py"
if not types_path.exists():
raise RuntimeError(f"prisma/types.py not found at {types_path}")
return types_path
def generate_stub(src_path: Path, stub_path: Path) -> int:
"""Generate the .pyi stub file from the source types.py."""
code = src_path.read_text(encoding="utf-8", errors="ignore")
source_lines = code.splitlines()
tree = ast.parse(code, filename=str(src_path))
classes, functions, safe_variable_sources, unsafe_variables = (
collect_top_level_symbols(tree, source_lines)
)
header = """\
# -*- coding: utf-8 -*-
# Auto-generated stub file - DO NOT EDIT
# Generated by gen_prisma_types_stub.py
#
# This stub intentionally collapses complex Prisma query DSL types to Any.
# Prisma's generated types can explode Pyright's type inference budgets
# on large schemas. We collapse them to Any so the rest of the codebase
# can remain strongly typed while keeping runtime behavior unchanged.
#
# Safe types (Literal, TypeVar, simple references) are preserved from the
# original types.py to maintain proper type checking where possible.
from __future__ import annotations
from typing import Any
from typing_extensions import Literal
# Re-export commonly used typing constructs that may be imported from this module
from typing import TYPE_CHECKING, TypeVar, Generic, Union, Optional, List, Dict
# Base type alias for stubbed Prisma types - allows any dict structure
_PrismaDict = dict[str, Any]
"""
lines = [header]
# Include safe variable definitions (Literal types, TypeVars, etc.)
lines.append("# Safe type definitions preserved from original types.py")
for source in safe_variable_sources:
lines.append(source)
lines.append("")
# Stub all classes and unsafe variables uniformly as dict[str, Any] aliases
# This allows:
# 1. Use in type annotations: x: SomeType
# 2. Constructor calls: SomeType(...)
# 3. Dict literal assignments: x: SomeType = {...}
lines.append(
"# Stubbed types (collapsed to dict[str, Any] to prevent type budget exhaustion)"
)
all_stubbed = sorted(classes | unsafe_variables)
for name in all_stubbed:
lines.append(f"{name} = _PrismaDict")
lines.append("")
# Stub functions
for name in sorted(functions):
lines.append(f"def {name}(*args: Any, **kwargs: Any) -> Any: ...")
lines.append("")
stub_path.write_text("\n".join(lines), encoding="utf-8")
return (
len(classes)
+ len(functions)
+ len(safe_variable_sources)
+ len(unsafe_variables)
)
def main() -> None:
"""Main entry point."""
try:
types_path = find_prisma_types_path()
stub_path = types_path.with_suffix(".pyi")
print(f"Found prisma types.py at: {types_path}")
print(f"Generating stub at: {stub_path}")
num_symbols = generate_stub(types_path, stub_path)
print(f"Generated {stub_path.name} with {num_symbols} Any-typed symbols")
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -25,9 +25,6 @@ def run(*command: str) -> None:
def lint():
# Generate Prisma types stub before running pyright to prevent type budget exhaustion
run("gen-prisma-stub")
lint_step_args: list[list[str]] = [
["ruff", "check", *TARGET_DIRS, "--exit-zero"],
["ruff", "format", "--diff", "--check", LIBS_DIR],
@@ -52,6 +49,4 @@ def format():
run("ruff", "format", LIBS_DIR)
run("isort", "--profile", "black", BACKEND_DIR)
run("black", BACKEND_DIR)
# Generate Prisma types stub before running pyright to prevent type budget exhaustion
run("gen-prisma-stub")
run("pyright", *TARGET_DIRS)

View File

@@ -1,46 +0,0 @@
-- CreateExtension
-- Supabase: pgvector must be enabled via Dashboard → Database → Extensions first
-- Create in public schema so vector type is available across all schemas
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "public";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'vector extension not available or already exists, skipping';
END $$;
-- CreateEnum
CREATE TYPE "ContentType" AS ENUM ('STORE_AGENT', 'BLOCK', 'INTEGRATION', 'DOCUMENTATION', 'LIBRARY_AGENT');
-- CreateTable
CREATE TABLE "UnifiedContentEmbedding" (
"id" TEXT NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"contentType" "ContentType" NOT NULL,
"contentId" TEXT NOT NULL,
"userId" TEXT,
"embedding" public.vector(1536) NOT NULL,
"searchableText" TEXT NOT NULL,
"metadata" JSONB NOT NULL DEFAULT '{}',
CONSTRAINT "UnifiedContentEmbedding_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX "UnifiedContentEmbedding_contentType_idx" ON "UnifiedContentEmbedding"("contentType");
-- CreateIndex
CREATE INDEX "UnifiedContentEmbedding_userId_idx" ON "UnifiedContentEmbedding"("userId");
-- CreateIndex
CREATE INDEX "UnifiedContentEmbedding_contentType_userId_idx" ON "UnifiedContentEmbedding"("contentType", "userId");
-- CreateIndex
-- NULLS NOT DISTINCT ensures only one public (NULL userId) embedding per contentType+contentId
-- Requires PostgreSQL 15+. Supabase uses PostgreSQL 15+.
CREATE UNIQUE INDEX "UnifiedContentEmbedding_contentType_contentId_userId_key" ON "UnifiedContentEmbedding"("contentType", "contentId", "userId") NULLS NOT DISTINCT;
-- CreateIndex
-- HNSW index for fast vector similarity search on embeddings
-- Uses cosine distance operator (<=>), which matches the query in hybrid_search.py
CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" public.vector_cosine_ops);

View File

@@ -1,71 +0,0 @@
-- Acknowledge Supabase-managed extensions to prevent drift warnings
-- These extensions are pre-installed by Supabase in specific schemas
-- This migration ensures they exist where available (Supabase) or skips gracefully (CI)
-- Create schemas (safe in both CI and Supabase)
CREATE SCHEMA IF NOT EXISTS "extensions";
-- Extensions that exist in both CI and Supabase
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS "pgcrypto" WITH SCHEMA "extensions";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pgcrypto extension not available, skipping';
END $$;
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA "extensions";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'uuid-ossp extension not available, skipping';
END $$;
-- Supabase-specific extensions (skip gracefully in CI)
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS "pg_stat_statements" WITH SCHEMA "extensions";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pg_stat_statements extension not available, skipping';
END $$;
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS "pg_net" WITH SCHEMA "extensions";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pg_net extension not available, skipping';
END $$;
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS "pgjwt" WITH SCHEMA "extensions";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pgjwt extension not available, skipping';
END $$;
DO $$
BEGIN
CREATE SCHEMA IF NOT EXISTS "graphql";
CREATE EXTENSION IF NOT EXISTS "pg_graphql" WITH SCHEMA "graphql";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pg_graphql extension not available, skipping';
END $$;
DO $$
BEGIN
CREATE SCHEMA IF NOT EXISTS "pgsodium";
CREATE EXTENSION IF NOT EXISTS "pgsodium" WITH SCHEMA "pgsodium";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pgsodium extension not available, skipping';
END $$;
DO $$
BEGIN
CREATE SCHEMA IF NOT EXISTS "vault";
CREATE EXTENSION IF NOT EXISTS "supabase_vault" WITH SCHEMA "vault";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'supabase_vault extension not available, skipping';
END $$;
-- Return to platform
CREATE SCHEMA IF NOT EXISTS "platform";

View File

@@ -1,64 +0,0 @@
-- CreateTable
CREATE TABLE "CoPilotUnderstanding" (
"id" TEXT NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"userId" TEXT NOT NULL,
"data" JSONB,
CONSTRAINT "CoPilotUnderstanding_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "ChatSession" (
"id" TEXT NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"userId" TEXT NOT NULL,
"title" TEXT,
"credentials" JSONB NOT NULL DEFAULT '{}',
"successfulAgentRuns" JSONB NOT NULL DEFAULT '{}',
"successfulAgentSchedules" JSONB NOT NULL DEFAULT '{}',
"totalPromptTokens" INTEGER NOT NULL DEFAULT 0,
"totalCompletionTokens" INTEGER NOT NULL DEFAULT 0,
CONSTRAINT "ChatSession_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "ChatMessage" (
"id" TEXT NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"sessionId" TEXT NOT NULL,
"role" TEXT NOT NULL,
"content" TEXT,
"name" TEXT,
"toolCallId" TEXT,
"refusal" TEXT,
"toolCalls" JSONB,
"functionCall" JSONB,
"sequence" INTEGER NOT NULL,
CONSTRAINT "ChatMessage_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "CoPilotUnderstanding_userId_key" ON "CoPilotUnderstanding"("userId");
-- CreateIndex
CREATE INDEX "CoPilotUnderstanding_userId_idx" ON "CoPilotUnderstanding"("userId");
-- CreateIndex
CREATE INDEX "ChatSession_userId_updatedAt_idx" ON "ChatSession"("userId", "updatedAt");
-- CreateIndex
CREATE UNIQUE INDEX "ChatMessage_sessionId_sequence_key" ON "ChatMessage"("sessionId", "sequence");
-- AddForeignKey
ALTER TABLE "CoPilotUnderstanding" ADD CONSTRAINT "CoPilotUnderstanding_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "ChatSession" ADD CONSTRAINT "ChatSession_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "ChatMessage" ADD CONSTRAINT "ChatMessage_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "ChatSession"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -2777,30 +2777,6 @@ enabler = ["pytest-enabler (>=2.2)"]
test = ["pyfakefs", "pytest (>=6,!=8.1.*)"]
type = ["pygobject-stubs", "pytest-mypy", "shtab", "types-pywin32"]
[[package]]
name = "langfuse"
version = "3.11.2"
description = "A client library for accessing langfuse"
optional = false
python-versions = "<4.0,>=3.10"
groups = ["main"]
files = [
{file = "langfuse-3.11.2-py3-none-any.whl", hash = "sha256:84faea9f909694023cc7f0eb45696be190248c8790424f22af57ca4cd7a29f2d"},
{file = "langfuse-3.11.2.tar.gz", hash = "sha256:ab5f296a8056815b7288c7f25bc308a5e79f82a8634467b25daffdde99276e09"},
]
[package.dependencies]
backoff = ">=1.10.0"
httpx = ">=0.15.4,<1.0"
openai = ">=0.27.8"
opentelemetry-api = ">=1.33.1,<2.0.0"
opentelemetry-exporter-otlp-proto-http = ">=1.33.1,<2.0.0"
opentelemetry-sdk = ">=1.33.1,<2.0.0"
packaging = ">=23.2,<26.0"
pydantic = ">=1.10.7,<3.0"
requests = ">=2,<3"
wrapt = ">=1.14,<2.0"
[[package]]
name = "launchdarkly-eventsource"
version = "1.3.0"
@@ -3492,90 +3468,6 @@ files = [
importlib-metadata = ">=6.0,<8.8.0"
typing-extensions = ">=4.5.0"
[[package]]
name = "opentelemetry-exporter-otlp-proto-common"
version = "1.35.0"
description = "OpenTelemetry Protobuf encoding"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "opentelemetry_exporter_otlp_proto_common-1.35.0-py3-none-any.whl", hash = "sha256:863465de697ae81279ede660f3918680b4480ef5f69dcdac04f30722ed7b74cc"},
{file = "opentelemetry_exporter_otlp_proto_common-1.35.0.tar.gz", hash = "sha256:6f6d8c39f629b9fa5c79ce19a2829dbd93034f8ac51243cdf40ed2196f00d7eb"},
]
[package.dependencies]
opentelemetry-proto = "1.35.0"
[[package]]
name = "opentelemetry-exporter-otlp-proto-http"
version = "1.35.0"
description = "OpenTelemetry Collector Protobuf over HTTP Exporter"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "opentelemetry_exporter_otlp_proto_http-1.35.0-py3-none-any.whl", hash = "sha256:9a001e3df3c7f160fb31056a28ed7faa2de7df68877ae909516102ae36a54e1d"},
{file = "opentelemetry_exporter_otlp_proto_http-1.35.0.tar.gz", hash = "sha256:cf940147f91b450ef5f66e9980d40eb187582eed399fa851f4a7a45bb880de79"},
]
[package.dependencies]
googleapis-common-protos = ">=1.52,<2.0"
opentelemetry-api = ">=1.15,<2.0"
opentelemetry-exporter-otlp-proto-common = "1.35.0"
opentelemetry-proto = "1.35.0"
opentelemetry-sdk = ">=1.35.0,<1.36.0"
requests = ">=2.7,<3.0"
typing-extensions = ">=4.5.0"
[[package]]
name = "opentelemetry-proto"
version = "1.35.0"
description = "OpenTelemetry Python Proto"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "opentelemetry_proto-1.35.0-py3-none-any.whl", hash = "sha256:98fffa803164499f562718384e703be8d7dfbe680192279a0429cb150a2f8809"},
{file = "opentelemetry_proto-1.35.0.tar.gz", hash = "sha256:532497341bd3e1c074def7c5b00172601b28bb83b48afc41a4b779f26eb4ee05"},
]
[package.dependencies]
protobuf = ">=5.0,<7.0"
[[package]]
name = "opentelemetry-sdk"
version = "1.35.0"
description = "OpenTelemetry Python SDK"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "opentelemetry_sdk-1.35.0-py3-none-any.whl", hash = "sha256:223d9e5f5678518f4842311bb73966e0b6db5d1e0b74e35074c052cd2487f800"},
{file = "opentelemetry_sdk-1.35.0.tar.gz", hash = "sha256:2a400b415ab68aaa6f04e8a6a9f6552908fb3090ae2ff78d6ae0c597ac581954"},
]
[package.dependencies]
opentelemetry-api = "1.35.0"
opentelemetry-semantic-conventions = "0.56b0"
typing-extensions = ">=4.5.0"
[[package]]
name = "opentelemetry-semantic-conventions"
version = "0.56b0"
description = "OpenTelemetry Semantic Conventions"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "opentelemetry_semantic_conventions-0.56b0-py3-none-any.whl", hash = "sha256:df44492868fd6b482511cc43a942e7194be64e94945f572db24df2e279a001a2"},
{file = "opentelemetry_semantic_conventions-0.56b0.tar.gz", hash = "sha256:c114c2eacc8ff6d3908cb328c811eaf64e6d68623840be9224dc829c4fd6c2ea"},
]
[package.dependencies]
opentelemetry-api = "1.35.0"
typing-extensions = ">=4.5.0"
[[package]]
name = "orjson"
version = "3.11.3"
@@ -7030,97 +6922,6 @@ files = [
{file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"},
]
[[package]]
name = "wrapt"
version = "1.17.3"
description = "Module for decorators, wrappers and monkey patching."
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04"},
{file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2"},
{file = "wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c"},
{file = "wrapt-1.17.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775"},
{file = "wrapt-1.17.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd"},
{file = "wrapt-1.17.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05"},
{file = "wrapt-1.17.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418"},
{file = "wrapt-1.17.3-cp310-cp310-win32.whl", hash = "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390"},
{file = "wrapt-1.17.3-cp310-cp310-win_amd64.whl", hash = "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6"},
{file = "wrapt-1.17.3-cp310-cp310-win_arm64.whl", hash = "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18"},
{file = "wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7"},
{file = "wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85"},
{file = "wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f"},
{file = "wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311"},
{file = "wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1"},
{file = "wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5"},
{file = "wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2"},
{file = "wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89"},
{file = "wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77"},
{file = "wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a"},
{file = "wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0"},
{file = "wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba"},
{file = "wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd"},
{file = "wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828"},
{file = "wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9"},
{file = "wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396"},
{file = "wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc"},
{file = "wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe"},
{file = "wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c"},
{file = "wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6"},
{file = "wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0"},
{file = "wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77"},
{file = "wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7"},
{file = "wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277"},
{file = "wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d"},
{file = "wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa"},
{file = "wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050"},
{file = "wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8"},
{file = "wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb"},
{file = "wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16"},
{file = "wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39"},
{file = "wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235"},
{file = "wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c"},
{file = "wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b"},
{file = "wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa"},
{file = "wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7"},
{file = "wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4"},
{file = "wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10"},
{file = "wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6"},
{file = "wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58"},
{file = "wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a"},
{file = "wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067"},
{file = "wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454"},
{file = "wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e"},
{file = "wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f"},
{file = "wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056"},
{file = "wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804"},
{file = "wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977"},
{file = "wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116"},
{file = "wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6"},
{file = "wrapt-1.17.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:70d86fa5197b8947a2fa70260b48e400bf2ccacdcab97bb7de47e3d1e6312225"},
{file = "wrapt-1.17.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:df7d30371a2accfe4013e90445f6388c570f103d61019b6b7c57e0265250072a"},
{file = "wrapt-1.17.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:caea3e9c79d5f0d2c6d9ab96111601797ea5da8e6d0723f77eabb0d4068d2b2f"},
{file = "wrapt-1.17.3-cp38-cp38-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:758895b01d546812d1f42204bd443b8c433c44d090248bf22689df673ccafe00"},
{file = "wrapt-1.17.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b551d101f31694fc785e58e0720ef7d9a10c4e62c1c9358ce6f63f23e30a56"},
{file = "wrapt-1.17.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:656873859b3b50eeebe6db8b1455e99d90c26ab058db8e427046dbc35c3140a5"},
{file = "wrapt-1.17.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a9a2203361a6e6404f80b99234fe7fb37d1fc73487b5a78dc1aa5b97201e0f22"},
{file = "wrapt-1.17.3-cp38-cp38-win32.whl", hash = "sha256:55cbbc356c2842f39bcc553cf695932e8b30e30e797f961860afb308e6b1bb7c"},
{file = "wrapt-1.17.3-cp38-cp38-win_amd64.whl", hash = "sha256:ad85e269fe54d506b240d2d7b9f5f2057c2aa9a2ea5b32c66f8902f768117ed2"},
{file = "wrapt-1.17.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30ce38e66630599e1193798285706903110d4f057aab3168a34b7fdc85569afc"},
{file = "wrapt-1.17.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:65d1d00fbfb3ea5f20add88bbc0f815150dbbde3b026e6c24759466c8b5a9ef9"},
{file = "wrapt-1.17.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7c06742645f914f26c7f1fa47b8bc4c91d222f76ee20116c43d5ef0912bba2d"},
{file = "wrapt-1.17.3-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e18f01b0c3e4a07fe6dfdb00e29049ba17eadbc5e7609a2a3a4af83ab7d710a"},
{file = "wrapt-1.17.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f5f51a6466667a5a356e6381d362d259125b57f059103dd9fdc8c0cf1d14139"},
{file = "wrapt-1.17.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:59923aa12d0157f6b82d686c3fd8e1166fa8cdfb3e17b42ce3b6147ff81528df"},
{file = "wrapt-1.17.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46acc57b331e0b3bcb3e1ca3b421d65637915cfcd65eb783cb2f78a511193f9b"},
{file = "wrapt-1.17.3-cp39-cp39-win32.whl", hash = "sha256:3e62d15d3cfa26e3d0788094de7b64efa75f3a53875cdbccdf78547aed547a81"},
{file = "wrapt-1.17.3-cp39-cp39-win_amd64.whl", hash = "sha256:1f23fa283f51c890eda8e34e4937079114c74b4c81d2b2f1f1d94948f5cc3d7f"},
{file = "wrapt-1.17.3-cp39-cp39-win_arm64.whl", hash = "sha256:24c2ed34dc222ed754247a2702b1e1e89fdbaa4016f324b4b8f1a802d4ffe87f"},
{file = "wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22"},
{file = "wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0"},
]
[[package]]
name = "xattr"
version = "1.2.0"
@@ -7494,4 +7295,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
content-hash = "86838b5ae40d606d6e01a14dad8a56c389d890d7a6a0c274a6602cca80f0df84"
content-hash = "a93ba0cea3b465cb6ec3e3f258b383b09f84ea352ccfdbfa112902cde5653fc6"

View File

@@ -33,7 +33,6 @@ html2text = "^2024.2.26"
jinja2 = "^3.1.6"
jsonref = "^1.1.0"
jsonschema = "^4.25.0"
langfuse = "^3.11.0"
launchdarkly-server-sdk = "^9.12.0"
mem0ai = "^0.1.115"
moviepy = "^2.1.2"
@@ -118,7 +117,6 @@ lint = "linter:lint"
test = "run_tests:test"
load-store-agents = "test.load_store_agents:run"
export-api-schema = "backend.cli.generate_openapi_json:main"
gen-prisma-stub = "gen_prisma_types_stub:main"
oauth-tool = "backend.cli.oauth_tool:cli"
[tool.isort]
@@ -136,9 +134,6 @@ ignore_patterns = []
[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
# Disable syrupy plugin to avoid conflict with pytest-snapshot
# Both provide --snapshot-update argument causing ArgumentError
addopts = "-p no:syrupy"
filterwarnings = [
"ignore:'audioop' is deprecated:DeprecationWarning:discord.player",
"ignore:invalid escape sequence:DeprecationWarning:tweepy.api",

View File

@@ -1,15 +1,14 @@
datasource db {
provider = "postgresql"
url = env("DATABASE_URL")
directUrl = env("DIRECT_URL")
extensions = [pgvector(map: "vector")]
provider = "postgresql"
url = env("DATABASE_URL")
directUrl = env("DIRECT_URL")
}
generator client {
provider = "prisma-client-py"
recursive_type_depth = -1
interface = "asyncio"
previewFeatures = ["views", "fullTextSearch", "postgresqlExtensions"]
previewFeatures = ["views", "fullTextSearch"]
partial_type_generator = "backend/data/partial_types.py"
}
@@ -48,13 +47,12 @@ model User {
AnalyticsMetrics AnalyticsMetrics[]
CreditTransactions CreditTransaction[]
UserBalance UserBalance?
ChatSessions ChatSession[]
AgentPresets AgentPreset[]
LibraryAgents LibraryAgent[]
AgentPresets AgentPreset[]
LibraryAgents LibraryAgent[]
Profile Profile[]
UserOnboarding UserOnboarding?
CoPilotUnderstanding CoPilotUnderstanding?
BuilderSearchHistory BuilderSearchHistory[]
StoreListings StoreListing[]
StoreListingReviews StoreListingReview[]
@@ -123,84 +121,19 @@ model UserOnboarding {
User User @relation(fields: [userId], references: [id], onDelete: Cascade)
}
model CoPilotUnderstanding {
id String @id @default(uuid())
createdAt DateTime @default(now())
updatedAt DateTime @default(now()) @updatedAt
userId String @unique
User User @relation(fields: [userId], references: [id], onDelete: Cascade)
data Json?
@@index([userId])
}
model BuilderSearchHistory {
id String @id @default(uuid())
createdAt DateTime @default(now())
updatedAt DateTime @default(now()) @updatedAt
searchQuery String
filter String[] @default([])
byCreator String[] @default([])
filter String[] @default([])
byCreator String[] @default([])
userId String
User User @relation(fields: [userId], references: [id], onDelete: Cascade)
}
////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
//////////////// CHAT SESSION TABLES ///////////////////
////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
model ChatSession {
id String @id @default(uuid())
createdAt DateTime @default(now())
updatedAt DateTime @default(now()) @updatedAt
userId String
User User @relation(fields: [userId], references: [id], onDelete: Cascade)
// Session metadata
title String?
credentials Json @default("{}") // Map of provider -> credential metadata
// Rate limiting counters (stored as JSON maps)
successfulAgentRuns Json @default("{}") // Map of graph_id -> count
successfulAgentSchedules Json @default("{}") // Map of graph_id -> count
// Usage tracking
totalPromptTokens Int @default(0)
totalCompletionTokens Int @default(0)
Messages ChatMessage[]
@@index([userId, updatedAt])
}
model ChatMessage {
id String @id @default(uuid())
createdAt DateTime @default(now())
sessionId String
Session ChatSession @relation(fields: [sessionId], references: [id], onDelete: Cascade)
// Message content
role String // "user", "assistant", "system", "tool", "function"
content String?
name String?
toolCallId String?
refusal String?
toolCalls Json? // List of tool calls for assistant messages
functionCall Json? // Deprecated but kept for compatibility
// Ordering within session
sequence Int
@@unique([sessionId, sequence])
}
// This model describes the Agent Graph/Flow (Multi Agent System).
model AgentGraph {
id String @default(uuid())
@@ -788,25 +721,26 @@ view StoreAgent {
storeListingVersionId String
updated_at DateTime
slug String
agent_name String
agent_video String?
agent_output_demo String?
agent_image String[]
slug String
agent_name String
agent_video String?
agent_output_demo String?
agent_image String[]
featured Boolean @default(false)
creator_username String?
creator_avatar String?
sub_heading String
description String
categories String[]
runs Int
rating Float
versions String[]
agentGraphVersions String[]
agentGraphId String
is_available Boolean @default(true)
useForOnboarding Boolean @default(false)
featured Boolean @default(false)
creator_username String?
creator_avatar String?
sub_heading String
description String
categories String[]
search Unsupported("tsvector")? @default(dbgenerated("''::tsvector"))
runs Int
rating Float
versions String[]
agentGraphVersions String[]
agentGraphId String
is_available Boolean @default(true)
useForOnboarding Boolean @default(false)
// Materialized views used (refreshed every 15 minutes via pg_cron):
// - mv_agent_run_counts - Pre-aggregated agent execution counts by agentGraphId
@@ -922,14 +856,14 @@ model StoreListingVersion {
AgentGraph AgentGraph @relation(fields: [agentGraphId, agentGraphVersion], references: [id, version])
// Content fields
name String
subHeading String
videoUrl String?
agentOutputDemoUrl String?
imageUrls String[]
description String
instructions String?
categories String[]
name String
subHeading String
videoUrl String?
agentOutputDemoUrl String?
imageUrls String[]
description String
instructions String?
categories String[]
isFeatured Boolean @default(false)
@@ -965,9 +899,6 @@ model StoreListingVersion {
// Reviews for this specific version
Reviews StoreListingReview[]
// Note: Embeddings now stored in UnifiedContentEmbedding table
// Use contentType=STORE_AGENT and contentId=storeListingVersionId
@@unique([storeListingId, version])
@@index([storeListingId, submissionStatus, isAvailable])
@@index([submissionStatus])
@@ -975,42 +906,6 @@ model StoreListingVersion {
@@index([agentGraphId, agentGraphVersion]) // Non-unique index for efficient lookups
}
// Content type enum for unified search across store agents, blocks, docs
// Note: BLOCK/INTEGRATION are file-based (Python classes), not DB records
// DOCUMENTATION are file-based (.md files), not DB records
// Only STORE_AGENT and LIBRARY_AGENT are stored in database
enum ContentType {
STORE_AGENT // Database: StoreListingVersion
BLOCK // File-based: Python classes in /backend/blocks/
INTEGRATION // File-based: Python classes (blocks with credentials)
DOCUMENTATION // File-based: .md/.mdx files
LIBRARY_AGENT // Database: User's personal agents
}
// Unified embeddings table for all searchable content types
// Supports both public content (userId=null) and user-specific content (userId=userID)
model UnifiedContentEmbedding {
id String @id @default(uuid())
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
// Content identification
contentType ContentType
contentId String // DB ID (storeListingVersionId) or file identifier (block.id, file_path)
userId String? // NULL for public content (store, blocks, docs), userId for private content (library agents)
// Search data
embedding Unsupported("vector(1536)") // pgvector embedding (extension in platform schema)
searchableText String // Combined text for search and fallback
metadata Json @default("{}") // Content-specific metadata
@@unique([contentType, contentId, userId], map: "UnifiedContentEmbedding_contentType_contentId_userId_key")
@@index([contentType])
@@index([userId])
@@index([contentType, userId])
@@index([embedding], map: "UnifiedContentEmbedding_embedding_idx")
}
model StoreListingReview {
id String @id @default(uuid())
createdAt DateTime @default(now())
@@ -1103,16 +998,16 @@ model OAuthApplication {
updatedAt DateTime @updatedAt
// Application metadata
name String
description String?
logoUrl String? // URL to app logo stored in GCS
clientId String @unique
clientSecret String // Hashed with Scrypt (same as API keys)
clientSecretSalt String // Salt for Scrypt hashing
name String
description String?
logoUrl String? // URL to app logo stored in GCS
clientId String @unique
clientSecret String // Hashed with Scrypt (same as API keys)
clientSecretSalt String // Salt for Scrypt hashing
// OAuth configuration
redirectUris String[] // Allowed callback URLs
grantTypes String[] @default(["authorization_code", "refresh_token"])
grantTypes String[] @default(["authorization_code", "refresh_token"])
scopes APIKeyPermission[] // Which permissions the app can request
// Application management

View File

@@ -2,7 +2,6 @@
"created_at": "2025-09-04T13:37:00",
"credentials_input_schema": {
"properties": {},
"required": [],
"title": "TestGraphCredentialsInputSchema",
"type": "object"
},

View File

@@ -2,7 +2,6 @@
{
"credentials_input_schema": {
"properties": {},
"required": [],
"title": "TestGraphCredentialsInputSchema",
"type": "object"
},

View File

@@ -4,7 +4,6 @@
"id": "test-agent-1",
"graph_id": "test-agent-1",
"graph_version": 1,
"owner_user_id": "3e53486c-cf57-477e-ba2a-cb02dc828e1a",
"image_url": null,
"creator_name": "Test Creator",
"creator_image_url": "",
@@ -42,7 +41,6 @@
"id": "test-agent-2",
"graph_id": "test-agent-2",
"graph_version": 1,
"owner_user_id": "3e53486c-cf57-477e-ba2a-cb02dc828e1a",
"image_url": null,
"creator_name": "Test Creator",
"creator_image_url": "",

View File

@@ -1,7 +1,6 @@
{
"submissions": [
{
"listing_id": "test-listing-id",
"agent_id": "test-agent-id",
"agent_version": 1,
"name": "Test Agent",

View File

@@ -37,7 +37,7 @@ services:
context: ../
dockerfile: autogpt_platform/backend/Dockerfile
target: migrate
command: ["sh", "-c", "poetry run prisma generate && poetry run gen-prisma-stub && poetry run prisma migrate deploy"]
command: ["sh", "-c", "poetry run prisma generate && poetry run prisma migrate deploy"]
develop:
watch:
- path: ./

View File

@@ -708,7 +708,10 @@ export function CreateButton() {
## 🧪 Testing & Storybook
- See `TESTING.md` for Playwright setup, E2E data seeding, and Storybook usage.
- End-to-end: [Playwright](https://playwright.dev/docs/intro) (`pnpm test`, `pnpm test-ui`)
- [Storybook](https://storybook.js.org/docs) for isolated UI development (`pnpm storybook` / `pnpm build-storybook`)
- For Storybook tests in CI, see [`@storybook/test-runner`](https://storybook.js.org/docs/writing-tests/test-runner) (`test-storybook:ci`)
- When changing components in `src/components`, update or add stories and visually verify in Storybook/Chromatic
---

View File

@@ -5,7 +5,6 @@ This is the frontend for AutoGPT's next generation
This project uses [**pnpm**](https://pnpm.io/) as the package manager via **corepack**. [Corepack](https://github.com/nodejs/corepack) is a Node.js tool that automatically manages package managers without requiring global installations.
For architecture, conventions, data fetching, feature flags, design system usage, state management, and PR process, see [CONTRIBUTING.md](./CONTRIBUTING.md).
For Playwright and Storybook testing setup, see [TESTING.md](./TESTING.md).
### Prerequisites

View File

@@ -1,57 +0,0 @@
# Frontend Testing 🧪
## Quick Start (local) 🚀
1. Start the backend + Supabase stack:
- From `autogpt_platform`: `docker compose --profile local up deps_backend -d`
- Or run the full stack: `docker compose up -d`
2. Seed rich E2E data (creates `test123@gmail.com` with library agents):
- From `autogpt_platform/backend`: `poetry run python test/e2e_test_data.py`
3. Run Playwright:
- From `autogpt_platform/frontend`: `pnpm test` or `pnpm test-ui`
## How Playwright setup works 🎭
- Playwright runs from `frontend/playwright.config.ts` with a global setup step.
- The global setup creates a user pool via the real signup UI and stores it in `frontend/.auth/user-pool.json`.
- Most tests call `getTestUser()` (from `src/tests/utils/auth.ts`) which pulls a random user from that pool.
- these users do not contain library agents, it's user that just "signed up" on the platform, hence some tests to make use of users created via script (see below) with more data
## Test users 👤
- **User pool (basic users)**
Created automatically by the Playwright global setup through `/signup`.
Used by `getTestUser()` in `src/tests/utils/auth.ts`.
- **Rich user with library agents**
Created by `backend/test/e2e_test_data.py`.
Accessed via `getTestUserWithLibraryAgents()` in `src/tests/credentials/index.ts`.
Use the rich user when a test needs existing library agents (e.g. `library.spec.ts`).
## Resetting or wiping the DB 🔁
If you reset the Docker DB and logins start failing:
1. Delete `frontend/.auth/user-pool.json` so the pool is regenerated.
2. Re-run the E2E data script to recreate the rich user + library agents:
- `poetry run python test/e2e_test_data.py`
## Storybook 📚
## Flow diagram 🗺️
```mermaid
flowchart TD
A[Start Docker stack] --> B[Run e2e_test_data.py]
B --> C[Run Playwright tests]
C --> D[Global setup creates user pool]
D --> E{Test needs rich data?}
E -->|No| F[getTestUser from user pool]
E -->|Yes| G[getTestUserWithLibraryAgents]
```
- `pnpm storybook` Run Storybook locally
- `pnpm build-storybook` Build a static Storybook
- CI runner: `pnpm test-storybook`
- When changing components in `src/components`, update or add stories and verify in Storybook/Chromatic.

View File

@@ -3,13 +3,6 @@ import { withSentryConfig } from "@sentry/nextjs";
/** @type {import('next').NextConfig} */
const nextConfig = {
productionBrowserSourceMaps: true,
// Externalize OpenTelemetry packages to fix Turbopack HMR issues
serverExternalPackages: [
"@opentelemetry/instrumentation",
"@opentelemetry/sdk-node",
"import-in-the-middle",
"require-in-the-middle",
],
experimental: {
serverActions: {
bodySizeLimit: "256mb",

View File

@@ -32,7 +32,6 @@
"@hookform/resolvers": "5.2.2",
"@next/third-parties": "15.4.6",
"@phosphor-icons/react": "2.1.10",
"@radix-ui/react-accordion": "1.2.12",
"@radix-ui/react-alert-dialog": "1.1.15",
"@radix-ui/react-avatar": "1.1.10",
"@radix-ui/react-checkbox": "1.3.3",
@@ -55,7 +54,7 @@
"@radix-ui/react-tooltip": "1.2.8",
"@rjsf/core": "6.1.2",
"@rjsf/utils": "6.1.2",
"@rjsf/validator-ajv8": "6.1.2",
"@rjsf/validator-ajv8": "5.24.13",
"@sentry/nextjs": "10.27.0",
"@supabase/ssr": "0.7.0",
"@supabase/supabase-js": "2.78.0",
@@ -93,6 +92,7 @@
"react-currency-input-field": "4.0.3",
"react-day-picker": "9.11.1",
"react-dom": "18.3.1",
"react-drag-drop-files": "2.4.0",
"react-hook-form": "7.66.0",
"react-icons": "5.5.0",
"react-markdown": "9.0.3",
@@ -118,7 +118,6 @@
},
"devDependencies": {
"@chromatic-com/storybook": "4.1.2",
"@opentelemetry/instrumentation": "0.209.0",
"@playwright/test": "1.56.1",
"@storybook/addon-a11y": "9.1.5",
"@storybook/addon-docs": "9.1.5",
@@ -142,7 +141,6 @@
"eslint": "8.57.1",
"eslint-config-next": "15.5.7",
"eslint-plugin-storybook": "9.1.5",
"import-in-the-middle": "2.0.2",
"msw": "2.11.6",
"msw-storybook-addon": "2.0.6",
"orval": "7.13.0",
@@ -150,7 +148,7 @@
"postcss": "8.5.6",
"prettier": "3.6.2",
"prettier-plugin-tailwindcss": "0.7.1",
"require-in-the-middle": "8.0.1",
"require-in-the-middle": "7.5.2",
"storybook": "9.1.5",
"tailwindcss": "3.4.17",
"typescript": "5.9.3"
@@ -160,10 +158,5 @@
"public"
]
},
"pnpm": {
"overrides": {
"@opentelemetry/instrumentation": "0.209.0"
}
},
"packageManager": "pnpm@10.20.0+sha512.cf9998222162dd85864d0a8102e7892e7ba4ceadebbf5a31f9c2fce48dfce317a9c53b9f6464d1ef9042cba2e02ae02a9f7c143a2b438cd93c91840f0192b9dd"
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

View File

@@ -1,6 +1,6 @@
import { CredentialsInput } from "@/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/modals/CredentialsInputs/CredentialsInput";
import { CredentialsMetaInput } from "@/app/api/__generated__/models/credentialsMetaInput";
import { GraphMeta } from "@/app/api/__generated__/models/graphMeta";
import { CredentialsInput } from "@/components/contextual/CredentialsInputs/CredentialsInputs";
import { useState } from "react";
import { getSchemaDefaultCredentials } from "../../helpers";
import { areAllCredentialsSet, getCredentialFields } from "./helpers";

Some files were not shown because too many files have changed in this diff Show More