mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-19 20:18:22 -05:00
Compare commits
25 Commits
pwuts/open
...
make-old-w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e0784f8f6b | ||
|
|
3040f39136 | ||
|
|
515504c604 | ||
|
|
18edeaeaf4 | ||
|
|
44182aff9c | ||
|
|
864c5a7846 | ||
|
|
699fffb1a8 | ||
|
|
f0641c2d26 | ||
|
|
94b6f74c95 | ||
|
|
46aabab3ea | ||
|
|
0a65df5102 | ||
|
|
6fbd208fe3 | ||
|
|
8fc174ca87 | ||
|
|
cacc89790f | ||
|
|
b9113bee02 | ||
|
|
3f65da03e7 | ||
|
|
9e96d11b2d | ||
|
|
4c264b7ae9 | ||
|
|
0adbc0bd05 | ||
|
|
8f3291bc92 | ||
|
|
7a20de880d | ||
|
|
ef8a6d2528 | ||
|
|
fd66be2aaa | ||
|
|
ae2cc97dc4 | ||
|
|
ea521eed26 |
@@ -1,37 +0,0 @@
|
||||
{
|
||||
"worktreeCopyPatterns": [
|
||||
".env*",
|
||||
".vscode/**",
|
||||
".auth/**",
|
||||
".claude/**",
|
||||
"autogpt_platform/.env*",
|
||||
"autogpt_platform/backend/.env*",
|
||||
"autogpt_platform/frontend/.env*",
|
||||
"autogpt_platform/frontend/.auth/**",
|
||||
"autogpt_platform/db/docker/.env*"
|
||||
],
|
||||
"worktreeCopyIgnores": [
|
||||
"**/node_modules/**",
|
||||
"**/dist/**",
|
||||
"**/.git/**",
|
||||
"**/Thumbs.db",
|
||||
"**/.DS_Store",
|
||||
"**/.next/**",
|
||||
"**/__pycache__/**",
|
||||
"**/.ruff_cache/**",
|
||||
"**/.pytest_cache/**",
|
||||
"**/*.pyc",
|
||||
"**/playwright-report/**",
|
||||
"**/logs/**",
|
||||
"**/site/**"
|
||||
],
|
||||
"worktreePathTemplate": "$BASE_PATH.worktree",
|
||||
"postCreateCmd": [
|
||||
"cd autogpt_platform/autogpt_libs && poetry install",
|
||||
"cd autogpt_platform/backend && poetry install && poetry run prisma generate",
|
||||
"cd autogpt_platform/frontend && pnpm install",
|
||||
"cd docs && pip install -r requirements.txt"
|
||||
],
|
||||
"terminalCommand": "code .",
|
||||
"deleteBranchWithWorktree": false
|
||||
}
|
||||
@@ -1,9 +1,6 @@
|
||||
# Ignore everything by default, selectively add things to context
|
||||
*
|
||||
|
||||
# Documentation (for embeddings/search)
|
||||
!docs/
|
||||
|
||||
# Platform - Libs
|
||||
!autogpt_platform/autogpt_libs/autogpt_libs/
|
||||
!autogpt_platform/autogpt_libs/pyproject.toml
|
||||
@@ -19,7 +16,6 @@
|
||||
!autogpt_platform/backend/poetry.lock
|
||||
!autogpt_platform/backend/README.md
|
||||
!autogpt_platform/backend/.env
|
||||
!autogpt_platform/backend/gen_prisma_types_stub.py
|
||||
|
||||
# Platform - Market
|
||||
!autogpt_platform/market/market/
|
||||
|
||||
2
.github/workflows/classic-autogpt-ci.yml
vendored
2
.github/workflows/classic-autogpt-ci.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
python-version: ["3.12", "3.13", "3.14"]
|
||||
platform-os: [ubuntu, macos, macos-arm64, windows]
|
||||
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
|
||||
|
||||
|
||||
13
.github/workflows/classic-autogpts-ci.yml
vendored
13
.github/workflows/classic-autogpts-ci.yml
vendored
@@ -11,9 +11,6 @@ on:
|
||||
- 'classic/original_autogpt/**'
|
||||
- 'classic/forge/**'
|
||||
- 'classic/benchmark/**'
|
||||
- 'classic/run'
|
||||
- 'classic/cli.py'
|
||||
- 'classic/setup.py'
|
||||
- '!**/*.md'
|
||||
pull_request:
|
||||
branches: [ master, dev, release-* ]
|
||||
@@ -22,9 +19,6 @@ on:
|
||||
- 'classic/original_autogpt/**'
|
||||
- 'classic/forge/**'
|
||||
- 'classic/benchmark/**'
|
||||
- 'classic/run'
|
||||
- 'classic/cli.py'
|
||||
- 'classic/setup.py'
|
||||
- '!**/*.md'
|
||||
|
||||
defaults:
|
||||
@@ -59,10 +53,15 @@ jobs:
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: ./classic/${{ matrix.agent-name }}/
|
||||
run: poetry install
|
||||
|
||||
- name: Run regression tests
|
||||
run: |
|
||||
./run agent start ${{ matrix.agent-name }}
|
||||
cd ${{ matrix.agent-name }}
|
||||
poetry run serve &
|
||||
sleep 10 # Wait for server to start
|
||||
poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
|
||||
poetry run agbenchmark --test=WriteFile
|
||||
env:
|
||||
|
||||
11
.github/workflows/classic-benchmark-ci.yml
vendored
11
.github/workflows/classic-benchmark-ci.yml
vendored
@@ -23,7 +23,7 @@ defaults:
|
||||
shell: bash
|
||||
|
||||
env:
|
||||
min-python-version: '3.10'
|
||||
min-python-version: '3.12'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
python-version: ["3.12", "3.13", "3.14"]
|
||||
platform-os: [ubuntu, macos, macos-arm64, windows]
|
||||
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
|
||||
defaults:
|
||||
@@ -128,11 +128,16 @@ jobs:
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install agent dependencies
|
||||
working-directory: classic/${{ matrix.agent-name }}
|
||||
run: poetry install
|
||||
|
||||
- name: Run regression tests
|
||||
working-directory: classic
|
||||
run: |
|
||||
./run agent start ${{ matrix.agent-name }}
|
||||
cd ${{ matrix.agent-name }}
|
||||
poetry run python -m forge &
|
||||
sleep 10 # Wait for server to start
|
||||
|
||||
set +e # Ignore non-zero exit codes and continue execution
|
||||
echo "Running the following command: poetry run agbenchmark --maintain --mock"
|
||||
|
||||
2
.github/workflows/classic-forge-ci.yml
vendored
2
.github/workflows/classic-forge-ci.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
python-version: ["3.12", "3.13", "3.14"]
|
||||
platform-os: [ubuntu, macos, macos-arm64, windows]
|
||||
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
|
||||
|
||||
|
||||
60
.github/workflows/classic-frontend-ci.yml
vendored
60
.github/workflows/classic-frontend-ci.yml
vendored
@@ -1,60 +0,0 @@
|
||||
name: Classic - Frontend CI/CD
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- dev
|
||||
- 'ci-test*' # This will match any branch that starts with "ci-test"
|
||||
paths:
|
||||
- 'classic/frontend/**'
|
||||
- '.github/workflows/classic-frontend-ci.yml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'classic/frontend/**'
|
||||
- '.github/workflows/classic-frontend-ci.yml'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
BUILD_BRANCH: ${{ format('classic-frontend-build/{0}', github.ref_name) }}
|
||||
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Flutter
|
||||
uses: subosito/flutter-action@v2
|
||||
with:
|
||||
flutter-version: '3.13.2'
|
||||
|
||||
- name: Build Flutter to Web
|
||||
run: |
|
||||
cd classic/frontend
|
||||
flutter build web --base-href /app/
|
||||
|
||||
# - name: Commit and Push to ${{ env.BUILD_BRANCH }}
|
||||
# if: github.event_name == 'push'
|
||||
# run: |
|
||||
# git config --local user.email "action@github.com"
|
||||
# git config --local user.name "GitHub Action"
|
||||
# git add classic/frontend/build/web
|
||||
# git checkout -B ${{ env.BUILD_BRANCH }}
|
||||
# git commit -m "Update frontend build to ${GITHUB_SHA:0:7}" -a
|
||||
# git push -f origin ${{ env.BUILD_BRANCH }}
|
||||
|
||||
- name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
|
||||
if: github.event_name == 'push'
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
with:
|
||||
add-paths: classic/frontend/build/web
|
||||
base: ${{ github.ref_name }}
|
||||
branch: ${{ env.BUILD_BRANCH }}
|
||||
delete-branch: true
|
||||
title: "Update frontend build in `${{ github.ref_name }}`"
|
||||
body: "This PR updates the frontend build based on commit ${{ github.sha }}."
|
||||
commit-message: "Update frontend build based on commit ${{ github.sha }}"
|
||||
4
.github/workflows/classic-python-checks.yml
vendored
4
.github/workflows/classic-python-checks.yml
vendored
@@ -59,7 +59,7 @@ jobs:
|
||||
needs: get-changed-parts
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
min-python-version: "3.10"
|
||||
min-python-version: "3.12"
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
needs: get-changed-parts
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
min-python-version: "3.10"
|
||||
min-python-version: "3.12"
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
2
.github/workflows/claude-dependabot.yml
vendored
2
.github/workflows/claude-dependabot.yml
vendored
@@ -74,7 +74,7 @@ jobs:
|
||||
|
||||
- name: Generate Prisma Client
|
||||
working-directory: autogpt_platform/backend
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
|
||||
- name: Set up Node.js
|
||||
|
||||
2
.github/workflows/claude.yml
vendored
2
.github/workflows/claude.yml
vendored
@@ -90,7 +90,7 @@ jobs:
|
||||
|
||||
- name: Generate Prisma Client
|
||||
working-directory: autogpt_platform/backend
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
|
||||
- name: Set up Node.js
|
||||
|
||||
12
.github/workflows/copilot-setup-steps.yml
vendored
12
.github/workflows/copilot-setup-steps.yml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
|
||||
- name: Generate Prisma Client
|
||||
working-directory: autogpt_platform/backend
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
|
||||
- name: Set up Node.js
|
||||
@@ -108,16 +108,6 @@ jobs:
|
||||
# run: pnpm playwright install --with-deps chromium
|
||||
|
||||
# Docker setup for development environment
|
||||
- name: Free up disk space
|
||||
run: |
|
||||
# Remove large unused tools to free disk space for Docker builds
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker system prune -af
|
||||
df -h
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
|
||||
4
.github/workflows/platform-backend-ci.yml
vendored
4
.github/workflows/platform-backend-ci.yml
vendored
@@ -134,7 +134,7 @@ jobs:
|
||||
run: poetry install
|
||||
|
||||
- name: Generate Prisma Client
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
- id: supabase
|
||||
name: Start Supabase
|
||||
@@ -176,7 +176,7 @@ jobs:
|
||||
}
|
||||
|
||||
- name: Run Database Migrations
|
||||
run: poetry run prisma migrate deploy
|
||||
run: poetry run prisma migrate dev --name updates
|
||||
env:
|
||||
DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
|
||||
DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}
|
||||
|
||||
25
.github/workflows/platform-frontend-ci.yml
vendored
25
.github/workflows/platform-frontend-ci.yml
vendored
@@ -11,7 +11,6 @@ on:
|
||||
- ".github/workflows/platform-frontend-ci.yml"
|
||||
- "autogpt_platform/frontend/**"
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event_name == 'merge_group' && format('merge-queue-{0}', github.ref) || format('{0}-{1}', github.ref, github.event.pull_request.number || github.sha) }}
|
||||
@@ -152,14 +151,6 @@ jobs:
|
||||
run: |
|
||||
cp ../.env.default ../.env
|
||||
|
||||
- name: Copy backend .env and set OpenAI API key
|
||||
run: |
|
||||
cp ../backend/.env.default ../backend/.env
|
||||
echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
|
||||
env:
|
||||
# Used by E2E test data script to generate embeddings for approved store agents
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
@@ -235,25 +226,13 @@ jobs:
|
||||
|
||||
- name: Run Playwright tests
|
||||
run: pnpm test:no-build
|
||||
continue-on-error: false
|
||||
|
||||
- name: Upload Playwright report
|
||||
if: always()
|
||||
- name: Upload Playwright artifacts
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: playwright-report
|
||||
path: playwright-report
|
||||
if-no-files-found: ignore
|
||||
retention-days: 3
|
||||
|
||||
- name: Upload Playwright test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: playwright-test-results
|
||||
path: test-results
|
||||
if-no-files-found: ignore
|
||||
retention-days: 3
|
||||
|
||||
- name: Print Final Docker Compose logs
|
||||
if: always()
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -3,6 +3,7 @@
|
||||
classic/original_autogpt/keys.py
|
||||
classic/original_autogpt/*.json
|
||||
auto_gpt_workspace/*
|
||||
.autogpt/
|
||||
*.mpeg
|
||||
.env
|
||||
# Root .env files
|
||||
@@ -177,5 +178,5 @@ autogpt_platform/backend/settings.py
|
||||
|
||||
*.ign.*
|
||||
.test-contents
|
||||
.claude/settings.local.json
|
||||
**/.claude/settings.local.json
|
||||
/autogpt_platform/backend/logs
|
||||
|
||||
@@ -6,14 +6,12 @@ start-core:
|
||||
|
||||
# Stop core services
|
||||
stop-core:
|
||||
docker compose stop
|
||||
docker compose stop deps
|
||||
|
||||
reset-db:
|
||||
docker compose stop db
|
||||
rm -rf db/docker/volumes/db/data
|
||||
cd backend && poetry run prisma migrate deploy
|
||||
cd backend && poetry run prisma generate
|
||||
cd backend && poetry run gen-prisma-stub
|
||||
|
||||
# View logs for core services
|
||||
logs-core:
|
||||
@@ -35,7 +33,6 @@ init-env:
|
||||
migrate:
|
||||
cd backend && poetry run prisma migrate deploy
|
||||
cd backend && poetry run prisma generate
|
||||
cd backend && poetry run gen-prisma-stub
|
||||
|
||||
run-backend:
|
||||
cd backend && poetry run app
|
||||
@@ -61,4 +58,4 @@ help:
|
||||
@echo " run-backend - Run the backend FastAPI server"
|
||||
@echo " run-frontend - Run the frontend Next.js development server"
|
||||
@echo " test-data - Run the test data creator"
|
||||
@echo " load-store-agents - Load store agents from agents/ folder into test database"
|
||||
@echo " load-store-agents - Load store agents from agents/ folder into test database"
|
||||
@@ -58,13 +58,6 @@ V0_API_KEY=
|
||||
OPEN_ROUTER_API_KEY=
|
||||
NVIDIA_API_KEY=
|
||||
|
||||
# Langfuse Prompt Management
|
||||
# Used for managing the CoPilot system prompt externally
|
||||
# Get credentials from https://cloud.langfuse.com or your self-hosted instance
|
||||
LANGFUSE_PUBLIC_KEY=
|
||||
LANGFUSE_SECRET_KEY=
|
||||
LANGFUSE_HOST=https://cloud.langfuse.com
|
||||
|
||||
# OAuth Credentials
|
||||
# For the OAuth callback URL, use <your_frontend_url>/auth/integrations/oauth_callback,
|
||||
# e.g. http://localhost:3000/auth/integrations/oauth_callback
|
||||
|
||||
1
autogpt_platform/backend/.gitignore
vendored
1
autogpt_platform/backend/.gitignore
vendored
@@ -18,4 +18,3 @@ load-tests/results/
|
||||
load-tests/*.json
|
||||
load-tests/*.log
|
||||
load-tests/node_modules/*
|
||||
migrations/*/rollback*.sql
|
||||
|
||||
@@ -48,8 +48,7 @@ RUN poetry install --no-ansi --no-root
|
||||
# Generate Prisma client
|
||||
COPY autogpt_platform/backend/schema.prisma ./
|
||||
COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/partial_types.py
|
||||
COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
|
||||
RUN poetry run prisma generate && poetry run gen-prisma-stub
|
||||
RUN poetry run prisma generate
|
||||
|
||||
FROM debian:13-slim AS server_dependencies
|
||||
|
||||
@@ -100,7 +99,6 @@ COPY autogpt_platform/backend/migrations /app/autogpt_platform/backend/migration
|
||||
FROM server_dependencies AS server
|
||||
|
||||
COPY autogpt_platform/backend /app/autogpt_platform/backend
|
||||
COPY docs /app/docs
|
||||
RUN poetry install --no-ansi --only-root
|
||||
|
||||
ENV PORT=8000
|
||||
|
||||
@@ -1,57 +1,21 @@
|
||||
"""
|
||||
External API Application
|
||||
|
||||
This module defines the main FastAPI application for the external API,
|
||||
which mounts the v1 and v2 sub-applications.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import RedirectResponse
|
||||
|
||||
from backend.api.middleware.security import SecurityHeadersMiddleware
|
||||
from backend.monitoring.instrumentation import instrument_fastapi
|
||||
|
||||
from .v1.app import v1_app
|
||||
from .v2.app import v2_app
|
||||
|
||||
DESCRIPTION = """
|
||||
The external API provides programmatic access to the AutoGPT Platform for building
|
||||
integrations, automations, and custom applications.
|
||||
|
||||
### API Versions
|
||||
|
||||
| Version | End of Life | Path | Documentation |
|
||||
|---------------------|-------------|------------------------|---------------|
|
||||
| **v2** | | `/external-api/v2/...` | [v2 docs](v2/docs) |
|
||||
| **v1** (deprecated) | 2025-05-01 | `/external-api/v1/...` | [v1 docs](v1/docs) |
|
||||
|
||||
**Recommendation**: New integrations should use v2.
|
||||
|
||||
For authentication details and usage examples, see the
|
||||
[API Integration Guide](https://docs.agpt.co/platform/integrating/api-guide/).
|
||||
"""
|
||||
from .v1.routes import v1_router
|
||||
|
||||
external_api = FastAPI(
|
||||
title="AutoGPT Platform API",
|
||||
summary="External API for AutoGPT Platform integrations",
|
||||
description=DESCRIPTION,
|
||||
version="2.0.0",
|
||||
title="AutoGPT External API",
|
||||
description="External API for AutoGPT integrations",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
version="1.0",
|
||||
)
|
||||
|
||||
external_api.add_middleware(SecurityHeadersMiddleware)
|
||||
external_api.include_router(v1_router, prefix="/v1")
|
||||
|
||||
@external_api.get("/", include_in_schema=False)
|
||||
async def root_redirect() -> RedirectResponse:
|
||||
"""Redirect root to API documentation."""
|
||||
return RedirectResponse(url="/docs")
|
||||
|
||||
|
||||
# Mount versioned sub-applications
|
||||
# Each sub-app has its own /docs page at /v1/docs and /v2/docs
|
||||
external_api.mount("/v1", v1_app)
|
||||
external_api.mount("/v2", v2_app)
|
||||
|
||||
# Add Prometheus instrumentation to the main app
|
||||
# Add Prometheus instrumentation
|
||||
instrument_fastapi(
|
||||
external_api,
|
||||
service_name="external-api",
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
"""
|
||||
V1 External API Application
|
||||
|
||||
This module defines the FastAPI application for the v1 external API.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from backend.api.middleware.security import SecurityHeadersMiddleware
|
||||
|
||||
from .routes import v1_router
|
||||
|
||||
DESCRIPTION = """
|
||||
The v1 API provides access to core AutoGPT functionality for external integrations.
|
||||
|
||||
For authentication details and usage examples, see the
|
||||
[API Integration Guide](https://docs.agpt.co/platform/integrating/api-guide/).
|
||||
"""
|
||||
|
||||
v1_app = FastAPI(
|
||||
title="AutoGPT Platform API",
|
||||
summary="External API for AutoGPT Platform integrations (v1)",
|
||||
description=DESCRIPTION,
|
||||
version="1.0.0",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
openapi_url="/openapi.json",
|
||||
openapi_tags=[
|
||||
{"name": "user", "description": "User information"},
|
||||
{"name": "blocks", "description": "Block operations"},
|
||||
{"name": "graphs", "description": "Graph execution"},
|
||||
{"name": "store", "description": "Marketplace agents and creators"},
|
||||
{"name": "integrations", "description": "OAuth credential management"},
|
||||
{"name": "tools", "description": "AI assistant tools"},
|
||||
],
|
||||
)
|
||||
|
||||
v1_app.add_middleware(SecurityHeadersMiddleware)
|
||||
v1_app.include_router(v1_router)
|
||||
@@ -70,7 +70,7 @@ class RunAgentRequest(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
def _create_ephemeral_session(user_id: str) -> ChatSession:
|
||||
def _create_ephemeral_session(user_id: str | None) -> ChatSession:
|
||||
"""Create an ephemeral session for stateless API requests."""
|
||||
return ChatSession.new(user_id)
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
"""
|
||||
V2 External API
|
||||
|
||||
This module provides the v2 external API for programmatic access to the AutoGPT Platform.
|
||||
"""
|
||||
|
||||
from .routes import v2_router
|
||||
|
||||
__all__ = ["v2_router"]
|
||||
@@ -1,82 +0,0 @@
|
||||
"""
|
||||
V2 External API Application
|
||||
|
||||
This module defines the FastAPI application for the v2 external API.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from backend.api.middleware.security import SecurityHeadersMiddleware
|
||||
|
||||
from .routes import v2_router
|
||||
|
||||
DESCRIPTION = """
|
||||
The v2 API provides comprehensive access to the AutoGPT Platform for building
|
||||
integrations, automations, and custom applications.
|
||||
|
||||
### Key Improvements over v1
|
||||
|
||||
- **Consistent naming**: Uses `graph_id`/`graph_version` consistently
|
||||
- **Better pagination**: All list endpoints support pagination
|
||||
- **Comprehensive coverage**: Access to library, runs, schedules, credits, and more
|
||||
- **Human-in-the-loop**: Review and approve agent decisions via the API
|
||||
|
||||
For authentication details and usage examples, see the
|
||||
[API Integration Guide](https://docs.agpt.co/platform/integrating/api-guide/).
|
||||
|
||||
### Pagination
|
||||
|
||||
List endpoints return paginated responses. Use `page` and `page_size` query
|
||||
parameters to navigate results. Maximum page size is 100 items.
|
||||
"""
|
||||
|
||||
v2_app = FastAPI(
|
||||
title="AutoGPT Platform External API",
|
||||
summary="External API for AutoGPT Platform integrations (v2)",
|
||||
description=DESCRIPTION,
|
||||
version="2.0.0",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
openapi_url="/openapi.json",
|
||||
openapi_tags=[
|
||||
{
|
||||
"name": "graphs",
|
||||
"description": "Create, update, and manage agent graphs",
|
||||
},
|
||||
{
|
||||
"name": "schedules",
|
||||
"description": "Manage scheduled graph executions",
|
||||
},
|
||||
{
|
||||
"name": "blocks",
|
||||
"description": "Discover available building blocks",
|
||||
},
|
||||
{
|
||||
"name": "marketplace",
|
||||
"description": "Browse agents and creators, manage submissions",
|
||||
},
|
||||
{
|
||||
"name": "library",
|
||||
"description": "Access your agent library and execute agents",
|
||||
},
|
||||
{
|
||||
"name": "runs",
|
||||
"description": "Monitor execution runs and human-in-the-loop reviews",
|
||||
},
|
||||
{
|
||||
"name": "credits",
|
||||
"description": "Check balance and view transaction history",
|
||||
},
|
||||
{
|
||||
"name": "integrations",
|
||||
"description": "Manage OAuth credentials for external services",
|
||||
},
|
||||
{
|
||||
"name": "files",
|
||||
"description": "Upload files for agent input",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
v2_app.add_middleware(SecurityHeadersMiddleware)
|
||||
v2_app.include_router(v2_router)
|
||||
@@ -1,140 +0,0 @@
|
||||
"""
|
||||
V2 External API - Blocks Endpoints
|
||||
|
||||
Provides read-only access to available building blocks.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Response, Security
|
||||
from fastapi.concurrency import run_in_threadpool
|
||||
from prisma.enums import APIKeyPermission
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.data.block import get_blocks
|
||||
from backend.util.cache import cached
|
||||
from backend.util.json import dumps
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
blocks_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class BlockCost(BaseModel):
|
||||
"""Cost information for a block."""
|
||||
|
||||
cost_type: str = Field(description="Type of cost (e.g., 'per_call', 'per_token')")
|
||||
cost_filter: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Conditions for this cost"
|
||||
)
|
||||
cost_amount: int = Field(description="Cost amount in credits")
|
||||
|
||||
|
||||
class Block(BaseModel):
|
||||
"""A building block that can be used in graphs."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
input_schema: dict[str, Any]
|
||||
output_schema: dict[str, Any]
|
||||
costs: list[BlockCost] = Field(default_factory=list)
|
||||
disabled: bool = Field(default=False)
|
||||
|
||||
|
||||
class BlocksListResponse(BaseModel):
|
||||
"""Response for listing blocks."""
|
||||
|
||||
blocks: list[Block]
|
||||
total_count: int
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Internal Functions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _compute_blocks_sync() -> str:
|
||||
"""
|
||||
Synchronous function to compute blocks data.
|
||||
This does the heavy lifting: instantiate 226+ blocks, compute costs, serialize.
|
||||
"""
|
||||
from backend.data.credit import get_block_cost
|
||||
|
||||
block_classes = get_blocks()
|
||||
result = []
|
||||
|
||||
for block_class in block_classes.values():
|
||||
block_instance = block_class()
|
||||
if not block_instance.disabled:
|
||||
costs = get_block_cost(block_instance)
|
||||
# Convert BlockCost BaseModel objects to dictionaries
|
||||
costs_dict = [
|
||||
cost.model_dump() if isinstance(cost, BaseModel) else cost
|
||||
for cost in costs
|
||||
]
|
||||
result.append({**block_instance.to_dict(), "costs": costs_dict})
|
||||
|
||||
return dumps(result)
|
||||
|
||||
|
||||
@cached(ttl_seconds=3600)
|
||||
async def _get_cached_blocks() -> str:
|
||||
"""
|
||||
Async cached function with thundering herd protection.
|
||||
On cache miss: runs heavy work in thread pool
|
||||
On cache hit: returns cached string immediately
|
||||
"""
|
||||
return await run_in_threadpool(_compute_blocks_sync)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@blocks_router.get(
|
||||
path="",
|
||||
summary="List available blocks",
|
||||
responses={
|
||||
200: {
|
||||
"description": "List of available building blocks",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"items": {"additionalProperties": True, "type": "object"},
|
||||
"type": "array",
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
async def list_blocks(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_BLOCK)
|
||||
),
|
||||
) -> Response:
|
||||
"""
|
||||
List all available building blocks that can be used in graphs.
|
||||
|
||||
Each block represents a specific capability (e.g., HTTP request, text processing,
|
||||
AI completion, etc.) that can be connected in a graph to create an agent.
|
||||
|
||||
The response includes input/output schemas for each block, as well as
|
||||
cost information for blocks that consume credits.
|
||||
"""
|
||||
content = await _get_cached_blocks()
|
||||
return Response(
|
||||
content=content,
|
||||
media_type="application/json",
|
||||
)
|
||||
@@ -1,36 +0,0 @@
|
||||
"""
|
||||
Common utilities for V2 External API
|
||||
"""
|
||||
|
||||
from typing import TypeVar
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# Constants for pagination
|
||||
MAX_PAGE_SIZE = 100
|
||||
DEFAULT_PAGE_SIZE = 20
|
||||
|
||||
|
||||
class PaginationParams(BaseModel):
|
||||
"""Common pagination parameters."""
|
||||
|
||||
page: int = Field(default=1, ge=1, description="Page number (1-indexed)")
|
||||
page_size: int = Field(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Number of items per page (max {MAX_PAGE_SIZE})",
|
||||
)
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class PaginatedResponse(BaseModel):
|
||||
"""Generic paginated response wrapper."""
|
||||
|
||||
items: list
|
||||
total_count: int = Field(description="Total number of items across all pages")
|
||||
page: int = Field(description="Current page number (1-indexed)")
|
||||
page_size: int = Field(description="Number of items per page")
|
||||
total_pages: int = Field(description="Total number of pages")
|
||||
@@ -1,141 +0,0 @@
|
||||
"""
|
||||
V2 External API - Credits Endpoints
|
||||
|
||||
Provides access to credit balance and transaction history.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Query, Security
|
||||
from prisma.enums import APIKeyPermission
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.data.credit import get_user_credit_model
|
||||
|
||||
from .common import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
credits_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class CreditBalance(BaseModel):
|
||||
"""User's credit balance."""
|
||||
|
||||
balance: int = Field(description="Current credit balance")
|
||||
|
||||
|
||||
class CreditTransaction(BaseModel):
|
||||
"""A credit transaction."""
|
||||
|
||||
transaction_key: str
|
||||
amount: int = Field(description="Transaction amount (positive or negative)")
|
||||
type: str = Field(description="One of: TOP_UP, USAGE, GRANT, REFUND")
|
||||
transaction_time: datetime
|
||||
running_balance: Optional[int] = Field(
|
||||
default=None, description="Balance after this transaction"
|
||||
)
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class CreditTransactionsResponse(BaseModel):
|
||||
"""Response for listing credit transactions."""
|
||||
|
||||
transactions: list[CreditTransaction]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@credits_router.get(
|
||||
path="",
|
||||
summary="Get credit balance",
|
||||
response_model=CreditBalance,
|
||||
)
|
||||
async def get_balance(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_CREDITS)
|
||||
),
|
||||
) -> CreditBalance:
|
||||
"""
|
||||
Get the current credit balance for the authenticated user.
|
||||
"""
|
||||
user_credit_model = await get_user_credit_model(auth.user_id)
|
||||
balance = await user_credit_model.get_credits(auth.user_id)
|
||||
|
||||
return CreditBalance(balance=balance)
|
||||
|
||||
|
||||
@credits_router.get(
|
||||
path="/transactions",
|
||||
summary="Get transaction history",
|
||||
response_model=CreditTransactionsResponse,
|
||||
)
|
||||
async def get_transactions(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_CREDITS)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
transaction_type: Optional[str] = Query(
|
||||
default=None,
|
||||
description="Filter by transaction type (TOP_UP, USAGE, GRANT, REFUND)",
|
||||
),
|
||||
) -> CreditTransactionsResponse:
|
||||
"""
|
||||
Get credit transaction history for the authenticated user.
|
||||
|
||||
Returns transactions sorted by most recent first.
|
||||
"""
|
||||
user_credit_model = await get_user_credit_model(auth.user_id)
|
||||
|
||||
history = await user_credit_model.get_transaction_history(
|
||||
user_id=auth.user_id,
|
||||
transaction_count_limit=page_size,
|
||||
transaction_type=transaction_type,
|
||||
)
|
||||
|
||||
transactions = [
|
||||
CreditTransaction(
|
||||
transaction_key=t.transaction_key,
|
||||
amount=t.amount,
|
||||
type=t.transaction_type.value,
|
||||
transaction_time=t.transaction_time,
|
||||
running_balance=t.running_balance,
|
||||
description=t.description,
|
||||
)
|
||||
for t in history.transactions
|
||||
]
|
||||
|
||||
# Note: The current credit module doesn't support true pagination,
|
||||
# so we're returning what we have
|
||||
total_count = len(transactions)
|
||||
total_pages = 1 # Without true pagination support
|
||||
|
||||
return CreditTransactionsResponse(
|
||||
transactions=transactions,
|
||||
total_count=total_count,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages,
|
||||
)
|
||||
@@ -1,132 +0,0 @@
|
||||
"""
|
||||
V2 External API - Files Endpoints
|
||||
|
||||
Provides file upload functionality for agent inputs.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, File, HTTPException, Query, Security, UploadFile
|
||||
from prisma.enums import APIKeyPermission
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.util.cloud_storage import get_cloud_storage_handler
|
||||
from backend.util.settings import Settings
|
||||
from backend.util.virus_scanner import scan_content_safe
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = Settings()
|
||||
|
||||
files_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class UploadFileResponse(BaseModel):
|
||||
"""Response after uploading a file."""
|
||||
|
||||
file_uri: str = Field(description="URI to reference the uploaded file in agents")
|
||||
file_name: str
|
||||
size: int = Field(description="File size in bytes")
|
||||
content_type: str
|
||||
expires_in_hours: int
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _create_file_size_error(size_bytes: int, max_size_mb: int) -> HTTPException:
|
||||
"""Create standardized file size error response."""
|
||||
return HTTPException(
|
||||
status_code=400,
|
||||
detail=f"File size ({size_bytes} bytes) exceeds the maximum allowed size of {max_size_mb}MB",
|
||||
)
|
||||
|
||||
|
||||
@files_router.post(
|
||||
path="/upload",
|
||||
summary="Upload a file",
|
||||
response_model=UploadFileResponse,
|
||||
)
|
||||
async def upload_file(
|
||||
file: UploadFile = File(...),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.UPLOAD_FILES)
|
||||
),
|
||||
provider: str = Query(
|
||||
default="gcs", description="Storage provider (gcs, s3, azure)"
|
||||
),
|
||||
expiration_hours: int = Query(
|
||||
default=24, ge=1, le=48, description="Hours until file expires (1-48)"
|
||||
),
|
||||
) -> UploadFileResponse:
|
||||
"""
|
||||
Upload a file to cloud storage for use with agents.
|
||||
|
||||
The returned `file_uri` can be used as input to agents that accept file inputs
|
||||
(e.g., FileStoreBlock, AgentFileInputBlock).
|
||||
|
||||
Files are automatically scanned for viruses before storage.
|
||||
"""
|
||||
# Check file size limit
|
||||
max_size_mb = settings.config.upload_file_size_limit_mb
|
||||
max_size_bytes = max_size_mb * 1024 * 1024
|
||||
|
||||
# Try to get file size from headers first
|
||||
if hasattr(file, "size") and file.size is not None and file.size > max_size_bytes:
|
||||
raise _create_file_size_error(file.size, max_size_mb)
|
||||
|
||||
# Read file content
|
||||
content = await file.read()
|
||||
content_size = len(content)
|
||||
|
||||
# Double-check file size after reading
|
||||
if content_size > max_size_bytes:
|
||||
raise _create_file_size_error(content_size, max_size_mb)
|
||||
|
||||
# Extract file info
|
||||
file_name = file.filename or "uploaded_file"
|
||||
content_type = file.content_type or "application/octet-stream"
|
||||
|
||||
# Virus scan the content
|
||||
await scan_content_safe(content, filename=file_name)
|
||||
|
||||
# Check if cloud storage is configured
|
||||
cloud_storage = await get_cloud_storage_handler()
|
||||
if not cloud_storage.config.gcs_bucket_name:
|
||||
# Fallback to base64 data URI when GCS is not configured
|
||||
base64_content = base64.b64encode(content).decode("utf-8")
|
||||
data_uri = f"data:{content_type};base64,{base64_content}"
|
||||
|
||||
return UploadFileResponse(
|
||||
file_uri=data_uri,
|
||||
file_name=file_name,
|
||||
size=content_size,
|
||||
content_type=content_type,
|
||||
expires_in_hours=expiration_hours,
|
||||
)
|
||||
|
||||
# Store in cloud storage
|
||||
storage_path = await cloud_storage.store_file(
|
||||
content=content,
|
||||
filename=file_name,
|
||||
provider=provider,
|
||||
expiration_hours=expiration_hours,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
|
||||
return UploadFileResponse(
|
||||
file_uri=storage_path,
|
||||
file_name=file_name,
|
||||
size=content_size,
|
||||
content_type=content_type,
|
||||
expires_in_hours=expiration_hours,
|
||||
)
|
||||
@@ -1,445 +0,0 @@
|
||||
"""
|
||||
V2 External API - Graphs Endpoints
|
||||
|
||||
Provides endpoints for managing agent graphs (CRUD operations).
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Security
|
||||
from prisma.enums import APIKeyPermission
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.data import graph as graph_db
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.integrations.webhooks.graph_lifecycle_hooks import (
|
||||
on_graph_activate,
|
||||
on_graph_deactivate,
|
||||
)
|
||||
|
||||
from .common import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
|
||||
from .models import (
|
||||
CreateGraphRequest,
|
||||
DeleteGraphResponse,
|
||||
GraphDetails,
|
||||
GraphLink,
|
||||
GraphMeta,
|
||||
GraphNode,
|
||||
GraphSettings,
|
||||
GraphsListResponse,
|
||||
SetActiveVersionRequest,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
graphs_router = APIRouter()
|
||||
|
||||
|
||||
def _convert_graph_meta(graph: graph_db.GraphMeta) -> GraphMeta:
|
||||
"""Convert internal GraphMeta to v2 API model."""
|
||||
return GraphMeta(
|
||||
id=graph.id,
|
||||
version=graph.version,
|
||||
is_active=graph.is_active,
|
||||
name=graph.name,
|
||||
description=graph.description,
|
||||
created_at=graph.created_at,
|
||||
input_schema=graph.input_schema,
|
||||
output_schema=graph.output_schema,
|
||||
)
|
||||
|
||||
|
||||
def _convert_graph_details(graph: graph_db.GraphModel) -> GraphDetails:
|
||||
"""Convert internal GraphModel to v2 API GraphDetails model."""
|
||||
return GraphDetails(
|
||||
id=graph.id,
|
||||
version=graph.version,
|
||||
is_active=graph.is_active,
|
||||
name=graph.name,
|
||||
description=graph.description,
|
||||
created_at=graph.created_at,
|
||||
input_schema=graph.input_schema,
|
||||
output_schema=graph.output_schema,
|
||||
nodes=[
|
||||
GraphNode(
|
||||
id=node.id,
|
||||
block_id=node.block_id,
|
||||
input_default=node.input_default,
|
||||
metadata=node.metadata,
|
||||
)
|
||||
for node in graph.nodes
|
||||
],
|
||||
links=[
|
||||
GraphLink(
|
||||
id=link.id,
|
||||
source_id=link.source_id,
|
||||
sink_id=link.sink_id,
|
||||
source_name=link.source_name,
|
||||
sink_name=link.sink_name,
|
||||
is_static=link.is_static,
|
||||
)
|
||||
for link in graph.links
|
||||
],
|
||||
credentials_input_schema=graph.credentials_input_schema,
|
||||
)
|
||||
|
||||
|
||||
@graphs_router.get(
|
||||
path="",
|
||||
summary="List user's graphs",
|
||||
response_model=GraphsListResponse,
|
||||
)
|
||||
async def list_graphs(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_GRAPH)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> GraphsListResponse:
|
||||
"""
|
||||
List all graphs owned by the authenticated user.
|
||||
|
||||
Returns a paginated list of graph metadata (not full graph details).
|
||||
"""
|
||||
graphs, pagination_info = await graph_db.list_graphs_paginated(
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
filter_by="active",
|
||||
)
|
||||
return GraphsListResponse(
|
||||
graphs=[_convert_graph_meta(g) for g in graphs],
|
||||
total_count=pagination_info.total_items,
|
||||
page=pagination_info.current_page,
|
||||
page_size=pagination_info.page_size,
|
||||
total_pages=pagination_info.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@graphs_router.post(
|
||||
path="",
|
||||
summary="Create a new graph",
|
||||
response_model=GraphDetails,
|
||||
)
|
||||
async def create_graph(
|
||||
create_graph_request: CreateGraphRequest,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_GRAPH)
|
||||
),
|
||||
) -> GraphDetails:
|
||||
"""
|
||||
Create a new agent graph.
|
||||
|
||||
The graph will be validated and assigned a new ID. It will automatically
|
||||
be added to the user's library.
|
||||
"""
|
||||
# Import here to avoid circular imports
|
||||
from backend.api.features.library import db as library_db
|
||||
|
||||
# Convert v2 API Graph model to internal Graph model
|
||||
internal_graph = graph_db.Graph(
|
||||
id=create_graph_request.graph.id or "",
|
||||
version=create_graph_request.graph.version,
|
||||
is_active=create_graph_request.graph.is_active,
|
||||
name=create_graph_request.graph.name,
|
||||
description=create_graph_request.graph.description,
|
||||
nodes=[
|
||||
graph_db.Node(
|
||||
id=node.id,
|
||||
block_id=node.block_id,
|
||||
input_default=node.input_default,
|
||||
metadata=node.metadata,
|
||||
)
|
||||
for node in create_graph_request.graph.nodes
|
||||
],
|
||||
links=[
|
||||
graph_db.Link(
|
||||
id=link.id,
|
||||
source_id=link.source_id,
|
||||
sink_id=link.sink_id,
|
||||
source_name=link.source_name,
|
||||
sink_name=link.sink_name,
|
||||
is_static=link.is_static,
|
||||
)
|
||||
for link in create_graph_request.graph.links
|
||||
],
|
||||
)
|
||||
|
||||
graph = graph_db.make_graph_model(internal_graph, auth.user_id)
|
||||
graph.reassign_ids(user_id=auth.user_id, reassign_graph_id=True)
|
||||
graph.validate_graph(for_run=False)
|
||||
|
||||
await graph_db.create_graph(graph, user_id=auth.user_id)
|
||||
await library_db.create_library_agent(graph, user_id=auth.user_id)
|
||||
activated_graph = await on_graph_activate(graph, user_id=auth.user_id)
|
||||
|
||||
return _convert_graph_details(activated_graph)
|
||||
|
||||
|
||||
@graphs_router.get(
|
||||
path="/{graph_id}",
|
||||
summary="Get graph details",
|
||||
response_model=GraphDetails,
|
||||
)
|
||||
async def get_graph(
|
||||
graph_id: str,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_GRAPH)
|
||||
),
|
||||
version: int | None = Query(
|
||||
default=None,
|
||||
description="Specific version to retrieve (default: active version)",
|
||||
),
|
||||
) -> GraphDetails:
|
||||
"""
|
||||
Get detailed information about a specific graph.
|
||||
|
||||
By default returns the active version. Use the `version` query parameter
|
||||
to retrieve a specific version.
|
||||
"""
|
||||
graph = await graph_db.get_graph(
|
||||
graph_id,
|
||||
version,
|
||||
user_id=auth.user_id,
|
||||
include_subgraphs=True,
|
||||
)
|
||||
if not graph:
|
||||
raise HTTPException(status_code=404, detail=f"Graph #{graph_id} not found.")
|
||||
return _convert_graph_details(graph)
|
||||
|
||||
|
||||
@graphs_router.put(
|
||||
path="/{graph_id}",
|
||||
summary="Update graph (creates new version)",
|
||||
response_model=GraphDetails,
|
||||
)
|
||||
async def update_graph(
|
||||
graph_id: str,
|
||||
graph_request: CreateGraphRequest,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_GRAPH)
|
||||
),
|
||||
) -> GraphDetails:
|
||||
"""
|
||||
Update a graph by creating a new version.
|
||||
|
||||
This does not modify existing versions - it creates a new version with
|
||||
the provided content. The new version becomes the active version.
|
||||
"""
|
||||
# Import here to avoid circular imports
|
||||
from backend.api.features.library import db as library_db
|
||||
|
||||
graph_data = graph_request.graph
|
||||
if graph_data.id and graph_data.id != graph_id:
|
||||
raise HTTPException(400, detail="Graph ID does not match ID in URI")
|
||||
|
||||
existing_versions = await graph_db.get_graph_all_versions(
|
||||
graph_id, user_id=auth.user_id
|
||||
)
|
||||
if not existing_versions:
|
||||
raise HTTPException(404, detail=f"Graph #{graph_id} not found")
|
||||
|
||||
latest_version_number = max(g.version for g in existing_versions)
|
||||
|
||||
# Convert v2 API Graph model to internal Graph model
|
||||
internal_graph = graph_db.Graph(
|
||||
id=graph_id,
|
||||
version=latest_version_number + 1,
|
||||
is_active=graph_data.is_active,
|
||||
name=graph_data.name,
|
||||
description=graph_data.description,
|
||||
nodes=[
|
||||
graph_db.Node(
|
||||
id=node.id,
|
||||
block_id=node.block_id,
|
||||
input_default=node.input_default,
|
||||
metadata=node.metadata,
|
||||
)
|
||||
for node in graph_data.nodes
|
||||
],
|
||||
links=[
|
||||
graph_db.Link(
|
||||
id=link.id,
|
||||
source_id=link.source_id,
|
||||
sink_id=link.sink_id,
|
||||
source_name=link.source_name,
|
||||
sink_name=link.sink_name,
|
||||
is_static=link.is_static,
|
||||
)
|
||||
for link in graph_data.links
|
||||
],
|
||||
)
|
||||
|
||||
current_active_version = next((v for v in existing_versions if v.is_active), None)
|
||||
graph = graph_db.make_graph_model(internal_graph, auth.user_id)
|
||||
graph.reassign_ids(user_id=auth.user_id, reassign_graph_id=False)
|
||||
graph.validate_graph(for_run=False)
|
||||
|
||||
new_graph_version = await graph_db.create_graph(graph, user_id=auth.user_id)
|
||||
|
||||
if new_graph_version.is_active:
|
||||
await library_db.update_agent_version_in_library(
|
||||
auth.user_id, new_graph_version.id, new_graph_version.version
|
||||
)
|
||||
new_graph_version = await on_graph_activate(
|
||||
new_graph_version, user_id=auth.user_id
|
||||
)
|
||||
await graph_db.set_graph_active_version(
|
||||
graph_id=graph_id, version=new_graph_version.version, user_id=auth.user_id
|
||||
)
|
||||
if current_active_version:
|
||||
await on_graph_deactivate(current_active_version, user_id=auth.user_id)
|
||||
|
||||
new_graph_version_with_subgraphs = await graph_db.get_graph(
|
||||
graph_id,
|
||||
new_graph_version.version,
|
||||
user_id=auth.user_id,
|
||||
include_subgraphs=True,
|
||||
)
|
||||
assert new_graph_version_with_subgraphs
|
||||
return _convert_graph_details(new_graph_version_with_subgraphs)
|
||||
|
||||
|
||||
@graphs_router.delete(
|
||||
path="/{graph_id}",
|
||||
summary="Delete graph permanently",
|
||||
response_model=DeleteGraphResponse,
|
||||
)
|
||||
async def delete_graph(
|
||||
graph_id: str,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_GRAPH)
|
||||
),
|
||||
) -> DeleteGraphResponse:
|
||||
"""
|
||||
Permanently delete a graph and all its versions.
|
||||
|
||||
This action cannot be undone. All associated executions will remain
|
||||
but will reference a deleted graph.
|
||||
"""
|
||||
if active_version := await graph_db.get_graph(
|
||||
graph_id=graph_id, version=None, user_id=auth.user_id
|
||||
):
|
||||
await on_graph_deactivate(active_version, user_id=auth.user_id)
|
||||
|
||||
version_count = await graph_db.delete_graph(graph_id, user_id=auth.user_id)
|
||||
return DeleteGraphResponse(version_count=version_count)
|
||||
|
||||
|
||||
@graphs_router.get(
|
||||
path="/{graph_id}/versions",
|
||||
summary="List all graph versions",
|
||||
response_model=list[GraphDetails],
|
||||
)
|
||||
async def list_graph_versions(
|
||||
graph_id: str,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_GRAPH)
|
||||
),
|
||||
) -> list[GraphDetails]:
|
||||
"""
|
||||
Get all versions of a specific graph.
|
||||
|
||||
Returns a list of all versions, with the active version marked.
|
||||
"""
|
||||
graphs = await graph_db.get_graph_all_versions(graph_id, user_id=auth.user_id)
|
||||
if not graphs:
|
||||
raise HTTPException(status_code=404, detail=f"Graph #{graph_id} not found.")
|
||||
return [_convert_graph_details(g) for g in graphs]
|
||||
|
||||
|
||||
@graphs_router.put(
|
||||
path="/{graph_id}/versions/active",
|
||||
summary="Set active graph version",
|
||||
)
|
||||
async def set_active_version(
|
||||
graph_id: str,
|
||||
request_body: SetActiveVersionRequest,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_GRAPH)
|
||||
),
|
||||
) -> None:
|
||||
"""
|
||||
Set which version of a graph is the active version.
|
||||
|
||||
The active version is used when executing the graph without specifying
|
||||
a version number.
|
||||
"""
|
||||
# Import here to avoid circular imports
|
||||
from backend.api.features.library import db as library_db
|
||||
|
||||
new_active_version = request_body.active_graph_version
|
||||
new_active_graph = await graph_db.get_graph(
|
||||
graph_id, new_active_version, user_id=auth.user_id
|
||||
)
|
||||
if not new_active_graph:
|
||||
raise HTTPException(404, f"Graph #{graph_id} v{new_active_version} not found")
|
||||
|
||||
current_active_graph = await graph_db.get_graph(
|
||||
graph_id=graph_id,
|
||||
version=None,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
|
||||
await on_graph_activate(new_active_graph, user_id=auth.user_id)
|
||||
await graph_db.set_graph_active_version(
|
||||
graph_id=graph_id,
|
||||
version=new_active_version,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
|
||||
await library_db.update_agent_version_in_library(
|
||||
auth.user_id, new_active_graph.id, new_active_graph.version
|
||||
)
|
||||
|
||||
if current_active_graph and current_active_graph.version != new_active_version:
|
||||
await on_graph_deactivate(current_active_graph, user_id=auth.user_id)
|
||||
|
||||
|
||||
@graphs_router.patch(
|
||||
path="/{graph_id}/settings",
|
||||
summary="Update graph settings",
|
||||
response_model=GraphSettings,
|
||||
)
|
||||
async def update_graph_settings(
|
||||
graph_id: str,
|
||||
settings: GraphSettings,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_GRAPH)
|
||||
),
|
||||
) -> GraphSettings:
|
||||
"""
|
||||
Update settings for a graph.
|
||||
|
||||
Currently supports:
|
||||
- human_in_the_loop_safe_mode: Enable/disable safe mode for human-in-the-loop blocks
|
||||
"""
|
||||
# Import here to avoid circular imports
|
||||
from backend.api.features.library import db as library_db
|
||||
from backend.data.graph import GraphSettings as InternalGraphSettings
|
||||
|
||||
library_agent = await library_db.get_library_agent_by_graph_id(
|
||||
graph_id=graph_id, user_id=auth.user_id
|
||||
)
|
||||
if not library_agent:
|
||||
raise HTTPException(404, f"Graph #{graph_id} not found in user's library")
|
||||
|
||||
# Convert to internal model
|
||||
internal_settings = InternalGraphSettings(
|
||||
human_in_the_loop_safe_mode=settings.human_in_the_loop_safe_mode
|
||||
)
|
||||
|
||||
updated_agent = await library_db.update_library_agent_settings(
|
||||
user_id=auth.user_id,
|
||||
agent_id=library_agent.id,
|
||||
settings=internal_settings,
|
||||
)
|
||||
|
||||
return GraphSettings(
|
||||
human_in_the_loop_safe_mode=updated_agent.settings.human_in_the_loop_safe_mode
|
||||
)
|
||||
@@ -1,271 +0,0 @@
|
||||
"""
|
||||
V2 External API - Integrations Endpoints
|
||||
|
||||
Provides access to user's integration credentials.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Path, Security
|
||||
from prisma.enums import APIKeyPermission
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.api.features.library import db as library_db
|
||||
from backend.data import graph as graph_db
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.data.model import Credentials, OAuth2Credentials
|
||||
from backend.integrations.creds_manager import IntegrationCredentialsManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
integrations_router = APIRouter()
|
||||
creds_manager = IntegrationCredentialsManager()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class Credential(BaseModel):
|
||||
"""A user's credential for an integration."""
|
||||
|
||||
id: str
|
||||
provider: str = Field(description="Integration provider name")
|
||||
title: Optional[str] = Field(
|
||||
default=None, description="User-assigned title for this credential"
|
||||
)
|
||||
scopes: list[str] = Field(default_factory=list, description="Granted scopes")
|
||||
|
||||
|
||||
class CredentialsListResponse(BaseModel):
|
||||
"""Response for listing credentials."""
|
||||
|
||||
credentials: list[Credential]
|
||||
|
||||
|
||||
class CredentialRequirement(BaseModel):
|
||||
"""A credential requirement for a graph or agent."""
|
||||
|
||||
provider: str = Field(description="Required provider name")
|
||||
required_scopes: list[str] = Field(
|
||||
default_factory=list, description="Required scopes"
|
||||
)
|
||||
matching_credentials: list[Credential] = Field(
|
||||
default_factory=list,
|
||||
description="User's credentials that match this requirement",
|
||||
)
|
||||
|
||||
|
||||
class CredentialRequirementsResponse(BaseModel):
|
||||
"""Response for listing credential requirements."""
|
||||
|
||||
requirements: list[CredentialRequirement]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Conversion Functions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _convert_credential(cred: Credentials) -> Credential:
|
||||
"""Convert internal credential to v2 API model."""
|
||||
scopes: list[str] = []
|
||||
if isinstance(cred, OAuth2Credentials):
|
||||
scopes = cred.scopes or []
|
||||
|
||||
return Credential(
|
||||
id=cred.id,
|
||||
provider=cred.provider,
|
||||
title=cred.title,
|
||||
scopes=scopes,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@integrations_router.get(
|
||||
path="/credentials",
|
||||
summary="List all credentials",
|
||||
response_model=CredentialsListResponse,
|
||||
)
|
||||
async def list_credentials(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_INTEGRATIONS)
|
||||
),
|
||||
) -> CredentialsListResponse:
|
||||
"""
|
||||
List all integration credentials for the authenticated user.
|
||||
|
||||
This returns all OAuth credentials the user has connected, across
|
||||
all integration providers.
|
||||
"""
|
||||
credentials = await creds_manager.store.get_all_creds(auth.user_id)
|
||||
|
||||
return CredentialsListResponse(
|
||||
credentials=[_convert_credential(c) for c in credentials]
|
||||
)
|
||||
|
||||
|
||||
@integrations_router.get(
|
||||
path="/credentials/{provider}",
|
||||
summary="List credentials by provider",
|
||||
response_model=CredentialsListResponse,
|
||||
)
|
||||
async def list_credentials_by_provider(
|
||||
provider: str = Path(description="Provider name (e.g., 'github', 'google')"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_INTEGRATIONS)
|
||||
),
|
||||
) -> CredentialsListResponse:
|
||||
"""
|
||||
List integration credentials for a specific provider.
|
||||
"""
|
||||
all_credentials = await creds_manager.store.get_all_creds(auth.user_id)
|
||||
|
||||
# Filter by provider
|
||||
filtered = [c for c in all_credentials if c.provider.lower() == provider.lower()]
|
||||
|
||||
return CredentialsListResponse(
|
||||
credentials=[_convert_credential(c) for c in filtered]
|
||||
)
|
||||
|
||||
|
||||
@integrations_router.get(
|
||||
path="/graphs/{graph_id}/credentials",
|
||||
summary="List credentials matching graph requirements",
|
||||
response_model=CredentialRequirementsResponse,
|
||||
)
|
||||
async def list_graph_credential_requirements(
|
||||
graph_id: str = Path(description="Graph ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_INTEGRATIONS)
|
||||
),
|
||||
) -> CredentialRequirementsResponse:
|
||||
"""
|
||||
List credential requirements for a graph and matching user credentials.
|
||||
|
||||
This helps identify which credentials the user needs to provide
|
||||
when executing a graph.
|
||||
"""
|
||||
# Get the graph
|
||||
graph = await graph_db.get_graph(
|
||||
graph_id=graph_id,
|
||||
version=None, # Active version
|
||||
user_id=auth.user_id,
|
||||
include_subgraphs=True,
|
||||
)
|
||||
if not graph:
|
||||
raise HTTPException(status_code=404, detail=f"Graph #{graph_id} not found")
|
||||
|
||||
# Get the credentials input schema which contains provider requirements
|
||||
creds_schema = graph.credentials_input_schema
|
||||
all_credentials = await creds_manager.store.get_all_creds(auth.user_id)
|
||||
|
||||
requirements = []
|
||||
for field_name, field_schema in creds_schema.get("properties", {}).items():
|
||||
# Extract provider from schema
|
||||
# The schema structure varies, but typically has provider info
|
||||
providers = []
|
||||
if "anyOf" in field_schema:
|
||||
for option in field_schema["anyOf"]:
|
||||
if "provider" in option:
|
||||
providers.append(option["provider"])
|
||||
elif "provider" in field_schema:
|
||||
providers.append(field_schema["provider"])
|
||||
|
||||
for provider in providers:
|
||||
# Find matching credentials
|
||||
matching = [
|
||||
_convert_credential(c)
|
||||
for c in all_credentials
|
||||
if c.provider.lower() == provider.lower()
|
||||
]
|
||||
|
||||
requirements.append(
|
||||
CredentialRequirement(
|
||||
provider=provider,
|
||||
required_scopes=[], # Would need to extract from schema
|
||||
matching_credentials=matching,
|
||||
)
|
||||
)
|
||||
|
||||
return CredentialRequirementsResponse(requirements=requirements)
|
||||
|
||||
|
||||
@integrations_router.get(
|
||||
path="/library/{agent_id}/credentials",
|
||||
summary="List credentials matching library agent requirements",
|
||||
response_model=CredentialRequirementsResponse,
|
||||
)
|
||||
async def list_library_agent_credential_requirements(
|
||||
agent_id: str = Path(description="Library agent ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_INTEGRATIONS)
|
||||
),
|
||||
) -> CredentialRequirementsResponse:
|
||||
"""
|
||||
List credential requirements for a library agent and matching user credentials.
|
||||
|
||||
This helps identify which credentials the user needs to provide
|
||||
when executing an agent from their library.
|
||||
"""
|
||||
# Get the library agent
|
||||
try:
|
||||
library_agent = await library_db.get_library_agent(
|
||||
id=agent_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=404, detail=f"Agent #{agent_id} not found")
|
||||
|
||||
# Get the underlying graph
|
||||
graph = await graph_db.get_graph(
|
||||
graph_id=library_agent.graph_id,
|
||||
version=library_agent.graph_version,
|
||||
user_id=auth.user_id,
|
||||
include_subgraphs=True,
|
||||
)
|
||||
if not graph:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Graph for agent #{agent_id} not found",
|
||||
)
|
||||
|
||||
# Get the credentials input schema
|
||||
creds_schema = graph.credentials_input_schema
|
||||
all_credentials = await creds_manager.store.get_all_creds(auth.user_id)
|
||||
|
||||
requirements = []
|
||||
for field_name, field_schema in creds_schema.get("properties", {}).items():
|
||||
# Extract provider from schema
|
||||
providers = []
|
||||
if "anyOf" in field_schema:
|
||||
for option in field_schema["anyOf"]:
|
||||
if "provider" in option:
|
||||
providers.append(option["provider"])
|
||||
elif "provider" in field_schema:
|
||||
providers.append(field_schema["provider"])
|
||||
|
||||
for provider in providers:
|
||||
# Find matching credentials
|
||||
matching = [
|
||||
_convert_credential(c)
|
||||
for c in all_credentials
|
||||
if c.provider.lower() == provider.lower()
|
||||
]
|
||||
|
||||
requirements.append(
|
||||
CredentialRequirement(
|
||||
provider=provider,
|
||||
required_scopes=[],
|
||||
matching_credentials=matching,
|
||||
)
|
||||
)
|
||||
|
||||
return CredentialRequirementsResponse(requirements=requirements)
|
||||
@@ -1,247 +0,0 @@
|
||||
"""
|
||||
V2 External API - Library Endpoints
|
||||
|
||||
Provides access to the user's agent library and agent execution.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Path, Query, Security
|
||||
from prisma.enums import APIKeyPermission
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.api.features.library import db as library_db
|
||||
from backend.api.features.library import model as library_model
|
||||
from backend.data import execution as execution_db
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.data.credit import get_user_credit_model
|
||||
from backend.executor import utils as execution_utils
|
||||
|
||||
from .common import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
|
||||
from .models import (
|
||||
ExecuteAgentRequest,
|
||||
LibraryAgent,
|
||||
LibraryAgentsResponse,
|
||||
Run,
|
||||
RunsListResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
library_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Conversion Functions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _convert_library_agent(agent: library_model.LibraryAgent) -> LibraryAgent:
|
||||
"""Convert internal LibraryAgent to v2 API model."""
|
||||
return LibraryAgent(
|
||||
id=agent.id,
|
||||
graph_id=agent.graph_id,
|
||||
graph_version=agent.graph_version,
|
||||
name=agent.name,
|
||||
description=agent.description,
|
||||
is_favorite=agent.is_favorite,
|
||||
can_access_graph=agent.can_access_graph,
|
||||
is_latest_version=agent.is_latest_version,
|
||||
image_url=agent.image_url,
|
||||
creator_name=agent.creator_name,
|
||||
input_schema=agent.input_schema,
|
||||
output_schema=agent.output_schema,
|
||||
created_at=agent.created_at,
|
||||
updated_at=agent.updated_at,
|
||||
)
|
||||
|
||||
|
||||
def _convert_execution_to_run(exec: execution_db.GraphExecutionMeta) -> Run:
|
||||
"""Convert internal execution to v2 API Run model."""
|
||||
return Run(
|
||||
id=exec.id,
|
||||
graph_id=exec.graph_id,
|
||||
graph_version=exec.graph_version,
|
||||
status=exec.status.value,
|
||||
started_at=exec.started_at,
|
||||
ended_at=exec.ended_at,
|
||||
inputs=exec.inputs,
|
||||
cost=exec.stats.cost if exec.stats else 0,
|
||||
duration=exec.stats.duration if exec.stats else 0,
|
||||
node_count=exec.stats.node_exec_count if exec.stats else 0,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@library_router.get(
|
||||
path="/agents",
|
||||
summary="List library agents",
|
||||
response_model=LibraryAgentsResponse,
|
||||
)
|
||||
async def list_library_agents(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_LIBRARY)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> LibraryAgentsResponse:
|
||||
"""
|
||||
List agents in the user's library.
|
||||
|
||||
The library contains agents the user has created or added from the marketplace.
|
||||
"""
|
||||
result = await library_db.list_library_agents(
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return LibraryAgentsResponse(
|
||||
agents=[_convert_library_agent(a) for a in result.agents],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@library_router.get(
|
||||
path="/agents/favorites",
|
||||
summary="List favorite agents",
|
||||
response_model=LibraryAgentsResponse,
|
||||
)
|
||||
async def list_favorite_agents(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_LIBRARY)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> LibraryAgentsResponse:
|
||||
"""
|
||||
List favorite agents in the user's library.
|
||||
"""
|
||||
result = await library_db.list_favorite_library_agents(
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return LibraryAgentsResponse(
|
||||
agents=[_convert_library_agent(a) for a in result.agents],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@library_router.post(
|
||||
path="/agents/{agent_id}/runs",
|
||||
summary="Execute an agent",
|
||||
response_model=Run,
|
||||
)
|
||||
async def execute_agent(
|
||||
request: ExecuteAgentRequest,
|
||||
agent_id: str = Path(description="Library agent ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.RUN_AGENT)
|
||||
),
|
||||
) -> Run:
|
||||
"""
|
||||
Execute an agent from the library.
|
||||
|
||||
This creates a new run with the provided inputs. The run executes
|
||||
asynchronously and you can poll the run status using GET /runs/{run_id}.
|
||||
"""
|
||||
# Check credit balance
|
||||
user_credit_model = await get_user_credit_model(auth.user_id)
|
||||
current_balance = await user_credit_model.get_credits(auth.user_id)
|
||||
if current_balance <= 0:
|
||||
raise HTTPException(
|
||||
status_code=402,
|
||||
detail="Insufficient balance to execute the agent. Please top up your account.",
|
||||
)
|
||||
|
||||
# Get the library agent to find the graph ID and version
|
||||
try:
|
||||
library_agent = await library_db.get_library_agent(
|
||||
id=agent_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=404, detail=f"Agent #{agent_id} not found")
|
||||
|
||||
try:
|
||||
result = await execution_utils.add_graph_execution(
|
||||
graph_id=library_agent.graph_id,
|
||||
user_id=auth.user_id,
|
||||
inputs=request.inputs,
|
||||
graph_version=library_agent.graph_version,
|
||||
graph_credentials_inputs=request.credentials_inputs,
|
||||
)
|
||||
|
||||
return _convert_execution_to_run(result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to execute agent: {e}")
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@library_router.get(
|
||||
path="/agents/{agent_id}/runs",
|
||||
summary="List runs for an agent",
|
||||
response_model=RunsListResponse,
|
||||
)
|
||||
async def list_agent_runs(
|
||||
agent_id: str = Path(description="Library agent ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_LIBRARY)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> RunsListResponse:
|
||||
"""
|
||||
List execution runs for a specific agent.
|
||||
"""
|
||||
# Get the library agent to find the graph ID
|
||||
try:
|
||||
library_agent = await library_db.get_library_agent(
|
||||
id=agent_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=404, detail=f"Agent #{agent_id} not found")
|
||||
|
||||
result = await execution_db.get_graph_executions_paginated(
|
||||
graph_id=library_agent.graph_id,
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return RunsListResponse(
|
||||
runs=[_convert_execution_to_run(e) for e in result.executions],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
@@ -1,510 +0,0 @@
|
||||
"""
|
||||
V2 External API - Marketplace Endpoints
|
||||
|
||||
Provides access to the agent marketplace (store).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
from typing import Literal, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Path, Query, Security
|
||||
from prisma.enums import APIKeyPermission
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.api.features.store import cache as store_cache
|
||||
from backend.api.features.store import db as store_db
|
||||
from backend.api.features.store import model as store_model
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
|
||||
from .common import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
marketplace_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class MarketplaceAgent(BaseModel):
|
||||
"""An agent available in the marketplace."""
|
||||
|
||||
slug: str
|
||||
name: str
|
||||
description: str
|
||||
sub_heading: str
|
||||
creator: str
|
||||
creator_avatar: str
|
||||
runs: int = Field(default=0, description="Number of times this agent has been run")
|
||||
rating: float = Field(default=0.0, description="Average rating")
|
||||
image_url: str = Field(default="")
|
||||
|
||||
|
||||
class MarketplaceAgentDetails(BaseModel):
|
||||
"""Detailed information about a marketplace agent."""
|
||||
|
||||
store_listing_version_id: str
|
||||
slug: str
|
||||
name: str
|
||||
description: str
|
||||
sub_heading: str
|
||||
instructions: Optional[str] = None
|
||||
creator: str
|
||||
creator_avatar: str
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
runs: int = Field(default=0)
|
||||
rating: float = Field(default=0.0)
|
||||
image_urls: list[str] = Field(default_factory=list)
|
||||
video_url: str = Field(default="")
|
||||
versions: list[str] = Field(default_factory=list, description="Available versions")
|
||||
agent_graph_versions: list[str] = Field(default_factory=list)
|
||||
agent_graph_id: str
|
||||
last_updated: datetime
|
||||
|
||||
|
||||
class MarketplaceAgentsResponse(BaseModel):
|
||||
"""Response for listing marketplace agents."""
|
||||
|
||||
agents: list[MarketplaceAgent]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class MarketplaceCreator(BaseModel):
|
||||
"""A creator on the marketplace."""
|
||||
|
||||
name: str
|
||||
username: str
|
||||
description: str
|
||||
avatar_url: str
|
||||
num_agents: int
|
||||
agent_rating: float
|
||||
agent_runs: int
|
||||
is_featured: bool = False
|
||||
|
||||
|
||||
class MarketplaceCreatorDetails(BaseModel):
|
||||
"""Detailed information about a marketplace creator."""
|
||||
|
||||
name: str
|
||||
username: str
|
||||
description: str
|
||||
avatar_url: str
|
||||
agent_rating: float
|
||||
agent_runs: int
|
||||
top_categories: list[str] = Field(default_factory=list)
|
||||
links: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class MarketplaceCreatorsResponse(BaseModel):
|
||||
"""Response for listing marketplace creators."""
|
||||
|
||||
creators: list[MarketplaceCreator]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class MarketplaceSubmission(BaseModel):
|
||||
"""A marketplace submission."""
|
||||
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
name: str
|
||||
sub_heading: str
|
||||
slug: str
|
||||
description: str
|
||||
instructions: Optional[str] = None
|
||||
image_urls: list[str] = Field(default_factory=list)
|
||||
date_submitted: datetime
|
||||
status: str = Field(description="One of: DRAFT, PENDING, APPROVED, REJECTED")
|
||||
runs: int = Field(default=0)
|
||||
rating: float = Field(default=0.0)
|
||||
store_listing_version_id: Optional[str] = None
|
||||
version: Optional[int] = None
|
||||
review_comments: Optional[str] = None
|
||||
reviewed_at: Optional[datetime] = None
|
||||
video_url: Optional[str] = None
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class SubmissionsListResponse(BaseModel):
|
||||
"""Response for listing submissions."""
|
||||
|
||||
submissions: list[MarketplaceSubmission]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class CreateSubmissionRequest(BaseModel):
|
||||
"""Request to create a marketplace submission."""
|
||||
|
||||
graph_id: str = Field(description="ID of the graph to submit")
|
||||
graph_version: int = Field(description="Version of the graph to submit")
|
||||
name: str = Field(description="Display name for the agent")
|
||||
slug: str = Field(description="URL-friendly identifier")
|
||||
description: str = Field(description="Full description")
|
||||
sub_heading: str = Field(description="Short tagline")
|
||||
image_urls: list[str] = Field(default_factory=list)
|
||||
video_url: Optional[str] = None
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Conversion Functions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _convert_store_agent(agent: store_model.StoreAgent) -> MarketplaceAgent:
|
||||
"""Convert internal StoreAgent to v2 API model."""
|
||||
return MarketplaceAgent(
|
||||
slug=agent.slug,
|
||||
name=agent.agent_name,
|
||||
description=agent.description,
|
||||
sub_heading=agent.sub_heading,
|
||||
creator=agent.creator,
|
||||
creator_avatar=agent.creator_avatar,
|
||||
runs=agent.runs,
|
||||
rating=agent.rating,
|
||||
image_url=agent.agent_image,
|
||||
)
|
||||
|
||||
|
||||
def _convert_store_agent_details(
|
||||
agent: store_model.StoreAgentDetails,
|
||||
) -> MarketplaceAgentDetails:
|
||||
"""Convert internal StoreAgentDetails to v2 API model."""
|
||||
return MarketplaceAgentDetails(
|
||||
store_listing_version_id=agent.store_listing_version_id,
|
||||
slug=agent.slug,
|
||||
name=agent.agent_name,
|
||||
description=agent.description,
|
||||
sub_heading=agent.sub_heading,
|
||||
instructions=agent.instructions,
|
||||
creator=agent.creator,
|
||||
creator_avatar=agent.creator_avatar,
|
||||
categories=agent.categories,
|
||||
runs=agent.runs,
|
||||
rating=agent.rating,
|
||||
image_urls=agent.agent_image,
|
||||
video_url=agent.agent_video,
|
||||
versions=agent.versions,
|
||||
agent_graph_versions=agent.agentGraphVersions,
|
||||
agent_graph_id=agent.agentGraphId,
|
||||
last_updated=agent.last_updated,
|
||||
)
|
||||
|
||||
|
||||
def _convert_creator(creator: store_model.Creator) -> MarketplaceCreator:
|
||||
"""Convert internal Creator to v2 API model."""
|
||||
return MarketplaceCreator(
|
||||
name=creator.name,
|
||||
username=creator.username,
|
||||
description=creator.description,
|
||||
avatar_url=creator.avatar_url,
|
||||
num_agents=creator.num_agents,
|
||||
agent_rating=creator.agent_rating,
|
||||
agent_runs=creator.agent_runs,
|
||||
is_featured=creator.is_featured,
|
||||
)
|
||||
|
||||
|
||||
def _convert_creator_details(
|
||||
creator: store_model.CreatorDetails,
|
||||
) -> MarketplaceCreatorDetails:
|
||||
"""Convert internal CreatorDetails to v2 API model."""
|
||||
return MarketplaceCreatorDetails(
|
||||
name=creator.name,
|
||||
username=creator.username,
|
||||
description=creator.description,
|
||||
avatar_url=creator.avatar_url,
|
||||
agent_rating=creator.agent_rating,
|
||||
agent_runs=creator.agent_runs,
|
||||
top_categories=creator.top_categories,
|
||||
links=creator.links,
|
||||
)
|
||||
|
||||
|
||||
def _convert_submission(sub: store_model.StoreSubmission) -> MarketplaceSubmission:
|
||||
"""Convert internal StoreSubmission to v2 API model."""
|
||||
return MarketplaceSubmission(
|
||||
graph_id=sub.agent_id,
|
||||
graph_version=sub.agent_version,
|
||||
name=sub.name,
|
||||
sub_heading=sub.sub_heading,
|
||||
slug=sub.slug,
|
||||
description=sub.description,
|
||||
instructions=sub.instructions,
|
||||
image_urls=sub.image_urls,
|
||||
date_submitted=sub.date_submitted,
|
||||
status=sub.status.value,
|
||||
runs=sub.runs,
|
||||
rating=sub.rating,
|
||||
store_listing_version_id=sub.store_listing_version_id,
|
||||
version=sub.version,
|
||||
review_comments=sub.review_comments,
|
||||
reviewed_at=sub.reviewed_at,
|
||||
video_url=sub.video_url,
|
||||
categories=sub.categories,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints - Read (authenticated)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@marketplace_router.get(
|
||||
path="/agents",
|
||||
summary="List marketplace agents",
|
||||
response_model=MarketplaceAgentsResponse,
|
||||
)
|
||||
async def list_agents(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_STORE)
|
||||
),
|
||||
featured: bool = Query(default=False, description="Filter to featured agents only"),
|
||||
creator: Optional[str] = Query(
|
||||
default=None, description="Filter by creator username"
|
||||
),
|
||||
sorted_by: Optional[Literal["rating", "runs", "name", "updated_at"]] = Query(
|
||||
default=None, description="Sort field"
|
||||
),
|
||||
search_query: Optional[str] = Query(default=None, description="Search query"),
|
||||
category: Optional[str] = Query(default=None, description="Filter by category"),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> MarketplaceAgentsResponse:
|
||||
"""
|
||||
List agents available in the marketplace.
|
||||
|
||||
Supports filtering by featured status, creator, category, and search query.
|
||||
Results can be sorted by rating, runs, name, or update time.
|
||||
"""
|
||||
result = await store_cache._get_cached_store_agents(
|
||||
featured=featured,
|
||||
creator=creator,
|
||||
sorted_by=sorted_by,
|
||||
search_query=search_query,
|
||||
category=category,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return MarketplaceAgentsResponse(
|
||||
agents=[_convert_store_agent(a) for a in result.agents],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@marketplace_router.get(
|
||||
path="/agents/{username}/{agent_name}",
|
||||
summary="Get agent details",
|
||||
response_model=MarketplaceAgentDetails,
|
||||
)
|
||||
async def get_agent_details(
|
||||
username: str = Path(description="Creator username"),
|
||||
agent_name: str = Path(description="Agent slug/name"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_STORE)
|
||||
),
|
||||
) -> MarketplaceAgentDetails:
|
||||
"""
|
||||
Get detailed information about a specific marketplace agent.
|
||||
"""
|
||||
username = urllib.parse.unquote(username).lower()
|
||||
agent_name = urllib.parse.unquote(agent_name).lower()
|
||||
|
||||
agent = await store_cache._get_cached_agent_details(
|
||||
username=username, agent_name=agent_name
|
||||
)
|
||||
|
||||
return _convert_store_agent_details(agent)
|
||||
|
||||
|
||||
@marketplace_router.get(
|
||||
path="/creators",
|
||||
summary="List marketplace creators",
|
||||
response_model=MarketplaceCreatorsResponse,
|
||||
)
|
||||
async def list_creators(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_STORE)
|
||||
),
|
||||
featured: bool = Query(
|
||||
default=False, description="Filter to featured creators only"
|
||||
),
|
||||
search_query: Optional[str] = Query(default=None, description="Search query"),
|
||||
sorted_by: Optional[Literal["agent_rating", "agent_runs", "num_agents"]] = Query(
|
||||
default=None, description="Sort field"
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> MarketplaceCreatorsResponse:
|
||||
"""
|
||||
List creators on the marketplace.
|
||||
|
||||
Supports filtering by featured status and search query.
|
||||
Results can be sorted by rating, runs, or number of agents.
|
||||
"""
|
||||
result = await store_cache._get_cached_store_creators(
|
||||
featured=featured,
|
||||
search_query=search_query,
|
||||
sorted_by=sorted_by,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return MarketplaceCreatorsResponse(
|
||||
creators=[_convert_creator(c) for c in result.creators],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@marketplace_router.get(
|
||||
path="/creators/{username}",
|
||||
summary="Get creator details",
|
||||
response_model=MarketplaceCreatorDetails,
|
||||
)
|
||||
async def get_creator_details(
|
||||
username: str = Path(description="Creator username"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_STORE)
|
||||
),
|
||||
) -> MarketplaceCreatorDetails:
|
||||
"""
|
||||
Get detailed information about a specific marketplace creator.
|
||||
"""
|
||||
username = urllib.parse.unquote(username).lower()
|
||||
|
||||
creator = await store_cache._get_cached_creator_details(username=username)
|
||||
|
||||
return _convert_creator_details(creator)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints - Submissions (CRUD)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@marketplace_router.get(
|
||||
path="/submissions",
|
||||
summary="List my submissions",
|
||||
response_model=SubmissionsListResponse,
|
||||
)
|
||||
async def list_submissions(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_STORE)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> SubmissionsListResponse:
|
||||
"""
|
||||
List your marketplace submissions.
|
||||
|
||||
Returns all submissions you've created, including drafts, pending,
|
||||
approved, and rejected submissions.
|
||||
"""
|
||||
result = await store_db.get_store_submissions(
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return SubmissionsListResponse(
|
||||
submissions=[_convert_submission(s) for s in result.submissions],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@marketplace_router.post(
|
||||
path="/submissions",
|
||||
summary="Create a submission",
|
||||
response_model=MarketplaceSubmission,
|
||||
)
|
||||
async def create_submission(
|
||||
request: CreateSubmissionRequest,
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_STORE)
|
||||
),
|
||||
) -> MarketplaceSubmission:
|
||||
"""
|
||||
Create a new marketplace submission.
|
||||
|
||||
This submits an agent for review to be published in the marketplace.
|
||||
The submission will be in PENDING status until reviewed by the team.
|
||||
"""
|
||||
submission = await store_db.create_store_submission(
|
||||
user_id=auth.user_id,
|
||||
agent_id=request.graph_id,
|
||||
agent_version=request.graph_version,
|
||||
slug=request.slug,
|
||||
name=request.name,
|
||||
sub_heading=request.sub_heading,
|
||||
description=request.description,
|
||||
image_urls=request.image_urls,
|
||||
video_url=request.video_url,
|
||||
categories=request.categories,
|
||||
)
|
||||
|
||||
return _convert_submission(submission)
|
||||
|
||||
|
||||
@marketplace_router.delete(
|
||||
path="/submissions/{submission_id}",
|
||||
summary="Delete a submission",
|
||||
)
|
||||
async def delete_submission(
|
||||
submission_id: str = Path(description="Submission ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_STORE)
|
||||
),
|
||||
) -> None:
|
||||
"""
|
||||
Delete a marketplace submission.
|
||||
|
||||
Only submissions in DRAFT status can be deleted.
|
||||
"""
|
||||
success = await store_db.delete_store_submission(
|
||||
user_id=auth.user_id,
|
||||
submission_id=submission_id,
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Submission #{submission_id} not found"
|
||||
)
|
||||
@@ -1,552 +0,0 @@
|
||||
"""
|
||||
V2 External API - Request and Response Models
|
||||
|
||||
This module defines all request and response models for the v2 external API.
|
||||
All models are self-contained and specific to the external API contract.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# ============================================================================
|
||||
# Common/Shared Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class PaginatedResponse(BaseModel):
|
||||
"""Base class for paginated responses."""
|
||||
|
||||
total_count: int = Field(description="Total number of items across all pages")
|
||||
page: int = Field(description="Current page number (1-indexed)")
|
||||
page_size: int = Field(description="Number of items per page")
|
||||
total_pages: int = Field(description="Total number of pages")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Graph Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class GraphLink(BaseModel):
|
||||
"""A link between two nodes in a graph."""
|
||||
|
||||
id: str
|
||||
source_id: str = Field(description="ID of the source node")
|
||||
sink_id: str = Field(description="ID of the target node")
|
||||
source_name: str = Field(description="Output pin name on source node")
|
||||
sink_name: str = Field(description="Input pin name on target node")
|
||||
is_static: bool = Field(
|
||||
default=False, description="Whether this link provides static data"
|
||||
)
|
||||
|
||||
|
||||
class GraphNode(BaseModel):
|
||||
"""A node in an agent graph."""
|
||||
|
||||
id: str
|
||||
block_id: str = Field(description="ID of the block type")
|
||||
input_default: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Default input values"
|
||||
)
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Node metadata (e.g., position)"
|
||||
)
|
||||
|
||||
|
||||
class Graph(BaseModel):
|
||||
"""Graph definition for creating or updating an agent."""
|
||||
|
||||
id: Optional[str] = Field(default=None, description="Graph ID (assigned by server)")
|
||||
version: int = Field(default=1, description="Graph version")
|
||||
is_active: bool = Field(default=True, description="Whether this version is active")
|
||||
name: str = Field(description="Graph name")
|
||||
description: str = Field(default="", description="Graph description")
|
||||
nodes: list[GraphNode] = Field(default_factory=list, description="List of nodes")
|
||||
links: list[GraphLink] = Field(
|
||||
default_factory=list, description="Links between nodes"
|
||||
)
|
||||
|
||||
|
||||
class GraphMeta(BaseModel):
|
||||
"""Graph metadata (summary information)."""
|
||||
|
||||
id: str
|
||||
version: int
|
||||
is_active: bool
|
||||
name: str
|
||||
description: str
|
||||
created_at: datetime
|
||||
input_schema: dict[str, Any] = Field(description="Input schema for the graph")
|
||||
output_schema: dict[str, Any] = Field(description="Output schema for the graph")
|
||||
|
||||
|
||||
class GraphDetails(GraphMeta):
|
||||
"""Full graph details including nodes and links."""
|
||||
|
||||
nodes: list[GraphNode]
|
||||
links: list[GraphLink]
|
||||
credentials_input_schema: dict[str, Any] = Field(
|
||||
description="Schema for required credentials"
|
||||
)
|
||||
|
||||
|
||||
class GraphSettings(BaseModel):
|
||||
"""Settings for a graph."""
|
||||
|
||||
human_in_the_loop_safe_mode: Optional[bool] = Field(
|
||||
default=None, description="Enable safe mode for human-in-the-loop blocks"
|
||||
)
|
||||
|
||||
|
||||
class CreateGraphRequest(BaseModel):
|
||||
"""Request to create a new graph."""
|
||||
|
||||
graph: Graph = Field(description="The graph definition")
|
||||
|
||||
|
||||
class SetActiveVersionRequest(BaseModel):
|
||||
"""Request to set the active graph version."""
|
||||
|
||||
active_graph_version: int = Field(description="Version number to set as active")
|
||||
|
||||
|
||||
class GraphsListResponse(PaginatedResponse):
|
||||
"""Response for listing graphs."""
|
||||
|
||||
graphs: list[GraphMeta]
|
||||
|
||||
|
||||
class DeleteGraphResponse(BaseModel):
|
||||
"""Response for deleting a graph."""
|
||||
|
||||
version_count: int = Field(description="Number of versions deleted")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Schedule Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class Schedule(BaseModel):
|
||||
"""An execution schedule for a graph."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
cron: str = Field(description="Cron expression for the schedule")
|
||||
input_data: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Input data for scheduled executions"
|
||||
)
|
||||
is_enabled: bool = Field(default=True, description="Whether schedule is enabled")
|
||||
next_run_time: Optional[datetime] = Field(
|
||||
default=None, description="Next scheduled run time"
|
||||
)
|
||||
|
||||
|
||||
class CreateScheduleRequest(BaseModel):
|
||||
"""Request to create a schedule."""
|
||||
|
||||
name: str = Field(description="Display name for the schedule")
|
||||
cron: str = Field(description="Cron expression (e.g., '0 9 * * *' for 9am daily)")
|
||||
input_data: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Input data for scheduled executions"
|
||||
)
|
||||
credentials_inputs: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Credentials for the schedule"
|
||||
)
|
||||
graph_version: Optional[int] = Field(
|
||||
default=None, description="Graph version (default: active version)"
|
||||
)
|
||||
timezone: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Timezone for schedule (e.g., 'America/New_York')",
|
||||
)
|
||||
|
||||
|
||||
class SchedulesListResponse(PaginatedResponse):
|
||||
"""Response for listing schedules."""
|
||||
|
||||
schedules: list[Schedule]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Block Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class BlockCost(BaseModel):
|
||||
"""Cost information for a block."""
|
||||
|
||||
cost_type: str = Field(description="Type of cost (e.g., 'per_call', 'per_token')")
|
||||
cost_filter: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Conditions for this cost"
|
||||
)
|
||||
cost_amount: int = Field(description="Cost amount in credits")
|
||||
|
||||
|
||||
class Block(BaseModel):
|
||||
"""A building block that can be used in graphs."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
input_schema: dict[str, Any]
|
||||
output_schema: dict[str, Any]
|
||||
costs: list[BlockCost] = Field(default_factory=list)
|
||||
|
||||
|
||||
class BlocksListResponse(BaseModel):
|
||||
"""Response for listing blocks."""
|
||||
|
||||
blocks: list[Block]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Marketplace Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class MarketplaceAgent(BaseModel):
|
||||
"""An agent available in the marketplace."""
|
||||
|
||||
slug: str
|
||||
agent_name: str
|
||||
agent_image: str
|
||||
creator: str
|
||||
creator_avatar: str
|
||||
sub_heading: str
|
||||
description: str
|
||||
runs: int = Field(default=0, description="Number of times this agent has been run")
|
||||
rating: float = Field(default=0.0, description="Average rating")
|
||||
|
||||
|
||||
class MarketplaceAgentDetails(BaseModel):
|
||||
"""Detailed information about a marketplace agent."""
|
||||
|
||||
store_listing_version_id: str
|
||||
slug: str
|
||||
agent_name: str
|
||||
agent_video: str
|
||||
agent_output_demo: str
|
||||
agent_image: list[str]
|
||||
creator: str
|
||||
creator_avatar: str
|
||||
sub_heading: str
|
||||
description: str
|
||||
instructions: Optional[str] = None
|
||||
categories: list[str]
|
||||
runs: int
|
||||
rating: float
|
||||
versions: list[str]
|
||||
agent_graph_versions: list[str]
|
||||
agent_graph_id: str
|
||||
last_updated: datetime
|
||||
recommended_schedule_cron: Optional[str] = None
|
||||
|
||||
|
||||
class MarketplaceCreator(BaseModel):
|
||||
"""A creator on the marketplace."""
|
||||
|
||||
name: str
|
||||
username: str
|
||||
description: str
|
||||
avatar_url: str
|
||||
num_agents: int
|
||||
agent_rating: float
|
||||
agent_runs: int
|
||||
is_featured: bool = False
|
||||
|
||||
|
||||
class MarketplaceAgentsResponse(PaginatedResponse):
|
||||
"""Response for listing marketplace agents."""
|
||||
|
||||
agents: list[MarketplaceAgent]
|
||||
|
||||
|
||||
class MarketplaceCreatorsResponse(PaginatedResponse):
|
||||
"""Response for listing marketplace creators."""
|
||||
|
||||
creators: list[MarketplaceCreator]
|
||||
|
||||
|
||||
# Submission models
|
||||
class MarketplaceSubmission(BaseModel):
|
||||
"""A marketplace submission."""
|
||||
|
||||
agent_id: str
|
||||
agent_version: int
|
||||
name: str
|
||||
sub_heading: str
|
||||
slug: str
|
||||
description: str
|
||||
instructions: Optional[str] = None
|
||||
image_urls: list[str] = Field(default_factory=list)
|
||||
date_submitted: datetime
|
||||
status: str = Field(description="One of: DRAFT, PENDING, APPROVED, REJECTED")
|
||||
runs: int
|
||||
rating: float
|
||||
store_listing_version_id: Optional[str] = None
|
||||
version: Optional[int] = None
|
||||
|
||||
# Review fields
|
||||
review_comments: Optional[str] = None
|
||||
reviewed_at: Optional[datetime] = None
|
||||
|
||||
# Additional optional fields
|
||||
video_url: Optional[str] = None
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class CreateSubmissionRequest(BaseModel):
|
||||
"""Request to create a marketplace submission."""
|
||||
|
||||
agent_id: str = Field(description="ID of the graph to submit")
|
||||
agent_version: int = Field(description="Version of the graph to submit")
|
||||
name: str = Field(description="Display name for the agent")
|
||||
slug: str = Field(description="URL-friendly identifier")
|
||||
description: str = Field(description="Full description")
|
||||
sub_heading: str = Field(description="Short tagline")
|
||||
image_urls: list[str] = Field(default_factory=list)
|
||||
video_url: Optional[str] = None
|
||||
categories: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class UpdateSubmissionRequest(BaseModel):
|
||||
"""Request to update a marketplace submission."""
|
||||
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
sub_heading: Optional[str] = None
|
||||
image_urls: Optional[list[str]] = None
|
||||
video_url: Optional[str] = None
|
||||
categories: Optional[list[str]] = None
|
||||
|
||||
|
||||
class SubmissionsListResponse(PaginatedResponse):
|
||||
"""Response for listing submissions."""
|
||||
|
||||
submissions: list[MarketplaceSubmission]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Library Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class LibraryAgent(BaseModel):
|
||||
"""An agent in the user's library."""
|
||||
|
||||
id: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
name: str
|
||||
description: str
|
||||
is_favorite: bool = False
|
||||
can_access_graph: bool = False
|
||||
is_latest_version: bool = False
|
||||
image_url: Optional[str] = None
|
||||
creator_name: str
|
||||
input_schema: dict[str, Any] = Field(description="Input schema for the agent")
|
||||
output_schema: dict[str, Any] = Field(description="Output schema for the agent")
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class LibraryAgentsResponse(PaginatedResponse):
|
||||
"""Response for listing library agents."""
|
||||
|
||||
agents: list[LibraryAgent]
|
||||
|
||||
|
||||
class ExecuteAgentRequest(BaseModel):
|
||||
"""Request to execute an agent."""
|
||||
|
||||
inputs: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Input values for the agent"
|
||||
)
|
||||
credentials_inputs: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Credentials for the agent"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Run Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class Run(BaseModel):
|
||||
"""An execution run."""
|
||||
|
||||
id: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
status: str = Field(
|
||||
description="One of: INCOMPLETE, QUEUED, RUNNING, COMPLETED, TERMINATED, FAILED, REVIEW"
|
||||
)
|
||||
started_at: datetime
|
||||
ended_at: Optional[datetime] = None
|
||||
inputs: Optional[dict[str, Any]] = None
|
||||
cost: int = Field(default=0, description="Cost in credits")
|
||||
duration: float = Field(default=0, description="Duration in seconds")
|
||||
node_count: int = Field(default=0, description="Number of nodes executed")
|
||||
|
||||
|
||||
class RunDetails(Run):
|
||||
"""Detailed information about a run including node executions."""
|
||||
|
||||
outputs: Optional[dict[str, list[Any]]] = None
|
||||
node_executions: list[dict[str, Any]] = Field(
|
||||
default_factory=list, description="Individual node execution results"
|
||||
)
|
||||
|
||||
|
||||
class RunsListResponse(PaginatedResponse):
|
||||
"""Response for listing runs."""
|
||||
|
||||
runs: list[Run]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Run Review Models (Human-in-the-loop)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class PendingReview(BaseModel):
|
||||
"""A pending human-in-the-loop review."""
|
||||
|
||||
id: str # node_exec_id
|
||||
run_id: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
payload: Any = Field(description="Data to be reviewed")
|
||||
instructions: Optional[str] = Field(
|
||||
default=None, description="Instructions for the reviewer"
|
||||
)
|
||||
editable: bool = Field(
|
||||
default=True, description="Whether the reviewer can edit the data"
|
||||
)
|
||||
status: str = Field(description="One of: WAITING, APPROVED, REJECTED")
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class PendingReviewsResponse(PaginatedResponse):
|
||||
"""Response for listing pending reviews."""
|
||||
|
||||
reviews: list[PendingReview]
|
||||
|
||||
|
||||
class ReviewDecision(BaseModel):
|
||||
"""Decision for a single review item."""
|
||||
|
||||
node_exec_id: str = Field(description="Node execution ID (review ID)")
|
||||
approved: bool = Field(description="Whether to approve the data")
|
||||
edited_payload: Optional[Any] = Field(
|
||||
default=None, description="Modified payload data (if editing)"
|
||||
)
|
||||
message: Optional[str] = Field(
|
||||
default=None, description="Optional message from reviewer", max_length=2000
|
||||
)
|
||||
|
||||
|
||||
class SubmitReviewsRequest(BaseModel):
|
||||
"""Request to submit review responses for all pending reviews of an execution."""
|
||||
|
||||
reviews: list[ReviewDecision] = Field(
|
||||
description="All review decisions for the execution"
|
||||
)
|
||||
|
||||
|
||||
class SubmitReviewsResponse(BaseModel):
|
||||
"""Response after submitting reviews."""
|
||||
|
||||
run_id: str
|
||||
approved_count: int = Field(description="Number of reviews approved")
|
||||
rejected_count: int = Field(description="Number of reviews rejected")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Credit Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class CreditBalance(BaseModel):
|
||||
"""User's credit balance."""
|
||||
|
||||
balance: int = Field(description="Current credit balance")
|
||||
|
||||
|
||||
class CreditTransaction(BaseModel):
|
||||
"""A credit transaction."""
|
||||
|
||||
transaction_key: str
|
||||
amount: int
|
||||
transaction_type: str = Field(description="Transaction type")
|
||||
transaction_time: datetime
|
||||
running_balance: int
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class CreditTransactionsResponse(PaginatedResponse):
|
||||
"""Response for listing credit transactions."""
|
||||
|
||||
transactions: list[CreditTransaction]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Integration Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class Credential(BaseModel):
|
||||
"""A user's credential for an integration."""
|
||||
|
||||
id: str
|
||||
provider: str = Field(description="Integration provider name")
|
||||
title: Optional[str] = Field(
|
||||
default=None, description="User-assigned title for this credential"
|
||||
)
|
||||
scopes: list[str] = Field(default_factory=list, description="Granted scopes")
|
||||
|
||||
|
||||
class CredentialsListResponse(BaseModel):
|
||||
"""Response for listing credentials."""
|
||||
|
||||
credentials: list[Credential]
|
||||
|
||||
|
||||
class CredentialRequirement(BaseModel):
|
||||
"""A credential requirement for a graph or agent."""
|
||||
|
||||
provider: str = Field(description="Required provider name")
|
||||
required_scopes: list[str] = Field(
|
||||
default_factory=list, description="Required scopes"
|
||||
)
|
||||
matching_credentials: list[Credential] = Field(
|
||||
default_factory=list,
|
||||
description="User's credentials that match this requirement",
|
||||
)
|
||||
|
||||
|
||||
class CredentialRequirementsResponse(BaseModel):
|
||||
"""Response for listing credential requirements."""
|
||||
|
||||
requirements: list[CredentialRequirement]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# File Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class UploadFileResponse(BaseModel):
|
||||
"""Response after uploading a file."""
|
||||
|
||||
file_uri: str = Field(description="URI to reference the uploaded file")
|
||||
file_name: str
|
||||
size: int = Field(description="File size in bytes")
|
||||
content_type: str
|
||||
expires_in_hours: int
|
||||
@@ -1,35 +0,0 @@
|
||||
"""
|
||||
V2 External API Routes
|
||||
|
||||
This module defines the main v2 router that aggregates all v2 API endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
from .blocks import blocks_router
|
||||
from .credits import credits_router
|
||||
from .files import files_router
|
||||
from .graphs import graphs_router
|
||||
from .integrations import integrations_router
|
||||
from .library import library_router
|
||||
from .marketplace import marketplace_router
|
||||
from .runs import runs_router
|
||||
from .schedules import graph_schedules_router, schedules_router
|
||||
|
||||
v2_router = APIRouter()
|
||||
|
||||
# Include all sub-routers
|
||||
v2_router.include_router(graphs_router, prefix="/graphs", tags=["graphs"])
|
||||
v2_router.include_router(graph_schedules_router, prefix="/graphs", tags=["schedules"])
|
||||
v2_router.include_router(schedules_router, prefix="/schedules", tags=["schedules"])
|
||||
v2_router.include_router(blocks_router, prefix="/blocks", tags=["blocks"])
|
||||
v2_router.include_router(
|
||||
marketplace_router, prefix="/marketplace", tags=["marketplace"]
|
||||
)
|
||||
v2_router.include_router(library_router, prefix="/library", tags=["library"])
|
||||
v2_router.include_router(runs_router, prefix="/runs", tags=["runs"])
|
||||
v2_router.include_router(credits_router, prefix="/credits", tags=["credits"])
|
||||
v2_router.include_router(
|
||||
integrations_router, prefix="/integrations", tags=["integrations"]
|
||||
)
|
||||
v2_router.include_router(files_router, prefix="/files", tags=["files"])
|
||||
@@ -1,451 +0,0 @@
|
||||
"""
|
||||
V2 External API - Runs Endpoints
|
||||
|
||||
Provides access to execution runs and human-in-the-loop reviews.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Path, Query, Security
|
||||
from prisma.enums import APIKeyPermission, ReviewStatus
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.api.features.executions.review.model import (
|
||||
PendingHumanReviewModel,
|
||||
SafeJsonData,
|
||||
)
|
||||
from backend.data import execution as execution_db
|
||||
from backend.data import human_review as review_db
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.executor import utils as execution_utils
|
||||
|
||||
from .common import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
runs_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class Run(BaseModel):
|
||||
"""An execution run."""
|
||||
|
||||
id: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
status: str = Field(
|
||||
description="One of: INCOMPLETE, QUEUED, RUNNING, COMPLETED, TERMINATED, FAILED, REVIEW"
|
||||
)
|
||||
started_at: datetime
|
||||
ended_at: Optional[datetime] = None
|
||||
inputs: Optional[dict[str, Any]] = None
|
||||
cost: int = Field(default=0, description="Cost in credits")
|
||||
duration: float = Field(default=0, description="Duration in seconds")
|
||||
node_count: int = Field(default=0, description="Number of nodes executed")
|
||||
|
||||
|
||||
class RunDetails(Run):
|
||||
"""Detailed information about a run including outputs and node executions."""
|
||||
|
||||
outputs: Optional[dict[str, list[Any]]] = None
|
||||
node_executions: list[dict[str, Any]] = Field(
|
||||
default_factory=list, description="Individual node execution results"
|
||||
)
|
||||
|
||||
|
||||
class RunsListResponse(BaseModel):
|
||||
"""Response for listing runs."""
|
||||
|
||||
runs: list[Run]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class PendingReview(BaseModel):
|
||||
"""A pending human-in-the-loop review."""
|
||||
|
||||
id: str # node_exec_id
|
||||
run_id: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
payload: SafeJsonData = Field(description="Data to be reviewed")
|
||||
instructions: Optional[str] = Field(
|
||||
default=None, description="Instructions for the reviewer"
|
||||
)
|
||||
editable: bool = Field(
|
||||
default=True, description="Whether the reviewer can edit the data"
|
||||
)
|
||||
status: str = Field(description="One of: WAITING, APPROVED, REJECTED")
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class PendingReviewsResponse(BaseModel):
|
||||
"""Response for listing pending reviews."""
|
||||
|
||||
reviews: list[PendingReview]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class ReviewDecision(BaseModel):
|
||||
"""Decision for a single review item."""
|
||||
|
||||
node_exec_id: str = Field(description="Node execution ID (review ID)")
|
||||
approved: bool = Field(description="Whether to approve the data")
|
||||
edited_payload: Optional[SafeJsonData] = Field(
|
||||
default=None, description="Modified payload data (if editing)"
|
||||
)
|
||||
message: Optional[str] = Field(
|
||||
default=None, description="Optional message from reviewer", max_length=2000
|
||||
)
|
||||
|
||||
|
||||
class SubmitReviewsRequest(BaseModel):
|
||||
"""Request to submit review responses for all pending reviews of an execution."""
|
||||
|
||||
reviews: list[ReviewDecision] = Field(
|
||||
description="All review decisions for the execution"
|
||||
)
|
||||
|
||||
|
||||
class SubmitReviewsResponse(BaseModel):
|
||||
"""Response after submitting reviews."""
|
||||
|
||||
run_id: str
|
||||
approved_count: int = Field(description="Number of reviews approved")
|
||||
rejected_count: int = Field(description="Number of reviews rejected")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Conversion Functions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _convert_execution_to_run(exec: execution_db.GraphExecutionMeta) -> Run:
|
||||
"""Convert internal execution to v2 API Run model."""
|
||||
return Run(
|
||||
id=exec.id,
|
||||
graph_id=exec.graph_id,
|
||||
graph_version=exec.graph_version,
|
||||
status=exec.status.value,
|
||||
started_at=exec.started_at,
|
||||
ended_at=exec.ended_at,
|
||||
inputs=exec.inputs,
|
||||
cost=exec.stats.cost if exec.stats else 0,
|
||||
duration=exec.stats.duration if exec.stats else 0,
|
||||
node_count=exec.stats.node_exec_count if exec.stats else 0,
|
||||
)
|
||||
|
||||
|
||||
def _convert_execution_to_run_details(
|
||||
exec: execution_db.GraphExecutionWithNodes,
|
||||
) -> RunDetails:
|
||||
"""Convert internal execution with nodes to v2 API RunDetails model."""
|
||||
return RunDetails(
|
||||
id=exec.id,
|
||||
graph_id=exec.graph_id,
|
||||
graph_version=exec.graph_version,
|
||||
status=exec.status.value,
|
||||
started_at=exec.started_at,
|
||||
ended_at=exec.ended_at,
|
||||
inputs=exec.inputs,
|
||||
outputs=exec.outputs,
|
||||
cost=exec.stats.cost if exec.stats else 0,
|
||||
duration=exec.stats.duration if exec.stats else 0,
|
||||
node_count=exec.stats.node_exec_count if exec.stats else 0,
|
||||
node_executions=[
|
||||
{
|
||||
"node_id": node.node_id,
|
||||
"status": node.status.value,
|
||||
"input_data": node.input_data,
|
||||
"output_data": node.output_data,
|
||||
"started_at": node.start_time,
|
||||
"ended_at": node.end_time,
|
||||
}
|
||||
for node in exec.node_executions
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _convert_pending_review(review: PendingHumanReviewModel) -> PendingReview:
|
||||
"""Convert internal PendingHumanReviewModel to v2 API PendingReview model."""
|
||||
return PendingReview(
|
||||
id=review.node_exec_id,
|
||||
run_id=review.graph_exec_id,
|
||||
graph_id=review.graph_id,
|
||||
graph_version=review.graph_version,
|
||||
payload=review.payload,
|
||||
instructions=review.instructions,
|
||||
editable=review.editable,
|
||||
status=review.status.value,
|
||||
created_at=review.created_at,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints - Runs
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@runs_router.get(
|
||||
path="",
|
||||
summary="List all runs",
|
||||
response_model=RunsListResponse,
|
||||
)
|
||||
async def list_runs(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_RUN)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> RunsListResponse:
|
||||
"""
|
||||
List all execution runs for the authenticated user.
|
||||
|
||||
Returns runs across all agents, sorted by most recent first.
|
||||
"""
|
||||
result = await execution_db.get_graph_executions_paginated(
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
return RunsListResponse(
|
||||
runs=[_convert_execution_to_run(e) for e in result.executions],
|
||||
total_count=result.pagination.total_items,
|
||||
page=result.pagination.current_page,
|
||||
page_size=result.pagination.page_size,
|
||||
total_pages=result.pagination.total_pages,
|
||||
)
|
||||
|
||||
|
||||
@runs_router.get(
|
||||
path="/{run_id}",
|
||||
summary="Get run details",
|
||||
response_model=RunDetails,
|
||||
)
|
||||
async def get_run(
|
||||
run_id: str = Path(description="Run ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_RUN)
|
||||
),
|
||||
) -> RunDetails:
|
||||
"""
|
||||
Get detailed information about a specific run.
|
||||
|
||||
Includes outputs and individual node execution results.
|
||||
"""
|
||||
result = await execution_db.get_graph_execution(
|
||||
user_id=auth.user_id,
|
||||
execution_id=run_id,
|
||||
include_node_executions=True,
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail=f"Run #{run_id} not found")
|
||||
|
||||
return _convert_execution_to_run_details(result)
|
||||
|
||||
|
||||
@runs_router.post(
|
||||
path="/{run_id}/stop",
|
||||
summary="Stop a run",
|
||||
)
|
||||
async def stop_run(
|
||||
run_id: str = Path(description="Run ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_RUN)
|
||||
),
|
||||
) -> Run:
|
||||
"""
|
||||
Stop a running execution.
|
||||
|
||||
Only runs in QUEUED or RUNNING status can be stopped.
|
||||
"""
|
||||
# Verify the run exists and belongs to the user
|
||||
exec = await execution_db.get_graph_execution(
|
||||
user_id=auth.user_id,
|
||||
execution_id=run_id,
|
||||
)
|
||||
if not exec:
|
||||
raise HTTPException(status_code=404, detail=f"Run #{run_id} not found")
|
||||
|
||||
# Stop the execution
|
||||
await execution_utils.stop_graph_execution(
|
||||
graph_exec_id=run_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
|
||||
# Fetch updated execution
|
||||
updated_exec = await execution_db.get_graph_execution(
|
||||
user_id=auth.user_id,
|
||||
execution_id=run_id,
|
||||
)
|
||||
|
||||
if not updated_exec:
|
||||
raise HTTPException(status_code=404, detail=f"Run #{run_id} not found")
|
||||
|
||||
return _convert_execution_to_run(updated_exec)
|
||||
|
||||
|
||||
@runs_router.delete(
|
||||
path="/{run_id}",
|
||||
summary="Delete a run",
|
||||
)
|
||||
async def delete_run(
|
||||
run_id: str = Path(description="Run ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_RUN)
|
||||
),
|
||||
) -> None:
|
||||
"""
|
||||
Delete an execution run.
|
||||
|
||||
This marks the run as deleted. The data may still be retained for
|
||||
some time for recovery purposes.
|
||||
"""
|
||||
await execution_db.delete_graph_execution(
|
||||
graph_exec_id=run_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints - Reviews (Human-in-the-loop)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@runs_router.get(
|
||||
path="/reviews",
|
||||
summary="List all pending reviews",
|
||||
response_model=PendingReviewsResponse,
|
||||
)
|
||||
async def list_pending_reviews(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_RUN_REVIEW)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> PendingReviewsResponse:
|
||||
"""
|
||||
List all pending human-in-the-loop reviews.
|
||||
|
||||
These are blocks that require human approval or input before the
|
||||
agent can continue execution.
|
||||
"""
|
||||
reviews = await review_db.get_pending_reviews_for_user(
|
||||
user_id=auth.user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
# Note: get_pending_reviews_for_user returns list directly, not a paginated result
|
||||
# We compute pagination info based on results
|
||||
total_count = len(reviews)
|
||||
total_pages = max(1, (total_count + page_size - 1) // page_size)
|
||||
|
||||
return PendingReviewsResponse(
|
||||
reviews=[_convert_pending_review(r) for r in reviews],
|
||||
total_count=total_count,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages,
|
||||
)
|
||||
|
||||
|
||||
@runs_router.get(
|
||||
path="/{run_id}/reviews",
|
||||
summary="List reviews for a run",
|
||||
response_model=list[PendingReview],
|
||||
)
|
||||
async def list_run_reviews(
|
||||
run_id: str = Path(description="Run ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_RUN_REVIEW)
|
||||
),
|
||||
) -> list[PendingReview]:
|
||||
"""
|
||||
List all human-in-the-loop reviews for a specific run.
|
||||
"""
|
||||
reviews = await review_db.get_pending_reviews_for_execution(
|
||||
graph_exec_id=run_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
|
||||
return [_convert_pending_review(r) for r in reviews]
|
||||
|
||||
|
||||
@runs_router.post(
|
||||
path="/{run_id}/reviews",
|
||||
summary="Submit review responses for a run",
|
||||
response_model=SubmitReviewsResponse,
|
||||
)
|
||||
async def submit_reviews(
|
||||
request: SubmitReviewsRequest,
|
||||
run_id: str = Path(description="Run ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_RUN_REVIEW)
|
||||
),
|
||||
) -> SubmitReviewsResponse:
|
||||
"""
|
||||
Submit responses to all pending human-in-the-loop reviews for a run.
|
||||
|
||||
All pending reviews for the execution must be included. Approving
|
||||
a review will allow the agent to continue; rejecting will terminate
|
||||
execution at that point.
|
||||
"""
|
||||
# Build review decisions dict for process_all_reviews_for_execution
|
||||
review_decisions: dict[
|
||||
str, tuple[ReviewStatus, SafeJsonData | None, str | None]
|
||||
] = {}
|
||||
|
||||
for decision in request.reviews:
|
||||
status = ReviewStatus.APPROVED if decision.approved else ReviewStatus.REJECTED
|
||||
review_decisions[decision.node_exec_id] = (
|
||||
status,
|
||||
decision.edited_payload,
|
||||
decision.message,
|
||||
)
|
||||
|
||||
try:
|
||||
results = await review_db.process_all_reviews_for_execution(
|
||||
user_id=auth.user_id,
|
||||
review_decisions=review_decisions,
|
||||
)
|
||||
|
||||
approved_count = sum(
|
||||
1 for r in results.values() if r.status == ReviewStatus.APPROVED
|
||||
)
|
||||
rejected_count = sum(
|
||||
1 for r in results.values() if r.status == ReviewStatus.REJECTED
|
||||
)
|
||||
|
||||
return SubmitReviewsResponse(
|
||||
run_id=run_id,
|
||||
approved_count=approved_count,
|
||||
rejected_count=rejected_count,
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
@@ -1,250 +0,0 @@
|
||||
"""
|
||||
V2 External API - Schedules Endpoints
|
||||
|
||||
Provides endpoints for managing execution schedules.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Path, Query, Security
|
||||
from prisma.enums import APIKeyPermission
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.api.external.middleware import require_permission
|
||||
from backend.data import graph as graph_db
|
||||
from backend.data.auth.base import APIAuthorizationInfo
|
||||
from backend.data.user import get_user_by_id
|
||||
from backend.executor import scheduler
|
||||
from backend.util.clients import get_scheduler_client
|
||||
from backend.util.timezone_utils import get_user_timezone_or_utc
|
||||
|
||||
from .common import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
schedules_router = APIRouter()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Request/Response Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class Schedule(BaseModel):
|
||||
"""An execution schedule for a graph."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
cron: str = Field(description="Cron expression for the schedule")
|
||||
input_data: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Input data for scheduled executions"
|
||||
)
|
||||
next_run_time: Optional[datetime] = Field(
|
||||
default=None, description="Next scheduled run time"
|
||||
)
|
||||
is_enabled: bool = Field(default=True, description="Whether schedule is enabled")
|
||||
|
||||
|
||||
class SchedulesListResponse(BaseModel):
|
||||
"""Response for listing schedules."""
|
||||
|
||||
schedules: list[Schedule]
|
||||
total_count: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class CreateScheduleRequest(BaseModel):
|
||||
"""Request to create a schedule."""
|
||||
|
||||
name: str = Field(description="Display name for the schedule")
|
||||
cron: str = Field(description="Cron expression (e.g., '0 9 * * *' for 9am daily)")
|
||||
input_data: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Input data for scheduled executions"
|
||||
)
|
||||
credentials_inputs: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Credentials for the schedule"
|
||||
)
|
||||
graph_version: Optional[int] = Field(
|
||||
default=None, description="Graph version (default: active version)"
|
||||
)
|
||||
timezone: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Timezone for schedule (e.g., 'America/New_York'). "
|
||||
"Defaults to user's timezone."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _convert_schedule(job: scheduler.GraphExecutionJobInfo) -> Schedule:
|
||||
"""Convert internal schedule job info to v2 API model."""
|
||||
# Parse the ISO format string to datetime
|
||||
next_run = datetime.fromisoformat(job.next_run_time) if job.next_run_time else None
|
||||
|
||||
return Schedule(
|
||||
id=job.id,
|
||||
name=job.name or "",
|
||||
graph_id=job.graph_id,
|
||||
graph_version=job.graph_version,
|
||||
cron=job.cron,
|
||||
input_data=job.input_data,
|
||||
next_run_time=next_run,
|
||||
is_enabled=True, # All returned schedules are enabled
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@schedules_router.get(
|
||||
path="",
|
||||
summary="List all user schedules",
|
||||
response_model=SchedulesListResponse,
|
||||
)
|
||||
async def list_all_schedules(
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_SCHEDULE)
|
||||
),
|
||||
page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(
|
||||
default=DEFAULT_PAGE_SIZE,
|
||||
ge=1,
|
||||
le=MAX_PAGE_SIZE,
|
||||
description=f"Items per page (max {MAX_PAGE_SIZE})",
|
||||
),
|
||||
) -> SchedulesListResponse:
|
||||
"""
|
||||
List all schedules for the authenticated user across all graphs.
|
||||
"""
|
||||
schedules = await get_scheduler_client().get_execution_schedules(
|
||||
user_id=auth.user_id
|
||||
)
|
||||
converted = [_convert_schedule(s) for s in schedules]
|
||||
|
||||
# Manual pagination (scheduler doesn't support pagination natively)
|
||||
total_count = len(converted)
|
||||
total_pages = (total_count + page_size - 1) // page_size if total_count > 0 else 1
|
||||
start = (page - 1) * page_size
|
||||
end = start + page_size
|
||||
paginated = converted[start:end]
|
||||
|
||||
return SchedulesListResponse(
|
||||
schedules=paginated,
|
||||
total_count=total_count,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages,
|
||||
)
|
||||
|
||||
|
||||
@schedules_router.delete(
|
||||
path="/{schedule_id}",
|
||||
summary="Delete a schedule",
|
||||
)
|
||||
async def delete_schedule(
|
||||
schedule_id: str = Path(description="Schedule ID to delete"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_SCHEDULE)
|
||||
),
|
||||
) -> None:
|
||||
"""
|
||||
Delete an execution schedule.
|
||||
"""
|
||||
try:
|
||||
await get_scheduler_client().delete_schedule(
|
||||
schedule_id=schedule_id,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
except Exception as e:
|
||||
if "not found" in str(e).lower():
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Schedule #{schedule_id} not found"
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Graph-specific Schedule Endpoints (nested under /graphs)
|
||||
# These are included in the graphs router via include_router
|
||||
# ============================================================================
|
||||
|
||||
graph_schedules_router = APIRouter()
|
||||
|
||||
|
||||
@graph_schedules_router.get(
|
||||
path="/{graph_id}/schedules",
|
||||
summary="List schedules for a graph",
|
||||
response_model=list[Schedule],
|
||||
)
|
||||
async def list_graph_schedules(
|
||||
graph_id: str = Path(description="Graph ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.READ_SCHEDULE)
|
||||
),
|
||||
) -> list[Schedule]:
|
||||
"""
|
||||
List all schedules for a specific graph.
|
||||
"""
|
||||
schedules = await get_scheduler_client().get_execution_schedules(
|
||||
user_id=auth.user_id,
|
||||
graph_id=graph_id,
|
||||
)
|
||||
return [_convert_schedule(s) for s in schedules]
|
||||
|
||||
|
||||
@graph_schedules_router.post(
|
||||
path="/{graph_id}/schedules",
|
||||
summary="Create a schedule for a graph",
|
||||
response_model=Schedule,
|
||||
)
|
||||
async def create_graph_schedule(
|
||||
request: CreateScheduleRequest,
|
||||
graph_id: str = Path(description="Graph ID"),
|
||||
auth: APIAuthorizationInfo = Security(
|
||||
require_permission(APIKeyPermission.WRITE_SCHEDULE)
|
||||
),
|
||||
) -> Schedule:
|
||||
"""
|
||||
Create a new execution schedule for a graph.
|
||||
|
||||
The schedule will execute the graph at times matching the cron expression,
|
||||
using the provided input data.
|
||||
"""
|
||||
graph = await graph_db.get_graph(
|
||||
graph_id=graph_id,
|
||||
version=request.graph_version,
|
||||
user_id=auth.user_id,
|
||||
)
|
||||
if not graph:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Graph #{graph_id} v{request.graph_version} not found.",
|
||||
)
|
||||
|
||||
# Determine timezone
|
||||
if request.timezone:
|
||||
user_timezone = request.timezone
|
||||
else:
|
||||
user = await get_user_by_id(auth.user_id)
|
||||
user_timezone = get_user_timezone_or_utc(user.timezone if user else None)
|
||||
|
||||
result = await get_scheduler_client().add_execution_schedule(
|
||||
user_id=auth.user_id,
|
||||
graph_id=graph_id,
|
||||
graph_version=graph.version,
|
||||
name=request.name,
|
||||
cron=request.cron,
|
||||
input_data=request.input_data,
|
||||
input_credentials=request.credentials_inputs,
|
||||
user_timezone=user_timezone,
|
||||
)
|
||||
|
||||
return _convert_schedule(result)
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Configuration management for chat system."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field, field_validator
|
||||
from pydantic_settings import BaseSettings
|
||||
@@ -11,11 +12,7 @@ class ChatConfig(BaseSettings):
|
||||
|
||||
# OpenAI API Configuration
|
||||
model: str = Field(
|
||||
default="anthropic/claude-opus-4.5", description="Default model to use"
|
||||
)
|
||||
title_model: str = Field(
|
||||
default="openai/gpt-4o-mini",
|
||||
description="Model to use for generating session titles (should be fast/cheap)",
|
||||
default="qwen/qwen3-235b-a22b-2507", description="Default model to use"
|
||||
)
|
||||
api_key: str | None = Field(default=None, description="OpenAI API key")
|
||||
base_url: str | None = Field(
|
||||
@@ -26,6 +23,12 @@ class ChatConfig(BaseSettings):
|
||||
# Session TTL Configuration - 12 hours
|
||||
session_ttl: int = Field(default=43200, description="Session TTL in seconds")
|
||||
|
||||
# System Prompt Configuration
|
||||
system_prompt_path: str = Field(
|
||||
default="prompts/chat_system.md",
|
||||
description="Path to system prompt file relative to chat module",
|
||||
)
|
||||
|
||||
# Streaming Configuration
|
||||
max_context_messages: int = Field(
|
||||
default=50, ge=1, le=200, description="Maximum context messages"
|
||||
@@ -38,13 +41,6 @@ class ChatConfig(BaseSettings):
|
||||
default=3, description="Maximum number of agent schedules"
|
||||
)
|
||||
|
||||
# Langfuse Prompt Management Configuration
|
||||
# Note: Langfuse credentials are in Settings().secrets (settings.py)
|
||||
langfuse_prompt_name: str = Field(
|
||||
default="CoPilot Prompt",
|
||||
description="Name of the prompt in Langfuse to fetch",
|
||||
)
|
||||
|
||||
@field_validator("api_key", mode="before")
|
||||
@classmethod
|
||||
def get_api_key(cls, v):
|
||||
@@ -76,11 +72,43 @@ class ChatConfig(BaseSettings):
|
||||
v = "https://openrouter.ai/api/v1"
|
||||
return v
|
||||
|
||||
# Prompt paths for different contexts
|
||||
PROMPT_PATHS: dict[str, str] = {
|
||||
"default": "prompts/chat_system.md",
|
||||
"onboarding": "prompts/onboarding_system.md",
|
||||
}
|
||||
def get_system_prompt(self, **template_vars) -> str:
|
||||
"""Load and render the system prompt from file.
|
||||
|
||||
Args:
|
||||
**template_vars: Variables to substitute in the template
|
||||
|
||||
Returns:
|
||||
Rendered system prompt string
|
||||
|
||||
"""
|
||||
# Get the path relative to this module
|
||||
module_dir = Path(__file__).parent
|
||||
prompt_path = module_dir / self.system_prompt_path
|
||||
|
||||
# Check for .j2 extension first (Jinja2 template)
|
||||
j2_path = Path(str(prompt_path) + ".j2")
|
||||
if j2_path.exists():
|
||||
try:
|
||||
from jinja2 import Template
|
||||
|
||||
template = Template(j2_path.read_text())
|
||||
return template.render(**template_vars)
|
||||
except ImportError:
|
||||
# Jinja2 not installed, fall back to reading as plain text
|
||||
return j2_path.read_text()
|
||||
|
||||
# Check for markdown file
|
||||
if prompt_path.exists():
|
||||
content = prompt_path.read_text()
|
||||
|
||||
# Simple variable substitution if Jinja2 is not available
|
||||
for key, value in template_vars.items():
|
||||
placeholder = f"{{{key}}}"
|
||||
content = content.replace(placeholder, str(value))
|
||||
|
||||
return content
|
||||
raise FileNotFoundError(f"System prompt file not found: {prompt_path}")
|
||||
|
||||
class Config:
|
||||
"""Pydantic config."""
|
||||
|
||||
@@ -1,249 +0,0 @@
|
||||
"""Database operations for chat sessions."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any, cast
|
||||
|
||||
from prisma.models import ChatMessage as PrismaChatMessage
|
||||
from prisma.models import ChatSession as PrismaChatSession
|
||||
from prisma.types import (
|
||||
ChatMessageCreateInput,
|
||||
ChatSessionCreateInput,
|
||||
ChatSessionUpdateInput,
|
||||
ChatSessionWhereInput,
|
||||
)
|
||||
|
||||
from backend.data.db import transaction
|
||||
from backend.util.json import SafeJson
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def get_chat_session(session_id: str) -> PrismaChatSession | None:
|
||||
"""Get a chat session by ID from the database."""
|
||||
session = await PrismaChatSession.prisma().find_unique(
|
||||
where={"id": session_id},
|
||||
include={"Messages": True},
|
||||
)
|
||||
if session and session.Messages:
|
||||
# Sort messages by sequence in Python - Prisma Python client doesn't support
|
||||
# order_by in include clauses (unlike Prisma JS), so we sort after fetching
|
||||
session.Messages.sort(key=lambda m: m.sequence)
|
||||
return session
|
||||
|
||||
|
||||
async def create_chat_session(
|
||||
session_id: str,
|
||||
user_id: str,
|
||||
) -> PrismaChatSession:
|
||||
"""Create a new chat session in the database."""
|
||||
data = ChatSessionCreateInput(
|
||||
id=session_id,
|
||||
userId=user_id,
|
||||
credentials=SafeJson({}),
|
||||
successfulAgentRuns=SafeJson({}),
|
||||
successfulAgentSchedules=SafeJson({}),
|
||||
)
|
||||
return await PrismaChatSession.prisma().create(
|
||||
data=data,
|
||||
include={"Messages": True},
|
||||
)
|
||||
|
||||
|
||||
async def update_chat_session(
|
||||
session_id: str,
|
||||
credentials: dict[str, Any] | None = None,
|
||||
successful_agent_runs: dict[str, Any] | None = None,
|
||||
successful_agent_schedules: dict[str, Any] | None = None,
|
||||
total_prompt_tokens: int | None = None,
|
||||
total_completion_tokens: int | None = None,
|
||||
title: str | None = None,
|
||||
) -> PrismaChatSession | None:
|
||||
"""Update a chat session's metadata."""
|
||||
data: ChatSessionUpdateInput = {"updatedAt": datetime.now(UTC)}
|
||||
|
||||
if credentials is not None:
|
||||
data["credentials"] = SafeJson(credentials)
|
||||
if successful_agent_runs is not None:
|
||||
data["successfulAgentRuns"] = SafeJson(successful_agent_runs)
|
||||
if successful_agent_schedules is not None:
|
||||
data["successfulAgentSchedules"] = SafeJson(successful_agent_schedules)
|
||||
if total_prompt_tokens is not None:
|
||||
data["totalPromptTokens"] = total_prompt_tokens
|
||||
if total_completion_tokens is not None:
|
||||
data["totalCompletionTokens"] = total_completion_tokens
|
||||
if title is not None:
|
||||
data["title"] = title
|
||||
|
||||
session = await PrismaChatSession.prisma().update(
|
||||
where={"id": session_id},
|
||||
data=data,
|
||||
include={"Messages": True},
|
||||
)
|
||||
if session and session.Messages:
|
||||
# Sort in Python - Prisma Python doesn't support order_by in include clauses
|
||||
session.Messages.sort(key=lambda m: m.sequence)
|
||||
return session
|
||||
|
||||
|
||||
async def add_chat_message(
|
||||
session_id: str,
|
||||
role: str,
|
||||
sequence: int,
|
||||
content: str | None = None,
|
||||
name: str | None = None,
|
||||
tool_call_id: str | None = None,
|
||||
refusal: str | None = None,
|
||||
tool_calls: list[dict[str, Any]] | None = None,
|
||||
function_call: dict[str, Any] | None = None,
|
||||
) -> PrismaChatMessage:
|
||||
"""Add a message to a chat session."""
|
||||
# Build input dict dynamically rather than using ChatMessageCreateInput directly
|
||||
# because Prisma's TypedDict validation rejects optional fields set to None.
|
||||
# We only include fields that have values, then cast at the end.
|
||||
data: dict[str, Any] = {
|
||||
"Session": {"connect": {"id": session_id}},
|
||||
"role": role,
|
||||
"sequence": sequence,
|
||||
}
|
||||
|
||||
# Add optional string fields
|
||||
if content is not None:
|
||||
data["content"] = content
|
||||
if name is not None:
|
||||
data["name"] = name
|
||||
if tool_call_id is not None:
|
||||
data["toolCallId"] = tool_call_id
|
||||
if refusal is not None:
|
||||
data["refusal"] = refusal
|
||||
|
||||
# Add optional JSON fields only when they have values
|
||||
if tool_calls is not None:
|
||||
data["toolCalls"] = SafeJson(tool_calls)
|
||||
if function_call is not None:
|
||||
data["functionCall"] = SafeJson(function_call)
|
||||
|
||||
# Run message create and session timestamp update in parallel for lower latency
|
||||
_, message = await asyncio.gather(
|
||||
PrismaChatSession.prisma().update(
|
||||
where={"id": session_id},
|
||||
data={"updatedAt": datetime.now(UTC)},
|
||||
),
|
||||
PrismaChatMessage.prisma().create(data=cast(ChatMessageCreateInput, data)),
|
||||
)
|
||||
return message
|
||||
|
||||
|
||||
async def add_chat_messages_batch(
|
||||
session_id: str,
|
||||
messages: list[dict[str, Any]],
|
||||
start_sequence: int,
|
||||
) -> list[PrismaChatMessage]:
|
||||
"""Add multiple messages to a chat session in a batch.
|
||||
|
||||
Uses a transaction for atomicity - if any message creation fails,
|
||||
the entire batch is rolled back.
|
||||
"""
|
||||
if not messages:
|
||||
return []
|
||||
|
||||
created_messages = []
|
||||
|
||||
async with transaction() as tx:
|
||||
for i, msg in enumerate(messages):
|
||||
# Build input dict dynamically rather than using ChatMessageCreateInput
|
||||
# directly because Prisma's TypedDict validation rejects optional fields
|
||||
# set to None. We only include fields that have values, then cast.
|
||||
data: dict[str, Any] = {
|
||||
"Session": {"connect": {"id": session_id}},
|
||||
"role": msg["role"],
|
||||
"sequence": start_sequence + i,
|
||||
}
|
||||
|
||||
# Add optional string fields
|
||||
if msg.get("content") is not None:
|
||||
data["content"] = msg["content"]
|
||||
if msg.get("name") is not None:
|
||||
data["name"] = msg["name"]
|
||||
if msg.get("tool_call_id") is not None:
|
||||
data["toolCallId"] = msg["tool_call_id"]
|
||||
if msg.get("refusal") is not None:
|
||||
data["refusal"] = msg["refusal"]
|
||||
|
||||
# Add optional JSON fields only when they have values
|
||||
if msg.get("tool_calls") is not None:
|
||||
data["toolCalls"] = SafeJson(msg["tool_calls"])
|
||||
if msg.get("function_call") is not None:
|
||||
data["functionCall"] = SafeJson(msg["function_call"])
|
||||
|
||||
created = await PrismaChatMessage.prisma(tx).create(
|
||||
data=cast(ChatMessageCreateInput, data)
|
||||
)
|
||||
created_messages.append(created)
|
||||
|
||||
# Update session's updatedAt timestamp within the same transaction.
|
||||
# Note: Token usage (total_prompt_tokens, total_completion_tokens) is updated
|
||||
# separately via update_chat_session() after streaming completes.
|
||||
await PrismaChatSession.prisma(tx).update(
|
||||
where={"id": session_id},
|
||||
data={"updatedAt": datetime.now(UTC)},
|
||||
)
|
||||
|
||||
return created_messages
|
||||
|
||||
|
||||
async def get_user_chat_sessions(
|
||||
user_id: str,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> list[PrismaChatSession]:
|
||||
"""Get chat sessions for a user, ordered by most recent."""
|
||||
return await PrismaChatSession.prisma().find_many(
|
||||
where={"userId": user_id},
|
||||
order={"updatedAt": "desc"},
|
||||
take=limit,
|
||||
skip=offset,
|
||||
)
|
||||
|
||||
|
||||
async def get_user_session_count(user_id: str) -> int:
|
||||
"""Get the total number of chat sessions for a user."""
|
||||
return await PrismaChatSession.prisma().count(where={"userId": user_id})
|
||||
|
||||
|
||||
async def delete_chat_session(session_id: str, user_id: str | None = None) -> bool:
|
||||
"""Delete a chat session and all its messages.
|
||||
|
||||
Args:
|
||||
session_id: The session ID to delete.
|
||||
user_id: If provided, validates that the session belongs to this user
|
||||
before deletion. This prevents unauthorized deletion of other
|
||||
users' sessions.
|
||||
|
||||
Returns:
|
||||
True if deleted successfully, False otherwise.
|
||||
"""
|
||||
try:
|
||||
# Build typed where clause with optional user_id validation
|
||||
where_clause: ChatSessionWhereInput = {"id": session_id}
|
||||
if user_id is not None:
|
||||
where_clause["userId"] = user_id
|
||||
|
||||
result = await PrismaChatSession.prisma().delete_many(where=where_clause)
|
||||
if result == 0:
|
||||
logger.warning(
|
||||
f"No session deleted for {session_id} "
|
||||
f"(user_id validation: {user_id is not None})"
|
||||
)
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete chat session {session_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def get_chat_session_message_count(session_id: str) -> int:
|
||||
"""Get the number of messages in a chat session."""
|
||||
count = await PrismaChatMessage.prisma().count(where={"sessionId": session_id})
|
||||
return count
|
||||
@@ -1,9 +1,6 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from weakref import WeakValueDictionary
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionAssistantMessageParam,
|
||||
@@ -19,63 +16,17 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
|
||||
ChatCompletionMessageToolCallParam,
|
||||
Function,
|
||||
)
|
||||
from prisma.models import ChatMessage as PrismaChatMessage
|
||||
from prisma.models import ChatSession as PrismaChatSession
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.data.redis_client import get_redis_async
|
||||
from backend.util import json
|
||||
from backend.util.exceptions import DatabaseError, RedisError
|
||||
from backend.util.exceptions import RedisError
|
||||
|
||||
from . import db as chat_db
|
||||
from .config import ChatConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
config = ChatConfig()
|
||||
|
||||
|
||||
def _parse_json_field(value: str | dict | list | None, default: Any = None) -> Any:
|
||||
"""Parse a JSON field that may be stored as string or already parsed."""
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, str):
|
||||
return json.loads(value)
|
||||
return value
|
||||
|
||||
|
||||
# Redis cache key prefix for chat sessions
|
||||
CHAT_SESSION_CACHE_PREFIX = "chat:session:"
|
||||
|
||||
|
||||
def _get_session_cache_key(session_id: str) -> str:
|
||||
"""Get the Redis cache key for a chat session."""
|
||||
return f"{CHAT_SESSION_CACHE_PREFIX}{session_id}"
|
||||
|
||||
|
||||
# Session-level locks to prevent race conditions during concurrent upserts.
|
||||
# Uses WeakValueDictionary to automatically garbage collect locks when no longer referenced,
|
||||
# preventing unbounded memory growth while maintaining lock semantics for active sessions.
|
||||
# Invalidation: Locks are auto-removed by GC when no coroutine holds a reference (after
|
||||
# async with lock: completes). Explicit cleanup also occurs in delete_chat_session().
|
||||
_session_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
|
||||
_session_locks_mutex = asyncio.Lock()
|
||||
|
||||
|
||||
async def _get_session_lock(session_id: str) -> asyncio.Lock:
|
||||
"""Get or create a lock for a specific session to prevent concurrent upserts.
|
||||
|
||||
Uses WeakValueDictionary for automatic cleanup: locks are garbage collected
|
||||
when no coroutine holds a reference to them, preventing memory leaks from
|
||||
unbounded growth of session locks.
|
||||
"""
|
||||
async with _session_locks_mutex:
|
||||
lock = _session_locks.get(session_id)
|
||||
if lock is None:
|
||||
lock = asyncio.Lock()
|
||||
_session_locks[session_id] = lock
|
||||
return lock
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: str
|
||||
content: str | None = None
|
||||
@@ -94,8 +45,7 @@ class Usage(BaseModel):
|
||||
|
||||
class ChatSession(BaseModel):
|
||||
session_id: str
|
||||
user_id: str
|
||||
title: str | None = None
|
||||
user_id: str | None
|
||||
messages: list[ChatMessage]
|
||||
usage: list[Usage]
|
||||
credentials: dict[str, dict] = {} # Map of provider -> credential metadata
|
||||
@@ -105,11 +55,10 @@ class ChatSession(BaseModel):
|
||||
successful_agent_schedules: dict[str, int] = {}
|
||||
|
||||
@staticmethod
|
||||
def new(user_id: str) -> "ChatSession":
|
||||
def new(user_id: str | None) -> "ChatSession":
|
||||
return ChatSession(
|
||||
session_id=str(uuid.uuid4()),
|
||||
user_id=user_id,
|
||||
title=None,
|
||||
messages=[],
|
||||
usage=[],
|
||||
credentials={},
|
||||
@@ -117,61 +66,6 @@ class ChatSession(BaseModel):
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def from_db(
|
||||
prisma_session: PrismaChatSession,
|
||||
prisma_messages: list[PrismaChatMessage] | None = None,
|
||||
) -> "ChatSession":
|
||||
"""Convert Prisma models to Pydantic ChatSession."""
|
||||
messages = []
|
||||
if prisma_messages:
|
||||
for msg in prisma_messages:
|
||||
messages.append(
|
||||
ChatMessage(
|
||||
role=msg.role,
|
||||
content=msg.content,
|
||||
name=msg.name,
|
||||
tool_call_id=msg.toolCallId,
|
||||
refusal=msg.refusal,
|
||||
tool_calls=_parse_json_field(msg.toolCalls),
|
||||
function_call=_parse_json_field(msg.functionCall),
|
||||
)
|
||||
)
|
||||
|
||||
# Parse JSON fields from Prisma
|
||||
credentials = _parse_json_field(prisma_session.credentials, default={})
|
||||
successful_agent_runs = _parse_json_field(
|
||||
prisma_session.successfulAgentRuns, default={}
|
||||
)
|
||||
successful_agent_schedules = _parse_json_field(
|
||||
prisma_session.successfulAgentSchedules, default={}
|
||||
)
|
||||
|
||||
# Calculate usage from token counts
|
||||
usage = []
|
||||
if prisma_session.totalPromptTokens or prisma_session.totalCompletionTokens:
|
||||
usage.append(
|
||||
Usage(
|
||||
prompt_tokens=prisma_session.totalPromptTokens or 0,
|
||||
completion_tokens=prisma_session.totalCompletionTokens or 0,
|
||||
total_tokens=(prisma_session.totalPromptTokens or 0)
|
||||
+ (prisma_session.totalCompletionTokens or 0),
|
||||
)
|
||||
)
|
||||
|
||||
return ChatSession(
|
||||
session_id=prisma_session.id,
|
||||
user_id=prisma_session.userId,
|
||||
title=prisma_session.title,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
credentials=credentials,
|
||||
started_at=prisma_session.createdAt,
|
||||
updated_at=prisma_session.updatedAt,
|
||||
successful_agent_runs=successful_agent_runs,
|
||||
successful_agent_schedules=successful_agent_schedules,
|
||||
)
|
||||
|
||||
def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
|
||||
messages = []
|
||||
for message in self.messages:
|
||||
@@ -261,337 +155,50 @@ class ChatSession(BaseModel):
|
||||
return messages
|
||||
|
||||
|
||||
async def _get_session_from_cache(session_id: str) -> ChatSession | None:
|
||||
"""Get a chat session from Redis cache."""
|
||||
redis_key = _get_session_cache_key(session_id)
|
||||
async def get_chat_session(
|
||||
session_id: str,
|
||||
user_id: str | None,
|
||||
) -> ChatSession | None:
|
||||
"""Get a chat session by ID."""
|
||||
redis_key = f"chat:session:{session_id}"
|
||||
async_redis = await get_redis_async()
|
||||
|
||||
raw_session: bytes | None = await async_redis.get(redis_key)
|
||||
|
||||
if raw_session is None:
|
||||
logger.warning(f"Session {session_id} not found in Redis")
|
||||
return None
|
||||
|
||||
try:
|
||||
session = ChatSession.model_validate_json(raw_session)
|
||||
logger.info(
|
||||
f"Loading session {session_id} from cache: "
|
||||
f"message_count={len(session.messages)}, "
|
||||
f"roles={[m.role for m in session.messages]}"
|
||||
)
|
||||
return session
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to deserialize session {session_id}: {e}", exc_info=True)
|
||||
raise RedisError(f"Corrupted session data for {session_id}") from e
|
||||
|
||||
|
||||
async def _cache_session(session: ChatSession) -> None:
|
||||
"""Cache a chat session in Redis."""
|
||||
redis_key = _get_session_cache_key(session.session_id)
|
||||
async_redis = await get_redis_async()
|
||||
await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())
|
||||
|
||||
|
||||
async def _get_session_from_db(session_id: str) -> ChatSession | None:
|
||||
"""Get a chat session from the database."""
|
||||
prisma_session = await chat_db.get_chat_session(session_id)
|
||||
if not prisma_session:
|
||||
return None
|
||||
|
||||
messages = prisma_session.Messages
|
||||
logger.info(
|
||||
f"Loading session {session_id} from DB: "
|
||||
f"has_messages={messages is not None}, "
|
||||
f"message_count={len(messages) if messages else 0}, "
|
||||
f"roles={[m.role for m in messages] if messages else []}"
|
||||
)
|
||||
|
||||
return ChatSession.from_db(prisma_session, messages)
|
||||
|
||||
|
||||
async def _save_session_to_db(
|
||||
session: ChatSession, existing_message_count: int
|
||||
) -> None:
|
||||
"""Save or update a chat session in the database."""
|
||||
# Check if session exists in DB
|
||||
existing = await chat_db.get_chat_session(session.session_id)
|
||||
|
||||
if not existing:
|
||||
# Create new session
|
||||
await chat_db.create_chat_session(
|
||||
session_id=session.session_id,
|
||||
user_id=session.user_id,
|
||||
)
|
||||
existing_message_count = 0
|
||||
|
||||
# Calculate total tokens from usage
|
||||
total_prompt = sum(u.prompt_tokens for u in session.usage)
|
||||
total_completion = sum(u.completion_tokens for u in session.usage)
|
||||
|
||||
# Update session metadata
|
||||
await chat_db.update_chat_session(
|
||||
session_id=session.session_id,
|
||||
credentials=session.credentials,
|
||||
successful_agent_runs=session.successful_agent_runs,
|
||||
successful_agent_schedules=session.successful_agent_schedules,
|
||||
total_prompt_tokens=total_prompt,
|
||||
total_completion_tokens=total_completion,
|
||||
)
|
||||
|
||||
# Add new messages (only those after existing count)
|
||||
new_messages = session.messages[existing_message_count:]
|
||||
if new_messages:
|
||||
messages_data = []
|
||||
for msg in new_messages:
|
||||
messages_data.append(
|
||||
{
|
||||
"role": msg.role,
|
||||
"content": msg.content,
|
||||
"name": msg.name,
|
||||
"tool_call_id": msg.tool_call_id,
|
||||
"refusal": msg.refusal,
|
||||
"tool_calls": msg.tool_calls,
|
||||
"function_call": msg.function_call,
|
||||
}
|
||||
)
|
||||
logger.info(
|
||||
f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
|
||||
f"roles={[m['role'] for m in messages_data]}, "
|
||||
f"start_sequence={existing_message_count}"
|
||||
)
|
||||
await chat_db.add_chat_messages_batch(
|
||||
session_id=session.session_id,
|
||||
messages=messages_data,
|
||||
start_sequence=existing_message_count,
|
||||
)
|
||||
|
||||
|
||||
async def get_chat_session(
|
||||
session_id: str,
|
||||
user_id: str | None = None,
|
||||
) -> ChatSession | None:
|
||||
"""Get a chat session by ID.
|
||||
|
||||
Checks Redis cache first, falls back to database if not found.
|
||||
Caches database results back to Redis.
|
||||
|
||||
Args:
|
||||
session_id: The session ID to fetch.
|
||||
user_id: If provided, validates that the session belongs to this user.
|
||||
If None, ownership is not validated (admin/system access).
|
||||
"""
|
||||
# Try cache first
|
||||
try:
|
||||
session = await _get_session_from_cache(session_id)
|
||||
if session:
|
||||
# Verify user ownership if user_id was provided for validation
|
||||
if user_id is not None and session.user_id != user_id:
|
||||
logger.warning(
|
||||
f"Session {session_id} user id mismatch: {session.user_id} != {user_id}"
|
||||
)
|
||||
return None
|
||||
return session
|
||||
except RedisError:
|
||||
logger.warning(f"Cache error for session {session_id}, trying database")
|
||||
except Exception as e:
|
||||
logger.warning(f"Unexpected cache error for session {session_id}: {e}")
|
||||
|
||||
# Fall back to database
|
||||
logger.info(f"Session {session_id} not in cache, checking database")
|
||||
session = await _get_session_from_db(session_id)
|
||||
|
||||
if session is None:
|
||||
logger.warning(f"Session {session_id} not found in cache or database")
|
||||
return None
|
||||
|
||||
# Verify user ownership if user_id was provided for validation
|
||||
if user_id is not None and session.user_id != user_id:
|
||||
if session.user_id is not None and session.user_id != user_id:
|
||||
logger.warning(
|
||||
f"Session {session_id} user id mismatch: {session.user_id} != {user_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
# Cache the session from DB
|
||||
try:
|
||||
await _cache_session(session)
|
||||
logger.info(f"Cached session {session_id} from database")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cache session {session_id}: {e}")
|
||||
|
||||
return session
|
||||
|
||||
|
||||
async def upsert_chat_session(
|
||||
session: ChatSession,
|
||||
) -> ChatSession:
|
||||
"""Update a chat session in both cache and database.
|
||||
"""Update a chat session with the given messages."""
|
||||
|
||||
Uses session-level locking to prevent race conditions when concurrent
|
||||
operations (e.g., background title update and main stream handler)
|
||||
attempt to upsert the same session simultaneously.
|
||||
redis_key = f"chat:session:{session.session_id}"
|
||||
|
||||
Raises:
|
||||
DatabaseError: If the database write fails. The cache is still updated
|
||||
as a best-effort optimization, but the error is propagated to ensure
|
||||
callers are aware of the persistence failure.
|
||||
RedisError: If the cache write fails (after successful DB write).
|
||||
"""
|
||||
# Acquire session-specific lock to prevent concurrent upserts
|
||||
lock = await _get_session_lock(session.session_id)
|
||||
async_redis = await get_redis_async()
|
||||
resp = await async_redis.setex(
|
||||
redis_key, config.session_ttl, session.model_dump_json()
|
||||
)
|
||||
|
||||
async with lock:
|
||||
# Get existing message count from DB for incremental saves
|
||||
existing_message_count = await chat_db.get_chat_session_message_count(
|
||||
session.session_id
|
||||
if not resp:
|
||||
raise RedisError(
|
||||
f"Failed to persist chat session {session.session_id} to Redis: {resp}"
|
||||
)
|
||||
|
||||
db_error: Exception | None = None
|
||||
|
||||
# Save to database (primary storage)
|
||||
try:
|
||||
await _save_session_to_db(session, existing_message_count)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to save session {session.session_id} to database: {e}"
|
||||
)
|
||||
db_error = e
|
||||
|
||||
# Save to cache (best-effort, even if DB failed)
|
||||
try:
|
||||
await _cache_session(session)
|
||||
except Exception as e:
|
||||
# If DB succeeded but cache failed, raise cache error
|
||||
if db_error is None:
|
||||
raise RedisError(
|
||||
f"Failed to persist chat session {session.session_id} to Redis: {e}"
|
||||
) from e
|
||||
# If both failed, log cache error but raise DB error (more critical)
|
||||
logger.warning(
|
||||
f"Cache write also failed for session {session.session_id}: {e}"
|
||||
)
|
||||
|
||||
# Propagate DB error after attempting cache (prevents data loss)
|
||||
if db_error is not None:
|
||||
raise DatabaseError(
|
||||
f"Failed to persist chat session {session.session_id} to database"
|
||||
) from db_error
|
||||
|
||||
return session
|
||||
|
||||
|
||||
async def create_chat_session(user_id: str) -> ChatSession:
|
||||
"""Create a new chat session and persist it.
|
||||
|
||||
Raises:
|
||||
DatabaseError: If the database write fails. We fail fast to ensure
|
||||
callers never receive a non-persisted session that only exists
|
||||
in cache (which would be lost when the cache expires).
|
||||
"""
|
||||
session = ChatSession.new(user_id)
|
||||
|
||||
# Create in database first - fail fast if this fails
|
||||
try:
|
||||
await chat_db.create_chat_session(
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create session {session.session_id} in database: {e}")
|
||||
raise DatabaseError(
|
||||
f"Failed to create chat session {session.session_id} in database"
|
||||
) from e
|
||||
|
||||
# Cache the session (best-effort optimization, DB is source of truth)
|
||||
try:
|
||||
await _cache_session(session)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cache new session {session.session_id}: {e}")
|
||||
|
||||
return session
|
||||
|
||||
|
||||
async def get_user_sessions(
|
||||
user_id: str,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[ChatSession], int]:
|
||||
"""Get chat sessions for a user from the database with total count.
|
||||
|
||||
Returns:
|
||||
A tuple of (sessions, total_count) where total_count is the overall
|
||||
number of sessions for the user (not just the current page).
|
||||
"""
|
||||
prisma_sessions = await chat_db.get_user_chat_sessions(user_id, limit, offset)
|
||||
total_count = await chat_db.get_user_session_count(user_id)
|
||||
|
||||
sessions = []
|
||||
for prisma_session in prisma_sessions:
|
||||
# Convert without messages for listing (lighter weight)
|
||||
sessions.append(ChatSession.from_db(prisma_session, None))
|
||||
|
||||
return sessions, total_count
|
||||
|
||||
|
||||
async def delete_chat_session(session_id: str, user_id: str | None = None) -> bool:
|
||||
"""Delete a chat session from both cache and database.
|
||||
|
||||
Args:
|
||||
session_id: The session ID to delete.
|
||||
user_id: If provided, validates that the session belongs to this user
|
||||
before deletion. This prevents unauthorized deletion.
|
||||
|
||||
Returns:
|
||||
True if deleted successfully, False otherwise.
|
||||
"""
|
||||
# Delete from database first (with optional user_id validation)
|
||||
# This confirms ownership before invalidating cache
|
||||
deleted = await chat_db.delete_chat_session(session_id, user_id)
|
||||
|
||||
if not deleted:
|
||||
return False
|
||||
|
||||
# Only invalidate cache and clean up lock after DB confirms deletion
|
||||
try:
|
||||
redis_key = _get_session_cache_key(session_id)
|
||||
async_redis = await get_redis_async()
|
||||
await async_redis.delete(redis_key)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete session {session_id} from cache: {e}")
|
||||
|
||||
# Clean up session lock (belt-and-suspenders with WeakValueDictionary)
|
||||
async with _session_locks_mutex:
|
||||
_session_locks.pop(session_id, None)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def update_session_title(session_id: str, title: str) -> bool:
|
||||
"""Update only the title of a chat session.
|
||||
|
||||
This is a lightweight operation that doesn't touch messages, avoiding
|
||||
race conditions with concurrent message updates. Use this for background
|
||||
title generation instead of upsert_chat_session.
|
||||
|
||||
Args:
|
||||
session_id: The session ID to update.
|
||||
title: The new title to set.
|
||||
|
||||
Returns:
|
||||
True if updated successfully, False otherwise.
|
||||
"""
|
||||
try:
|
||||
result = await chat_db.update_chat_session(session_id=session_id, title=title)
|
||||
if result is None:
|
||||
logger.warning(f"Session {session_id} not found for title update")
|
||||
return False
|
||||
|
||||
# Invalidate cache so next fetch gets updated title
|
||||
try:
|
||||
redis_key = _get_session_cache_key(session_id)
|
||||
async_redis = await get_redis_async()
|
||||
await async_redis.delete(redis_key)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update title for session {session_id}: {e}")
|
||||
return False
|
||||
|
||||
@@ -43,9 +43,9 @@ async def test_chatsession_serialization_deserialization():
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_chatsession_redis_storage(setup_test_user, test_user_id):
|
||||
async def test_chatsession_redis_storage():
|
||||
|
||||
s = ChatSession.new(user_id=test_user_id)
|
||||
s = ChatSession.new(user_id=None)
|
||||
s.messages = messages
|
||||
|
||||
s = await upsert_chat_session(s)
|
||||
@@ -59,61 +59,12 @@ async def test_chatsession_redis_storage(setup_test_user, test_user_id):
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_chatsession_redis_storage_user_id_mismatch(
|
||||
setup_test_user, test_user_id
|
||||
):
|
||||
async def test_chatsession_redis_storage_user_id_mismatch():
|
||||
|
||||
s = ChatSession.new(user_id=test_user_id)
|
||||
s = ChatSession.new(user_id="abc123")
|
||||
s.messages = messages
|
||||
s = await upsert_chat_session(s)
|
||||
|
||||
s2 = await get_chat_session(s.session_id, "different_user_id")
|
||||
s2 = await get_chat_session(s.session_id, None)
|
||||
|
||||
assert s2 is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_chatsession_db_storage(setup_test_user, test_user_id):
|
||||
"""Test that messages are correctly saved to and loaded from DB (not cache)."""
|
||||
from backend.data.redis_client import get_redis_async
|
||||
|
||||
# Create session with messages including assistant message
|
||||
s = ChatSession.new(user_id=test_user_id)
|
||||
s.messages = messages # Contains user, assistant, and tool messages
|
||||
assert s.session_id is not None, "Session id is not set"
|
||||
# Upsert to save to both cache and DB
|
||||
s = await upsert_chat_session(s)
|
||||
|
||||
# Clear the Redis cache to force DB load
|
||||
redis_key = f"chat:session:{s.session_id}"
|
||||
async_redis = await get_redis_async()
|
||||
await async_redis.delete(redis_key)
|
||||
|
||||
# Load from DB (cache was cleared)
|
||||
s2 = await get_chat_session(
|
||||
session_id=s.session_id,
|
||||
user_id=s.user_id,
|
||||
)
|
||||
|
||||
assert s2 is not None, "Session not found after loading from DB"
|
||||
assert len(s2.messages) == len(
|
||||
s.messages
|
||||
), f"Message count mismatch: expected {len(s.messages)}, got {len(s2.messages)}"
|
||||
|
||||
# Verify all roles are present
|
||||
roles = [m.role for m in s2.messages]
|
||||
assert "user" in roles, f"User message missing. Roles found: {roles}"
|
||||
assert "assistant" in roles, f"Assistant message missing. Roles found: {roles}"
|
||||
assert "tool" in roles, f"Tool message missing. Roles found: {roles}"
|
||||
|
||||
# Verify message content
|
||||
for orig, loaded in zip(s.messages, s2.messages):
|
||||
assert orig.role == loaded.role, f"Role mismatch: {orig.role} != {loaded.role}"
|
||||
assert (
|
||||
orig.content == loaded.content
|
||||
), f"Content mismatch for {orig.role}: {orig.content} != {loaded.content}"
|
||||
if orig.tool_calls:
|
||||
assert (
|
||||
loaded.tool_calls is not None
|
||||
), f"Tool calls missing for {orig.role} message"
|
||||
assert len(orig.tool_calls) == len(loaded.tool_calls)
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
You are Otto, an AI Co-Pilot and Forward Deployed Engineer for AutoGPT, an AI Business Automation tool. Your mission is to help users quickly find and set up AutoGPT agents to solve their business problems.
|
||||
|
||||
Here are the functions available to you:
|
||||
|
||||
<functions>
|
||||
1. **find_agent** - Search for agents that solve the user's problem
|
||||
2. **run_agent** - Run or schedule an agent (automatically handles setup)
|
||||
</functions>
|
||||
|
||||
## HOW run_agent WORKS
|
||||
|
||||
The `run_agent` tool automatically handles the entire setup flow:
|
||||
|
||||
1. **First call** (no inputs) → Returns available inputs so user can decide what values to use
|
||||
2. **Credentials check** → If missing, UI automatically prompts user to add them (you don't need to mention this)
|
||||
3. **Execution** → Runs when you provide `inputs` OR set `use_defaults=true`
|
||||
|
||||
Parameters:
|
||||
- `username_agent_slug` (required): Agent identifier like "creator/agent-name"
|
||||
- `inputs`: Object with input values for the agent
|
||||
- `use_defaults`: Set to `true` to run with default values (only after user confirms)
|
||||
- `schedule_name` + `cron`: For scheduled execution
|
||||
|
||||
## WORKFLOW
|
||||
|
||||
1. **find_agent** - Search for agents that solve the user's problem
|
||||
2. **run_agent** (first call, no inputs) - Get available inputs for the agent
|
||||
3. **Ask user** what values they want to use OR if they want to use defaults
|
||||
4. **run_agent** (second call) - Either with `inputs={...}` or `use_defaults=true`
|
||||
|
||||
## YOUR APPROACH
|
||||
|
||||
**Step 1: Understand the Problem**
|
||||
- Ask maximum 1-2 targeted questions
|
||||
- Focus on: What business problem are they solving?
|
||||
- Move quickly to searching for solutions
|
||||
|
||||
**Step 2: Find Agents**
|
||||
- Use `find_agent` immediately with relevant keywords
|
||||
- Suggest the best option from search results
|
||||
- Explain briefly how it solves their problem
|
||||
|
||||
**Step 3: Get Agent Inputs**
|
||||
- Call `run_agent(username_agent_slug="creator/agent-name")` without inputs
|
||||
- This returns the available inputs (required and optional)
|
||||
- Present these to the user and ask what values they want
|
||||
|
||||
**Step 4: Run with User's Choice**
|
||||
- If user provides values: `run_agent(username_agent_slug="...", inputs={...})`
|
||||
- If user says "use defaults": `run_agent(username_agent_slug="...", use_defaults=true)`
|
||||
- On success, share the agent link with the user
|
||||
|
||||
**For Scheduled Execution:**
|
||||
- Add `schedule_name` and `cron` parameters
|
||||
- Example: `run_agent(username_agent_slug="...", inputs={...}, schedule_name="Daily Report", cron="0 9 * * *")`
|
||||
|
||||
## FUNCTION CALL FORMAT
|
||||
|
||||
To call a function, use this exact format:
|
||||
`<function_call>function_name(parameter="value")</function_call>`
|
||||
|
||||
Examples:
|
||||
- `<function_call>find_agent(query="social media automation")</function_call>`
|
||||
- `<function_call>run_agent(username_agent_slug="creator/agent-name")</function_call>` (get inputs)
|
||||
- `<function_call>run_agent(username_agent_slug="creator/agent-name", inputs={"topic": "AI news"})</function_call>`
|
||||
- `<function_call>run_agent(username_agent_slug="creator/agent-name", use_defaults=true)</function_call>`
|
||||
|
||||
## KEY RULES
|
||||
|
||||
**What You DON'T Do:**
|
||||
- Don't help with login (frontend handles this)
|
||||
- Don't mention or explain credentials to the user (frontend handles this automatically)
|
||||
- Don't run agents without first showing available inputs to the user
|
||||
- Don't use `use_defaults=true` without user explicitly confirming
|
||||
- Don't write responses longer than 3 sentences
|
||||
|
||||
**What You DO:**
|
||||
- Always call run_agent first without inputs to see what's available
|
||||
- Ask user what values they want OR if they want to use defaults
|
||||
- Keep all responses to maximum 3 sentences
|
||||
- Include the agent link in your response after successful execution
|
||||
|
||||
**Error Handling:**
|
||||
- Authentication needed → "Please sign in via the interface"
|
||||
- Credentials missing → The UI handles this automatically. Focus on asking the user about input values instead.
|
||||
|
||||
## RESPONSE STRUCTURE
|
||||
|
||||
Before responding, wrap your analysis in <thinking> tags to systematically plan your approach:
|
||||
- Extract the key business problem or request from the user's message
|
||||
- Determine what function call (if any) you need to make next
|
||||
- Plan your response to stay under the 3-sentence maximum
|
||||
|
||||
Example interaction:
|
||||
```
|
||||
User: "Run the AI news agent for me"
|
||||
Otto: <function_call>run_agent(username_agent_slug="autogpt/ai-news")</function_call>
|
||||
[Tool returns: Agent accepts inputs - Required: topic. Optional: num_articles (default: 5)]
|
||||
Otto: The AI News agent needs a topic. What topic would you like news about, or should I use the defaults?
|
||||
User: "Use defaults"
|
||||
Otto: <function_call>run_agent(username_agent_slug="autogpt/ai-news", use_defaults=true)</function_call>
|
||||
```
|
||||
|
||||
KEEP ANSWERS TO 3 SENTENCES
|
||||
@@ -1,10 +1,3 @@
|
||||
"""
|
||||
Response models for Vercel AI SDK UI Stream Protocol.
|
||||
|
||||
This module implements the AI SDK UI Stream Protocol (v1) for streaming chat responses.
|
||||
See: https://ai-sdk.dev/docs/ai-sdk-ui/stream-protocol
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
@@ -12,133 +5,97 @@ from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ResponseType(str, Enum):
|
||||
"""Types of streaming responses following AI SDK protocol."""
|
||||
"""Types of streaming responses."""
|
||||
|
||||
# Message lifecycle
|
||||
START = "start"
|
||||
FINISH = "finish"
|
||||
|
||||
# Text streaming
|
||||
TEXT_START = "text-start"
|
||||
TEXT_DELTA = "text-delta"
|
||||
TEXT_END = "text-end"
|
||||
|
||||
# Tool interaction
|
||||
TOOL_INPUT_START = "tool-input-start"
|
||||
TOOL_INPUT_AVAILABLE = "tool-input-available"
|
||||
TOOL_OUTPUT_AVAILABLE = "tool-output-available"
|
||||
|
||||
# Other
|
||||
TEXT_CHUNK = "text_chunk"
|
||||
TEXT_ENDED = "text_ended"
|
||||
TOOL_CALL = "tool_call"
|
||||
TOOL_CALL_START = "tool_call_start"
|
||||
TOOL_RESPONSE = "tool_response"
|
||||
ERROR = "error"
|
||||
USAGE = "usage"
|
||||
STREAM_END = "stream_end"
|
||||
|
||||
|
||||
class StreamBaseResponse(BaseModel):
|
||||
"""Base response model for all streaming responses."""
|
||||
|
||||
type: ResponseType
|
||||
timestamp: str | None = None
|
||||
|
||||
def to_sse(self) -> str:
|
||||
"""Convert to SSE format."""
|
||||
return f"data: {self.model_dump_json()}\n\n"
|
||||
|
||||
|
||||
# ========== Message Lifecycle ==========
|
||||
class StreamTextChunk(StreamBaseResponse):
|
||||
"""Streaming text content from the assistant."""
|
||||
|
||||
type: ResponseType = ResponseType.TEXT_CHUNK
|
||||
content: str = Field(..., description="Text content chunk")
|
||||
|
||||
|
||||
class StreamStart(StreamBaseResponse):
|
||||
"""Start of a new message."""
|
||||
|
||||
type: ResponseType = ResponseType.START
|
||||
messageId: str = Field(..., description="Unique message ID")
|
||||
|
||||
|
||||
class StreamFinish(StreamBaseResponse):
|
||||
"""End of message/stream."""
|
||||
|
||||
type: ResponseType = ResponseType.FINISH
|
||||
|
||||
|
||||
# ========== Text Streaming ==========
|
||||
|
||||
|
||||
class StreamTextStart(StreamBaseResponse):
|
||||
"""Start of a text block."""
|
||||
|
||||
type: ResponseType = ResponseType.TEXT_START
|
||||
id: str = Field(..., description="Text block ID")
|
||||
|
||||
|
||||
class StreamTextDelta(StreamBaseResponse):
|
||||
"""Streaming text content delta."""
|
||||
|
||||
type: ResponseType = ResponseType.TEXT_DELTA
|
||||
id: str = Field(..., description="Text block ID")
|
||||
delta: str = Field(..., description="Text content delta")
|
||||
|
||||
|
||||
class StreamTextEnd(StreamBaseResponse):
|
||||
"""End of a text block."""
|
||||
|
||||
type: ResponseType = ResponseType.TEXT_END
|
||||
id: str = Field(..., description="Text block ID")
|
||||
|
||||
|
||||
# ========== Tool Interaction ==========
|
||||
|
||||
|
||||
class StreamToolInputStart(StreamBaseResponse):
|
||||
class StreamToolCallStart(StreamBaseResponse):
|
||||
"""Tool call started notification."""
|
||||
|
||||
type: ResponseType = ResponseType.TOOL_INPUT_START
|
||||
toolCallId: str = Field(..., description="Unique tool call ID")
|
||||
toolName: str = Field(..., description="Name of the tool being called")
|
||||
type: ResponseType = ResponseType.TOOL_CALL_START
|
||||
tool_name: str = Field(..., description="Name of the tool that was executed")
|
||||
tool_id: str = Field(..., description="Unique tool call ID")
|
||||
|
||||
|
||||
class StreamToolInputAvailable(StreamBaseResponse):
|
||||
"""Tool input is ready for execution."""
|
||||
class StreamToolCall(StreamBaseResponse):
|
||||
"""Tool invocation notification."""
|
||||
|
||||
type: ResponseType = ResponseType.TOOL_INPUT_AVAILABLE
|
||||
toolCallId: str = Field(..., description="Unique tool call ID")
|
||||
toolName: str = Field(..., description="Name of the tool being called")
|
||||
input: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Tool input arguments"
|
||||
type: ResponseType = ResponseType.TOOL_CALL
|
||||
tool_id: str = Field(..., description="Unique tool call ID")
|
||||
tool_name: str = Field(..., description="Name of the tool being called")
|
||||
arguments: dict[str, Any] = Field(
|
||||
default_factory=dict, description="Tool arguments"
|
||||
)
|
||||
|
||||
|
||||
class StreamToolOutputAvailable(StreamBaseResponse):
|
||||
class StreamToolExecutionResult(StreamBaseResponse):
|
||||
"""Tool execution result."""
|
||||
|
||||
type: ResponseType = ResponseType.TOOL_OUTPUT_AVAILABLE
|
||||
toolCallId: str = Field(..., description="Tool call ID this responds to")
|
||||
output: str | dict[str, Any] = Field(..., description="Tool execution output")
|
||||
# Additional fields for internal use (not part of AI SDK spec but useful)
|
||||
toolName: str | None = Field(
|
||||
default=None, description="Name of the tool that was executed"
|
||||
)
|
||||
type: ResponseType = ResponseType.TOOL_RESPONSE
|
||||
tool_id: str = Field(..., description="Tool call ID this responds to")
|
||||
tool_name: str = Field(..., description="Name of the tool that was executed")
|
||||
result: str | dict[str, Any] = Field(..., description="Tool execution result")
|
||||
success: bool = Field(
|
||||
default=True, description="Whether the tool execution succeeded"
|
||||
)
|
||||
|
||||
|
||||
# ========== Other ==========
|
||||
|
||||
|
||||
class StreamUsage(StreamBaseResponse):
|
||||
"""Token usage statistics."""
|
||||
|
||||
type: ResponseType = ResponseType.USAGE
|
||||
promptTokens: int = Field(..., description="Number of prompt tokens")
|
||||
completionTokens: int = Field(..., description="Number of completion tokens")
|
||||
totalTokens: int = Field(..., description="Total number of tokens")
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
|
||||
|
||||
class StreamError(StreamBaseResponse):
|
||||
"""Error response."""
|
||||
|
||||
type: ResponseType = ResponseType.ERROR
|
||||
errorText: str = Field(..., description="Error message text")
|
||||
message: str = Field(..., description="Error message")
|
||||
code: str | None = Field(default=None, description="Error code")
|
||||
details: dict[str, Any] | None = Field(
|
||||
default=None, description="Additional error details"
|
||||
)
|
||||
|
||||
|
||||
class StreamTextEnded(StreamBaseResponse):
|
||||
"""Text streaming completed marker."""
|
||||
|
||||
type: ResponseType = ResponseType.TEXT_ENDED
|
||||
|
||||
|
||||
class StreamEnd(StreamBaseResponse):
|
||||
"""End of stream marker."""
|
||||
|
||||
type: ResponseType = ResponseType.STREAM_END
|
||||
summary: dict[str, Any] | None = Field(
|
||||
default=None, description="Stream summary statistics"
|
||||
)
|
||||
|
||||
@@ -13,25 +13,12 @@ from backend.util.exceptions import NotFoundError
|
||||
|
||||
from . import service as chat_service
|
||||
from .config import ChatConfig
|
||||
from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions
|
||||
|
||||
config = ChatConfig()
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _validate_and_get_session(
|
||||
session_id: str,
|
||||
user_id: str | None,
|
||||
) -> ChatSession:
|
||||
"""Validate session exists and belongs to user."""
|
||||
session = await get_chat_session(session_id, user_id)
|
||||
if not session:
|
||||
raise NotFoundError(f"Session {session_id} not found.")
|
||||
return session
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
tags=["chat"],
|
||||
)
|
||||
@@ -39,14 +26,6 @@ router = APIRouter(
|
||||
# ========== Request/Response Models ==========
|
||||
|
||||
|
||||
class StreamChatRequest(BaseModel):
|
||||
"""Request model for streaming chat with optional context."""
|
||||
|
||||
message: str
|
||||
is_user_message: bool = True
|
||||
context: dict[str, str] | None = None # {url: str, content: str}
|
||||
|
||||
|
||||
class CreateSessionResponse(BaseModel):
|
||||
"""Response model containing information on a newly created chat session."""
|
||||
|
||||
@@ -65,77 +44,22 @@ class SessionDetailResponse(BaseModel):
|
||||
messages: list[dict]
|
||||
|
||||
|
||||
class SessionSummaryResponse(BaseModel):
|
||||
"""Response model for a session summary (without messages)."""
|
||||
|
||||
id: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
title: str | None = None
|
||||
|
||||
|
||||
class ListSessionsResponse(BaseModel):
|
||||
"""Response model for listing chat sessions."""
|
||||
|
||||
sessions: list[SessionSummaryResponse]
|
||||
total: int
|
||||
|
||||
|
||||
# ========== Routes ==========
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions",
|
||||
dependencies=[Security(auth.requires_user)],
|
||||
)
|
||||
async def list_sessions(
|
||||
user_id: Annotated[str, Security(auth.get_user_id)],
|
||||
limit: int = Query(default=50, ge=1, le=100),
|
||||
offset: int = Query(default=0, ge=0),
|
||||
) -> ListSessionsResponse:
|
||||
"""
|
||||
List chat sessions for the authenticated user.
|
||||
|
||||
Returns a paginated list of chat sessions belonging to the current user,
|
||||
ordered by most recently updated.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user's ID.
|
||||
limit: Maximum number of sessions to return (1-100).
|
||||
offset: Number of sessions to skip for pagination.
|
||||
|
||||
Returns:
|
||||
ListSessionsResponse: List of session summaries and total count.
|
||||
"""
|
||||
sessions, total_count = await get_user_sessions(user_id, limit, offset)
|
||||
|
||||
return ListSessionsResponse(
|
||||
sessions=[
|
||||
SessionSummaryResponse(
|
||||
id=session.session_id,
|
||||
created_at=session.started_at.isoformat(),
|
||||
updated_at=session.updated_at.isoformat(),
|
||||
title=session.title,
|
||||
)
|
||||
for session in sessions
|
||||
],
|
||||
total=total_count,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/sessions",
|
||||
)
|
||||
async def create_session(
|
||||
user_id: Annotated[str, Depends(auth.get_user_id)],
|
||||
user_id: Annotated[str | None, Depends(auth.get_user_id)],
|
||||
) -> CreateSessionResponse:
|
||||
"""
|
||||
Create a new chat session.
|
||||
|
||||
Initiates a new chat session for the authenticated user.
|
||||
Initiates a new chat session for either an authenticated or anonymous user.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user ID parsed from the JWT (required).
|
||||
user_id: The optional authenticated user ID parsed from the JWT. If missing, creates an anonymous session.
|
||||
|
||||
Returns:
|
||||
CreateSessionResponse: Details of the created session.
|
||||
@@ -143,15 +67,15 @@ async def create_session(
|
||||
"""
|
||||
logger.info(
|
||||
f"Creating session with user_id: "
|
||||
f"...{user_id[-8:] if len(user_id) > 8 else '<redacted>'}"
|
||||
f"...{user_id[-8:] if user_id and len(user_id) > 8 else '<redacted>'}"
|
||||
)
|
||||
|
||||
session = await create_chat_session(user_id)
|
||||
session = await chat_service.create_chat_session(user_id)
|
||||
|
||||
return CreateSessionResponse(
|
||||
id=session.session_id,
|
||||
created_at=session.started_at.isoformat(),
|
||||
user_id=session.user_id,
|
||||
user_id=session.user_id or None,
|
||||
)
|
||||
|
||||
|
||||
@@ -175,88 +99,29 @@ async def get_session(
|
||||
SessionDetailResponse: Details for the requested session; raises NotFoundError if not found.
|
||||
|
||||
"""
|
||||
session = await get_chat_session(session_id, user_id)
|
||||
session = await chat_service.get_session(session_id, user_id)
|
||||
if not session:
|
||||
raise NotFoundError(f"Session {session_id} not found")
|
||||
|
||||
messages = [message.model_dump() for message in session.messages]
|
||||
logger.info(
|
||||
f"Returning session {session_id}: "
|
||||
f"message_count={len(messages)}, "
|
||||
f"roles={[m.get('role') for m in messages]}"
|
||||
)
|
||||
|
||||
return SessionDetailResponse(
|
||||
id=session.session_id,
|
||||
created_at=session.started_at.isoformat(),
|
||||
updated_at=session.updated_at.isoformat(),
|
||||
user_id=session.user_id or None,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/sessions/{session_id}/stream",
|
||||
)
|
||||
async def stream_chat_post(
|
||||
session_id: str,
|
||||
request: StreamChatRequest,
|
||||
user_id: str | None = Depends(auth.get_user_id),
|
||||
):
|
||||
"""
|
||||
Stream chat responses for a session (POST with context support).
|
||||
|
||||
Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
|
||||
- Text fragments as they are generated
|
||||
- Tool call UI elements (if invoked)
|
||||
- Tool execution results
|
||||
|
||||
Args:
|
||||
session_id: The chat session identifier to associate with the streamed messages.
|
||||
request: Request body containing message, is_user_message, and optional context.
|
||||
user_id: Optional authenticated user ID.
|
||||
Returns:
|
||||
StreamingResponse: SSE-formatted response chunks.
|
||||
|
||||
"""
|
||||
session = await _validate_and_get_session(session_id, user_id)
|
||||
|
||||
async def event_generator() -> AsyncGenerator[str, None]:
|
||||
async for chunk in chat_service.stream_chat_completion(
|
||||
session_id,
|
||||
request.message,
|
||||
is_user_message=request.is_user_message,
|
||||
user_id=user_id,
|
||||
session=session, # Pass pre-fetched session to avoid double-fetch
|
||||
context=request.context,
|
||||
):
|
||||
yield chunk.to_sse()
|
||||
# AI SDK protocol termination
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no", # Disable nginx buffering
|
||||
"x-vercel-ai-ui-message-stream": "v1", # AI SDK protocol header
|
||||
},
|
||||
messages=[message.model_dump() for message in session.messages],
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{session_id}/stream",
|
||||
)
|
||||
async def stream_chat_get(
|
||||
async def stream_chat(
|
||||
session_id: str,
|
||||
message: Annotated[str, Query(min_length=1, max_length=10000)],
|
||||
user_id: str | None = Depends(auth.get_user_id),
|
||||
is_user_message: bool = Query(default=True),
|
||||
):
|
||||
"""
|
||||
Stream chat responses for a session (GET - legacy endpoint).
|
||||
Stream chat responses for a session.
|
||||
|
||||
Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
|
||||
- Text fragments as they are generated
|
||||
@@ -272,7 +137,14 @@ async def stream_chat_get(
|
||||
StreamingResponse: SSE-formatted response chunks.
|
||||
|
||||
"""
|
||||
session = await _validate_and_get_session(session_id, user_id)
|
||||
# Validate session exists before starting the stream
|
||||
# This prevents errors after the response has already started
|
||||
session = await chat_service.get_session(session_id, user_id)
|
||||
|
||||
if not session:
|
||||
raise NotFoundError(f"Session {session_id} not found. ")
|
||||
if session.user_id is None and user_id is not None:
|
||||
session = await chat_service.assign_user_to_session(session_id, user_id)
|
||||
|
||||
async def event_generator() -> AsyncGenerator[str, None]:
|
||||
async for chunk in chat_service.stream_chat_completion(
|
||||
@@ -283,8 +155,6 @@ async def stream_chat_get(
|
||||
session=session, # Pass pre-fetched session to avoid double-fetch
|
||||
):
|
||||
yield chunk.to_sse()
|
||||
# AI SDK protocol termination
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
@@ -293,7 +163,6 @@ async def stream_chat_get(
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no", # Disable nginx buffering
|
||||
"x-vercel-ai-ui-message-stream": "v1", # AI SDK protocol header
|
||||
},
|
||||
)
|
||||
|
||||
@@ -332,28 +201,16 @@ async def health_check() -> dict:
|
||||
"""
|
||||
Health check endpoint for the chat service.
|
||||
|
||||
Performs a full cycle test of session creation and retrieval. Should always return healthy
|
||||
Performs a full cycle test of session creation, assignment, and retrieval. Should always return healthy
|
||||
if the service and data layer are operational.
|
||||
|
||||
Returns:
|
||||
dict: A status dictionary indicating health, service name, and API version.
|
||||
|
||||
"""
|
||||
from backend.data.user import get_or_create_user
|
||||
|
||||
# Ensure health check user exists (required for FK constraint)
|
||||
health_check_user_id = "health-check-user"
|
||||
await get_or_create_user(
|
||||
{
|
||||
"sub": health_check_user_id,
|
||||
"email": "health-check@system.local",
|
||||
"user_metadata": {"name": "Health Check User"},
|
||||
}
|
||||
)
|
||||
|
||||
# Create and retrieve session to verify full data layer
|
||||
session = await create_chat_session(health_check_user_id)
|
||||
await get_chat_session(session.session_id, health_check_user_id)
|
||||
session = await chat_service.create_chat_session(None)
|
||||
await chat_service.assign_user_to_session(session.session_id, "test_user")
|
||||
await chat_service.get_session(session.session_id, "test_user")
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,19 +4,18 @@ from os import getenv
|
||||
import pytest
|
||||
|
||||
from . import service as chat_service
|
||||
from .model import create_chat_session, get_chat_session, upsert_chat_session
|
||||
from .response_model import (
|
||||
StreamEnd,
|
||||
StreamError,
|
||||
StreamFinish,
|
||||
StreamTextDelta,
|
||||
StreamToolOutputAvailable,
|
||||
StreamTextChunk,
|
||||
StreamToolExecutionResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_stream_chat_completion(setup_test_user, test_user_id):
|
||||
async def test_stream_chat_completion():
|
||||
"""
|
||||
Test the stream_chat_completion function.
|
||||
"""
|
||||
@@ -24,7 +23,7 @@ async def test_stream_chat_completion(setup_test_user, test_user_id):
|
||||
if not api_key:
|
||||
return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")
|
||||
|
||||
session = await create_chat_session(test_user_id)
|
||||
session = await chat_service.create_chat_session()
|
||||
|
||||
has_errors = False
|
||||
has_ended = False
|
||||
@@ -35,9 +34,9 @@ async def test_stream_chat_completion(setup_test_user, test_user_id):
|
||||
logger.info(chunk)
|
||||
if isinstance(chunk, StreamError):
|
||||
has_errors = True
|
||||
if isinstance(chunk, StreamTextDelta):
|
||||
assistant_message += chunk.delta
|
||||
if isinstance(chunk, StreamFinish):
|
||||
if isinstance(chunk, StreamTextChunk):
|
||||
assistant_message += chunk.content
|
||||
if isinstance(chunk, StreamEnd):
|
||||
has_ended = True
|
||||
|
||||
assert has_ended, "Chat completion did not end"
|
||||
@@ -46,7 +45,7 @@ async def test_stream_chat_completion(setup_test_user, test_user_id):
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user_id):
|
||||
async def test_stream_chat_completion_with_tool_calls():
|
||||
"""
|
||||
Test the stream_chat_completion function.
|
||||
"""
|
||||
@@ -54,8 +53,8 @@ async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user
|
||||
if not api_key:
|
||||
return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")
|
||||
|
||||
session = await create_chat_session(test_user_id)
|
||||
session = await upsert_chat_session(session)
|
||||
session = await chat_service.create_chat_session()
|
||||
session = await chat_service.upsert_chat_session(session)
|
||||
|
||||
has_errors = False
|
||||
has_ended = False
|
||||
@@ -69,14 +68,14 @@ async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user
|
||||
if isinstance(chunk, StreamError):
|
||||
has_errors = True
|
||||
|
||||
if isinstance(chunk, StreamFinish):
|
||||
if isinstance(chunk, StreamEnd):
|
||||
has_ended = True
|
||||
if isinstance(chunk, StreamToolOutputAvailable):
|
||||
if isinstance(chunk, StreamToolExecutionResult):
|
||||
had_tool_calls = True
|
||||
|
||||
assert has_ended, "Chat completion did not end"
|
||||
assert not has_errors, "Error occurred while streaming chat completion"
|
||||
assert had_tool_calls, "Tool calls did not occur"
|
||||
session = await get_chat_session(session.session_id)
|
||||
session = await chat_service.get_session(session.session_id)
|
||||
assert session, "Session not found"
|
||||
assert session.usage, "Usage is empty"
|
||||
|
||||
@@ -4,32 +4,21 @@ from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
|
||||
from .add_understanding import AddUnderstandingTool
|
||||
from .agent_output import AgentOutputTool
|
||||
from .base import BaseTool
|
||||
from .find_agent import FindAgentTool
|
||||
from .find_library_agent import FindLibraryAgentTool
|
||||
from .run_agent import RunAgentTool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from backend.api.features.chat.response_model import StreamToolOutputAvailable
|
||||
from backend.api.features.chat.response_model import StreamToolExecutionResult
|
||||
|
||||
# Single source of truth for all tools
|
||||
TOOL_REGISTRY: dict[str, BaseTool] = {
|
||||
"add_understanding": AddUnderstandingTool(),
|
||||
"find_agent": FindAgentTool(),
|
||||
"find_library_agent": FindLibraryAgentTool(),
|
||||
"run_agent": RunAgentTool(),
|
||||
"agent_output": AgentOutputTool(),
|
||||
}
|
||||
# Initialize tool instances
|
||||
find_agent_tool = FindAgentTool()
|
||||
run_agent_tool = RunAgentTool()
|
||||
|
||||
# Export individual tool instances for backwards compatibility
|
||||
find_agent_tool = TOOL_REGISTRY["find_agent"]
|
||||
run_agent_tool = TOOL_REGISTRY["run_agent"]
|
||||
|
||||
# Generated from registry for OpenAI API
|
||||
# Export tools as OpenAI format
|
||||
tools: list[ChatCompletionToolParam] = [
|
||||
tool.as_openai_tool() for tool in TOOL_REGISTRY.values()
|
||||
find_agent_tool.as_openai_tool(),
|
||||
run_agent_tool.as_openai_tool(),
|
||||
]
|
||||
|
||||
|
||||
@@ -39,9 +28,14 @@ async def execute_tool(
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
tool_call_id: str,
|
||||
) -> "StreamToolOutputAvailable":
|
||||
"""Execute a tool by name."""
|
||||
tool = TOOL_REGISTRY.get(tool_name)
|
||||
if not tool:
|
||||
) -> "StreamToolExecutionResult":
|
||||
|
||||
tool_map: dict[str, BaseTool] = {
|
||||
"find_agent": find_agent_tool,
|
||||
"run_agent": run_agent_tool,
|
||||
}
|
||||
if tool_name not in tool_map:
|
||||
raise ValueError(f"Tool {tool_name} not found")
|
||||
return await tool.execute(user_id, session, tool_call_id, **parameters)
|
||||
return await tool_map[tool_name].execute(
|
||||
user_id, session, tool_call_id, **parameters
|
||||
)
|
||||
|
||||
@@ -3,7 +3,6 @@ from datetime import UTC, datetime
|
||||
from os import getenv
|
||||
|
||||
import pytest
|
||||
from prisma.types import ProfileCreateInput
|
||||
from pydantic import SecretStr
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
@@ -18,7 +17,7 @@ from backend.data.user import get_or_create_user
|
||||
from backend.integrations.credentials_store import IntegrationCredentialsStore
|
||||
|
||||
|
||||
def make_session(user_id: str):
|
||||
def make_session(user_id: str | None = None):
|
||||
return ChatSession(
|
||||
session_id=str(uuid.uuid4()),
|
||||
user_id=user_id,
|
||||
@@ -50,13 +49,13 @@ async def setup_test_data():
|
||||
# 1b. Create a profile with username for the user (required for store agent lookup)
|
||||
username = user.email.split("@")[0]
|
||||
await prisma.profile.create(
|
||||
data=ProfileCreateInput(
|
||||
userId=user.id,
|
||||
username=username,
|
||||
name=f"Test User {username}",
|
||||
description="Test user profile",
|
||||
links=[], # Required field - empty array for test profiles
|
||||
)
|
||||
data={
|
||||
"userId": user.id,
|
||||
"username": username,
|
||||
"name": f"Test User {username}",
|
||||
"description": "Test user profile",
|
||||
"links": [], # Required field - empty array for test profiles
|
||||
}
|
||||
)
|
||||
|
||||
# 2. Create a test graph with agent input -> agent output
|
||||
@@ -173,13 +172,13 @@ async def setup_llm_test_data():
|
||||
# 1b. Create a profile with username for the user (required for store agent lookup)
|
||||
username = user.email.split("@")[0]
|
||||
await prisma.profile.create(
|
||||
data=ProfileCreateInput(
|
||||
userId=user.id,
|
||||
username=username,
|
||||
name=f"Test User {username}",
|
||||
description="Test user profile for LLM tests",
|
||||
links=[], # Required field - empty array for test profiles
|
||||
)
|
||||
data={
|
||||
"userId": user.id,
|
||||
"username": username,
|
||||
"name": f"Test User {username}",
|
||||
"description": "Test user profile for LLM tests",
|
||||
"links": [], # Required field - empty array for test profiles
|
||||
}
|
||||
)
|
||||
|
||||
# 2. Create test OpenAI credentials for the user
|
||||
@@ -333,13 +332,13 @@ async def setup_firecrawl_test_data():
|
||||
# 1b. Create a profile with username for the user (required for store agent lookup)
|
||||
username = user.email.split("@")[0]
|
||||
await prisma.profile.create(
|
||||
data=ProfileCreateInput(
|
||||
userId=user.id,
|
||||
username=username,
|
||||
name=f"Test User {username}",
|
||||
description="Test user profile for Firecrawl tests",
|
||||
links=[], # Required field - empty array for test profiles
|
||||
)
|
||||
data={
|
||||
"userId": user.id,
|
||||
"username": username,
|
||||
"name": f"Test User {username}",
|
||||
"description": "Test user profile for Firecrawl tests",
|
||||
"links": [], # Required field - empty array for test profiles
|
||||
}
|
||||
)
|
||||
|
||||
# NOTE: We deliberately do NOT create Firecrawl credentials for this user
|
||||
|
||||
@@ -1,119 +0,0 @@
|
||||
"""Tool for capturing user business understanding incrementally."""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.data.understanding import (
|
||||
BusinessUnderstandingInput,
|
||||
upsert_business_understanding,
|
||||
)
|
||||
|
||||
from .base import BaseTool
|
||||
from .models import ErrorResponse, ToolResponseBase, UnderstandingUpdatedResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AddUnderstandingTool(BaseTool):
|
||||
"""Tool for capturing user's business understanding incrementally."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "add_understanding"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return """Capture and store information about the user's business context,
|
||||
workflows, pain points, and automation goals. Call this tool whenever the user
|
||||
shares information about their business. Each call incrementally adds to the
|
||||
existing understanding - you don't need to provide all fields at once.
|
||||
|
||||
Use this to build a comprehensive profile that helps recommend better agents
|
||||
and automations for the user's specific needs."""
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
# Auto-generate from Pydantic model schema
|
||||
schema = BusinessUnderstandingInput.model_json_schema()
|
||||
properties = {}
|
||||
for field_name, field_schema in schema.get("properties", {}).items():
|
||||
prop: dict[str, Any] = {"description": field_schema.get("description", "")}
|
||||
# Handle anyOf for Optional types
|
||||
if "anyOf" in field_schema:
|
||||
for option in field_schema["anyOf"]:
|
||||
if option.get("type") != "null":
|
||||
prop["type"] = option.get("type", "string")
|
||||
if "items" in option:
|
||||
prop["items"] = option["items"]
|
||||
break
|
||||
else:
|
||||
prop["type"] = field_schema.get("type", "string")
|
||||
if "items" in field_schema:
|
||||
prop["items"] = field_schema["items"]
|
||||
properties[field_name] = prop
|
||||
return {"type": "object", "properties": properties, "required": []}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
"""Requires authentication to store user-specific data."""
|
||||
return True
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs,
|
||||
) -> ToolResponseBase:
|
||||
"""
|
||||
Capture and store business understanding incrementally.
|
||||
|
||||
Each call merges new data with existing understanding:
|
||||
- String fields are overwritten if provided
|
||||
- List fields are appended (with deduplication)
|
||||
"""
|
||||
session_id = session.session_id
|
||||
|
||||
if not user_id:
|
||||
return ErrorResponse(
|
||||
message="Authentication required to save business understanding.",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Check if any data was provided
|
||||
if not any(v is not None for v in kwargs.values()):
|
||||
return ErrorResponse(
|
||||
message="Please provide at least one field to update.",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Build input model from kwargs (only include fields defined in the model)
|
||||
valid_fields = set(BusinessUnderstandingInput.model_fields.keys())
|
||||
input_data = BusinessUnderstandingInput(
|
||||
**{k: v for k, v in kwargs.items() if k in valid_fields}
|
||||
)
|
||||
|
||||
# Track which fields were updated
|
||||
updated_fields = [
|
||||
k for k, v in kwargs.items() if k in valid_fields and v is not None
|
||||
]
|
||||
|
||||
# Upsert with merge
|
||||
understanding = await upsert_business_understanding(user_id, input_data)
|
||||
|
||||
# Build current understanding summary (filter out empty values)
|
||||
current_understanding = {
|
||||
k: v
|
||||
for k, v in understanding.model_dump(
|
||||
exclude={"id", "user_id", "created_at", "updated_at"}
|
||||
).items()
|
||||
if v is not None and v != [] and v != ""
|
||||
}
|
||||
|
||||
return UnderstandingUpdatedResponse(
|
||||
message=f"Updated understanding with: {', '.join(updated_fields)}. "
|
||||
"I now have a better picture of your business context.",
|
||||
session_id=session_id,
|
||||
updated_fields=updated_fields,
|
||||
current_understanding=current_understanding,
|
||||
)
|
||||
@@ -1,446 +0,0 @@
|
||||
"""Tool for retrieving agent execution outputs from user's library."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.api.features.library import db as library_db
|
||||
from backend.api.features.library.model import LibraryAgent
|
||||
from backend.data import execution as execution_db
|
||||
from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta
|
||||
|
||||
from .base import BaseTool
|
||||
from .models import (
|
||||
AgentOutputResponse,
|
||||
ErrorResponse,
|
||||
ExecutionOutputInfo,
|
||||
NoResultsResponse,
|
||||
ToolResponseBase,
|
||||
)
|
||||
from .utils import fetch_graph_from_store_slug
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AgentOutputInput(BaseModel):
|
||||
"""Input parameters for the agent_output tool."""
|
||||
|
||||
agent_name: str = ""
|
||||
library_agent_id: str = ""
|
||||
store_slug: str = ""
|
||||
execution_id: str = ""
|
||||
run_time: str = "latest"
|
||||
|
||||
@field_validator(
|
||||
"agent_name",
|
||||
"library_agent_id",
|
||||
"store_slug",
|
||||
"execution_id",
|
||||
"run_time",
|
||||
mode="before",
|
||||
)
|
||||
@classmethod
|
||||
def strip_strings(cls, v: Any) -> Any:
|
||||
"""Strip whitespace from string fields."""
|
||||
return v.strip() if isinstance(v, str) else v
|
||||
|
||||
|
||||
def parse_time_expression(
|
||||
time_expr: str | None,
|
||||
) -> tuple[datetime | None, datetime | None]:
|
||||
"""
|
||||
Parse time expression into datetime range (start, end).
|
||||
|
||||
Supports: "latest", "yesterday", "today", "last week", "last 7 days",
|
||||
"last month", "last 30 days", ISO date "YYYY-MM-DD", ISO datetime.
|
||||
"""
|
||||
if not time_expr or time_expr.lower() == "latest":
|
||||
return None, None
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
expr = time_expr.lower().strip()
|
||||
|
||||
# Relative time expressions lookup
|
||||
relative_times: dict[str, tuple[datetime, datetime]] = {
|
||||
"yesterday": (today_start - timedelta(days=1), today_start),
|
||||
"today": (today_start, now),
|
||||
"last week": (now - timedelta(days=7), now),
|
||||
"last 7 days": (now - timedelta(days=7), now),
|
||||
"last month": (now - timedelta(days=30), now),
|
||||
"last 30 days": (now - timedelta(days=30), now),
|
||||
}
|
||||
if expr in relative_times:
|
||||
return relative_times[expr]
|
||||
|
||||
# Try ISO date format (YYYY-MM-DD)
|
||||
date_match = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", expr)
|
||||
if date_match:
|
||||
try:
|
||||
year, month, day = map(int, date_match.groups())
|
||||
start = datetime(year, month, day, 0, 0, 0, tzinfo=timezone.utc)
|
||||
return start, start + timedelta(days=1)
|
||||
except ValueError:
|
||||
# Invalid date components (e.g., month=13, day=32)
|
||||
pass
|
||||
|
||||
# Try ISO datetime
|
||||
try:
|
||||
parsed = datetime.fromisoformat(expr.replace("Z", "+00:00"))
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed - timedelta(hours=1), parsed + timedelta(hours=1)
|
||||
except ValueError:
|
||||
return None, None
|
||||
|
||||
|
||||
class AgentOutputTool(BaseTool):
|
||||
"""Tool for retrieving execution outputs from user's library agents."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "agent_output"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return """Retrieve execution outputs from agents in the user's library.
|
||||
|
||||
Identify the agent using one of:
|
||||
- agent_name: Fuzzy search in user's library
|
||||
- library_agent_id: Exact library agent ID
|
||||
- store_slug: Marketplace format 'username/agent-name'
|
||||
|
||||
Select which run to retrieve using:
|
||||
- execution_id: Specific execution ID
|
||||
- run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
|
||||
"""
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_name": {
|
||||
"type": "string",
|
||||
"description": "Agent name to search for in user's library (fuzzy match)",
|
||||
},
|
||||
"library_agent_id": {
|
||||
"type": "string",
|
||||
"description": "Exact library agent ID",
|
||||
},
|
||||
"store_slug": {
|
||||
"type": "string",
|
||||
"description": "Marketplace identifier: 'username/agent-slug'",
|
||||
},
|
||||
"execution_id": {
|
||||
"type": "string",
|
||||
"description": "Specific execution ID to retrieve",
|
||||
},
|
||||
"run_time": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return True
|
||||
|
||||
async def _resolve_agent(
|
||||
self,
|
||||
user_id: str,
|
||||
agent_name: str | None,
|
||||
library_agent_id: str | None,
|
||||
store_slug: str | None,
|
||||
) -> tuple[LibraryAgent | None, str | None]:
|
||||
"""
|
||||
Resolve agent from provided identifiers.
|
||||
Returns (library_agent, error_message).
|
||||
"""
|
||||
# Priority 1: Exact library agent ID
|
||||
if library_agent_id:
|
||||
try:
|
||||
agent = await library_db.get_library_agent(library_agent_id, user_id)
|
||||
return agent, None
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get library agent by ID: {e}")
|
||||
return None, f"Library agent '{library_agent_id}' not found"
|
||||
|
||||
# Priority 2: Store slug (username/agent-name)
|
||||
if store_slug and "/" in store_slug:
|
||||
username, agent_slug = store_slug.split("/", 1)
|
||||
graph, _ = await fetch_graph_from_store_slug(username, agent_slug)
|
||||
if not graph:
|
||||
return None, f"Agent '{store_slug}' not found in marketplace"
|
||||
|
||||
# Find in user's library by graph_id
|
||||
agent = await library_db.get_library_agent_by_graph_id(user_id, graph.id)
|
||||
if not agent:
|
||||
return (
|
||||
None,
|
||||
f"Agent '{store_slug}' is not in your library. "
|
||||
"Add it first to see outputs.",
|
||||
)
|
||||
return agent, None
|
||||
|
||||
# Priority 3: Fuzzy name search in library
|
||||
if agent_name:
|
||||
try:
|
||||
response = await library_db.list_library_agents(
|
||||
user_id=user_id,
|
||||
search_term=agent_name,
|
||||
page_size=5,
|
||||
)
|
||||
if not response.agents:
|
||||
return (
|
||||
None,
|
||||
f"No agents matching '{agent_name}' found in your library",
|
||||
)
|
||||
|
||||
# Return best match (first result from search)
|
||||
return response.agents[0], None
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching library agents: {e}")
|
||||
return None, f"Error searching for agent: {e}"
|
||||
|
||||
return (
|
||||
None,
|
||||
"Please specify an agent name, library_agent_id, or store_slug",
|
||||
)
|
||||
|
||||
async def _get_execution(
|
||||
self,
|
||||
user_id: str,
|
||||
graph_id: str,
|
||||
execution_id: str | None,
|
||||
time_start: datetime | None,
|
||||
time_end: datetime | None,
|
||||
) -> tuple[GraphExecution | None, list[GraphExecutionMeta], str | None]:
|
||||
"""
|
||||
Fetch execution(s) based on filters.
|
||||
Returns (single_execution, available_executions_meta, error_message).
|
||||
"""
|
||||
# If specific execution_id provided, fetch it directly
|
||||
if execution_id:
|
||||
execution = await execution_db.get_graph_execution(
|
||||
user_id=user_id,
|
||||
execution_id=execution_id,
|
||||
include_node_executions=False,
|
||||
)
|
||||
if not execution:
|
||||
return None, [], f"Execution '{execution_id}' not found"
|
||||
return execution, [], None
|
||||
|
||||
# Get completed executions with time filters
|
||||
executions = await execution_db.get_graph_executions(
|
||||
graph_id=graph_id,
|
||||
user_id=user_id,
|
||||
statuses=[ExecutionStatus.COMPLETED],
|
||||
created_time_gte=time_start,
|
||||
created_time_lte=time_end,
|
||||
limit=10,
|
||||
)
|
||||
|
||||
if not executions:
|
||||
return None, [], None # No error, just no executions
|
||||
|
||||
# If only one execution, fetch full details
|
||||
if len(executions) == 1:
|
||||
full_execution = await execution_db.get_graph_execution(
|
||||
user_id=user_id,
|
||||
execution_id=executions[0].id,
|
||||
include_node_executions=False,
|
||||
)
|
||||
return full_execution, [], None
|
||||
|
||||
# Multiple executions - return latest with full details, plus list of available
|
||||
full_execution = await execution_db.get_graph_execution(
|
||||
user_id=user_id,
|
||||
execution_id=executions[0].id,
|
||||
include_node_executions=False,
|
||||
)
|
||||
return full_execution, executions, None
|
||||
|
||||
def _build_response(
|
||||
self,
|
||||
agent: LibraryAgent,
|
||||
execution: GraphExecution | None,
|
||||
available_executions: list[GraphExecutionMeta],
|
||||
session_id: str | None,
|
||||
) -> AgentOutputResponse:
|
||||
"""Build the response based on execution data."""
|
||||
library_agent_link = f"/library/agents/{agent.id}"
|
||||
|
||||
if not execution:
|
||||
return AgentOutputResponse(
|
||||
message=f"No completed executions found for agent '{agent.name}'",
|
||||
session_id=session_id,
|
||||
agent_name=agent.name,
|
||||
agent_id=agent.graph_id,
|
||||
library_agent_id=agent.id,
|
||||
library_agent_link=library_agent_link,
|
||||
total_executions=0,
|
||||
)
|
||||
|
||||
execution_info = ExecutionOutputInfo(
|
||||
execution_id=execution.id,
|
||||
status=execution.status.value,
|
||||
started_at=execution.started_at,
|
||||
ended_at=execution.ended_at,
|
||||
outputs=dict(execution.outputs),
|
||||
inputs_summary=execution.inputs if execution.inputs else None,
|
||||
)
|
||||
|
||||
available_list = None
|
||||
if len(available_executions) > 1:
|
||||
available_list = [
|
||||
{
|
||||
"id": e.id,
|
||||
"status": e.status.value,
|
||||
"started_at": e.started_at.isoformat() if e.started_at else None,
|
||||
}
|
||||
for e in available_executions[:5]
|
||||
]
|
||||
|
||||
message = f"Found execution outputs for agent '{agent.name}'"
|
||||
if len(available_executions) > 1:
|
||||
message += (
|
||||
f". Showing latest of {len(available_executions)} matching executions."
|
||||
)
|
||||
|
||||
return AgentOutputResponse(
|
||||
message=message,
|
||||
session_id=session_id,
|
||||
agent_name=agent.name,
|
||||
agent_id=agent.graph_id,
|
||||
library_agent_id=agent.id,
|
||||
library_agent_link=library_agent_link,
|
||||
execution=execution_info,
|
||||
available_executions=available_list,
|
||||
total_executions=len(available_executions) if available_executions else 1,
|
||||
)
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs,
|
||||
) -> ToolResponseBase:
|
||||
"""Execute the agent_output tool."""
|
||||
session_id = session.session_id
|
||||
|
||||
# Parse and validate input
|
||||
try:
|
||||
input_data = AgentOutputInput(**kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"Invalid input: {e}")
|
||||
return ErrorResponse(
|
||||
message="Invalid input parameters",
|
||||
error=str(e),
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Ensure user_id is present (should be guaranteed by requires_auth)
|
||||
if not user_id:
|
||||
return ErrorResponse(
|
||||
message="User authentication required",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Check if at least one identifier is provided
|
||||
if not any(
|
||||
[
|
||||
input_data.agent_name,
|
||||
input_data.library_agent_id,
|
||||
input_data.store_slug,
|
||||
input_data.execution_id,
|
||||
]
|
||||
):
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
"Please specify at least one of: agent_name, "
|
||||
"library_agent_id, store_slug, or execution_id"
|
||||
),
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# If only execution_id provided, we need to find the agent differently
|
||||
if (
|
||||
input_data.execution_id
|
||||
and not input_data.agent_name
|
||||
and not input_data.library_agent_id
|
||||
and not input_data.store_slug
|
||||
):
|
||||
# Fetch execution directly to get graph_id
|
||||
execution = await execution_db.get_graph_execution(
|
||||
user_id=user_id,
|
||||
execution_id=input_data.execution_id,
|
||||
include_node_executions=False,
|
||||
)
|
||||
if not execution:
|
||||
return ErrorResponse(
|
||||
message=f"Execution '{input_data.execution_id}' not found",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Find library agent by graph_id
|
||||
agent = await library_db.get_library_agent_by_graph_id(
|
||||
user_id, execution.graph_id
|
||||
)
|
||||
if not agent:
|
||||
return NoResultsResponse(
|
||||
message=(
|
||||
f"Execution found but agent not in your library. "
|
||||
f"Graph ID: {execution.graph_id}"
|
||||
),
|
||||
session_id=session_id,
|
||||
suggestions=["Add the agent to your library to see more details"],
|
||||
)
|
||||
|
||||
return self._build_response(agent, execution, [], session_id)
|
||||
|
||||
# Resolve agent from identifiers
|
||||
agent, error = await self._resolve_agent(
|
||||
user_id=user_id,
|
||||
agent_name=input_data.agent_name or None,
|
||||
library_agent_id=input_data.library_agent_id or None,
|
||||
store_slug=input_data.store_slug or None,
|
||||
)
|
||||
|
||||
if error or not agent:
|
||||
return NoResultsResponse(
|
||||
message=error or "Agent not found",
|
||||
session_id=session_id,
|
||||
suggestions=[
|
||||
"Check the agent name or ID",
|
||||
"Make sure the agent is in your library",
|
||||
],
|
||||
)
|
||||
|
||||
# Parse time expression
|
||||
time_start, time_end = parse_time_expression(input_data.run_time)
|
||||
|
||||
# Fetch execution(s)
|
||||
execution, available_executions, exec_error = await self._get_execution(
|
||||
user_id=user_id,
|
||||
graph_id=agent.graph_id,
|
||||
execution_id=input_data.execution_id or None,
|
||||
time_start=time_start,
|
||||
time_end=time_end,
|
||||
)
|
||||
|
||||
if exec_error:
|
||||
return ErrorResponse(
|
||||
message=exec_error,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
return self._build_response(agent, execution, available_executions, session_id)
|
||||
@@ -1,151 +0,0 @@
|
||||
"""Shared agent search functionality for find_agent and find_library_agent tools."""
|
||||
|
||||
import logging
|
||||
from typing import Literal
|
||||
|
||||
from backend.api.features.library import db as library_db
|
||||
from backend.api.features.store import db as store_db
|
||||
from backend.util.exceptions import DatabaseError, NotFoundError
|
||||
|
||||
from .models import (
|
||||
AgentInfo,
|
||||
AgentsFoundResponse,
|
||||
ErrorResponse,
|
||||
NoResultsResponse,
|
||||
ToolResponseBase,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SearchSource = Literal["marketplace", "library"]
|
||||
|
||||
|
||||
async def search_agents(
|
||||
query: str,
|
||||
source: SearchSource,
|
||||
session_id: str | None,
|
||||
user_id: str | None = None,
|
||||
) -> ToolResponseBase:
|
||||
"""
|
||||
Search for agents in marketplace or user library.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
source: "marketplace" or "library"
|
||||
session_id: Chat session ID
|
||||
user_id: User ID (required for library search)
|
||||
|
||||
Returns:
|
||||
AgentsFoundResponse, NoResultsResponse, or ErrorResponse
|
||||
"""
|
||||
if not query:
|
||||
return ErrorResponse(
|
||||
message="Please provide a search query", session_id=session_id
|
||||
)
|
||||
|
||||
if source == "library" and not user_id:
|
||||
return ErrorResponse(
|
||||
message="User authentication required to search library",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
agents: list[AgentInfo] = []
|
||||
try:
|
||||
if source == "marketplace":
|
||||
logger.info(f"Searching marketplace for: {query}")
|
||||
results = await store_db.get_store_agents(search_query=query, page_size=5)
|
||||
for agent in results.agents:
|
||||
agents.append(
|
||||
AgentInfo(
|
||||
id=f"{agent.creator}/{agent.slug}",
|
||||
name=agent.agent_name,
|
||||
description=agent.description or "",
|
||||
source="marketplace",
|
||||
in_library=False,
|
||||
creator=agent.creator,
|
||||
category="general",
|
||||
rating=agent.rating,
|
||||
runs=agent.runs,
|
||||
is_featured=False,
|
||||
)
|
||||
)
|
||||
else: # library
|
||||
logger.info(f"Searching user library for: {query}")
|
||||
results = await library_db.list_library_agents(
|
||||
user_id=user_id, # type: ignore[arg-type]
|
||||
search_term=query,
|
||||
page_size=10,
|
||||
)
|
||||
for agent in results.agents:
|
||||
agents.append(
|
||||
AgentInfo(
|
||||
id=agent.id,
|
||||
name=agent.name,
|
||||
description=agent.description or "",
|
||||
source="library",
|
||||
in_library=True,
|
||||
creator=agent.creator_name,
|
||||
status=agent.status.value,
|
||||
can_access_graph=agent.can_access_graph,
|
||||
has_external_trigger=agent.has_external_trigger,
|
||||
new_output=agent.new_output,
|
||||
graph_id=agent.graph_id,
|
||||
)
|
||||
)
|
||||
logger.info(f"Found {len(agents)} agents in {source}")
|
||||
except NotFoundError:
|
||||
pass
|
||||
except DatabaseError as e:
|
||||
logger.error(f"Error searching {source}: {e}", exc_info=True)
|
||||
return ErrorResponse(
|
||||
message=f"Failed to search {source}. Please try again.",
|
||||
error=str(e),
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
if not agents:
|
||||
suggestions = (
|
||||
[
|
||||
"Try more general terms",
|
||||
"Browse categories in the marketplace",
|
||||
"Check spelling",
|
||||
]
|
||||
if source == "marketplace"
|
||||
else [
|
||||
"Try different keywords",
|
||||
"Use find_agent to search the marketplace",
|
||||
"Check your library at /library",
|
||||
]
|
||||
)
|
||||
no_results_msg = (
|
||||
f"No agents found matching '{query}'. Try different keywords or browse the marketplace."
|
||||
if source == "marketplace"
|
||||
else f"No agents matching '{query}' found in your library."
|
||||
)
|
||||
return NoResultsResponse(
|
||||
message=no_results_msg, session_id=session_id, suggestions=suggestions
|
||||
)
|
||||
|
||||
title = f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} "
|
||||
title += (
|
||||
f"for '{query}'"
|
||||
if source == "marketplace"
|
||||
else f"in your library for '{query}'"
|
||||
)
|
||||
|
||||
message = (
|
||||
"Now you have found some options for the user to choose from. "
|
||||
"You can add a link to a recommended agent at: /marketplace/agent/agent_id "
|
||||
"Please ask the user if they would like to use any of these agents."
|
||||
if source == "marketplace"
|
||||
else "Found agents in the user's library. You can provide a link to view an agent at: "
|
||||
"/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute."
|
||||
)
|
||||
|
||||
return AgentsFoundResponse(
|
||||
message=message,
|
||||
title=title,
|
||||
agents=agents,
|
||||
count=len(agents),
|
||||
session_id=session_id,
|
||||
)
|
||||
@@ -6,7 +6,7 @@ from typing import Any
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.api.features.chat.response_model import StreamToolOutputAvailable
|
||||
from backend.api.features.chat.response_model import StreamToolExecutionResult
|
||||
|
||||
from .models import ErrorResponse, NeedLoginResponse, ToolResponseBase
|
||||
|
||||
@@ -53,7 +53,7 @@ class BaseTool:
|
||||
session: ChatSession,
|
||||
tool_call_id: str,
|
||||
**kwargs,
|
||||
) -> StreamToolOutputAvailable:
|
||||
) -> StreamToolExecutionResult:
|
||||
"""Execute the tool with authentication check.
|
||||
|
||||
Args:
|
||||
@@ -69,10 +69,10 @@ class BaseTool:
|
||||
logger.error(
|
||||
f"Attempted tool call for {self.name} but user not authenticated"
|
||||
)
|
||||
return StreamToolOutputAvailable(
|
||||
toolCallId=tool_call_id,
|
||||
toolName=self.name,
|
||||
output=NeedLoginResponse(
|
||||
return StreamToolExecutionResult(
|
||||
tool_id=tool_call_id,
|
||||
tool_name=self.name,
|
||||
result=NeedLoginResponse(
|
||||
message=f"Please sign in to use {self.name}",
|
||||
session_id=session.session_id,
|
||||
).model_dump_json(),
|
||||
@@ -81,17 +81,17 @@ class BaseTool:
|
||||
|
||||
try:
|
||||
result = await self._execute(user_id, session, **kwargs)
|
||||
return StreamToolOutputAvailable(
|
||||
toolCallId=tool_call_id,
|
||||
toolName=self.name,
|
||||
output=result.model_dump_json(),
|
||||
return StreamToolExecutionResult(
|
||||
tool_id=tool_call_id,
|
||||
tool_name=self.name,
|
||||
result=result.model_dump_json(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in {self.name}: {e}", exc_info=True)
|
||||
return StreamToolOutputAvailable(
|
||||
toolCallId=tool_call_id,
|
||||
toolName=self.name,
|
||||
output=ErrorResponse(
|
||||
return StreamToolExecutionResult(
|
||||
tool_id=tool_call_id,
|
||||
tool_name=self.name,
|
||||
result=ErrorResponse(
|
||||
message=f"An error occurred while executing {self.name}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
|
||||
@@ -1,16 +1,26 @@
|
||||
"""Tool for discovering agents from marketplace."""
|
||||
"""Tool for discovering agents from marketplace and user library."""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.api.features.store import db as store_db
|
||||
from backend.util.exceptions import DatabaseError, NotFoundError
|
||||
|
||||
from .agent_search import search_agents
|
||||
from .base import BaseTool
|
||||
from .models import ToolResponseBase
|
||||
from .models import (
|
||||
AgentCarouselResponse,
|
||||
AgentInfo,
|
||||
ErrorResponse,
|
||||
NoResultsResponse,
|
||||
ToolResponseBase,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FindAgentTool(BaseTool):
|
||||
"""Tool for discovering agents from the marketplace."""
|
||||
"""Tool for discovering agents based on user needs."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
@@ -36,11 +46,84 @@ class FindAgentTool(BaseTool):
|
||||
}
|
||||
|
||||
async def _execute(
|
||||
self, user_id: str | None, session: ChatSession, **kwargs
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs,
|
||||
) -> ToolResponseBase:
|
||||
return await search_agents(
|
||||
query=kwargs.get("query", "").strip(),
|
||||
source="marketplace",
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
"""Search for agents in the marketplace.
|
||||
|
||||
Args:
|
||||
user_id: User ID (may be anonymous)
|
||||
session_id: Chat session ID
|
||||
query: Search query
|
||||
|
||||
Returns:
|
||||
AgentCarouselResponse: List of agents found in the marketplace
|
||||
NoResultsResponse: No agents found in the marketplace
|
||||
ErrorResponse: Error message
|
||||
"""
|
||||
query = kwargs.get("query", "").strip()
|
||||
session_id = session.session_id
|
||||
if not query:
|
||||
return ErrorResponse(
|
||||
message="Please provide a search query",
|
||||
session_id=session_id,
|
||||
)
|
||||
agents = []
|
||||
try:
|
||||
logger.info(f"Searching marketplace for: {query}")
|
||||
store_results = await store_db.get_store_agents(
|
||||
search_query=query,
|
||||
page_size=5,
|
||||
)
|
||||
|
||||
logger.info(f"Find agents tool found {len(store_results.agents)} agents")
|
||||
for agent in store_results.agents:
|
||||
agent_id = f"{agent.creator}/{agent.slug}"
|
||||
logger.info(f"Building agent ID = {agent_id}")
|
||||
agents.append(
|
||||
AgentInfo(
|
||||
id=agent_id,
|
||||
name=agent.agent_name,
|
||||
description=agent.description or "",
|
||||
source="marketplace",
|
||||
in_library=False,
|
||||
creator=agent.creator,
|
||||
category="general",
|
||||
rating=agent.rating,
|
||||
runs=agent.runs,
|
||||
is_featured=False,
|
||||
),
|
||||
)
|
||||
except NotFoundError:
|
||||
pass
|
||||
except DatabaseError as e:
|
||||
logger.error(f"Error searching agents: {e}", exc_info=True)
|
||||
return ErrorResponse(
|
||||
message="Failed to search for agents. Please try again.",
|
||||
error=str(e),
|
||||
session_id=session_id,
|
||||
)
|
||||
if not agents:
|
||||
return NoResultsResponse(
|
||||
message=f"No agents found matching '{query}'. Try different keywords or browse the marketplace. If you have 3 consecutive find_agent tool calls results and found no agents. Please stop trying and ask the user if there is anything else you can help with.",
|
||||
session_id=session_id,
|
||||
suggestions=[
|
||||
"Try more general terms",
|
||||
"Browse categories in the marketplace",
|
||||
"Check spelling",
|
||||
],
|
||||
)
|
||||
|
||||
# Return formatted carousel
|
||||
title = (
|
||||
f"Found {len(agents)} agent{'s' if len(agents) != 1 else ''} for '{query}'"
|
||||
)
|
||||
return AgentCarouselResponse(
|
||||
message="Now you have found some options for the user to choose from. You can add a link to a recommended agent at: /marketplace/agent/agent_id Please ask the user if they would like to use any of these agents. If they do, please call the get_agent_details tool for this agent.",
|
||||
title=title,
|
||||
agents=agents,
|
||||
count=len(agents),
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
"""Tool for searching agents in the user's library."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
|
||||
from .agent_search import search_agents
|
||||
from .base import BaseTool
|
||||
from .models import ToolResponseBase
|
||||
|
||||
|
||||
class FindLibraryAgentTool(BaseTool):
|
||||
"""Tool for searching agents in the user's library."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "find_library_agent"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Search for agents in the user's library. Use this to find agents "
|
||||
"the user has already added to their library, including agents they "
|
||||
"created or added from the marketplace."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query to find agents by name or description.",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return True
|
||||
|
||||
async def _execute(
|
||||
self, user_id: str | None, session: ChatSession, **kwargs
|
||||
) -> ToolResponseBase:
|
||||
return await search_agents(
|
||||
query=kwargs.get("query", "").strip(),
|
||||
source="library",
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Pydantic models for tool responses."""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
@@ -12,15 +11,14 @@ from backend.data.model import CredentialsMetaInput
|
||||
class ResponseType(str, Enum):
|
||||
"""Types of tool responses."""
|
||||
|
||||
AGENTS_FOUND = "agents_found"
|
||||
AGENT_CAROUSEL = "agent_carousel"
|
||||
AGENT_DETAILS = "agent_details"
|
||||
SETUP_REQUIREMENTS = "setup_requirements"
|
||||
EXECUTION_STARTED = "execution_started"
|
||||
NEED_LOGIN = "need_login"
|
||||
ERROR = "error"
|
||||
NO_RESULTS = "no_results"
|
||||
AGENT_OUTPUT = "agent_output"
|
||||
UNDERSTANDING_UPDATED = "understanding_updated"
|
||||
SUCCESS = "success"
|
||||
|
||||
|
||||
# Base response model
|
||||
@@ -53,14 +51,14 @@ class AgentInfo(BaseModel):
|
||||
graph_id: str | None = None
|
||||
|
||||
|
||||
class AgentsFoundResponse(ToolResponseBase):
|
||||
class AgentCarouselResponse(ToolResponseBase):
|
||||
"""Response for find_agent tool."""
|
||||
|
||||
type: ResponseType = ResponseType.AGENTS_FOUND
|
||||
type: ResponseType = ResponseType.AGENT_CAROUSEL
|
||||
title: str = "Available Agents"
|
||||
agents: list[AgentInfo]
|
||||
count: int
|
||||
name: str = "agents_found"
|
||||
name: str = "agent_carousel"
|
||||
|
||||
|
||||
class NoResultsResponse(ToolResponseBase):
|
||||
@@ -175,37 +173,3 @@ class ErrorResponse(ToolResponseBase):
|
||||
type: ResponseType = ResponseType.ERROR
|
||||
error: str | None = None
|
||||
details: dict[str, Any] | None = None
|
||||
|
||||
|
||||
# Agent output models
|
||||
class ExecutionOutputInfo(BaseModel):
|
||||
"""Summary of a single execution's outputs."""
|
||||
|
||||
execution_id: str
|
||||
status: str
|
||||
started_at: datetime | None = None
|
||||
ended_at: datetime | None = None
|
||||
outputs: dict[str, list[Any]]
|
||||
inputs_summary: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class AgentOutputResponse(ToolResponseBase):
|
||||
"""Response for agent_output tool."""
|
||||
|
||||
type: ResponseType = ResponseType.AGENT_OUTPUT
|
||||
agent_name: str
|
||||
agent_id: str
|
||||
library_agent_id: str | None = None
|
||||
library_agent_link: str | None = None
|
||||
execution: ExecutionOutputInfo | None = None
|
||||
available_executions: list[dict[str, Any]] | None = None
|
||||
total_executions: int = 0
|
||||
|
||||
|
||||
# Business understanding models
|
||||
class UnderstandingUpdatedResponse(ToolResponseBase):
|
||||
"""Response for add_understanding tool."""
|
||||
|
||||
type: ResponseType = ResponseType.UNDERSTANDING_UPDATED
|
||||
updated_fields: list[str] = Field(default_factory=list)
|
||||
current_understanding: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from backend.api.features.chat.config import ChatConfig
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.api.features.library import db as library_db
|
||||
from backend.data.graph import GraphModel
|
||||
from backend.data.model import CredentialsMetaInput
|
||||
from backend.data.user import get_user_by_id
|
||||
@@ -58,7 +57,6 @@ class RunAgentInput(BaseModel):
|
||||
"""Input parameters for the run_agent tool."""
|
||||
|
||||
username_agent_slug: str = ""
|
||||
library_agent_id: str = ""
|
||||
inputs: dict[str, Any] = Field(default_factory=dict)
|
||||
use_defaults: bool = False
|
||||
schedule_name: str = ""
|
||||
@@ -66,12 +64,7 @@ class RunAgentInput(BaseModel):
|
||||
timezone: str = "UTC"
|
||||
|
||||
@field_validator(
|
||||
"username_agent_slug",
|
||||
"library_agent_id",
|
||||
"schedule_name",
|
||||
"cron",
|
||||
"timezone",
|
||||
mode="before",
|
||||
"username_agent_slug", "schedule_name", "cron", "timezone", mode="before"
|
||||
)
|
||||
@classmethod
|
||||
def strip_strings(cls, v: Any) -> Any:
|
||||
@@ -97,7 +90,7 @@ class RunAgentTool(BaseTool):
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return """Run or schedule an agent from the marketplace or user's library.
|
||||
return """Run or schedule an agent from the marketplace.
|
||||
|
||||
The tool automatically handles the setup flow:
|
||||
- Returns missing inputs if required fields are not provided
|
||||
@@ -105,10 +98,6 @@ class RunAgentTool(BaseTool):
|
||||
- Executes immediately if all requirements are met
|
||||
- Schedules execution if cron expression is provided
|
||||
|
||||
Identify the agent using either:
|
||||
- username_agent_slug: Marketplace format 'username/agent-name'
|
||||
- library_agent_id: ID of an agent in the user's library
|
||||
|
||||
For scheduled execution, provide: schedule_name, cron, and optionally timezone."""
|
||||
|
||||
@property
|
||||
@@ -120,10 +109,6 @@ class RunAgentTool(BaseTool):
|
||||
"type": "string",
|
||||
"description": "Agent identifier in format 'username/agent-name'",
|
||||
},
|
||||
"library_agent_id": {
|
||||
"type": "string",
|
||||
"description": "Library agent ID from user's library",
|
||||
},
|
||||
"inputs": {
|
||||
"type": "object",
|
||||
"description": "Input values for the agent",
|
||||
@@ -146,7 +131,7 @@ class RunAgentTool(BaseTool):
|
||||
"description": "IANA timezone for schedule (default: UTC)",
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
"required": ["username_agent_slug"],
|
||||
}
|
||||
|
||||
@property
|
||||
@@ -164,16 +149,10 @@ class RunAgentTool(BaseTool):
|
||||
params = RunAgentInput(**kwargs)
|
||||
session_id = session.session_id
|
||||
|
||||
# Validate at least one identifier is provided
|
||||
has_slug = params.username_agent_slug and "/" in params.username_agent_slug
|
||||
has_library_id = bool(params.library_agent_id)
|
||||
|
||||
if not has_slug and not has_library_id:
|
||||
# Validate agent slug format
|
||||
if not params.username_agent_slug or "/" not in params.username_agent_slug:
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
"Please provide either a username_agent_slug "
|
||||
"(format 'username/agent-name') or a library_agent_id"
|
||||
),
|
||||
message="Please provide an agent slug in format 'username/agent-name'",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
@@ -188,41 +167,13 @@ class RunAgentTool(BaseTool):
|
||||
is_schedule = bool(params.schedule_name or params.cron)
|
||||
|
||||
try:
|
||||
# Step 1: Fetch agent details
|
||||
graph: GraphModel | None = None
|
||||
library_agent = None
|
||||
|
||||
# Priority: library_agent_id if provided
|
||||
if has_library_id:
|
||||
library_agent = await library_db.get_library_agent(
|
||||
params.library_agent_id, user_id
|
||||
)
|
||||
if not library_agent:
|
||||
return ErrorResponse(
|
||||
message=f"Library agent '{params.library_agent_id}' not found",
|
||||
session_id=session_id,
|
||||
)
|
||||
# Get the graph from the library agent
|
||||
from backend.data.graph import get_graph
|
||||
|
||||
graph = await get_graph(
|
||||
library_agent.graph_id,
|
||||
library_agent.graph_version,
|
||||
user_id=user_id,
|
||||
)
|
||||
else:
|
||||
# Fetch from marketplace slug
|
||||
username, agent_name = params.username_agent_slug.split("/", 1)
|
||||
graph, _ = await fetch_graph_from_store_slug(username, agent_name)
|
||||
# Step 1: Fetch agent details (always happens first)
|
||||
username, agent_name = params.username_agent_slug.split("/", 1)
|
||||
graph, store_agent = await fetch_graph_from_store_slug(username, agent_name)
|
||||
|
||||
if not graph:
|
||||
identifier = (
|
||||
params.library_agent_id
|
||||
if has_library_id
|
||||
else params.username_agent_slug
|
||||
)
|
||||
return ErrorResponse(
|
||||
message=f"Agent '{identifier}' not found",
|
||||
message=f"Agent '{params.username_agent_slug}' not found in marketplace",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import uuid
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import orjson
|
||||
import pytest
|
||||
@@ -18,17 +17,6 @@ setup_test_data = setup_test_data
|
||||
setup_firecrawl_test_data = setup_firecrawl_test_data
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def mock_embedding_functions():
|
||||
"""Mock embedding functions for all tests to avoid database/API dependencies."""
|
||||
with patch(
|
||||
"backend.api.features.store.db.ensure_embedding",
|
||||
new_callable=AsyncMock,
|
||||
return_value=True,
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.asyncio(scope="session")
|
||||
async def test_run_agent(setup_test_data):
|
||||
"""Test that the run_agent tool successfully executes an approved agent"""
|
||||
@@ -58,11 +46,11 @@ async def test_run_agent(setup_test_data):
|
||||
|
||||
# Verify the response
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert hasattr(response, "result")
|
||||
# Parse the result JSON to verify the execution started
|
||||
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
assert "execution_id" in result_data
|
||||
assert "graph_id" in result_data
|
||||
assert result_data["graph_id"] == graph.id
|
||||
@@ -98,11 +86,11 @@ async def test_run_agent_missing_inputs(setup_test_data):
|
||||
|
||||
# Verify that we get an error response
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert hasattr(response, "result")
|
||||
# The tool should return an ErrorResponse when setup info indicates not ready
|
||||
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
assert "message" in result_data
|
||||
|
||||
|
||||
@@ -130,10 +118,10 @@ async def test_run_agent_invalid_agent_id(setup_test_data):
|
||||
|
||||
# Verify that we get an error response
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert hasattr(response, "result")
|
||||
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
assert "message" in result_data
|
||||
# Should get an error about failed setup or not found
|
||||
assert any(
|
||||
@@ -170,12 +158,12 @@ async def test_run_agent_with_llm_credentials(setup_llm_test_data):
|
||||
|
||||
# Verify the response
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert hasattr(response, "result")
|
||||
|
||||
# Parse the result JSON to verify the execution started
|
||||
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should successfully start execution since credentials are available
|
||||
assert "execution_id" in result_data
|
||||
@@ -207,9 +195,9 @@ async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_da
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should return agent_details type showing available inputs
|
||||
assert result_data.get("type") == "agent_details"
|
||||
@@ -242,9 +230,9 @@ async def test_run_agent_with_use_defaults(setup_test_data):
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should execute successfully
|
||||
assert "execution_id" in result_data
|
||||
@@ -272,9 +260,9 @@ async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should return setup_requirements type with missing credentials
|
||||
assert result_data.get("type") == "setup_requirements"
|
||||
@@ -304,9 +292,9 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should return error
|
||||
assert result_data.get("type") == "error"
|
||||
@@ -317,10 +305,9 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
|
||||
async def test_run_agent_unauthenticated():
|
||||
"""Test that run_agent returns need_login for unauthenticated users."""
|
||||
tool = RunAgentTool()
|
||||
# Session has a user_id (session owner), but we test tool execution without user_id
|
||||
session = make_session(user_id="test-session-owner")
|
||||
session = make_session(user_id=None)
|
||||
|
||||
# Execute without user_id to test unauthenticated behavior
|
||||
# Execute without user_id
|
||||
response = await tool.execute(
|
||||
user_id=None,
|
||||
session_id=str(uuid.uuid4()),
|
||||
@@ -331,9 +318,9 @@ async def test_run_agent_unauthenticated():
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Base tool returns need_login type for unauthenticated users
|
||||
assert result_data.get("type") == "need_login"
|
||||
@@ -363,9 +350,9 @@ async def test_run_agent_schedule_without_cron(setup_test_data):
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should return error about missing cron
|
||||
assert result_data.get("type") == "error"
|
||||
@@ -395,9 +382,9 @@ async def test_run_agent_schedule_without_name(setup_test_data):
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
assert hasattr(response, "result")
|
||||
assert isinstance(response.result, str)
|
||||
result_data = orjson.loads(response.result)
|
||||
|
||||
# Should return error about missing schedule_name
|
||||
assert result_data.get("type") == "error"
|
||||
|
||||
@@ -35,7 +35,11 @@ from backend.data.model import (
|
||||
OAuth2Credentials,
|
||||
UserIntegrations,
|
||||
)
|
||||
from backend.data.onboarding import OnboardingStep, complete_onboarding_step
|
||||
from backend.data.onboarding import (
|
||||
OnboardingStep,
|
||||
complete_onboarding_step,
|
||||
increment_runs,
|
||||
)
|
||||
from backend.data.user import get_user_integrations
|
||||
from backend.executor.utils import add_graph_execution
|
||||
from backend.integrations.ayrshare import AyrshareClient, SocialPlatform
|
||||
@@ -171,7 +175,6 @@ async def callback(
|
||||
f"Successfully processed OAuth callback for user {user_id} "
|
||||
f"and provider {provider.value}"
|
||||
)
|
||||
|
||||
return CredentialsMetaResponse(
|
||||
id=credentials.id,
|
||||
provider=credentials.provider,
|
||||
@@ -190,7 +193,6 @@ async def list_credentials(
|
||||
user_id: Annotated[str, Security(get_user_id)],
|
||||
) -> list[CredentialsMetaResponse]:
|
||||
credentials = await creds_manager.store.get_all_creds(user_id)
|
||||
|
||||
return [
|
||||
CredentialsMetaResponse(
|
||||
id=cred.id,
|
||||
@@ -213,7 +215,6 @@ async def list_credentials_by_provider(
|
||||
user_id: Annotated[str, Security(get_user_id)],
|
||||
) -> list[CredentialsMetaResponse]:
|
||||
credentials = await creds_manager.store.get_creds_by_provider(user_id, provider)
|
||||
|
||||
return [
|
||||
CredentialsMetaResponse(
|
||||
id=cred.id,
|
||||
@@ -377,6 +378,7 @@ async def webhook_ingress_generic(
|
||||
return
|
||||
|
||||
await complete_onboarding_step(user_id, OnboardingStep.TRIGGER_WEBHOOK)
|
||||
await increment_runs(user_id)
|
||||
|
||||
# Execute all triggers concurrently for better performance
|
||||
tasks = []
|
||||
@@ -829,18 +831,6 @@ async def list_providers() -> List[str]:
|
||||
return all_providers
|
||||
|
||||
|
||||
@router.get("/providers/system", response_model=List[str])
|
||||
async def list_system_providers() -> List[str]:
|
||||
"""
|
||||
Get a list of providers that have platform credits (system credentials) available.
|
||||
|
||||
These providers can be used without the user providing their own API keys.
|
||||
"""
|
||||
from backend.integrations.credentials_store import SYSTEM_PROVIDERS
|
||||
|
||||
return list(SYSTEM_PROVIDERS)
|
||||
|
||||
|
||||
@router.get("/providers/names", response_model=ProviderNamesResponse)
|
||||
async def get_provider_names() -> ProviderNamesResponse:
|
||||
"""
|
||||
|
||||
@@ -489,7 +489,7 @@ async def update_agent_version_in_library(
|
||||
agent_graph_version: int,
|
||||
) -> library_model.LibraryAgent:
|
||||
"""
|
||||
Updates the agent version in the library for any agent owned by the user.
|
||||
Updates the agent version in the library if useGraphIsActiveVersion is True.
|
||||
|
||||
Args:
|
||||
user_id: Owner of the LibraryAgent.
|
||||
@@ -498,31 +498,20 @@ async def update_agent_version_in_library(
|
||||
|
||||
Raises:
|
||||
DatabaseError: If there's an error with the update.
|
||||
NotFoundError: If no library agent is found for this user and agent.
|
||||
"""
|
||||
logger.debug(
|
||||
f"Updating agent version in library for user #{user_id}, "
|
||||
f"agent #{agent_graph_id} v{agent_graph_version}"
|
||||
)
|
||||
async with transaction() as tx:
|
||||
library_agent = await prisma.models.LibraryAgent.prisma(tx).find_first_or_raise(
|
||||
try:
|
||||
library_agent = await prisma.models.LibraryAgent.prisma().find_first_or_raise(
|
||||
where={
|
||||
"userId": user_id,
|
||||
"agentGraphId": agent_graph_id,
|
||||
"useGraphIsActiveVersion": True,
|
||||
},
|
||||
)
|
||||
|
||||
# Delete any conflicting LibraryAgent for the target version
|
||||
await prisma.models.LibraryAgent.prisma(tx).delete_many(
|
||||
where={
|
||||
"userId": user_id,
|
||||
"agentGraphId": agent_graph_id,
|
||||
"agentGraphVersion": agent_graph_version,
|
||||
"id": {"not": library_agent.id},
|
||||
}
|
||||
)
|
||||
|
||||
lib = await prisma.models.LibraryAgent.prisma(tx).update(
|
||||
lib = await prisma.models.LibraryAgent.prisma().update(
|
||||
where={"id": library_agent.id},
|
||||
data={
|
||||
"AgentGraph": {
|
||||
@@ -536,13 +525,13 @@ async def update_agent_version_in_library(
|
||||
},
|
||||
include={"AgentGraph": True},
|
||||
)
|
||||
if lib is None:
|
||||
raise NotFoundError(f"Library agent {library_agent.id} not found")
|
||||
|
||||
if lib is None:
|
||||
raise NotFoundError(
|
||||
f"Failed to update library agent for {agent_graph_id} v{agent_graph_version}"
|
||||
)
|
||||
|
||||
return library_model.LibraryAgent.from_db(lib)
|
||||
return library_model.LibraryAgent.from_db(lib)
|
||||
except prisma.errors.PrismaError as e:
|
||||
logger.error(f"Database error updating agent version in library: {e}")
|
||||
raise DatabaseError("Failed to update agent version in library") from e
|
||||
|
||||
|
||||
async def update_library_agent(
|
||||
@@ -836,7 +825,6 @@ async def add_store_agent_to_library(
|
||||
}
|
||||
},
|
||||
"isCreatedByUser": False,
|
||||
"useGraphIsActiveVersion": False,
|
||||
"settings": SafeJson(
|
||||
_initialize_graph_settings(graph_model).model_dump()
|
||||
),
|
||||
|
||||
@@ -48,7 +48,6 @@ class LibraryAgent(pydantic.BaseModel):
|
||||
id: str
|
||||
graph_id: str
|
||||
graph_version: int
|
||||
owner_user_id: str # ID of user who owns/created this agent graph
|
||||
|
||||
image_url: str | None
|
||||
|
||||
@@ -164,7 +163,6 @@ class LibraryAgent(pydantic.BaseModel):
|
||||
id=agent.id,
|
||||
graph_id=agent.agentGraphId,
|
||||
graph_version=agent.agentGraphVersion,
|
||||
owner_user_id=agent.userId,
|
||||
image_url=agent.imageUrl,
|
||||
creator_name=creator_name,
|
||||
creator_image_url=creator_image_url,
|
||||
|
||||
@@ -8,6 +8,7 @@ from backend.data.execution import GraphExecutionMeta
|
||||
from backend.data.graph import get_graph
|
||||
from backend.data.integrations import get_webhook
|
||||
from backend.data.model import CredentialsMetaInput
|
||||
from backend.data.onboarding import increment_runs
|
||||
from backend.executor.utils import add_graph_execution, make_node_credentials_input_map
|
||||
from backend.integrations.creds_manager import IntegrationCredentialsManager
|
||||
from backend.integrations.webhooks import get_webhook_manager
|
||||
@@ -402,6 +403,8 @@ async def execute_preset(
|
||||
merged_node_input = preset.inputs | inputs
|
||||
merged_credential_inputs = preset.credentials | credential_inputs
|
||||
|
||||
await increment_runs(user_id)
|
||||
|
||||
return await add_graph_execution(
|
||||
user_id=user_id,
|
||||
graph_id=preset.graph_id,
|
||||
|
||||
@@ -42,7 +42,6 @@ async def test_get_library_agents_success(
|
||||
id="test-agent-1",
|
||||
graph_id="test-agent-1",
|
||||
graph_version=1,
|
||||
owner_user_id=test_user_id,
|
||||
name="Test Agent 1",
|
||||
description="Test Description 1",
|
||||
image_url=None,
|
||||
@@ -65,7 +64,6 @@ async def test_get_library_agents_success(
|
||||
id="test-agent-2",
|
||||
graph_id="test-agent-2",
|
||||
graph_version=1,
|
||||
owner_user_id=test_user_id,
|
||||
name="Test Agent 2",
|
||||
description="Test Description 2",
|
||||
image_url=None,
|
||||
@@ -140,7 +138,6 @@ async def test_get_favorite_library_agents_success(
|
||||
id="test-agent-1",
|
||||
graph_id="test-agent-1",
|
||||
graph_version=1,
|
||||
owner_user_id=test_user_id,
|
||||
name="Favorite Agent 1",
|
||||
description="Test Favorite Description 1",
|
||||
image_url=None,
|
||||
@@ -208,7 +205,6 @@ def test_add_agent_to_library_success(
|
||||
id="test-library-agent-id",
|
||||
graph_id="test-agent-1",
|
||||
graph_version=1,
|
||||
owner_user_id=test_user_id,
|
||||
name="Test Agent 1",
|
||||
description="Test Description 1",
|
||||
image_url=None,
|
||||
|
||||
@@ -1,431 +0,0 @@
|
||||
"""
|
||||
Content Type Handlers for Unified Embeddings
|
||||
|
||||
Pluggable system for different content sources (store agents, blocks, docs).
|
||||
Each handler knows how to fetch and process its content type for embedding.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.data.db import query_raw_with_schema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContentItem:
|
||||
"""Represents a piece of content to be embedded."""
|
||||
|
||||
content_id: str # Unique identifier (DB ID or file path)
|
||||
content_type: ContentType
|
||||
searchable_text: str # Combined text for embedding
|
||||
metadata: dict[str, Any] # Content-specific metadata
|
||||
user_id: str | None = None # For user-scoped content
|
||||
|
||||
|
||||
class ContentHandler(ABC):
|
||||
"""Base handler for fetching and processing content for embeddings."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def content_type(self) -> ContentType:
|
||||
"""The ContentType this handler manages."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""
|
||||
Fetch items that don't have embeddings yet.
|
||||
|
||||
Args:
|
||||
batch_size: Maximum number of items to return
|
||||
|
||||
Returns:
|
||||
List of ContentItem objects ready for embedding
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""
|
||||
Get statistics about embedding coverage.
|
||||
|
||||
Returns:
|
||||
Dict with keys: total, with_embeddings, without_embeddings
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StoreAgentHandler(ContentHandler):
|
||||
"""Handler for marketplace store agent listings."""
|
||||
|
||||
@property
|
||||
def content_type(self) -> ContentType:
|
||||
return ContentType.STORE_AGENT
|
||||
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""Fetch approved store listings without embeddings."""
|
||||
from backend.api.features.store.embeddings import build_searchable_text
|
||||
|
||||
missing = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT
|
||||
slv.id,
|
||||
slv.name,
|
||||
slv.description,
|
||||
slv."subHeading",
|
||||
slv.categories
|
||||
FROM {schema_prefix}"StoreListingVersion" slv
|
||||
LEFT JOIN {schema_prefix}"UnifiedContentEmbedding" uce
|
||||
ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
|
||||
WHERE slv."submissionStatus" = 'APPROVED'
|
||||
AND slv."isDeleted" = false
|
||||
AND uce."contentId" IS NULL
|
||||
LIMIT $1
|
||||
""",
|
||||
batch_size,
|
||||
)
|
||||
|
||||
return [
|
||||
ContentItem(
|
||||
content_id=row["id"],
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text=build_searchable_text(
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
sub_heading=row["subHeading"],
|
||||
categories=row["categories"] or [],
|
||||
),
|
||||
metadata={
|
||||
"name": row["name"],
|
||||
"categories": row["categories"] or [],
|
||||
},
|
||||
user_id=None, # Store agents are public
|
||||
)
|
||||
for row in missing
|
||||
]
|
||||
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get statistics about store agent embedding coverage."""
|
||||
# Count approved versions
|
||||
approved_result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {schema_prefix}"StoreListingVersion"
|
||||
WHERE "submissionStatus" = 'APPROVED'
|
||||
AND "isDeleted" = false
|
||||
"""
|
||||
)
|
||||
total_approved = approved_result[0]["count"] if approved_result else 0
|
||||
|
||||
# Count versions with embeddings
|
||||
embedded_result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {schema_prefix}"StoreListingVersion" slv
|
||||
JOIN {schema_prefix}"UnifiedContentEmbedding" uce ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
|
||||
WHERE slv."submissionStatus" = 'APPROVED'
|
||||
AND slv."isDeleted" = false
|
||||
"""
|
||||
)
|
||||
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
|
||||
|
||||
return {
|
||||
"total": total_approved,
|
||||
"with_embeddings": with_embeddings,
|
||||
"without_embeddings": total_approved - with_embeddings,
|
||||
}
|
||||
|
||||
|
||||
class BlockHandler(ContentHandler):
|
||||
"""Handler for block definitions (Python classes)."""
|
||||
|
||||
@property
|
||||
def content_type(self) -> ContentType:
|
||||
return ContentType.BLOCK
|
||||
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""Fetch blocks without embeddings."""
|
||||
from backend.data.block import get_blocks
|
||||
|
||||
# Get all available blocks
|
||||
all_blocks = get_blocks()
|
||||
|
||||
# Check which ones have embeddings
|
||||
if not all_blocks:
|
||||
return []
|
||||
|
||||
block_ids = list(all_blocks.keys())
|
||||
|
||||
# Query for existing embeddings
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
|
||||
existing_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT "contentId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'BLOCK'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*block_ids,
|
||||
)
|
||||
|
||||
existing_ids = {row["contentId"] for row in existing_result}
|
||||
missing_blocks = [
|
||||
(block_id, block_cls)
|
||||
for block_id, block_cls in all_blocks.items()
|
||||
if block_id not in existing_ids
|
||||
]
|
||||
|
||||
# Convert to ContentItem
|
||||
items = []
|
||||
for block_id, block_cls in missing_blocks[:batch_size]:
|
||||
try:
|
||||
block_instance = block_cls()
|
||||
|
||||
# Build searchable text from block metadata
|
||||
parts = []
|
||||
if hasattr(block_instance, "name") and block_instance.name:
|
||||
parts.append(block_instance.name)
|
||||
if (
|
||||
hasattr(block_instance, "description")
|
||||
and block_instance.description
|
||||
):
|
||||
parts.append(block_instance.description)
|
||||
if hasattr(block_instance, "categories") and block_instance.categories:
|
||||
# Convert BlockCategory enum to strings
|
||||
parts.append(
|
||||
" ".join(str(cat.value) for cat in block_instance.categories)
|
||||
)
|
||||
|
||||
# Add input/output schema info
|
||||
if hasattr(block_instance, "input_schema"):
|
||||
schema = block_instance.input_schema
|
||||
if hasattr(schema, "model_json_schema"):
|
||||
schema_dict = schema.model_json_schema()
|
||||
if "properties" in schema_dict:
|
||||
for prop_name, prop_info in schema_dict[
|
||||
"properties"
|
||||
].items():
|
||||
if "description" in prop_info:
|
||||
parts.append(
|
||||
f"{prop_name}: {prop_info['description']}"
|
||||
)
|
||||
|
||||
searchable_text = " ".join(parts)
|
||||
|
||||
# Convert categories set of enums to list of strings for JSON serialization
|
||||
categories = getattr(block_instance, "categories", set())
|
||||
categories_list = (
|
||||
[cat.value for cat in categories] if categories else []
|
||||
)
|
||||
|
||||
items.append(
|
||||
ContentItem(
|
||||
content_id=block_id,
|
||||
content_type=ContentType.BLOCK,
|
||||
searchable_text=searchable_text,
|
||||
metadata={
|
||||
"name": getattr(block_instance, "name", ""),
|
||||
"categories": categories_list,
|
||||
},
|
||||
user_id=None, # Blocks are public
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process block {block_id}: {e}")
|
||||
continue
|
||||
|
||||
return items
|
||||
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get statistics about block embedding coverage."""
|
||||
from backend.data.block import get_blocks
|
||||
|
||||
all_blocks = get_blocks()
|
||||
total_blocks = len(all_blocks)
|
||||
|
||||
if total_blocks == 0:
|
||||
return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
|
||||
|
||||
block_ids = list(all_blocks.keys())
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
|
||||
|
||||
embedded_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'BLOCK'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*block_ids,
|
||||
)
|
||||
|
||||
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
|
||||
|
||||
return {
|
||||
"total": total_blocks,
|
||||
"with_embeddings": with_embeddings,
|
||||
"without_embeddings": total_blocks - with_embeddings,
|
||||
}
|
||||
|
||||
|
||||
class DocumentationHandler(ContentHandler):
|
||||
"""Handler for documentation files (.md/.mdx)."""
|
||||
|
||||
@property
|
||||
def content_type(self) -> ContentType:
|
||||
return ContentType.DOCUMENTATION
|
||||
|
||||
def _get_docs_root(self) -> Path:
|
||||
"""Get the documentation root directory."""
|
||||
# content_handlers.py is at: backend/backend/api/features/store/content_handlers.py
|
||||
# Need to go up to project root then into docs/
|
||||
# In container: /app/autogpt_platform/backend/backend/api/features/store -> /app/docs
|
||||
# In development: /repo/autogpt_platform/backend/backend/api/features/store -> /repo/docs
|
||||
this_file = Path(
|
||||
__file__
|
||||
) # .../backend/backend/api/features/store/content_handlers.py
|
||||
project_root = (
|
||||
this_file.parent.parent.parent.parent.parent.parent.parent
|
||||
) # -> /app or /repo
|
||||
docs_root = project_root / "docs"
|
||||
return docs_root
|
||||
|
||||
def _extract_title_and_content(self, file_path: Path) -> tuple[str, str]:
|
||||
"""Extract title and content from markdown file."""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
|
||||
# Try to extract title from first # heading
|
||||
lines = content.split("\n")
|
||||
title = ""
|
||||
body_lines = []
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("# ") and not title:
|
||||
title = line[2:].strip()
|
||||
else:
|
||||
body_lines.append(line)
|
||||
|
||||
# If no title found, use filename
|
||||
if not title:
|
||||
title = file_path.stem.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
body = "\n".join(body_lines)
|
||||
|
||||
return title, body
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read {file_path}: {e}")
|
||||
return file_path.stem, ""
|
||||
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""Fetch documentation files without embeddings."""
|
||||
docs_root = self._get_docs_root()
|
||||
|
||||
if not docs_root.exists():
|
||||
logger.warning(f"Documentation root not found: {docs_root}")
|
||||
return []
|
||||
|
||||
# Find all .md and .mdx files
|
||||
all_docs = list(docs_root.rglob("*.md")) + list(docs_root.rglob("*.mdx"))
|
||||
|
||||
# Get relative paths for content IDs
|
||||
doc_paths = [str(doc.relative_to(docs_root)) for doc in all_docs]
|
||||
|
||||
if not doc_paths:
|
||||
return []
|
||||
|
||||
# Check which ones have embeddings
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(doc_paths))])
|
||||
existing_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT "contentId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'DOCUMENTATION'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*doc_paths,
|
||||
)
|
||||
|
||||
existing_ids = {row["contentId"] for row in existing_result}
|
||||
missing_docs = [
|
||||
(doc_path, doc_file)
|
||||
for doc_path, doc_file in zip(doc_paths, all_docs)
|
||||
if doc_path not in existing_ids
|
||||
]
|
||||
|
||||
# Convert to ContentItem
|
||||
items = []
|
||||
for doc_path, doc_file in missing_docs[:batch_size]:
|
||||
try:
|
||||
title, content = self._extract_title_and_content(doc_file)
|
||||
|
||||
# Build searchable text
|
||||
searchable_text = f"{title} {content}"
|
||||
|
||||
items.append(
|
||||
ContentItem(
|
||||
content_id=doc_path,
|
||||
content_type=ContentType.DOCUMENTATION,
|
||||
searchable_text=searchable_text,
|
||||
metadata={
|
||||
"title": title,
|
||||
"path": doc_path,
|
||||
},
|
||||
user_id=None, # Documentation is public
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process doc {doc_path}: {e}")
|
||||
continue
|
||||
|
||||
return items
|
||||
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get statistics about documentation embedding coverage."""
|
||||
docs_root = self._get_docs_root()
|
||||
|
||||
if not docs_root.exists():
|
||||
return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
|
||||
|
||||
# Count all .md and .mdx files
|
||||
all_docs = list(docs_root.rglob("*.md")) + list(docs_root.rglob("*.mdx"))
|
||||
total_docs = len(all_docs)
|
||||
|
||||
if total_docs == 0:
|
||||
return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
|
||||
|
||||
doc_paths = [str(doc.relative_to(docs_root)) for doc in all_docs]
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(doc_paths))])
|
||||
|
||||
embedded_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'DOCUMENTATION'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*doc_paths,
|
||||
)
|
||||
|
||||
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
|
||||
|
||||
return {
|
||||
"total": total_docs,
|
||||
"with_embeddings": with_embeddings,
|
||||
"without_embeddings": total_docs - with_embeddings,
|
||||
}
|
||||
|
||||
|
||||
# Content handler registry
|
||||
CONTENT_HANDLERS: dict[ContentType, ContentHandler] = {
|
||||
ContentType.STORE_AGENT: StoreAgentHandler(),
|
||||
ContentType.BLOCK: BlockHandler(),
|
||||
ContentType.DOCUMENTATION: DocumentationHandler(),
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
"""
|
||||
Integration tests for content handlers using real DB.
|
||||
|
||||
Run with: poetry run pytest backend/api/features/store/content_handlers_integration_test.py -xvs
|
||||
|
||||
These tests use the real database but mock OpenAI calls.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.api.features.store.content_handlers import (
|
||||
CONTENT_HANDLERS,
|
||||
BlockHandler,
|
||||
DocumentationHandler,
|
||||
StoreAgentHandler,
|
||||
)
|
||||
from backend.api.features.store.embeddings import (
|
||||
EMBEDDING_DIM,
|
||||
backfill_all_content_types,
|
||||
ensure_content_embedding,
|
||||
get_embedding_stats,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_agent_handler_real_db():
|
||||
"""Test StoreAgentHandler with real database queries."""
|
||||
handler = StoreAgentHandler()
|
||||
|
||||
# Get stats from real DB
|
||||
stats = await handler.get_stats()
|
||||
|
||||
# Stats should have correct structure
|
||||
assert "total" in stats
|
||||
assert "with_embeddings" in stats
|
||||
assert "without_embeddings" in stats
|
||||
assert stats["total"] >= 0
|
||||
assert stats["with_embeddings"] >= 0
|
||||
assert stats["without_embeddings"] >= 0
|
||||
|
||||
# Get missing items (max 1 to keep test fast)
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
# Items should be list (may be empty if all have embeddings)
|
||||
assert isinstance(items, list)
|
||||
|
||||
if items:
|
||||
item = items[0]
|
||||
assert item.content_id is not None
|
||||
assert item.content_type.value == "STORE_AGENT"
|
||||
assert item.searchable_text != ""
|
||||
assert item.user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_real_db():
|
||||
"""Test BlockHandler with real database queries."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Get stats from real DB
|
||||
stats = await handler.get_stats()
|
||||
|
||||
# Stats should have correct structure
|
||||
assert "total" in stats
|
||||
assert "with_embeddings" in stats
|
||||
assert "without_embeddings" in stats
|
||||
assert stats["total"] >= 0 # Should have at least some blocks
|
||||
assert stats["with_embeddings"] >= 0
|
||||
assert stats["without_embeddings"] >= 0
|
||||
|
||||
# Get missing items (max 1 to keep test fast)
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
# Items should be list
|
||||
assert isinstance(items, list)
|
||||
|
||||
if items:
|
||||
item = items[0]
|
||||
assert item.content_id is not None # Should be block UUID
|
||||
assert item.content_type.value == "BLOCK"
|
||||
assert item.searchable_text != ""
|
||||
assert item.user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_real_fs():
|
||||
"""Test DocumentationHandler with real filesystem."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Get stats from real filesystem
|
||||
stats = await handler.get_stats()
|
||||
|
||||
# Stats should have correct structure
|
||||
assert "total" in stats
|
||||
assert "with_embeddings" in stats
|
||||
assert "without_embeddings" in stats
|
||||
assert stats["total"] >= 0
|
||||
assert stats["with_embeddings"] >= 0
|
||||
assert stats["without_embeddings"] >= 0
|
||||
|
||||
# Get missing items (max 1 to keep test fast)
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
# Items should be list
|
||||
assert isinstance(items, list)
|
||||
|
||||
if items:
|
||||
item = items[0]
|
||||
assert item.content_id is not None # Should be relative path
|
||||
assert item.content_type.value == "DOCUMENTATION"
|
||||
assert item.searchable_text != ""
|
||||
assert item.user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_stats_all_types():
|
||||
"""Test get_embedding_stats aggregates all content types."""
|
||||
stats = await get_embedding_stats()
|
||||
|
||||
# Should have structure with by_type and totals
|
||||
assert "by_type" in stats
|
||||
assert "totals" in stats
|
||||
|
||||
# Check each content type is present
|
||||
by_type = stats["by_type"]
|
||||
assert "STORE_AGENT" in by_type
|
||||
assert "BLOCK" in by_type
|
||||
assert "DOCUMENTATION" in by_type
|
||||
|
||||
# Check totals are aggregated
|
||||
totals = stats["totals"]
|
||||
assert totals["total"] >= 0
|
||||
assert totals["with_embeddings"] >= 0
|
||||
assert totals["without_embeddings"] >= 0
|
||||
assert "coverage_percent" in totals
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
async def test_ensure_content_embedding_blocks(mock_generate):
|
||||
"""Test creating embeddings for blocks (mocked OpenAI)."""
|
||||
# Mock OpenAI to return fake embedding
|
||||
mock_generate.return_value = [0.1] * EMBEDDING_DIM
|
||||
|
||||
# Get one block without embedding
|
||||
handler = BlockHandler()
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
if not items:
|
||||
pytest.skip("No blocks without embeddings")
|
||||
|
||||
item = items[0]
|
||||
|
||||
# Try to create embedding (OpenAI mocked)
|
||||
result = await ensure_content_embedding(
|
||||
content_type=item.content_type,
|
||||
content_id=item.content_id,
|
||||
searchable_text=item.searchable_text,
|
||||
metadata=item.metadata,
|
||||
user_id=item.user_id,
|
||||
)
|
||||
|
||||
# Should succeed with mocked OpenAI
|
||||
assert result is True
|
||||
mock_generate.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
async def test_backfill_all_content_types_dry_run(mock_generate):
|
||||
"""Test backfill_all_content_types processes all handlers in order."""
|
||||
# Mock OpenAI to return fake embedding
|
||||
mock_generate.return_value = [0.1] * EMBEDDING_DIM
|
||||
|
||||
# Run backfill with batch_size=1 to process max 1 per type
|
||||
result = await backfill_all_content_types(batch_size=1)
|
||||
|
||||
# Should have results for all content types
|
||||
assert "by_type" in result
|
||||
assert "totals" in result
|
||||
|
||||
by_type = result["by_type"]
|
||||
assert "BLOCK" in by_type
|
||||
assert "STORE_AGENT" in by_type
|
||||
assert "DOCUMENTATION" in by_type
|
||||
|
||||
# Each type should have correct structure
|
||||
for content_type, type_result in by_type.items():
|
||||
assert "processed" in type_result
|
||||
assert "success" in type_result
|
||||
assert "failed" in type_result
|
||||
|
||||
# Totals should aggregate
|
||||
totals = result["totals"]
|
||||
assert totals["processed"] >= 0
|
||||
assert totals["success"] >= 0
|
||||
assert totals["failed"] >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_content_handler_registry():
|
||||
"""Test all handlers are registered in correct order."""
|
||||
from prisma.enums import ContentType
|
||||
|
||||
# All three types should be registered
|
||||
assert ContentType.STORE_AGENT in CONTENT_HANDLERS
|
||||
assert ContentType.BLOCK in CONTENT_HANDLERS
|
||||
assert ContentType.DOCUMENTATION in CONTENT_HANDLERS
|
||||
|
||||
# Check handler types
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.STORE_AGENT], StoreAgentHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.BLOCK], BlockHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.DOCUMENTATION], DocumentationHandler)
|
||||
@@ -1,324 +0,0 @@
|
||||
"""
|
||||
E2E tests for content handlers (blocks, store agents, documentation).
|
||||
|
||||
Tests the full flow: discovering content → generating embeddings → storing.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store.content_handlers import (
|
||||
CONTENT_HANDLERS,
|
||||
BlockHandler,
|
||||
DocumentationHandler,
|
||||
StoreAgentHandler,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_agent_handler_get_missing_items(mocker):
|
||||
"""Test StoreAgentHandler fetches approved agents without embeddings."""
|
||||
handler = StoreAgentHandler()
|
||||
|
||||
# Mock database query
|
||||
mock_missing = [
|
||||
{
|
||||
"id": "agent-1",
|
||||
"name": "Test Agent",
|
||||
"description": "A test agent",
|
||||
"subHeading": "Test heading",
|
||||
"categories": ["AI", "Testing"],
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_missing,
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 1
|
||||
assert items[0].content_id == "agent-1"
|
||||
assert items[0].content_type == ContentType.STORE_AGENT
|
||||
assert "Test Agent" in items[0].searchable_text
|
||||
assert "A test agent" in items[0].searchable_text
|
||||
assert items[0].metadata["name"] == "Test Agent"
|
||||
assert items[0].user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_agent_handler_get_stats(mocker):
|
||||
"""Test StoreAgentHandler returns correct stats."""
|
||||
handler = StoreAgentHandler()
|
||||
|
||||
# Mock approved count query
|
||||
mock_approved = [{"count": 50}]
|
||||
# Mock embedded count query
|
||||
mock_embedded = [{"count": 30}]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
side_effect=[mock_approved, mock_embedded],
|
||||
):
|
||||
stats = await handler.get_stats()
|
||||
|
||||
assert stats["total"] == 50
|
||||
assert stats["with_embeddings"] == 30
|
||||
assert stats["without_embeddings"] == 20
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_get_missing_items(mocker):
|
||||
"""Test BlockHandler discovers blocks without embeddings."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock get_blocks to return test blocks
|
||||
mock_block_class = MagicMock()
|
||||
mock_block_instance = MagicMock()
|
||||
mock_block_instance.name = "Calculator Block"
|
||||
mock_block_instance.description = "Performs calculations"
|
||||
mock_block_instance.categories = [MagicMock(value="MATH")]
|
||||
mock_block_instance.input_schema.model_json_schema.return_value = {
|
||||
"properties": {"expression": {"description": "Math expression to evaluate"}}
|
||||
}
|
||||
mock_block_class.return_value = mock_block_instance
|
||||
|
||||
mock_blocks = {"block-uuid-1": mock_block_class}
|
||||
|
||||
# Mock existing embeddings query (no embeddings exist)
|
||||
mock_existing = []
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_existing,
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 1
|
||||
assert items[0].content_id == "block-uuid-1"
|
||||
assert items[0].content_type == ContentType.BLOCK
|
||||
assert "Calculator Block" in items[0].searchable_text
|
||||
assert "Performs calculations" in items[0].searchable_text
|
||||
assert "MATH" in items[0].searchable_text
|
||||
assert "expression: Math expression" in items[0].searchable_text
|
||||
assert items[0].user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_get_stats(mocker):
|
||||
"""Test BlockHandler returns correct stats."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock get_blocks
|
||||
mock_blocks = {
|
||||
"block-1": MagicMock(),
|
||||
"block-2": MagicMock(),
|
||||
"block-3": MagicMock(),
|
||||
}
|
||||
|
||||
# Mock embedded count query (2 blocks have embeddings)
|
||||
mock_embedded = [{"count": 2}]
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_embedded,
|
||||
):
|
||||
stats = await handler.get_stats()
|
||||
|
||||
assert stats["total"] == 3
|
||||
assert stats["with_embeddings"] == 2
|
||||
assert stats["without_embeddings"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_get_missing_items(tmp_path, mocker):
|
||||
"""Test DocumentationHandler discovers docs without embeddings."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Create temporary docs directory with test files
|
||||
docs_root = tmp_path / "docs"
|
||||
docs_root.mkdir()
|
||||
|
||||
(docs_root / "guide.md").write_text("# Getting Started\n\nThis is a guide.")
|
||||
(docs_root / "api.mdx").write_text("# API Reference\n\nAPI documentation.")
|
||||
|
||||
# Mock _get_docs_root to return temp dir
|
||||
with patch.object(handler, "_get_docs_root", return_value=docs_root):
|
||||
# Mock existing embeddings query (no embeddings exist)
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 2
|
||||
|
||||
# Check guide.md
|
||||
guide_item = next(
|
||||
(item for item in items if item.content_id == "guide.md"), None
|
||||
)
|
||||
assert guide_item is not None
|
||||
assert guide_item.content_type == ContentType.DOCUMENTATION
|
||||
assert "Getting Started" in guide_item.searchable_text
|
||||
assert "This is a guide" in guide_item.searchable_text
|
||||
assert guide_item.metadata["title"] == "Getting Started"
|
||||
assert guide_item.user_id is None
|
||||
|
||||
# Check api.mdx
|
||||
api_item = next(
|
||||
(item for item in items if item.content_id == "api.mdx"), None
|
||||
)
|
||||
assert api_item is not None
|
||||
assert "API Reference" in api_item.searchable_text
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_get_stats(tmp_path, mocker):
|
||||
"""Test DocumentationHandler returns correct stats."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Create temporary docs directory
|
||||
docs_root = tmp_path / "docs"
|
||||
docs_root.mkdir()
|
||||
(docs_root / "doc1.md").write_text("# Doc 1")
|
||||
(docs_root / "doc2.md").write_text("# Doc 2")
|
||||
(docs_root / "doc3.mdx").write_text("# Doc 3")
|
||||
|
||||
# Mock embedded count query (1 doc has embedding)
|
||||
mock_embedded = [{"count": 1}]
|
||||
|
||||
with patch.object(handler, "_get_docs_root", return_value=docs_root):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_embedded,
|
||||
):
|
||||
stats = await handler.get_stats()
|
||||
|
||||
assert stats["total"] == 3
|
||||
assert stats["with_embeddings"] == 1
|
||||
assert stats["without_embeddings"] == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_title_extraction(tmp_path):
|
||||
"""Test DocumentationHandler extracts title from markdown heading."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Test with heading
|
||||
doc_with_heading = tmp_path / "with_heading.md"
|
||||
doc_with_heading.write_text("# My Title\n\nContent here")
|
||||
title, content = handler._extract_title_and_content(doc_with_heading)
|
||||
assert title == "My Title"
|
||||
assert "# My Title" not in content
|
||||
assert "Content here" in content
|
||||
|
||||
# Test without heading
|
||||
doc_without_heading = tmp_path / "no-heading.md"
|
||||
doc_without_heading.write_text("Just content, no heading")
|
||||
title, content = handler._extract_title_and_content(doc_without_heading)
|
||||
assert title == "No Heading" # Uses filename
|
||||
assert "Just content" in content
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_content_handlers_registry():
|
||||
"""Test all content types are registered."""
|
||||
assert ContentType.STORE_AGENT in CONTENT_HANDLERS
|
||||
assert ContentType.BLOCK in CONTENT_HANDLERS
|
||||
assert ContentType.DOCUMENTATION in CONTENT_HANDLERS
|
||||
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.STORE_AGENT], StoreAgentHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.BLOCK], BlockHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.DOCUMENTATION], DocumentationHandler)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_handles_missing_attributes():
|
||||
"""Test BlockHandler gracefully handles blocks with missing attributes."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock block with minimal attributes
|
||||
mock_block_class = MagicMock()
|
||||
mock_block_instance = MagicMock()
|
||||
mock_block_instance.name = "Minimal Block"
|
||||
# No description, categories, or schema
|
||||
del mock_block_instance.description
|
||||
del mock_block_instance.categories
|
||||
del mock_block_instance.input_schema
|
||||
mock_block_class.return_value = mock_block_instance
|
||||
|
||||
mock_blocks = {"block-minimal": mock_block_class}
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 1
|
||||
assert items[0].searchable_text == "Minimal Block"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_skips_failed_blocks():
|
||||
"""Test BlockHandler skips blocks that fail to instantiate."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock one good block and one bad block
|
||||
good_block = MagicMock()
|
||||
good_instance = MagicMock()
|
||||
good_instance.name = "Good Block"
|
||||
good_instance.description = "Works fine"
|
||||
good_instance.categories = []
|
||||
good_block.return_value = good_instance
|
||||
|
||||
bad_block = MagicMock()
|
||||
bad_block.side_effect = Exception("Instantiation failed")
|
||||
|
||||
mock_blocks = {"good-block": good_block, "bad-block": bad_block}
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
# Should only get the good block
|
||||
assert len(items) == 1
|
||||
assert items[0].content_id == "good-block"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_missing_docs_directory():
|
||||
"""Test DocumentationHandler handles missing docs directory gracefully."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Mock _get_docs_root to return non-existent path
|
||||
fake_path = Path("/nonexistent/docs")
|
||||
with patch.object(handler, "_get_docs_root", return_value=fake_path):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
assert items == []
|
||||
|
||||
stats = await handler.get_stats()
|
||||
assert stats["total"] == 0
|
||||
assert stats["with_embeddings"] == 0
|
||||
assert stats["without_embeddings"] == 0
|
||||
@@ -1,7 +1,8 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import typing
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Literal
|
||||
from typing import Literal
|
||||
|
||||
import fastapi
|
||||
import prisma.enums
|
||||
@@ -9,7 +10,7 @@ import prisma.errors
|
||||
import prisma.models
|
||||
import prisma.types
|
||||
|
||||
from backend.data.db import transaction
|
||||
from backend.data.db import query_raw_with_schema, transaction
|
||||
from backend.data.graph import (
|
||||
GraphMeta,
|
||||
GraphModel,
|
||||
@@ -29,8 +30,6 @@ from backend.util.settings import Settings
|
||||
|
||||
from . import exceptions as store_exceptions
|
||||
from . import model as store_model
|
||||
from .embeddings import ensure_embedding
|
||||
from .hybrid_search import hybrid_search
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = Settings()
|
||||
@@ -51,77 +50,128 @@ async def get_store_agents(
|
||||
page_size: int = 20,
|
||||
) -> store_model.StoreAgentsResponse:
|
||||
"""
|
||||
Get PUBLIC store agents from the StoreAgent view.
|
||||
|
||||
Search behavior:
|
||||
- With search_query: Uses hybrid search (semantic + lexical)
|
||||
- Fallback: If embeddings unavailable, gracefully degrades to lexical-only
|
||||
- Rationale: User-facing endpoint prioritizes availability over accuracy
|
||||
|
||||
Note: Admin operations (approval) use fail-fast to prevent inconsistent state.
|
||||
Get PUBLIC store agents from the StoreAgent view
|
||||
"""
|
||||
logger.debug(
|
||||
f"Getting store agents. featured={featured}, creators={creators}, sorted_by={sorted_by}, search={search_query}, category={category}, page={page}"
|
||||
)
|
||||
|
||||
search_used_hybrid = False
|
||||
store_agents: list[store_model.StoreAgent] = []
|
||||
agents: list[dict[str, Any]] = []
|
||||
total = 0
|
||||
total_pages = 0
|
||||
|
||||
try:
|
||||
# If search_query is provided, use hybrid search (embeddings + tsvector)
|
||||
# If search_query is provided, use full-text search
|
||||
if search_query:
|
||||
# Try hybrid search combining semantic and lexical signals
|
||||
# Falls back to lexical-only if OpenAI unavailable (user-facing, high SLA)
|
||||
try:
|
||||
agents, total = await hybrid_search(
|
||||
query=search_query,
|
||||
featured=featured,
|
||||
creators=creators,
|
||||
category=category,
|
||||
sorted_by="relevance", # Use hybrid scoring for relevance
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
search_used_hybrid = True
|
||||
except Exception as e:
|
||||
# Log error but fall back to lexical search for better UX
|
||||
logger.error(
|
||||
f"Hybrid search failed (likely OpenAI unavailable), "
|
||||
f"falling back to lexical search: {e}"
|
||||
)
|
||||
# search_used_hybrid remains False, will use fallback path below
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
# Convert hybrid search results (dict format) if hybrid succeeded
|
||||
if search_used_hybrid:
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
store_agents: list[store_model.StoreAgent] = []
|
||||
for agent in agents:
|
||||
try:
|
||||
store_agent = store_model.StoreAgent(
|
||||
slug=agent["slug"],
|
||||
agent_name=agent["agent_name"],
|
||||
agent_image=(
|
||||
agent["agent_image"][0] if agent["agent_image"] else ""
|
||||
),
|
||||
creator=agent["creator_username"] or "Needs Profile",
|
||||
creator_avatar=agent["creator_avatar"] or "",
|
||||
sub_heading=agent["sub_heading"],
|
||||
description=agent["description"],
|
||||
runs=agent["runs"],
|
||||
rating=agent["rating"],
|
||||
)
|
||||
store_agents.append(store_agent)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error parsing Store agent from hybrid search results: {e}"
|
||||
)
|
||||
continue
|
||||
# Whitelist allowed order_by columns
|
||||
ALLOWED_ORDER_BY = {
|
||||
"rating": "rating DESC, rank DESC",
|
||||
"runs": "runs DESC, rank DESC",
|
||||
"name": "agent_name ASC, rank ASC",
|
||||
"updated_at": "updated_at DESC, rank DESC",
|
||||
}
|
||||
|
||||
if not search_used_hybrid:
|
||||
# Fallback path - use basic search or no search
|
||||
# Validate and get order clause
|
||||
if sorted_by and sorted_by in ALLOWED_ORDER_BY:
|
||||
order_by_clause = ALLOWED_ORDER_BY[sorted_by]
|
||||
else:
|
||||
order_by_clause = "updated_at DESC, rank DESC"
|
||||
|
||||
# Build WHERE conditions and parameters list
|
||||
where_parts: list[str] = []
|
||||
params: list[typing.Any] = [search_query] # $1 - search term
|
||||
param_index = 2 # Start at $2 for next parameter
|
||||
|
||||
# Always filter for available agents
|
||||
where_parts.append("is_available = true")
|
||||
|
||||
if featured:
|
||||
where_parts.append("featured = true")
|
||||
|
||||
if creators and creators:
|
||||
# Use ANY with array parameter
|
||||
where_parts.append(f"creator_username = ANY(${param_index})")
|
||||
params.append(creators)
|
||||
param_index += 1
|
||||
|
||||
if category and category:
|
||||
where_parts.append(f"${param_index} = ANY(categories)")
|
||||
params.append(category)
|
||||
param_index += 1
|
||||
|
||||
sql_where_clause: str = " AND ".join(where_parts) if where_parts else "1=1"
|
||||
|
||||
# Add pagination params
|
||||
params.extend([page_size, offset])
|
||||
limit_param = f"${param_index}"
|
||||
offset_param = f"${param_index + 1}"
|
||||
|
||||
# Execute full-text search query with parameterized values
|
||||
sql_query = f"""
|
||||
SELECT
|
||||
slug,
|
||||
agent_name,
|
||||
agent_image,
|
||||
creator_username,
|
||||
creator_avatar,
|
||||
sub_heading,
|
||||
description,
|
||||
runs,
|
||||
rating,
|
||||
categories,
|
||||
featured,
|
||||
is_available,
|
||||
updated_at,
|
||||
ts_rank_cd(search, query) AS rank
|
||||
FROM {{schema_prefix}}"StoreAgent",
|
||||
plainto_tsquery('english', $1) AS query
|
||||
WHERE {sql_where_clause}
|
||||
AND search @@ query
|
||||
ORDER BY {order_by_clause}
|
||||
LIMIT {limit_param} OFFSET {offset_param}
|
||||
"""
|
||||
|
||||
# Count query for pagination - only uses search term parameter
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {{schema_prefix}}"StoreAgent",
|
||||
plainto_tsquery('english', $1) AS query
|
||||
WHERE {sql_where_clause}
|
||||
AND search @@ query
|
||||
"""
|
||||
|
||||
# Execute both queries with parameters
|
||||
agents = await query_raw_with_schema(sql_query, *params)
|
||||
|
||||
# For count, use params without pagination (last 2 params)
|
||||
count_params = params[:-2]
|
||||
count_result = await query_raw_with_schema(count_query, *count_params)
|
||||
|
||||
total = count_result[0]["count"] if count_result else 0
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
|
||||
# Convert raw results to StoreAgent models
|
||||
store_agents: list[store_model.StoreAgent] = []
|
||||
for agent in agents:
|
||||
try:
|
||||
store_agent = store_model.StoreAgent(
|
||||
slug=agent["slug"],
|
||||
agent_name=agent["agent_name"],
|
||||
agent_image=(
|
||||
agent["agent_image"][0] if agent["agent_image"] else ""
|
||||
),
|
||||
creator=agent["creator_username"] or "Needs Profile",
|
||||
creator_avatar=agent["creator_avatar"] or "",
|
||||
sub_heading=agent["sub_heading"],
|
||||
description=agent["description"],
|
||||
runs=agent["runs"],
|
||||
rating=agent["rating"],
|
||||
)
|
||||
store_agents.append(store_agent)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing Store agent from search results: {e}")
|
||||
continue
|
||||
|
||||
else:
|
||||
# Non-search query path (original logic)
|
||||
where_clause: prisma.types.StoreAgentWhereInput = {"is_available": True}
|
||||
if featured:
|
||||
where_clause["featured"] = featured
|
||||
@@ -130,14 +180,6 @@ async def get_store_agents(
|
||||
if category:
|
||||
where_clause["categories"] = {"has": category}
|
||||
|
||||
# Add basic text search if search_query provided but hybrid failed
|
||||
if search_query:
|
||||
where_clause["OR"] = [
|
||||
{"agent_name": {"contains": search_query, "mode": "insensitive"}},
|
||||
{"sub_heading": {"contains": search_query, "mode": "insensitive"}},
|
||||
{"description": {"contains": search_query, "mode": "insensitive"}},
|
||||
]
|
||||
|
||||
order_by = []
|
||||
if sorted_by == "rating":
|
||||
order_by.append({"rating": "desc"})
|
||||
@@ -146,7 +188,7 @@ async def get_store_agents(
|
||||
elif sorted_by == "name":
|
||||
order_by.append({"agent_name": "asc"})
|
||||
|
||||
db_agents = await prisma.models.StoreAgent.prisma().find_many(
|
||||
agents = await prisma.models.StoreAgent.prisma().find_many(
|
||||
where=where_clause,
|
||||
order=order_by,
|
||||
skip=(page - 1) * page_size,
|
||||
@@ -157,7 +199,7 @@ async def get_store_agents(
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
|
||||
store_agents: list[store_model.StoreAgent] = []
|
||||
for agent in db_agents:
|
||||
for agent in agents:
|
||||
try:
|
||||
# Create the StoreAgent object safely
|
||||
store_agent = store_model.StoreAgent(
|
||||
@@ -572,7 +614,6 @@ async def get_store_submissions(
|
||||
submission_models = []
|
||||
for sub in submissions:
|
||||
submission_model = store_model.StoreSubmission(
|
||||
listing_id=sub.listing_id,
|
||||
agent_id=sub.agent_id,
|
||||
agent_version=sub.agent_version,
|
||||
name=sub.name,
|
||||
@@ -626,48 +667,35 @@ async def delete_store_submission(
|
||||
submission_id: str,
|
||||
) -> bool:
|
||||
"""
|
||||
Delete a store submission version as the submitting user.
|
||||
Delete a store listing submission as the submitting user.
|
||||
|
||||
Args:
|
||||
user_id: ID of the authenticated user
|
||||
submission_id: StoreListingVersion ID to delete
|
||||
submission_id: ID of the submission to be deleted
|
||||
|
||||
Returns:
|
||||
bool: True if successfully deleted
|
||||
bool: True if the submission was successfully deleted, False otherwise
|
||||
"""
|
||||
logger.debug(f"Deleting store submission {submission_id} for user {user_id}")
|
||||
|
||||
try:
|
||||
# Find the submission version with ownership check
|
||||
version = await prisma.models.StoreListingVersion.prisma().find_first(
|
||||
where={"id": submission_id}, include={"StoreListing": True}
|
||||
# Verify the submission belongs to this user
|
||||
submission = await prisma.models.StoreListing.prisma().find_first(
|
||||
where={"agentGraphId": submission_id, "owningUserId": user_id}
|
||||
)
|
||||
|
||||
if (
|
||||
not version
|
||||
or not version.StoreListing
|
||||
or version.StoreListing.owningUserId != user_id
|
||||
):
|
||||
raise store_exceptions.SubmissionNotFoundError("Submission not found")
|
||||
|
||||
# Prevent deletion of approved submissions
|
||||
if version.submissionStatus == prisma.enums.SubmissionStatus.APPROVED:
|
||||
raise store_exceptions.InvalidOperationError(
|
||||
"Cannot delete approved submissions"
|
||||
if not submission:
|
||||
logger.warning(f"Submission not found for user {user_id}: {submission_id}")
|
||||
raise store_exceptions.SubmissionNotFoundError(
|
||||
f"Submission not found for this user. User ID: {user_id}, Submission ID: {submission_id}"
|
||||
)
|
||||
|
||||
# Delete the version
|
||||
await prisma.models.StoreListingVersion.prisma().delete(
|
||||
where={"id": version.id}
|
||||
)
|
||||
# Delete the submission
|
||||
await prisma.models.StoreListing.prisma().delete(where={"id": submission.id})
|
||||
|
||||
# Clean up empty listing if this was the last version
|
||||
remaining = await prisma.models.StoreListingVersion.prisma().count(
|
||||
where={"storeListingId": version.storeListingId}
|
||||
logger.debug(
|
||||
f"Successfully deleted submission {submission_id} for user {user_id}"
|
||||
)
|
||||
if remaining == 0:
|
||||
await prisma.models.StoreListing.prisma().delete(
|
||||
where={"id": version.storeListingId}
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -731,15 +759,9 @@ async def create_store_submission(
|
||||
logger.warning(
|
||||
f"Agent not found for user {user_id}: {agent_id} v{agent_version}"
|
||||
)
|
||||
# Provide more user-friendly error message when agent_id is empty
|
||||
if not agent_id or agent_id.strip() == "":
|
||||
raise store_exceptions.AgentNotFoundError(
|
||||
"No agent selected. Please select an agent before submitting to the store."
|
||||
)
|
||||
else:
|
||||
raise store_exceptions.AgentNotFoundError(
|
||||
f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
|
||||
)
|
||||
raise store_exceptions.AgentNotFoundError(
|
||||
f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
|
||||
)
|
||||
|
||||
# Check if listing already exists for this agent
|
||||
existing_listing = await prisma.models.StoreListing.prisma().find_first(
|
||||
@@ -811,7 +833,6 @@ async def create_store_submission(
|
||||
logger.debug(f"Created store listing for agent {agent_id}")
|
||||
# Return submission details
|
||||
return store_model.StoreSubmission(
|
||||
listing_id=listing.id,
|
||||
agent_id=agent_id,
|
||||
agent_version=agent_version,
|
||||
name=name,
|
||||
@@ -923,56 +944,81 @@ async def edit_store_submission(
|
||||
# Currently we are not allowing user to update the agent associated with a submission
|
||||
# If we allow it in future, then we need a check here to verify the agent belongs to this user.
|
||||
|
||||
# Only allow editing of PENDING submissions
|
||||
if current_version.submissionStatus != prisma.enums.SubmissionStatus.PENDING:
|
||||
# Check if we can edit this submission
|
||||
if current_version.submissionStatus == prisma.enums.SubmissionStatus.REJECTED:
|
||||
raise store_exceptions.InvalidOperationError(
|
||||
f"Cannot edit a {current_version.submissionStatus.value.lower()} submission. Only pending submissions can be edited."
|
||||
"Cannot edit a rejected submission"
|
||||
)
|
||||
|
||||
# For APPROVED submissions, we need to create a new version
|
||||
if current_version.submissionStatus == prisma.enums.SubmissionStatus.APPROVED:
|
||||
# Create a new version for the existing listing
|
||||
return await create_store_version(
|
||||
user_id=user_id,
|
||||
agent_id=current_version.agentGraphId,
|
||||
agent_version=current_version.agentGraphVersion,
|
||||
store_listing_id=current_version.storeListingId,
|
||||
name=name,
|
||||
video_url=video_url,
|
||||
agent_output_demo_url=agent_output_demo_url,
|
||||
image_urls=image_urls,
|
||||
description=description,
|
||||
sub_heading=sub_heading,
|
||||
categories=categories,
|
||||
changes_summary=changes_summary,
|
||||
recommended_schedule_cron=recommended_schedule_cron,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
# For PENDING submissions, we can update the existing version
|
||||
# Update the existing version
|
||||
updated_version = await prisma.models.StoreListingVersion.prisma().update(
|
||||
where={"id": store_listing_version_id},
|
||||
data=prisma.types.StoreListingVersionUpdateInput(
|
||||
elif current_version.submissionStatus == prisma.enums.SubmissionStatus.PENDING:
|
||||
# Update the existing version
|
||||
updated_version = await prisma.models.StoreListingVersion.prisma().update(
|
||||
where={"id": store_listing_version_id},
|
||||
data=prisma.types.StoreListingVersionUpdateInput(
|
||||
name=name,
|
||||
videoUrl=video_url,
|
||||
agentOutputDemoUrl=agent_output_demo_url,
|
||||
imageUrls=image_urls,
|
||||
description=description,
|
||||
categories=categories,
|
||||
subHeading=sub_heading,
|
||||
changesSummary=changes_summary,
|
||||
recommendedScheduleCron=recommended_schedule_cron,
|
||||
instructions=instructions,
|
||||
),
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Updated existing version {store_listing_version_id} for agent {current_version.agentGraphId}"
|
||||
)
|
||||
|
||||
if not updated_version:
|
||||
raise DatabaseError("Failed to update store listing version")
|
||||
return store_model.StoreSubmission(
|
||||
agent_id=current_version.agentGraphId,
|
||||
agent_version=current_version.agentGraphVersion,
|
||||
name=name,
|
||||
videoUrl=video_url,
|
||||
agentOutputDemoUrl=agent_output_demo_url,
|
||||
imageUrls=image_urls,
|
||||
sub_heading=sub_heading,
|
||||
slug=current_version.StoreListing.slug,
|
||||
description=description,
|
||||
categories=categories,
|
||||
subHeading=sub_heading,
|
||||
changesSummary=changes_summary,
|
||||
recommendedScheduleCron=recommended_schedule_cron,
|
||||
instructions=instructions,
|
||||
),
|
||||
)
|
||||
image_urls=image_urls,
|
||||
date_submitted=updated_version.submittedAt or updated_version.createdAt,
|
||||
status=updated_version.submissionStatus,
|
||||
runs=0,
|
||||
rating=0.0,
|
||||
store_listing_version_id=updated_version.id,
|
||||
changes_summary=changes_summary,
|
||||
video_url=video_url,
|
||||
categories=categories,
|
||||
version=updated_version.version,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Updated existing version {store_listing_version_id} for agent {current_version.agentGraphId}"
|
||||
)
|
||||
|
||||
if not updated_version:
|
||||
raise DatabaseError("Failed to update store listing version")
|
||||
return store_model.StoreSubmission(
|
||||
listing_id=current_version.StoreListing.id,
|
||||
agent_id=current_version.agentGraphId,
|
||||
agent_version=current_version.agentGraphVersion,
|
||||
name=name,
|
||||
sub_heading=sub_heading,
|
||||
slug=current_version.StoreListing.slug,
|
||||
description=description,
|
||||
instructions=instructions,
|
||||
image_urls=image_urls,
|
||||
date_submitted=updated_version.submittedAt or updated_version.createdAt,
|
||||
status=updated_version.submissionStatus,
|
||||
runs=0,
|
||||
rating=0.0,
|
||||
store_listing_version_id=updated_version.id,
|
||||
changes_summary=changes_summary,
|
||||
video_url=video_url,
|
||||
categories=categories,
|
||||
version=updated_version.version,
|
||||
)
|
||||
else:
|
||||
raise store_exceptions.InvalidOperationError(
|
||||
f"Cannot edit submission with status: {current_version.submissionStatus}"
|
||||
)
|
||||
|
||||
except (
|
||||
store_exceptions.SubmissionNotFoundError,
|
||||
@@ -1051,78 +1097,38 @@ async def create_store_version(
|
||||
f"Agent not found for this user. User ID: {user_id}, Agent ID: {agent_id}, Version: {agent_version}"
|
||||
)
|
||||
|
||||
# Check if there's already a PENDING submission for this agent (any version)
|
||||
existing_pending_submission = (
|
||||
await prisma.models.StoreListingVersion.prisma().find_first(
|
||||
where=prisma.types.StoreListingVersionWhereInput(
|
||||
storeListingId=store_listing_id,
|
||||
agentGraphId=agent_id,
|
||||
submissionStatus=prisma.enums.SubmissionStatus.PENDING,
|
||||
isDeleted=False,
|
||||
)
|
||||
# Get the latest version number
|
||||
latest_version = listing.Versions[0] if listing.Versions else None
|
||||
|
||||
next_version = (latest_version.version + 1) if latest_version else 1
|
||||
|
||||
# Create a new version for the existing listing
|
||||
new_version = await prisma.models.StoreListingVersion.prisma().create(
|
||||
data=prisma.types.StoreListingVersionCreateInput(
|
||||
version=next_version,
|
||||
agentGraphId=agent_id,
|
||||
agentGraphVersion=agent_version,
|
||||
name=name,
|
||||
videoUrl=video_url,
|
||||
agentOutputDemoUrl=agent_output_demo_url,
|
||||
imageUrls=image_urls,
|
||||
description=description,
|
||||
instructions=instructions,
|
||||
categories=categories,
|
||||
subHeading=sub_heading,
|
||||
submissionStatus=prisma.enums.SubmissionStatus.PENDING,
|
||||
submittedAt=datetime.now(),
|
||||
changesSummary=changes_summary,
|
||||
recommendedScheduleCron=recommended_schedule_cron,
|
||||
storeListingId=store_listing_id,
|
||||
)
|
||||
)
|
||||
|
||||
# Handle existing pending submission and create new one atomically
|
||||
async with transaction() as tx:
|
||||
# Get the latest version number first
|
||||
latest_listing = await prisma.models.StoreListing.prisma(tx).find_first(
|
||||
where=prisma.types.StoreListingWhereInput(
|
||||
id=store_listing_id, owningUserId=user_id
|
||||
),
|
||||
include={"Versions": {"order_by": {"version": "desc"}, "take": 1}},
|
||||
)
|
||||
|
||||
if not latest_listing:
|
||||
raise store_exceptions.ListingNotFoundError(
|
||||
f"Store listing not found. User ID: {user_id}, Listing ID: {store_listing_id}"
|
||||
)
|
||||
|
||||
latest_version = (
|
||||
latest_listing.Versions[0] if latest_listing.Versions else None
|
||||
)
|
||||
next_version = (latest_version.version + 1) if latest_version else 1
|
||||
|
||||
# If there's an existing pending submission, delete it atomically before creating new one
|
||||
if existing_pending_submission:
|
||||
logger.info(
|
||||
f"Found existing PENDING submission for agent {agent_id} (was v{existing_pending_submission.agentGraphVersion}, now v{agent_version}), replacing existing submission instead of creating duplicate"
|
||||
)
|
||||
await prisma.models.StoreListingVersion.prisma(tx).delete(
|
||||
where={"id": existing_pending_submission.id}
|
||||
)
|
||||
logger.debug(
|
||||
f"Deleted existing pending submission {existing_pending_submission.id}"
|
||||
)
|
||||
|
||||
# Create a new version for the existing listing
|
||||
new_version = await prisma.models.StoreListingVersion.prisma(tx).create(
|
||||
data=prisma.types.StoreListingVersionCreateInput(
|
||||
version=next_version,
|
||||
agentGraphId=agent_id,
|
||||
agentGraphVersion=agent_version,
|
||||
name=name,
|
||||
videoUrl=video_url,
|
||||
agentOutputDemoUrl=agent_output_demo_url,
|
||||
imageUrls=image_urls,
|
||||
description=description,
|
||||
instructions=instructions,
|
||||
categories=categories,
|
||||
subHeading=sub_heading,
|
||||
submissionStatus=prisma.enums.SubmissionStatus.PENDING,
|
||||
submittedAt=datetime.now(),
|
||||
changesSummary=changes_summary,
|
||||
recommendedScheduleCron=recommended_schedule_cron,
|
||||
storeListingId=store_listing_id,
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Created new version for listing {store_listing_id} of agent {agent_id}"
|
||||
)
|
||||
# Return submission details
|
||||
return store_model.StoreSubmission(
|
||||
listing_id=listing.id,
|
||||
agent_id=agent_id,
|
||||
agent_version=agent_version,
|
||||
name=name,
|
||||
@@ -1535,7 +1541,7 @@ async def review_store_submission(
|
||||
)
|
||||
|
||||
# Update the AgentGraph with store listing data
|
||||
await prisma.models.AgentGraph.prisma(tx).update(
|
||||
await prisma.models.AgentGraph.prisma().update(
|
||||
where={
|
||||
"graphVersionId": {
|
||||
"id": store_listing_version.agentGraphId,
|
||||
@@ -1550,23 +1556,6 @@ async def review_store_submission(
|
||||
},
|
||||
)
|
||||
|
||||
# Generate embedding for approved listing (blocking - admin operation)
|
||||
# Inside transaction: if embedding fails, entire transaction rolls back
|
||||
embedding_success = await ensure_embedding(
|
||||
version_id=store_listing_version_id,
|
||||
name=store_listing_version.name,
|
||||
description=store_listing_version.description,
|
||||
sub_heading=store_listing_version.subHeading,
|
||||
categories=store_listing_version.categories or [],
|
||||
tx=tx,
|
||||
)
|
||||
if not embedding_success:
|
||||
raise ValueError(
|
||||
f"Failed to generate embedding for listing {store_listing_version_id}. "
|
||||
"This is likely due to OpenAI API being unavailable. "
|
||||
"Please try again later or contact support if the issue persists."
|
||||
)
|
||||
|
||||
await prisma.models.StoreListing.prisma(tx).update(
|
||||
where={"id": store_listing_version.StoreListing.id},
|
||||
data={
|
||||
@@ -1719,12 +1708,15 @@ async def review_store_submission(
|
||||
|
||||
# Convert to Pydantic model for consistency
|
||||
return store_model.StoreSubmission(
|
||||
listing_id=(submission.StoreListing.id if submission.StoreListing else ""),
|
||||
agent_id=submission.agentGraphId,
|
||||
agent_version=submission.agentGraphVersion,
|
||||
name=submission.name,
|
||||
sub_heading=submission.subHeading,
|
||||
slug=(submission.StoreListing.slug if submission.StoreListing else ""),
|
||||
slug=(
|
||||
submission.StoreListing.slug
|
||||
if hasattr(submission, "storeListing") and submission.StoreListing
|
||||
else ""
|
||||
),
|
||||
description=submission.description,
|
||||
instructions=submission.instructions,
|
||||
image_urls=submission.imageUrls or [],
|
||||
@@ -1826,7 +1818,9 @@ async def get_admin_listings_with_versions(
|
||||
where = prisma.types.StoreListingWhereInput(**where_dict)
|
||||
include = prisma.types.StoreListingInclude(
|
||||
Versions=prisma.types.FindManyStoreListingVersionArgsFromStoreListing(
|
||||
order_by={"version": "desc"}
|
||||
order_by=prisma.types._StoreListingVersion_version_OrderByInput(
|
||||
version="desc"
|
||||
)
|
||||
),
|
||||
OwningUser=True,
|
||||
)
|
||||
@@ -1851,7 +1845,6 @@ async def get_admin_listings_with_versions(
|
||||
# If we have versions, turn them into StoreSubmission models
|
||||
for version in listing.Versions or []:
|
||||
version_model = store_model.StoreSubmission(
|
||||
listing_id=listing.id,
|
||||
agent_id=version.agentGraphId,
|
||||
agent_version=version.agentGraphVersion,
|
||||
name=version.name,
|
||||
|
||||
@@ -1,962 +0,0 @@
|
||||
"""
|
||||
Unified Content Embeddings Service
|
||||
|
||||
Handles generation and storage of OpenAI embeddings for all content types
|
||||
(store listings, blocks, documentation, library agents) to enable semantic/hybrid search.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import prisma
|
||||
from prisma.enums import ContentType
|
||||
from tiktoken import encoding_for_model
|
||||
|
||||
from backend.api.features.store.content_handlers import CONTENT_HANDLERS
|
||||
from backend.data.db import execute_raw_with_schema, query_raw_with_schema
|
||||
from backend.util.clients import get_openai_client
|
||||
from backend.util.json import dumps
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# OpenAI embedding model configuration
|
||||
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||
# Embedding dimension for the model above
|
||||
# text-embedding-3-small: 1536, text-embedding-3-large: 3072
|
||||
EMBEDDING_DIM = 1536
|
||||
# OpenAI embedding token limit (8,191 with 1 token buffer for safety)
|
||||
EMBEDDING_MAX_TOKENS = 8191
|
||||
|
||||
|
||||
def build_searchable_text(
|
||||
name: str,
|
||||
description: str,
|
||||
sub_heading: str,
|
||||
categories: list[str],
|
||||
) -> str:
|
||||
"""
|
||||
Build searchable text from listing version fields.
|
||||
|
||||
Combines relevant fields into a single string for embedding.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Name is important - include it
|
||||
if name:
|
||||
parts.append(name)
|
||||
|
||||
# Sub-heading provides context
|
||||
if sub_heading:
|
||||
parts.append(sub_heading)
|
||||
|
||||
# Description is the main content
|
||||
if description:
|
||||
parts.append(description)
|
||||
|
||||
# Categories help with semantic matching
|
||||
if categories:
|
||||
parts.append(" ".join(categories))
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
async def generate_embedding(text: str) -> list[float] | None:
|
||||
"""
|
||||
Generate embedding for text using OpenAI API.
|
||||
|
||||
Returns None if embedding generation fails.
|
||||
Fail-fast: no retries to maintain consistency with approval flow.
|
||||
"""
|
||||
try:
|
||||
client = get_openai_client()
|
||||
if not client:
|
||||
logger.error("openai_internal_api_key not set, cannot generate embedding")
|
||||
return None
|
||||
|
||||
# Truncate text to token limit using tiktoken
|
||||
# Character-based truncation is insufficient because token ratios vary by content type
|
||||
enc = encoding_for_model(EMBEDDING_MODEL)
|
||||
tokens = enc.encode(text)
|
||||
if len(tokens) > EMBEDDING_MAX_TOKENS:
|
||||
tokens = tokens[:EMBEDDING_MAX_TOKENS]
|
||||
truncated_text = enc.decode(tokens)
|
||||
logger.info(
|
||||
f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
|
||||
)
|
||||
else:
|
||||
truncated_text = text
|
||||
|
||||
start_time = time.time()
|
||||
response = await client.embeddings.create(
|
||||
model=EMBEDDING_MODEL,
|
||||
input=truncated_text,
|
||||
)
|
||||
latency_ms = (time.time() - start_time) * 1000
|
||||
|
||||
embedding = response.data[0].embedding
|
||||
logger.info(
|
||||
f"Generated embedding: {len(embedding)} dims, "
|
||||
f"{len(tokens)} tokens, {latency_ms:.0f}ms"
|
||||
)
|
||||
return embedding
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate embedding: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def store_embedding(
|
||||
version_id: str,
|
||||
embedding: list[float],
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Store embedding in the database.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
|
||||
DEPRECATED: Use ensure_embedding() instead (includes searchable_text).
|
||||
"""
|
||||
return await store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=version_id,
|
||||
embedding=embedding,
|
||||
searchable_text="", # Empty for backward compat; ensure_embedding() populates this
|
||||
metadata=None,
|
||||
user_id=None, # Store agents are public
|
||||
tx=tx,
|
||||
)
|
||||
|
||||
|
||||
async def store_content_embedding(
|
||||
content_type: ContentType,
|
||||
content_id: str,
|
||||
embedding: list[float],
|
||||
searchable_text: str,
|
||||
metadata: dict | None = None,
|
||||
user_id: str | None = None,
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Store embedding in the unified content embeddings table.
|
||||
|
||||
New function for unified content embedding storage.
|
||||
Uses raw SQL since Prisma doesn't natively support pgvector.
|
||||
"""
|
||||
try:
|
||||
client = tx if tx else prisma.get_client()
|
||||
|
||||
# Convert embedding to PostgreSQL vector format
|
||||
embedding_str = embedding_to_vector_string(embedding)
|
||||
metadata_json = dumps(metadata or {})
|
||||
|
||||
# Upsert the embedding
|
||||
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
||||
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
||||
)
|
||||
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
|
||||
ON CONFLICT ("contentType", "contentId", "userId")
|
||||
DO UPDATE SET
|
||||
"embedding" = $4::vector,
|
||||
"searchableText" = $5,
|
||||
"metadata" = $6::jsonb,
|
||||
"updatedAt" = NOW()
|
||||
WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
|
||||
AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
|
||||
AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
|
||||
""",
|
||||
content_type,
|
||||
content_id,
|
||||
user_id,
|
||||
embedding_str,
|
||||
searchable_text,
|
||||
metadata_json,
|
||||
client=client,
|
||||
set_public_search_path=True,
|
||||
)
|
||||
|
||||
logger.info(f"Stored embedding for {content_type}:{content_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to store embedding for {content_type}:{content_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def get_embedding(version_id: str) -> dict[str, Any] | None:
|
||||
"""
|
||||
Retrieve embedding record for a listing version.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
|
||||
Returns dict with storeListingVersionId, embedding, timestamps or None if not found.
|
||||
"""
|
||||
result = await get_content_embedding(
|
||||
ContentType.STORE_AGENT, version_id, user_id=None
|
||||
)
|
||||
if result:
|
||||
# Transform to old format for backward compatibility
|
||||
return {
|
||||
"storeListingVersionId": result["contentId"],
|
||||
"embedding": result["embedding"],
|
||||
"createdAt": result["createdAt"],
|
||||
"updatedAt": result["updatedAt"],
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
async def get_content_embedding(
|
||||
content_type: ContentType, content_id: str, user_id: str | None = None
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Retrieve embedding record for any content type.
|
||||
|
||||
New function for unified content embedding retrieval.
|
||||
Returns dict with contentType, contentId, embedding, timestamps or None if not found.
|
||||
"""
|
||||
try:
|
||||
result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT
|
||||
"contentType",
|
||||
"contentId",
|
||||
"userId",
|
||||
"embedding"::text as "embedding",
|
||||
"searchableText",
|
||||
"metadata",
|
||||
"createdAt",
|
||||
"updatedAt"
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
|
||||
""",
|
||||
content_type,
|
||||
content_id,
|
||||
user_id,
|
||||
set_public_search_path=True,
|
||||
)
|
||||
|
||||
if result and len(result) > 0:
|
||||
return result[0]
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get embedding for {content_type}:{content_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def ensure_embedding(
|
||||
version_id: str,
|
||||
name: str,
|
||||
description: str,
|
||||
sub_heading: str,
|
||||
categories: list[str],
|
||||
force: bool = False,
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Ensure an embedding exists for the listing version.
|
||||
|
||||
Creates embedding if missing. Use force=True to regenerate.
|
||||
Backward-compatible wrapper for store listings.
|
||||
|
||||
Args:
|
||||
version_id: The StoreListingVersion ID
|
||||
name: Agent name
|
||||
description: Agent description
|
||||
sub_heading: Agent sub-heading
|
||||
categories: Agent categories
|
||||
force: Force regeneration even if embedding exists
|
||||
tx: Optional transaction client
|
||||
|
||||
Returns:
|
||||
True if embedding exists/was created, False on failure
|
||||
"""
|
||||
try:
|
||||
# Check if embedding already exists
|
||||
if not force:
|
||||
existing = await get_embedding(version_id)
|
||||
if existing and existing.get("embedding"):
|
||||
logger.debug(f"Embedding for version {version_id} already exists")
|
||||
return True
|
||||
|
||||
# Build searchable text for embedding
|
||||
searchable_text = build_searchable_text(
|
||||
name, description, sub_heading, categories
|
||||
)
|
||||
|
||||
# Generate new embedding
|
||||
embedding = await generate_embedding(searchable_text)
|
||||
if embedding is None:
|
||||
logger.warning(f"Could not generate embedding for version {version_id}")
|
||||
return False
|
||||
|
||||
# Store the embedding with metadata using new function
|
||||
metadata = {
|
||||
"name": name,
|
||||
"subHeading": sub_heading,
|
||||
"categories": categories,
|
||||
}
|
||||
return await store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=version_id,
|
||||
embedding=embedding,
|
||||
searchable_text=searchable_text,
|
||||
metadata=metadata,
|
||||
user_id=None, # Store agents are public
|
||||
tx=tx,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to ensure embedding for version {version_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def delete_embedding(version_id: str) -> bool:
|
||||
"""
|
||||
Delete embedding for a listing version.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
|
||||
Note: This is usually handled automatically by CASCADE delete,
|
||||
but provided for manual cleanup if needed.
|
||||
"""
|
||||
return await delete_content_embedding(ContentType.STORE_AGENT, version_id)
|
||||
|
||||
|
||||
async def delete_content_embedding(
|
||||
content_type: ContentType, content_id: str, user_id: str | None = None
|
||||
) -> bool:
|
||||
"""
|
||||
Delete embedding for any content type.
|
||||
|
||||
New function for unified content embedding deletion.
|
||||
Note: This is usually handled automatically by CASCADE delete,
|
||||
but provided for manual cleanup if needed.
|
||||
|
||||
Args:
|
||||
content_type: The type of content (STORE_AGENT, LIBRARY_AGENT, etc.)
|
||||
content_id: The unique identifier for the content
|
||||
user_id: Optional user ID. For public content (STORE_AGENT, BLOCK), pass None.
|
||||
For user-scoped content (LIBRARY_AGENT), pass the user's ID to avoid
|
||||
deleting embeddings belonging to other users.
|
||||
|
||||
Returns:
|
||||
True if deletion succeeded, False otherwise
|
||||
"""
|
||||
try:
|
||||
client = prisma.get_client()
|
||||
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType"
|
||||
AND "contentId" = $2
|
||||
AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
|
||||
""",
|
||||
content_type,
|
||||
content_id,
|
||||
user_id,
|
||||
client=client,
|
||||
)
|
||||
|
||||
user_str = f" (user: {user_id})" if user_id else ""
|
||||
logger.info(f"Deleted embedding for {content_type}:{content_id}{user_str}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete embedding for {content_type}:{content_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def get_embedding_stats() -> dict[str, Any]:
|
||||
"""
|
||||
Get statistics about embedding coverage for all content types.
|
||||
|
||||
Returns stats per content type and overall totals.
|
||||
"""
|
||||
try:
|
||||
stats_by_type = {}
|
||||
total_items = 0
|
||||
total_with_embeddings = 0
|
||||
total_without_embeddings = 0
|
||||
|
||||
# Aggregate stats from all handlers
|
||||
for content_type, handler in CONTENT_HANDLERS.items():
|
||||
try:
|
||||
stats = await handler.get_stats()
|
||||
stats_by_type[content_type.value] = {
|
||||
"total": stats["total"],
|
||||
"with_embeddings": stats["with_embeddings"],
|
||||
"without_embeddings": stats["without_embeddings"],
|
||||
"coverage_percent": (
|
||||
round(stats["with_embeddings"] / stats["total"] * 100, 1)
|
||||
if stats["total"] > 0
|
||||
else 0
|
||||
),
|
||||
}
|
||||
|
||||
total_items += stats["total"]
|
||||
total_with_embeddings += stats["with_embeddings"]
|
||||
total_without_embeddings += stats["without_embeddings"]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get stats for {content_type.value}: {e}")
|
||||
stats_by_type[content_type.value] = {
|
||||
"total": 0,
|
||||
"with_embeddings": 0,
|
||||
"without_embeddings": 0,
|
||||
"coverage_percent": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return {
|
||||
"by_type": stats_by_type,
|
||||
"totals": {
|
||||
"total": total_items,
|
||||
"with_embeddings": total_with_embeddings,
|
||||
"without_embeddings": total_without_embeddings,
|
||||
"coverage_percent": (
|
||||
round(total_with_embeddings / total_items * 100, 1)
|
||||
if total_items > 0
|
||||
else 0
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get embedding stats: {e}")
|
||||
return {
|
||||
"by_type": {},
|
||||
"totals": {
|
||||
"total": 0,
|
||||
"with_embeddings": 0,
|
||||
"without_embeddings": 0,
|
||||
"coverage_percent": 0,
|
||||
},
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
async def backfill_missing_embeddings(batch_size: int = 10) -> dict[str, Any]:
|
||||
"""
|
||||
Generate embeddings for approved listings that don't have them.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing usage.
|
||||
This now delegates to backfill_all_content_types() to process all content types.
|
||||
|
||||
Args:
|
||||
batch_size: Number of embeddings to generate per content type
|
||||
|
||||
Returns:
|
||||
Dict with success/failure counts aggregated across all content types
|
||||
"""
|
||||
# Delegate to the new generic backfill system
|
||||
result = await backfill_all_content_types(batch_size)
|
||||
|
||||
# Return in the old format for backward compatibility
|
||||
return result["totals"]
|
||||
|
||||
|
||||
async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
|
||||
"""
|
||||
Generate embeddings for all content types using registered handlers.
|
||||
|
||||
Processes content types in order: BLOCK → STORE_AGENT → DOCUMENTATION.
|
||||
This ensures foundational content (blocks) are searchable first.
|
||||
|
||||
Args:
|
||||
batch_size: Number of embeddings to generate per content type
|
||||
|
||||
Returns:
|
||||
Dict with stats per content type and overall totals
|
||||
"""
|
||||
results_by_type = {}
|
||||
total_processed = 0
|
||||
total_success = 0
|
||||
total_failed = 0
|
||||
|
||||
# Process content types in explicit order
|
||||
processing_order = [
|
||||
ContentType.BLOCK,
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
for content_type in processing_order:
|
||||
handler = CONTENT_HANDLERS.get(content_type)
|
||||
if not handler:
|
||||
logger.warning(f"No handler registered for {content_type.value}")
|
||||
continue
|
||||
try:
|
||||
logger.info(f"Processing {content_type.value} content type...")
|
||||
|
||||
# Get missing items from handler
|
||||
missing_items = await handler.get_missing_items(batch_size)
|
||||
|
||||
if not missing_items:
|
||||
results_by_type[content_type.value] = {
|
||||
"processed": 0,
|
||||
"success": 0,
|
||||
"failed": 0,
|
||||
"message": "No missing embeddings",
|
||||
}
|
||||
continue
|
||||
|
||||
# Process embeddings concurrently for better performance
|
||||
embedding_tasks = [
|
||||
ensure_content_embedding(
|
||||
content_type=item.content_type,
|
||||
content_id=item.content_id,
|
||||
searchable_text=item.searchable_text,
|
||||
metadata=item.metadata,
|
||||
user_id=item.user_id,
|
||||
)
|
||||
for item in missing_items
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*embedding_tasks, return_exceptions=True)
|
||||
|
||||
success = sum(1 for result in results if result is True)
|
||||
failed = len(results) - success
|
||||
|
||||
results_by_type[content_type.value] = {
|
||||
"processed": len(missing_items),
|
||||
"success": success,
|
||||
"failed": failed,
|
||||
"message": f"Backfilled {success} embeddings, {failed} failed",
|
||||
}
|
||||
|
||||
total_processed += len(missing_items)
|
||||
total_success += success
|
||||
total_failed += failed
|
||||
|
||||
logger.info(
|
||||
f"{content_type.value}: processed {len(missing_items)}, "
|
||||
f"success {success}, failed {failed}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process {content_type.value}: {e}")
|
||||
results_by_type[content_type.value] = {
|
||||
"processed": 0,
|
||||
"success": 0,
|
||||
"failed": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return {
|
||||
"by_type": results_by_type,
|
||||
"totals": {
|
||||
"processed": total_processed,
|
||||
"success": total_success,
|
||||
"failed": total_failed,
|
||||
"message": f"Overall: {total_success} succeeded, {total_failed} failed",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def embed_query(query: str) -> list[float] | None:
|
||||
"""
|
||||
Generate embedding for a search query.
|
||||
|
||||
Same as generate_embedding but with clearer intent.
|
||||
"""
|
||||
return await generate_embedding(query)
|
||||
|
||||
|
||||
def embedding_to_vector_string(embedding: list[float]) -> str:
|
||||
"""Convert embedding list to PostgreSQL vector string format."""
|
||||
return "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
|
||||
async def ensure_content_embedding(
|
||||
content_type: ContentType,
|
||||
content_id: str,
|
||||
searchable_text: str,
|
||||
metadata: dict | None = None,
|
||||
user_id: str | None = None,
|
||||
force: bool = False,
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Ensure an embedding exists for any content type.
|
||||
|
||||
Generic function for creating embeddings for store agents, blocks, docs, etc.
|
||||
|
||||
Args:
|
||||
content_type: ContentType enum value (STORE_AGENT, BLOCK, etc.)
|
||||
content_id: Unique identifier for the content
|
||||
searchable_text: Combined text for embedding generation
|
||||
metadata: Optional metadata to store with embedding
|
||||
force: Force regeneration even if embedding exists
|
||||
tx: Optional transaction client
|
||||
|
||||
Returns:
|
||||
True if embedding exists/was created, False on failure
|
||||
"""
|
||||
try:
|
||||
# Check if embedding already exists
|
||||
if not force:
|
||||
existing = await get_content_embedding(content_type, content_id, user_id)
|
||||
if existing and existing.get("embedding"):
|
||||
logger.debug(
|
||||
f"Embedding for {content_type}:{content_id} already exists"
|
||||
)
|
||||
return True
|
||||
|
||||
# Generate new embedding
|
||||
embedding = await generate_embedding(searchable_text)
|
||||
if embedding is None:
|
||||
logger.warning(
|
||||
f"Could not generate embedding for {content_type}:{content_id}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Store the embedding
|
||||
return await store_content_embedding(
|
||||
content_type=content_type,
|
||||
content_id=content_id,
|
||||
embedding=embedding,
|
||||
searchable_text=searchable_text,
|
||||
metadata=metadata or {},
|
||||
user_id=user_id,
|
||||
tx=tx,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to ensure embedding for {content_type}:{content_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def cleanup_orphaned_embeddings() -> dict[str, Any]:
|
||||
"""
|
||||
Clean up embeddings for content that no longer exists or is no longer valid.
|
||||
|
||||
Compares current content with embeddings in database and removes orphaned records:
|
||||
- STORE_AGENT: Removes embeddings for rejected/deleted store listings
|
||||
- BLOCK: Removes embeddings for blocks no longer registered
|
||||
- DOCUMENTATION: Removes embeddings for deleted doc files
|
||||
|
||||
Returns:
|
||||
Dict with cleanup statistics per content type
|
||||
"""
|
||||
results_by_type = {}
|
||||
total_deleted = 0
|
||||
|
||||
# Cleanup orphaned embeddings for all content types
|
||||
cleanup_types = [
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.BLOCK,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
for content_type in cleanup_types:
|
||||
try:
|
||||
handler = CONTENT_HANDLERS.get(content_type)
|
||||
if not handler:
|
||||
logger.warning(f"No handler registered for {content_type}")
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"error": "No handler registered",
|
||||
}
|
||||
continue
|
||||
|
||||
# Get all current content IDs from handler
|
||||
if content_type == ContentType.STORE_AGENT:
|
||||
# Get IDs of approved store listing versions from non-deleted listings
|
||||
valid_agents = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT slv.id
|
||||
FROM {schema_prefix}"StoreListingVersion" slv
|
||||
JOIN {schema_prefix}"StoreListing" sl ON slv."storeListingId" = sl.id
|
||||
WHERE slv."submissionStatus" = 'APPROVED'
|
||||
AND slv."isDeleted" = false
|
||||
AND sl."isDeleted" = false
|
||||
""",
|
||||
)
|
||||
current_ids = {row["id"] for row in valid_agents}
|
||||
elif content_type == ContentType.BLOCK:
|
||||
from backend.data.block import get_blocks
|
||||
|
||||
current_ids = set(get_blocks().keys())
|
||||
elif content_type == ContentType.DOCUMENTATION:
|
||||
from pathlib import Path
|
||||
|
||||
# embeddings.py is at: backend/backend/api/features/store/embeddings.py
|
||||
# Need to go up to project root then into docs/
|
||||
this_file = Path(__file__)
|
||||
project_root = (
|
||||
this_file.parent.parent.parent.parent.parent.parent.parent
|
||||
)
|
||||
docs_root = project_root / "docs"
|
||||
if docs_root.exists():
|
||||
all_docs = list(docs_root.rglob("*.md")) + list(
|
||||
docs_root.rglob("*.mdx")
|
||||
)
|
||||
current_ids = {str(doc.relative_to(docs_root)) for doc in all_docs}
|
||||
else:
|
||||
current_ids = set()
|
||||
else:
|
||||
# Skip unknown content types to avoid accidental deletion
|
||||
logger.warning(
|
||||
f"Skipping cleanup for unknown content type: {content_type}"
|
||||
)
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"error": "Unknown content type - skipped for safety",
|
||||
}
|
||||
continue
|
||||
|
||||
# Get all embedding IDs from database
|
||||
db_embeddings = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT "contentId"
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType"
|
||||
""",
|
||||
content_type,
|
||||
)
|
||||
|
||||
db_ids = {row["contentId"] for row in db_embeddings}
|
||||
|
||||
# Find orphaned embeddings (in DB but not in current content)
|
||||
orphaned_ids = db_ids - current_ids
|
||||
|
||||
if not orphaned_ids:
|
||||
logger.info(f"{content_type.value}: No orphaned embeddings found")
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"message": "No orphaned embeddings",
|
||||
}
|
||||
continue
|
||||
|
||||
# Delete orphaned embeddings in batch for better performance
|
||||
orphaned_list = list(orphaned_ids)
|
||||
try:
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType"
|
||||
AND "contentId" = ANY($2::text[])
|
||||
""",
|
||||
content_type,
|
||||
orphaned_list,
|
||||
)
|
||||
deleted = len(orphaned_list)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to batch delete orphaned embeddings: {e}")
|
||||
deleted = 0
|
||||
|
||||
logger.info(
|
||||
f"{content_type.value}: Deleted {deleted}/{len(orphaned_ids)} orphaned embeddings"
|
||||
)
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": deleted,
|
||||
"orphaned": len(orphaned_ids),
|
||||
"message": f"Deleted {deleted} orphaned embeddings",
|
||||
}
|
||||
|
||||
total_deleted += deleted
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup {content_type.value}: {e}")
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return {
|
||||
"by_type": results_by_type,
|
||||
"totals": {
|
||||
"deleted": total_deleted,
|
||||
"message": f"Deleted {total_deleted} orphaned embeddings",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def semantic_search(
|
||||
query: str,
|
||||
content_types: list[ContentType] | None = None,
|
||||
user_id: str | None = None,
|
||||
limit: int = 20,
|
||||
min_similarity: float = 0.5,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Semantic search across content types using embeddings.
|
||||
|
||||
Performs vector similarity search on UnifiedContentEmbedding table.
|
||||
Used directly for blocks/docs/library agents, or as the semantic component
|
||||
within hybrid_search for store agents.
|
||||
|
||||
If embedding generation fails, falls back to lexical search on searchableText.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
content_types: List of ContentType to search. Defaults to [BLOCK, STORE_AGENT, DOCUMENTATION]
|
||||
user_id: Optional user ID for searching private content (library agents)
|
||||
limit: Maximum number of results to return (default: 20)
|
||||
min_similarity: Minimum cosine similarity threshold (0-1, default: 0.5)
|
||||
|
||||
Returns:
|
||||
List of search results with the following structure:
|
||||
[
|
||||
{
|
||||
"content_id": str,
|
||||
"content_type": str, # "BLOCK", "STORE_AGENT", "DOCUMENTATION", or "LIBRARY_AGENT"
|
||||
"searchable_text": str,
|
||||
"metadata": dict,
|
||||
"similarity": float, # Cosine similarity score (0-1)
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
Examples:
|
||||
# Search blocks only
|
||||
results = await semantic_search("calculate", content_types=[ContentType.BLOCK])
|
||||
|
||||
# Search blocks and documentation
|
||||
results = await semantic_search(
|
||||
"how to use API",
|
||||
content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION]
|
||||
)
|
||||
|
||||
# Search all public content (default)
|
||||
results = await semantic_search("AI agent")
|
||||
|
||||
# Search user's library agents
|
||||
results = await semantic_search(
|
||||
"my custom agent",
|
||||
content_types=[ContentType.LIBRARY_AGENT],
|
||||
user_id="user123"
|
||||
)
|
||||
"""
|
||||
# Default to searching all public content types
|
||||
if content_types is None:
|
||||
content_types = [
|
||||
ContentType.BLOCK,
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
# Validate inputs
|
||||
if not content_types:
|
||||
return [] # Empty content_types would cause invalid SQL (IN ())
|
||||
|
||||
query = query.strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
if limit < 1:
|
||||
limit = 1
|
||||
if limit > 100:
|
||||
limit = 100
|
||||
|
||||
# Generate query embedding
|
||||
query_embedding = await embed_query(query)
|
||||
|
||||
if query_embedding is not None:
|
||||
# Semantic search with embeddings
|
||||
embedding_str = embedding_to_vector_string(query_embedding)
|
||||
|
||||
# Build params in order: limit, then user_id (if provided), then content types
|
||||
params: list[Any] = [limit]
|
||||
user_filter = ""
|
||||
if user_id is not None:
|
||||
user_filter = 'AND "userId" = ${}'.format(len(params) + 1)
|
||||
params.append(user_id)
|
||||
|
||||
# Add content type parameters and build placeholders dynamically
|
||||
content_type_start_idx = len(params) + 1
|
||||
content_type_placeholders = ", ".join(
|
||||
f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
|
||||
for i in range(len(content_types))
|
||||
)
|
||||
params.extend([ct.value for ct in content_types])
|
||||
|
||||
sql = f"""
|
||||
SELECT
|
||||
"contentId" as content_id,
|
||||
"contentType" as content_type,
|
||||
"searchableText" as searchable_text,
|
||||
metadata,
|
||||
1 - (embedding <=> '{embedding_str}'::vector) as similarity
|
||||
FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" IN ({content_type_placeholders})
|
||||
{user_filter}
|
||||
AND 1 - (embedding <=> '{embedding_str}'::vector) >= ${len(params) + 1}
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $1
|
||||
"""
|
||||
params.append(min_similarity)
|
||||
|
||||
try:
|
||||
results = await query_raw_with_schema(
|
||||
sql, *params, set_public_search_path=True
|
||||
)
|
||||
return [
|
||||
{
|
||||
"content_id": row["content_id"],
|
||||
"content_type": row["content_type"],
|
||||
"searchable_text": row["searchable_text"],
|
||||
"metadata": row["metadata"],
|
||||
"similarity": float(row["similarity"]),
|
||||
}
|
||||
for row in results
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Semantic search failed: {e}")
|
||||
# Fall through to lexical search below
|
||||
|
||||
# Fallback to lexical search if embeddings unavailable
|
||||
logger.warning("Falling back to lexical search (embeddings unavailable)")
|
||||
|
||||
params_lexical: list[Any] = [limit]
|
||||
user_filter = ""
|
||||
if user_id is not None:
|
||||
user_filter = 'AND "userId" = ${}'.format(len(params_lexical) + 1)
|
||||
params_lexical.append(user_id)
|
||||
|
||||
# Add content type parameters and build placeholders dynamically
|
||||
content_type_start_idx = len(params_lexical) + 1
|
||||
content_type_placeholders_lexical = ", ".join(
|
||||
f'${content_type_start_idx + i}::{{{{schema_prefix}}}}"ContentType"'
|
||||
for i in range(len(content_types))
|
||||
)
|
||||
params_lexical.extend([ct.value for ct in content_types])
|
||||
|
||||
sql_lexical = f"""
|
||||
SELECT
|
||||
"contentId" as content_id,
|
||||
"contentType" as content_type,
|
||||
"searchableText" as searchable_text,
|
||||
metadata,
|
||||
0.0 as similarity
|
||||
FROM {{{{schema_prefix}}}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" IN ({content_type_placeholders_lexical})
|
||||
{user_filter}
|
||||
AND "searchableText" ILIKE ${len(params_lexical) + 1}
|
||||
ORDER BY "updatedAt" DESC
|
||||
LIMIT $1
|
||||
"""
|
||||
params_lexical.append(f"%{query}%")
|
||||
|
||||
try:
|
||||
results = await query_raw_with_schema(
|
||||
sql_lexical, *params_lexical, set_public_search_path=True
|
||||
)
|
||||
return [
|
||||
{
|
||||
"content_id": row["content_id"],
|
||||
"content_type": row["content_type"],
|
||||
"searchable_text": row["searchable_text"],
|
||||
"metadata": row["metadata"],
|
||||
"similarity": 0.0, # Lexical search doesn't provide similarity
|
||||
}
|
||||
for row in results
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Lexical search failed: {e}")
|
||||
return []
|
||||
@@ -1,666 +0,0 @@
|
||||
"""
|
||||
End-to-end database tests for embeddings and hybrid search.
|
||||
|
||||
These tests hit the actual database to verify SQL queries work correctly.
|
||||
Tests cover:
|
||||
1. Embedding storage (store_content_embedding)
|
||||
2. Embedding retrieval (get_content_embedding)
|
||||
3. Embedding deletion (delete_content_embedding)
|
||||
4. Unified hybrid search across content types
|
||||
5. Store agent hybrid search
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import AsyncGenerator
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
from backend.api.features.store.embeddings import EMBEDDING_DIM
|
||||
from backend.api.features.store.hybrid_search import (
|
||||
hybrid_search,
|
||||
unified_hybrid_search,
|
||||
)
|
||||
|
||||
# ============================================================================
|
||||
# Test Fixtures
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_content_id() -> str:
|
||||
"""Generate unique content ID for test isolation."""
|
||||
return f"test-content-{uuid.uuid4()}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_user_id() -> str:
|
||||
"""Generate unique user ID for test isolation."""
|
||||
return f"test-user-{uuid.uuid4()}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding() -> list[float]:
|
||||
"""Generate a mock embedding vector."""
|
||||
# Create a normalized embedding vector
|
||||
import math
|
||||
|
||||
raw = [float(i % 10) / 10.0 for i in range(EMBEDDING_DIM)]
|
||||
# Normalize to unit length (required for cosine similarity)
|
||||
magnitude = math.sqrt(sum(x * x for x in raw))
|
||||
return [x / magnitude for x in raw]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def similar_embedding() -> list[float]:
|
||||
"""Generate an embedding similar to mock_embedding."""
|
||||
import math
|
||||
|
||||
# Similar but slightly different values
|
||||
raw = [float(i % 10) / 10.0 + 0.01 for i in range(EMBEDDING_DIM)]
|
||||
magnitude = math.sqrt(sum(x * x for x in raw))
|
||||
return [x / magnitude for x in raw]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def different_embedding() -> list[float]:
|
||||
"""Generate an embedding very different from mock_embedding."""
|
||||
import math
|
||||
|
||||
# Reversed pattern to be maximally different
|
||||
raw = [float((EMBEDDING_DIM - i) % 10) / 10.0 for i in range(EMBEDDING_DIM)]
|
||||
magnitude = math.sqrt(sum(x * x for x in raw))
|
||||
return [x / magnitude for x in raw]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cleanup_embeddings(
|
||||
server,
|
||||
) -> AsyncGenerator[list[tuple[ContentType, str, str | None]], None]:
|
||||
"""
|
||||
Fixture that tracks created embeddings and cleans them up after tests.
|
||||
|
||||
Yields a list to which tests can append (content_type, content_id, user_id) tuples.
|
||||
"""
|
||||
created_embeddings: list[tuple[ContentType, str, str | None]] = []
|
||||
yield created_embeddings
|
||||
|
||||
# Cleanup all created embeddings
|
||||
for content_type, content_id, user_id in created_embeddings:
|
||||
try:
|
||||
await embeddings.delete_content_embedding(content_type, content_id, user_id)
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# store_content_embedding Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_store_agent(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test storing embedding for STORE_AGENT content type."""
|
||||
# Track for cleanup
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, test_content_id, None))
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="AI assistant for productivity tasks",
|
||||
metadata={"name": "Test Agent", "categories": ["productivity"]},
|
||||
user_id=None, # Store agents are public
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
# Verify it was stored
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["contentId"] == test_content_id
|
||||
assert stored["contentType"] == "STORE_AGENT"
|
||||
assert stored["searchableText"] == "AI assistant for productivity tasks"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_block(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test storing embedding for BLOCK content type."""
|
||||
cleanup_embeddings.append((ContentType.BLOCK, test_content_id, None))
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="HTTP request block for API calls",
|
||||
metadata={"name": "HTTP Request Block"},
|
||||
user_id=None, # Blocks are public
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["contentType"] == "BLOCK"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_documentation(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test storing embedding for DOCUMENTATION content type."""
|
||||
cleanup_embeddings.append((ContentType.DOCUMENTATION, test_content_id, None))
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.DOCUMENTATION,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Getting started guide for AutoGPT platform",
|
||||
metadata={"title": "Getting Started", "url": "/docs/getting-started"},
|
||||
user_id=None, # Docs are public
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.DOCUMENTATION, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["contentType"] == "DOCUMENTATION"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_upsert(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test that storing embedding twice updates instead of duplicates."""
|
||||
cleanup_embeddings.append((ContentType.BLOCK, test_content_id, None))
|
||||
|
||||
# Store first time
|
||||
result1 = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Original text",
|
||||
metadata={"version": 1},
|
||||
user_id=None,
|
||||
)
|
||||
assert result1 is True
|
||||
|
||||
# Store again with different text (upsert)
|
||||
result2 = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Updated text",
|
||||
metadata={"version": 2},
|
||||
user_id=None,
|
||||
)
|
||||
assert result2 is True
|
||||
|
||||
# Verify only one record with updated text
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["searchableText"] == "Updated text"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# get_content_embedding Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_content_embedding_not_found(server):
|
||||
"""Test retrieving non-existent embedding returns None."""
|
||||
result = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT, "non-existent-id", user_id=None
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_content_embedding_with_metadata(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test that metadata is correctly stored and retrieved."""
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, test_content_id, None))
|
||||
|
||||
metadata = {
|
||||
"name": "Test Agent",
|
||||
"subHeading": "A test agent",
|
||||
"categories": ["ai", "productivity"],
|
||||
"customField": 123,
|
||||
}
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="test",
|
||||
metadata=metadata,
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT, test_content_id, user_id=None
|
||||
)
|
||||
|
||||
assert stored is not None
|
||||
assert stored["metadata"]["name"] == "Test Agent"
|
||||
assert stored["metadata"]["categories"] == ["ai", "productivity"]
|
||||
assert stored["metadata"]["customField"] == 123
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# delete_content_embedding Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_delete_content_embedding(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
):
|
||||
"""Test deleting embedding removes it from database."""
|
||||
# Store embedding
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="To be deleted",
|
||||
metadata=None,
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Verify it exists
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
|
||||
# Delete it
|
||||
result = await embeddings.delete_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert result is True
|
||||
|
||||
# Verify it's gone
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_delete_content_embedding_not_found(server):
|
||||
"""Test deleting non-existent embedding doesn't error."""
|
||||
result = await embeddings.delete_content_embedding(
|
||||
ContentType.BLOCK, "non-existent-id", user_id=None
|
||||
)
|
||||
# Should succeed even if nothing to delete
|
||||
assert result is True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# unified_hybrid_search Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_finds_matching_content(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search finds content matching the query."""
|
||||
# Create unique content IDs
|
||||
agent_id = f"test-agent-{uuid.uuid4()}"
|
||||
block_id = f"test-block-{uuid.uuid4()}"
|
||||
doc_id = f"test-doc-{uuid.uuid4()}"
|
||||
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, agent_id, None))
|
||||
cleanup_embeddings.append((ContentType.BLOCK, block_id, None))
|
||||
cleanup_embeddings.append((ContentType.DOCUMENTATION, doc_id, None))
|
||||
|
||||
# Store embeddings for different content types
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=agent_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="AI writing assistant for blog posts",
|
||||
metadata={"name": "Writing Assistant"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=block_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Text generation block for creative writing",
|
||||
metadata={"name": "Text Generator"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.DOCUMENTATION,
|
||||
content_id=doc_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="How to use writing blocks in AutoGPT",
|
||||
metadata={"title": "Writing Guide"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Search for "writing" - should find all three
|
||||
results, total = await unified_hybrid_search(
|
||||
query="writing",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should find at least our test content (may find others too)
|
||||
content_ids = [r["content_id"] for r in results]
|
||||
assert agent_id in content_ids or total >= 1 # Lexical search should find it
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_filter_by_content_type(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search can filter by content type."""
|
||||
agent_id = f"test-agent-{uuid.uuid4()}"
|
||||
block_id = f"test-block-{uuid.uuid4()}"
|
||||
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, agent_id, None))
|
||||
cleanup_embeddings.append((ContentType.BLOCK, block_id, None))
|
||||
|
||||
# Store both types with same searchable text
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=agent_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="unique_search_term_xyz123",
|
||||
metadata={},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=block_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="unique_search_term_xyz123",
|
||||
metadata={},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Search only for BLOCK type
|
||||
results, total = await unified_hybrid_search(
|
||||
query="unique_search_term_xyz123",
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# All results should be BLOCK type
|
||||
for r in results:
|
||||
assert r["content_type"] == "BLOCK"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_empty_query(server):
|
||||
"""Test unified search with empty query returns empty results."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
assert results == []
|
||||
assert total == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_pagination(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search pagination works correctly."""
|
||||
# Create multiple items
|
||||
content_ids = []
|
||||
for i in range(5):
|
||||
content_id = f"test-pagination-{uuid.uuid4()}"
|
||||
content_ids.append(content_id)
|
||||
cleanup_embeddings.append((ContentType.BLOCK, content_id, None))
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text=f"pagination test item number {i}",
|
||||
metadata={"index": i},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Get first page
|
||||
page1_results, total1 = await unified_hybrid_search(
|
||||
query="pagination test",
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=1,
|
||||
page_size=2,
|
||||
)
|
||||
|
||||
# Get second page
|
||||
page2_results, total2 = await unified_hybrid_search(
|
||||
query="pagination test",
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=2,
|
||||
page_size=2,
|
||||
)
|
||||
|
||||
# Total should be consistent
|
||||
assert total1 == total2
|
||||
|
||||
# Pages should have different content (if we have enough results)
|
||||
if len(page1_results) > 0 and len(page2_results) > 0:
|
||||
page1_ids = {r["content_id"] for r in page1_results}
|
||||
page2_ids = {r["content_id"] for r in page2_results}
|
||||
# No overlap between pages
|
||||
assert page1_ids.isdisjoint(page2_ids)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_min_score_filtering(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search respects min_score threshold."""
|
||||
content_id = f"test-minscore-{uuid.uuid4()}"
|
||||
cleanup_embeddings.append((ContentType.BLOCK, content_id, None))
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="completely unrelated content about bananas",
|
||||
metadata={},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Search with very high min_score - should filter out low relevance
|
||||
results_high, _ = await unified_hybrid_search(
|
||||
query="quantum computing algorithms",
|
||||
content_types=[ContentType.BLOCK],
|
||||
min_score=0.9, # Very high threshold
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Search with low min_score
|
||||
results_low, _ = await unified_hybrid_search(
|
||||
query="quantum computing algorithms",
|
||||
content_types=[ContentType.BLOCK],
|
||||
min_score=0.01, # Very low threshold
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# High threshold should have fewer or equal results
|
||||
assert len(results_high) <= len(results_low)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# hybrid_search (Store Agents) Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_hybrid_search_store_agents_sql_valid(server):
|
||||
"""Test that hybrid_search SQL executes without errors."""
|
||||
# This test verifies the SQL is syntactically correct
|
||||
# even if no results are found
|
||||
results, total = await hybrid_search(
|
||||
query="test agent",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should not raise - verifies SQL is valid
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
assert total >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_hybrid_search_with_filters(server):
|
||||
"""Test hybrid_search with various filter options."""
|
||||
# Test with all filter types
|
||||
results, total = await hybrid_search(
|
||||
query="productivity",
|
||||
featured=True,
|
||||
creators=["test-creator"],
|
||||
category="productivity",
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
|
||||
# Should not raise - verifies filter SQL is valid
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_hybrid_search_pagination(server):
|
||||
"""Test hybrid_search pagination."""
|
||||
# Page 1
|
||||
results1, total1 = await hybrid_search(
|
||||
query="agent",
|
||||
page=1,
|
||||
page_size=5,
|
||||
)
|
||||
|
||||
# Page 2
|
||||
results2, total2 = await hybrid_search(
|
||||
query="agent",
|
||||
page=2,
|
||||
page_size=5,
|
||||
)
|
||||
|
||||
# Verify SQL executes without error
|
||||
assert isinstance(results1, list)
|
||||
assert isinstance(results2, list)
|
||||
assert isinstance(total1, int)
|
||||
assert isinstance(total2, int)
|
||||
|
||||
# If page 1 has results, total should be > 0
|
||||
# Note: total from page 2 may be 0 if no results on that page (COUNT(*) OVER limitation)
|
||||
if results1:
|
||||
assert total1 > 0
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SQL Validity Tests (verify queries don't break)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_all_content_types_searchable(server):
|
||||
"""Test that all content types can be searched without SQL errors."""
|
||||
for content_type in [
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.BLOCK,
|
||||
ContentType.DOCUMENTATION,
|
||||
]:
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=[content_type],
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_multiple_content_types_searchable(server):
|
||||
"""Test searching multiple content types at once."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION],
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_search_all_content_types_default(server):
|
||||
"""Test searching all content types (default behavior)."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=None, # Should search all
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
@@ -1,315 +0,0 @@
|
||||
"""
|
||||
Integration tests for embeddings with schema handling.
|
||||
|
||||
These tests verify that embeddings operations work correctly across different database schemas.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
from backend.api.features.store.embeddings import EMBEDDING_DIM
|
||||
|
||||
# Schema prefix tests removed - functionality moved to db.raw_with_schema() helper
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_store_content_embedding_with_schema():
|
||||
"""Test storing embeddings with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
embedding=[0.1] * EMBEDDING_DIM,
|
||||
searchable_text="test text",
|
||||
metadata={"test": "data"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_client.execute_raw.called
|
||||
|
||||
# Get the SQL query that was executed
|
||||
call_args = mock_client.execute_raw.call_args
|
||||
sql_query = call_args[0][0]
|
||||
|
||||
# Verify schema prefix is in the query
|
||||
assert '"platform"."UnifiedContentEmbedding"' in sql_query
|
||||
|
||||
# Verify result
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_get_content_embedding_with_schema():
|
||||
"""Test retrieving embeddings with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.query_raw.return_value = [
|
||||
{
|
||||
"contentType": "STORE_AGENT",
|
||||
"contentId": "test-id",
|
||||
"userId": None,
|
||||
"embedding": "[0.1, 0.2]",
|
||||
"searchableText": "test",
|
||||
"metadata": {},
|
||||
"createdAt": "2024-01-01",
|
||||
"updatedAt": "2024-01-01",
|
||||
}
|
||||
]
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT,
|
||||
"test-id",
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_client.query_raw.called
|
||||
|
||||
# Get the SQL query that was executed
|
||||
call_args = mock_client.query_raw.call_args
|
||||
sql_query = call_args[0][0]
|
||||
|
||||
# Verify schema prefix is in the query
|
||||
assert '"platform"."UnifiedContentEmbedding"' in sql_query
|
||||
|
||||
# Verify result
|
||||
assert result is not None
|
||||
assert result["contentId"] == "test-id"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_delete_content_embedding_with_schema():
|
||||
"""Test deleting embeddings with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.delete_content_embedding(
|
||||
ContentType.STORE_AGENT,
|
||||
"test-id",
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_client.execute_raw.called
|
||||
|
||||
# Get the SQL query that was executed
|
||||
call_args = mock_client.execute_raw.call_args
|
||||
sql_query = call_args[0][0]
|
||||
|
||||
# Verify schema prefix is in the query
|
||||
assert '"platform"."UnifiedContentEmbedding"' in sql_query
|
||||
|
||||
# Verify result
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_get_embedding_stats_with_schema():
|
||||
"""Test embedding statistics with proper schema handling via content handlers."""
|
||||
# Mock handler to return stats
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_stats = AsyncMock(
|
||||
return_value={
|
||||
"total": 100,
|
||||
"with_embeddings": 80,
|
||||
"without_embeddings": 20,
|
||||
}
|
||||
)
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
result = await embeddings.get_embedding_stats()
|
||||
|
||||
# Verify handler was called
|
||||
mock_handler.get_stats.assert_called_once()
|
||||
|
||||
# Verify new result structure
|
||||
assert "by_type" in result
|
||||
assert "totals" in result
|
||||
assert result["totals"]["total"] == 100
|
||||
assert result["totals"]["with_embeddings"] == 80
|
||||
assert result["totals"]["without_embeddings"] == 20
|
||||
assert result["totals"]["coverage_percent"] == 80.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_backfill_missing_embeddings_with_schema():
|
||||
"""Test backfilling embeddings via content handlers."""
|
||||
from backend.api.features.store.content_handlers import ContentItem
|
||||
|
||||
# Create mock content item
|
||||
mock_item = ContentItem(
|
||||
content_id="version-1",
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text="Test Agent Test description",
|
||||
metadata={"name": "Test Agent"},
|
||||
)
|
||||
|
||||
# Mock handler
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_missing_items = AsyncMock(return_value=[mock_item])
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding",
|
||||
return_value=[0.1] * EMBEDDING_DIM,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.store_content_embedding",
|
||||
return_value=True,
|
||||
):
|
||||
result = await embeddings.backfill_missing_embeddings(batch_size=10)
|
||||
|
||||
# Verify handler was called
|
||||
mock_handler.get_missing_items.assert_called_once_with(10)
|
||||
|
||||
# Verify results
|
||||
assert result["processed"] == 1
|
||||
assert result["success"] == 1
|
||||
assert result["failed"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_ensure_content_embedding_with_schema():
|
||||
"""Test ensuring embeddings exist with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_content_embedding"
|
||||
) as mock_get:
|
||||
# Simulate no existing embedding
|
||||
mock_get.return_value = None
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding"
|
||||
) as mock_generate:
|
||||
mock_generate.return_value = [0.1] * EMBEDDING_DIM
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.store_content_embedding"
|
||||
) as mock_store:
|
||||
mock_store.return_value = True
|
||||
|
||||
result = await embeddings.ensure_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
searchable_text="test text",
|
||||
metadata={"test": "data"},
|
||||
user_id=None,
|
||||
force=False,
|
||||
)
|
||||
|
||||
# Verify the flow
|
||||
assert mock_get.called
|
||||
assert mock_generate.called
|
||||
assert mock_store.called
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_backward_compatibility_store_embedding():
|
||||
"""Test backward compatibility wrapper for store_embedding."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.store_content_embedding"
|
||||
) as mock_store:
|
||||
mock_store.return_value = True
|
||||
|
||||
result = await embeddings.store_embedding(
|
||||
version_id="test-version-id",
|
||||
embedding=[0.1] * EMBEDDING_DIM,
|
||||
tx=None,
|
||||
)
|
||||
|
||||
# Verify it calls the new function with correct parameters
|
||||
assert mock_store.called
|
||||
call_args = mock_store.call_args
|
||||
|
||||
assert call_args[1]["content_type"] == ContentType.STORE_AGENT
|
||||
assert call_args[1]["content_id"] == "test-version-id"
|
||||
assert call_args[1]["user_id"] is None
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_backward_compatibility_get_embedding():
|
||||
"""Test backward compatibility wrapper for get_embedding."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_content_embedding"
|
||||
) as mock_get:
|
||||
mock_get.return_value = {
|
||||
"contentType": "STORE_AGENT",
|
||||
"contentId": "test-version-id",
|
||||
"embedding": "[0.1, 0.2]",
|
||||
"createdAt": "2024-01-01",
|
||||
"updatedAt": "2024-01-01",
|
||||
}
|
||||
|
||||
result = await embeddings.get_embedding("test-version-id")
|
||||
|
||||
# Verify it calls the new function
|
||||
assert mock_get.called
|
||||
|
||||
# Verify it transforms to old format
|
||||
assert result is not None
|
||||
assert result["storeListingVersionId"] == "test-version-id"
|
||||
assert "embedding" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_schema_handling_error_cases():
|
||||
"""Test error handling in schema-aware operations."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.execute_raw.side_effect = Exception("Database error")
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
embedding=[0.1] * EMBEDDING_DIM,
|
||||
searchable_text="test",
|
||||
metadata=None,
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Should return False on error, not raise
|
||||
assert result is False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
@@ -1,407 +0,0 @@
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import prisma
|
||||
import pytest
|
||||
from prisma import Prisma
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def setup_prisma():
|
||||
"""Setup Prisma client for tests."""
|
||||
try:
|
||||
Prisma()
|
||||
except prisma.errors.ClientAlreadyRegisteredError:
|
||||
pass
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_build_searchable_text():
|
||||
"""Test searchable text building from listing fields."""
|
||||
result = embeddings.build_searchable_text(
|
||||
name="AI Assistant",
|
||||
description="A helpful AI assistant for productivity",
|
||||
sub_heading="Boost your productivity",
|
||||
categories=["AI", "Productivity"],
|
||||
)
|
||||
|
||||
expected = "AI Assistant Boost your productivity A helpful AI assistant for productivity AI Productivity"
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_build_searchable_text_empty_fields():
|
||||
"""Test searchable text building with empty fields."""
|
||||
result = embeddings.build_searchable_text(
|
||||
name="", description="Test description", sub_heading="", categories=[]
|
||||
)
|
||||
|
||||
assert result == "Test description"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_success():
|
||||
"""Test successful embedding generation."""
|
||||
# Mock OpenAI response
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.data = [MagicMock()]
|
||||
mock_response.data[0].embedding = [0.1, 0.2, 0.3] * 512 # 1536 dimensions
|
||||
|
||||
# Use AsyncMock for async embeddings.create method
|
||||
mock_client.embeddings.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.generate_embedding("test text")
|
||||
|
||||
assert result is not None
|
||||
assert len(result) == embeddings.EMBEDDING_DIM
|
||||
assert result[0] == 0.1
|
||||
|
||||
mock_client.embeddings.create.assert_called_once_with(
|
||||
model="text-embedding-3-small", input="test text"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_no_api_key():
|
||||
"""Test embedding generation without API key."""
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = None
|
||||
|
||||
result = await embeddings.generate_embedding("test text")
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_api_error():
|
||||
"""Test embedding generation with API error."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create = AsyncMock(side_effect=Exception("API Error"))
|
||||
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.generate_embedding("test text")
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_text_truncation():
|
||||
"""Test that long text is properly truncated using tiktoken."""
|
||||
from tiktoken import encoding_for_model
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.data = [MagicMock()]
|
||||
mock_response.data[0].embedding = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
# Use AsyncMock for async embeddings.create method
|
||||
mock_client.embeddings.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
# Create text that will exceed 8191 tokens
|
||||
# Use varied characters to ensure token-heavy text: each word is ~1 token
|
||||
words = [f"word{i}" for i in range(10000)]
|
||||
long_text = " ".join(words) # ~10000 tokens
|
||||
|
||||
await embeddings.generate_embedding(long_text)
|
||||
|
||||
# Verify text was truncated to 8191 tokens
|
||||
call_args = mock_client.embeddings.create.call_args
|
||||
truncated_text = call_args.kwargs["input"]
|
||||
|
||||
# Count actual tokens in truncated text
|
||||
enc = encoding_for_model("text-embedding-3-small")
|
||||
actual_tokens = len(enc.encode(truncated_text))
|
||||
|
||||
# Should be at or just under 8191 tokens
|
||||
assert actual_tokens <= 8191
|
||||
# Should be close to the limit (not over-truncated)
|
||||
assert actual_tokens >= 8100
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_embedding_success(mocker):
|
||||
"""Test successful embedding storage."""
|
||||
mock_client = mocker.AsyncMock()
|
||||
mock_client.execute_raw = mocker.AsyncMock()
|
||||
|
||||
embedding = [0.1, 0.2, 0.3]
|
||||
|
||||
result = await embeddings.store_embedding(
|
||||
version_id="test-version-id", embedding=embedding, tx=mock_client
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# execute_raw is called twice: once for SET search_path, once for INSERT
|
||||
assert mock_client.execute_raw.call_count == 2
|
||||
|
||||
# First call: SET search_path
|
||||
first_call_args = mock_client.execute_raw.call_args_list[0][0]
|
||||
assert "SET search_path" in first_call_args[0]
|
||||
|
||||
# Second call: INSERT query with the actual data
|
||||
second_call_args = mock_client.execute_raw.call_args_list[1][0]
|
||||
assert "test-version-id" in second_call_args
|
||||
assert "[0.1,0.2,0.3]" in second_call_args
|
||||
assert None in second_call_args # userId should be None for store agents
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_embedding_database_error(mocker):
|
||||
"""Test embedding storage with database error."""
|
||||
mock_client = mocker.AsyncMock()
|
||||
mock_client.execute_raw.side_effect = Exception("Database error")
|
||||
|
||||
embedding = [0.1, 0.2, 0.3]
|
||||
|
||||
result = await embeddings.store_embedding(
|
||||
version_id="test-version-id", embedding=embedding, tx=mock_client
|
||||
)
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_success():
|
||||
"""Test successful embedding retrieval."""
|
||||
mock_result = [
|
||||
{
|
||||
"contentType": "STORE_AGENT",
|
||||
"contentId": "test-version-id",
|
||||
"userId": None,
|
||||
"embedding": "[0.1,0.2,0.3]",
|
||||
"searchableText": "Test text",
|
||||
"metadata": {},
|
||||
"createdAt": "2024-01-01T00:00:00Z",
|
||||
"updatedAt": "2024-01-01T00:00:00Z",
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_result,
|
||||
):
|
||||
result = await embeddings.get_embedding("test-version-id")
|
||||
|
||||
assert result is not None
|
||||
assert result["storeListingVersionId"] == "test-version-id"
|
||||
assert result["embedding"] == "[0.1,0.2,0.3]"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_not_found():
|
||||
"""Test embedding retrieval when not found."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
result = await embeddings.get_embedding("test-version-id")
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
@patch("backend.api.features.store.embeddings.store_embedding")
|
||||
@patch("backend.api.features.store.embeddings.get_embedding")
|
||||
async def test_ensure_embedding_already_exists(mock_get, mock_store, mock_generate):
|
||||
"""Test ensure_embedding when embedding already exists."""
|
||||
mock_get.return_value = {"embedding": "[0.1,0.2,0.3]"}
|
||||
|
||||
result = await embeddings.ensure_embedding(
|
||||
version_id="test-id",
|
||||
name="Test",
|
||||
description="Test description",
|
||||
sub_heading="Test heading",
|
||||
categories=["test"],
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_generate.assert_not_called()
|
||||
mock_store.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
@patch("backend.api.features.store.embeddings.store_content_embedding")
|
||||
@patch("backend.api.features.store.embeddings.get_embedding")
|
||||
async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
|
||||
"""Test ensure_embedding creating new embedding."""
|
||||
mock_get.return_value = None
|
||||
mock_generate.return_value = [0.1, 0.2, 0.3]
|
||||
mock_store.return_value = True
|
||||
|
||||
result = await embeddings.ensure_embedding(
|
||||
version_id="test-id",
|
||||
name="Test",
|
||||
description="Test description",
|
||||
sub_heading="Test heading",
|
||||
categories=["test"],
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_generate.assert_called_once_with("Test Test heading Test description test")
|
||||
mock_store.assert_called_once_with(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
embedding=[0.1, 0.2, 0.3],
|
||||
searchable_text="Test Test heading Test description test",
|
||||
metadata={"name": "Test", "subHeading": "Test heading", "categories": ["test"]},
|
||||
user_id=None,
|
||||
tx=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
@patch("backend.api.features.store.embeddings.get_embedding")
|
||||
async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
|
||||
"""Test ensure_embedding when generation fails."""
|
||||
mock_get.return_value = None
|
||||
mock_generate.return_value = None
|
||||
|
||||
result = await embeddings.ensure_embedding(
|
||||
version_id="test-id",
|
||||
name="Test",
|
||||
description="Test description",
|
||||
sub_heading="Test heading",
|
||||
categories=["test"],
|
||||
)
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_stats():
|
||||
"""Test embedding statistics retrieval."""
|
||||
# Mock handler stats for each content type
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_stats = AsyncMock(
|
||||
return_value={
|
||||
"total": 100,
|
||||
"with_embeddings": 75,
|
||||
"without_embeddings": 25,
|
||||
}
|
||||
)
|
||||
|
||||
# Patch the CONTENT_HANDLERS where it's used (in embeddings module)
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
result = await embeddings.get_embedding_stats()
|
||||
|
||||
assert "by_type" in result
|
||||
assert "totals" in result
|
||||
assert result["totals"]["total"] == 100
|
||||
assert result["totals"]["with_embeddings"] == 75
|
||||
assert result["totals"]["without_embeddings"] == 25
|
||||
assert result["totals"]["coverage_percent"] == 75.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.store_content_embedding")
|
||||
async def test_backfill_missing_embeddings_success(mock_store):
|
||||
"""Test backfill with successful embedding generation."""
|
||||
# Mock ContentItem from handlers
|
||||
from backend.api.features.store.content_handlers import ContentItem
|
||||
|
||||
mock_items = [
|
||||
ContentItem(
|
||||
content_id="version-1",
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text="Agent 1 Description 1",
|
||||
metadata={"name": "Agent 1"},
|
||||
),
|
||||
ContentItem(
|
||||
content_id="version-2",
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text="Agent 2 Description 2",
|
||||
metadata={"name": "Agent 2"},
|
||||
),
|
||||
]
|
||||
|
||||
# Mock handler to return missing items
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_missing_items = AsyncMock(return_value=mock_items)
|
||||
|
||||
# Mock store_content_embedding to succeed for first, fail for second
|
||||
mock_store.side_effect = [True, False]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding",
|
||||
return_value=[0.1] * embeddings.EMBEDDING_DIM,
|
||||
):
|
||||
result = await embeddings.backfill_missing_embeddings(batch_size=5)
|
||||
|
||||
assert result["processed"] == 2
|
||||
assert result["success"] == 1
|
||||
assert result["failed"] == 1
|
||||
assert mock_store.call_count == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_backfill_missing_embeddings_no_missing():
|
||||
"""Test backfill when no embeddings are missing."""
|
||||
# Mock handler to return no missing items
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_missing_items = AsyncMock(return_value=[])
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
result = await embeddings.backfill_missing_embeddings(batch_size=5)
|
||||
|
||||
assert result["processed"] == 0
|
||||
assert result["success"] == 0
|
||||
assert result["failed"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_embedding_to_vector_string():
|
||||
"""Test embedding to PostgreSQL vector string conversion."""
|
||||
embedding = [0.1, 0.2, 0.3, -0.4]
|
||||
result = embeddings.embedding_to_vector_string(embedding)
|
||||
assert result == "[0.1,0.2,0.3,-0.4]"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_embed_query():
|
||||
"""Test embed_query function (alias for generate_embedding)."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding"
|
||||
) as mock_generate:
|
||||
mock_generate.return_value = [0.1, 0.2, 0.3]
|
||||
|
||||
result = await embeddings.embed_query("test query")
|
||||
|
||||
assert result == [0.1, 0.2, 0.3]
|
||||
mock_generate.assert_called_once_with("test query")
|
||||
@@ -1,625 +0,0 @@
|
||||
"""
|
||||
Unified Hybrid Search
|
||||
|
||||
Combines semantic (embedding) search with lexical (tsvector) search
|
||||
for improved relevance across all content types (agents, blocks, docs).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Literal
|
||||
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store.embeddings import (
|
||||
EMBEDDING_DIM,
|
||||
embed_query,
|
||||
embedding_to_vector_string,
|
||||
)
|
||||
from backend.data.db import query_raw_with_schema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnifiedSearchWeights:
|
||||
"""Weights for unified search (no popularity signal)."""
|
||||
|
||||
semantic: float = 0.40 # Embedding cosine similarity
|
||||
lexical: float = 0.40 # tsvector ts_rank_cd score
|
||||
category: float = 0.10 # Category match boost (for types that have categories)
|
||||
recency: float = 0.10 # Newer content ranked higher
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate weights are non-negative and sum to approximately 1.0."""
|
||||
total = self.semantic + self.lexical + self.category + self.recency
|
||||
|
||||
if any(
|
||||
w < 0 for w in [self.semantic, self.lexical, self.category, self.recency]
|
||||
):
|
||||
raise ValueError("All weights must be non-negative")
|
||||
|
||||
if not (0.99 <= total <= 1.01):
|
||||
raise ValueError(f"Weights must sum to ~1.0, got {total:.3f}")
|
||||
|
||||
|
||||
# Default weights for unified search
|
||||
DEFAULT_UNIFIED_WEIGHTS = UnifiedSearchWeights()
|
||||
|
||||
# Minimum relevance score thresholds
|
||||
DEFAULT_MIN_SCORE = 0.15 # For unified search (more permissive)
|
||||
DEFAULT_STORE_AGENT_MIN_SCORE = 0.20 # For store agent search (original threshold)
|
||||
|
||||
|
||||
async def unified_hybrid_search(
|
||||
query: str,
|
||||
content_types: list[ContentType] | None = None,
|
||||
category: str | None = None,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
weights: UnifiedSearchWeights | None = None,
|
||||
min_score: float | None = None,
|
||||
user_id: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
"""
|
||||
Unified hybrid search across all content types.
|
||||
|
||||
Searches UnifiedContentEmbedding using both semantic (vector) and lexical (tsvector) signals.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
content_types: List of content types to search. Defaults to all public types.
|
||||
category: Filter by category (for content types that support it)
|
||||
page: Page number (1-indexed)
|
||||
page_size: Results per page
|
||||
weights: Custom weights for search signals
|
||||
min_score: Minimum relevance score threshold (0-1)
|
||||
user_id: User ID for searching private content (library agents)
|
||||
|
||||
Returns:
|
||||
Tuple of (results list, total count)
|
||||
"""
|
||||
# Validate inputs
|
||||
query = query.strip()
|
||||
if not query:
|
||||
return [], 0
|
||||
|
||||
if page < 1:
|
||||
page = 1
|
||||
if page_size < 1:
|
||||
page_size = 1
|
||||
if page_size > 100:
|
||||
page_size = 100
|
||||
|
||||
if content_types is None:
|
||||
content_types = [
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.BLOCK,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
if weights is None:
|
||||
weights = DEFAULT_UNIFIED_WEIGHTS
|
||||
if min_score is None:
|
||||
min_score = DEFAULT_MIN_SCORE
|
||||
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
# Generate query embedding
|
||||
query_embedding = await embed_query(query)
|
||||
|
||||
# Graceful degradation if embedding unavailable
|
||||
if query_embedding is None or not query_embedding:
|
||||
logger.warning(
|
||||
"Failed to generate query embedding - falling back to lexical-only search. "
|
||||
"Check that openai_internal_api_key is configured and OpenAI API is accessible."
|
||||
)
|
||||
query_embedding = [0.0] * EMBEDDING_DIM
|
||||
# Redistribute semantic weight to lexical
|
||||
total_non_semantic = weights.lexical + weights.category + weights.recency
|
||||
if total_non_semantic > 0:
|
||||
factor = 1.0 / total_non_semantic
|
||||
weights = UnifiedSearchWeights(
|
||||
semantic=0.0,
|
||||
lexical=weights.lexical * factor,
|
||||
category=weights.category * factor,
|
||||
recency=weights.recency * factor,
|
||||
)
|
||||
else:
|
||||
weights = UnifiedSearchWeights(
|
||||
semantic=0.0, lexical=1.0, category=0.0, recency=0.0
|
||||
)
|
||||
|
||||
# Build parameters
|
||||
params: list[Any] = []
|
||||
param_idx = 1
|
||||
|
||||
# Query for lexical search
|
||||
params.append(query)
|
||||
query_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Query lowercase for category matching
|
||||
params.append(query.lower())
|
||||
query_lower_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Embedding
|
||||
embedding_str = embedding_to_vector_string(query_embedding)
|
||||
params.append(embedding_str)
|
||||
embedding_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Content types
|
||||
content_type_values = [ct.value for ct in content_types]
|
||||
params.append(content_type_values)
|
||||
content_types_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# User ID filter (for private content)
|
||||
user_filter = ""
|
||||
if user_id is not None:
|
||||
params.append(user_id)
|
||||
user_filter = f'AND (uce."userId" = ${param_idx} OR uce."userId" IS NULL)'
|
||||
param_idx += 1
|
||||
else:
|
||||
user_filter = 'AND uce."userId" IS NULL'
|
||||
|
||||
# Weights
|
||||
params.append(weights.semantic)
|
||||
w_semantic = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.lexical)
|
||||
w_lexical = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.category)
|
||||
w_category = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.recency)
|
||||
w_recency = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Min score
|
||||
params.append(min_score)
|
||||
min_score_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Pagination
|
||||
params.append(page_size)
|
||||
limit_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(offset)
|
||||
offset_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Unified search query on UnifiedContentEmbedding
|
||||
sql_query = f"""
|
||||
WITH candidates AS (
|
||||
-- Lexical matches (uses GIN index on search column)
|
||||
SELECT uce.id, uce."contentType", uce."contentId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
||||
{user_filter}
|
||||
AND uce.search @@ plainto_tsquery('english', {query_param})
|
||||
|
||||
UNION
|
||||
|
||||
-- Semantic matches (uses HNSW index on embedding)
|
||||
(
|
||||
SELECT uce.id, uce."contentType", uce."contentId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||
WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
|
||||
{user_filter}
|
||||
ORDER BY uce.embedding <=> {embedding_param}::vector
|
||||
LIMIT 200
|
||||
)
|
||||
),
|
||||
search_scores AS (
|
||||
SELECT
|
||||
uce."contentType" as content_type,
|
||||
uce."contentId" as content_id,
|
||||
uce."searchableText" as searchable_text,
|
||||
uce.metadata,
|
||||
uce."updatedAt" as updated_at,
|
||||
-- Semantic score: cosine similarity (1 - distance)
|
||||
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
|
||||
-- Lexical score: ts_rank_cd
|
||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||
-- Category match from metadata
|
||||
CASE
|
||||
WHEN uce.metadata ? 'categories' AND EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements_text(uce.metadata->'categories') cat
|
||||
WHERE LOWER(cat) LIKE '%' || {query_lower_param} || '%'
|
||||
)
|
||||
THEN 1.0
|
||||
ELSE 0.0
|
||||
END as category_score,
|
||||
-- Recency score: linear decay over 90 days
|
||||
GREATEST(0, 1 - EXTRACT(EPOCH FROM (NOW() - uce."updatedAt")) / (90 * 24 * 3600)) as recency_score
|
||||
FROM candidates c
|
||||
INNER JOIN {{schema_prefix}}"UnifiedContentEmbedding" uce ON c.id = uce.id
|
||||
),
|
||||
max_lexical AS (
|
||||
SELECT GREATEST(MAX(lexical_raw), 0.001) as max_val FROM search_scores
|
||||
),
|
||||
normalized AS (
|
||||
SELECT
|
||||
ss.*,
|
||||
ss.lexical_raw / ml.max_val as lexical_score
|
||||
FROM search_scores ss
|
||||
CROSS JOIN max_lexical ml
|
||||
),
|
||||
scored AS (
|
||||
SELECT
|
||||
content_type,
|
||||
content_id,
|
||||
searchable_text,
|
||||
metadata,
|
||||
updated_at,
|
||||
semantic_score,
|
||||
lexical_score,
|
||||
category_score,
|
||||
recency_score,
|
||||
(
|
||||
{w_semantic} * semantic_score +
|
||||
{w_lexical} * lexical_score +
|
||||
{w_category} * category_score +
|
||||
{w_recency} * recency_score
|
||||
) as combined_score
|
||||
FROM normalized
|
||||
),
|
||||
filtered AS (
|
||||
SELECT
|
||||
*,
|
||||
COUNT(*) OVER () as total_count
|
||||
FROM scored
|
||||
WHERE combined_score >= {min_score_param}
|
||||
)
|
||||
SELECT * FROM filtered
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT {limit_param} OFFSET {offset_param}
|
||||
"""
|
||||
|
||||
results = await query_raw_with_schema(
|
||||
sql_query, *params, set_public_search_path=True
|
||||
)
|
||||
|
||||
total = results[0]["total_count"] if results else 0
|
||||
|
||||
# Clean up results
|
||||
for result in results:
|
||||
result.pop("total_count", None)
|
||||
|
||||
logger.info(f"Unified hybrid search: {len(results)} results, {total} total")
|
||||
|
||||
return results, total
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Store Agent specific search (with full metadata)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class StoreAgentSearchWeights:
|
||||
"""Weights for store agent search including popularity."""
|
||||
|
||||
semantic: float = 0.30
|
||||
lexical: float = 0.30
|
||||
category: float = 0.20
|
||||
recency: float = 0.10
|
||||
popularity: float = 0.10
|
||||
|
||||
def __post_init__(self):
|
||||
total = (
|
||||
self.semantic
|
||||
+ self.lexical
|
||||
+ self.category
|
||||
+ self.recency
|
||||
+ self.popularity
|
||||
)
|
||||
if any(
|
||||
w < 0
|
||||
for w in [
|
||||
self.semantic,
|
||||
self.lexical,
|
||||
self.category,
|
||||
self.recency,
|
||||
self.popularity,
|
||||
]
|
||||
):
|
||||
raise ValueError("All weights must be non-negative")
|
||||
if not (0.99 <= total <= 1.01):
|
||||
raise ValueError(f"Weights must sum to ~1.0, got {total:.3f}")
|
||||
|
||||
|
||||
DEFAULT_STORE_AGENT_WEIGHTS = StoreAgentSearchWeights()
|
||||
|
||||
|
||||
async def hybrid_search(
|
||||
query: str,
|
||||
featured: bool = False,
|
||||
creators: list[str] | None = None,
|
||||
category: str | None = None,
|
||||
sorted_by: (
|
||||
Literal["relevance", "rating", "runs", "name", "updated_at"] | None
|
||||
) = None,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
weights: StoreAgentSearchWeights | None = None,
|
||||
min_score: float | None = None,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
"""
|
||||
Hybrid search for store agents with full metadata.
|
||||
|
||||
Uses UnifiedContentEmbedding for search, joins to StoreAgent for metadata.
|
||||
"""
|
||||
query = query.strip()
|
||||
if not query:
|
||||
return [], 0
|
||||
|
||||
if page < 1:
|
||||
page = 1
|
||||
if page_size < 1:
|
||||
page_size = 1
|
||||
if page_size > 100:
|
||||
page_size = 100
|
||||
|
||||
if weights is None:
|
||||
weights = DEFAULT_STORE_AGENT_WEIGHTS
|
||||
if min_score is None:
|
||||
min_score = (
|
||||
DEFAULT_STORE_AGENT_MIN_SCORE # Use original threshold for store agents
|
||||
)
|
||||
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
# Generate query embedding
|
||||
query_embedding = await embed_query(query)
|
||||
|
||||
# Graceful degradation
|
||||
if query_embedding is None or not query_embedding:
|
||||
logger.warning(
|
||||
"Failed to generate query embedding - falling back to lexical-only search."
|
||||
)
|
||||
query_embedding = [0.0] * EMBEDDING_DIM
|
||||
total_non_semantic = (
|
||||
weights.lexical + weights.category + weights.recency + weights.popularity
|
||||
)
|
||||
if total_non_semantic > 0:
|
||||
factor = 1.0 / total_non_semantic
|
||||
weights = StoreAgentSearchWeights(
|
||||
semantic=0.0,
|
||||
lexical=weights.lexical * factor,
|
||||
category=weights.category * factor,
|
||||
recency=weights.recency * factor,
|
||||
popularity=weights.popularity * factor,
|
||||
)
|
||||
else:
|
||||
weights = StoreAgentSearchWeights(
|
||||
semantic=0.0, lexical=1.0, category=0.0, recency=0.0, popularity=0.0
|
||||
)
|
||||
|
||||
# Build parameters
|
||||
params: list[Any] = []
|
||||
param_idx = 1
|
||||
|
||||
params.append(query)
|
||||
query_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(query.lower())
|
||||
query_lower_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
embedding_str = embedding_to_vector_string(query_embedding)
|
||||
params.append(embedding_str)
|
||||
embedding_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Build WHERE clause for StoreAgent filters
|
||||
where_parts = ["sa.is_available = true"]
|
||||
|
||||
if featured:
|
||||
where_parts.append("sa.featured = true")
|
||||
|
||||
if creators:
|
||||
params.append(creators)
|
||||
where_parts.append(f"sa.creator_username = ANY(${param_idx})")
|
||||
param_idx += 1
|
||||
|
||||
if category:
|
||||
params.append(category)
|
||||
where_parts.append(f"${param_idx} = ANY(sa.categories)")
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(where_parts)
|
||||
|
||||
# Weights
|
||||
params.append(weights.semantic)
|
||||
w_semantic = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.lexical)
|
||||
w_lexical = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.category)
|
||||
w_category = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.recency)
|
||||
w_recency = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(weights.popularity)
|
||||
w_popularity = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(min_score)
|
||||
min_score_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(page_size)
|
||||
limit_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
params.append(offset)
|
||||
offset_param = f"${param_idx}"
|
||||
param_idx += 1
|
||||
|
||||
# Query using UnifiedContentEmbedding for search, StoreAgent for metadata
|
||||
sql_query = f"""
|
||||
WITH candidates AS (
|
||||
-- Lexical matches via UnifiedContentEmbedding.search
|
||||
SELECT uce."contentId" as "storeListingVersionId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||
INNER JOIN {{schema_prefix}}"StoreAgent" sa
|
||||
ON uce."contentId" = sa."storeListingVersionId"
|
||||
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
||||
AND uce."userId" IS NULL
|
||||
AND uce.search @@ plainto_tsquery('english', {query_param})
|
||||
AND {where_clause}
|
||||
|
||||
UNION
|
||||
|
||||
-- Semantic matches via UnifiedContentEmbedding.embedding
|
||||
SELECT uce."contentId" as "storeListingVersionId"
|
||||
FROM (
|
||||
SELECT uce."contentId", uce.embedding
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||
INNER JOIN {{schema_prefix}}"StoreAgent" sa
|
||||
ON uce."contentId" = sa."storeListingVersionId"
|
||||
WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
||||
AND uce."userId" IS NULL
|
||||
AND {where_clause}
|
||||
ORDER BY uce.embedding <=> {embedding_param}::vector
|
||||
LIMIT 200
|
||||
) uce
|
||||
),
|
||||
search_scores AS (
|
||||
SELECT
|
||||
sa.slug,
|
||||
sa.agent_name,
|
||||
sa.agent_image,
|
||||
sa.creator_username,
|
||||
sa.creator_avatar,
|
||||
sa.sub_heading,
|
||||
sa.description,
|
||||
sa.runs,
|
||||
sa.rating,
|
||||
sa.categories,
|
||||
sa.featured,
|
||||
sa.is_available,
|
||||
sa.updated_at,
|
||||
-- Semantic score
|
||||
COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
|
||||
-- Lexical score (raw, will normalize)
|
||||
COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
|
||||
-- Category match
|
||||
CASE
|
||||
WHEN EXISTS (
|
||||
SELECT 1 FROM unnest(sa.categories) cat
|
||||
WHERE LOWER(cat) LIKE '%' || {query_lower_param} || '%'
|
||||
)
|
||||
THEN 1.0
|
||||
ELSE 0.0
|
||||
END as category_score,
|
||||
-- Recency
|
||||
GREATEST(0, 1 - EXTRACT(EPOCH FROM (NOW() - sa.updated_at)) / (90 * 24 * 3600)) as recency_score,
|
||||
-- Popularity (raw)
|
||||
sa.runs as popularity_raw
|
||||
FROM candidates c
|
||||
INNER JOIN {{schema_prefix}}"StoreAgent" sa
|
||||
ON c."storeListingVersionId" = sa."storeListingVersionId"
|
||||
INNER JOIN {{schema_prefix}}"UnifiedContentEmbedding" uce
|
||||
ON sa."storeListingVersionId" = uce."contentId"
|
||||
AND uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
|
||||
),
|
||||
max_vals AS (
|
||||
SELECT
|
||||
GREATEST(MAX(lexical_raw), 0.001) as max_lexical,
|
||||
GREATEST(MAX(popularity_raw), 1) as max_popularity
|
||||
FROM search_scores
|
||||
),
|
||||
normalized AS (
|
||||
SELECT
|
||||
ss.*,
|
||||
ss.lexical_raw / mv.max_lexical as lexical_score,
|
||||
CASE
|
||||
WHEN ss.popularity_raw > 0
|
||||
THEN LN(1 + ss.popularity_raw) / LN(1 + mv.max_popularity)
|
||||
ELSE 0
|
||||
END as popularity_score
|
||||
FROM search_scores ss
|
||||
CROSS JOIN max_vals mv
|
||||
),
|
||||
scored AS (
|
||||
SELECT
|
||||
slug,
|
||||
agent_name,
|
||||
agent_image,
|
||||
creator_username,
|
||||
creator_avatar,
|
||||
sub_heading,
|
||||
description,
|
||||
runs,
|
||||
rating,
|
||||
categories,
|
||||
featured,
|
||||
is_available,
|
||||
updated_at,
|
||||
semantic_score,
|
||||
lexical_score,
|
||||
category_score,
|
||||
recency_score,
|
||||
popularity_score,
|
||||
(
|
||||
{w_semantic} * semantic_score +
|
||||
{w_lexical} * lexical_score +
|
||||
{w_category} * category_score +
|
||||
{w_recency} * recency_score +
|
||||
{w_popularity} * popularity_score
|
||||
) as combined_score
|
||||
FROM normalized
|
||||
),
|
||||
filtered AS (
|
||||
SELECT *, COUNT(*) OVER () as total_count
|
||||
FROM scored
|
||||
WHERE combined_score >= {min_score_param}
|
||||
)
|
||||
SELECT * FROM filtered
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT {limit_param} OFFSET {offset_param}
|
||||
"""
|
||||
|
||||
results = await query_raw_with_schema(
|
||||
sql_query, *params, set_public_search_path=True
|
||||
)
|
||||
|
||||
total = results[0]["total_count"] if results else 0
|
||||
|
||||
for result in results:
|
||||
result.pop("total_count", None)
|
||||
|
||||
logger.info(f"Hybrid search (store agents): {len(results)} results, {total} total")
|
||||
|
||||
return results, total
|
||||
|
||||
|
||||
async def hybrid_search_simple(
|
||||
query: str,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
"""Simplified hybrid search for store agents."""
|
||||
return await hybrid_search(query=query, page=page, page_size=page_size)
|
||||
|
||||
|
||||
# Backward compatibility alias - HybridSearchWeights maps to StoreAgentSearchWeights
|
||||
# for existing code that expects the popularity parameter
|
||||
HybridSearchWeights = StoreAgentSearchWeights
|
||||
@@ -1,667 +0,0 @@
|
||||
"""
|
||||
Integration tests for hybrid search with schema handling.
|
||||
|
||||
These tests verify that hybrid search works correctly across different database schemas.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
from backend.api.features.store.hybrid_search import (
|
||||
HybridSearchWeights,
|
||||
UnifiedSearchWeights,
|
||||
hybrid_search,
|
||||
unified_hybrid_search,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_with_schema_handling():
|
||||
"""Test that hybrid search correctly handles database schema prefixes."""
|
||||
# Test with a mock query to ensure schema handling works
|
||||
query = "test agent"
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
# Mock the query result
|
||||
mock_query.return_value = [
|
||||
{
|
||||
"slug": "test/agent",
|
||||
"agent_name": "Test Agent",
|
||||
"agent_image": "test.png",
|
||||
"creator_username": "test",
|
||||
"creator_avatar": "avatar.png",
|
||||
"sub_heading": "Test sub-heading",
|
||||
"description": "Test description",
|
||||
"runs": 10,
|
||||
"rating": 4.5,
|
||||
"categories": ["test"],
|
||||
"featured": False,
|
||||
"is_available": True,
|
||||
"updated_at": "2024-01-01T00:00:00Z",
|
||||
"combined_score": 0.8,
|
||||
"semantic_score": 0.7,
|
||||
"lexical_score": 0.6,
|
||||
"category_score": 0.5,
|
||||
"recency_score": 0.4,
|
||||
"total_count": 1,
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM # Mock embedding
|
||||
|
||||
results, total = await hybrid_search(
|
||||
query=query,
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_query.called
|
||||
# Verify the SQL template uses schema_prefix placeholder
|
||||
call_args = mock_query.call_args
|
||||
sql_template = call_args[0][0]
|
||||
assert "{schema_prefix}" in sql_template
|
||||
|
||||
# Verify results
|
||||
assert len(results) == 1
|
||||
assert total == 1
|
||||
assert results[0]["slug"] == "test/agent"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_with_public_schema():
|
||||
"""Test hybrid search when using public schema (no prefix needed)."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "public"
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
mock_query.return_value = []
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify the mock was set up correctly
|
||||
assert mock_schema.return_value == "public"
|
||||
|
||||
# Results should work even with empty results
|
||||
assert results == []
|
||||
assert total == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_with_custom_schema():
|
||||
"""Test hybrid search when using custom schema (e.g., 'platform')."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
mock_query.return_value = []
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify the mock was set up correctly
|
||||
assert mock_schema.return_value == "platform"
|
||||
|
||||
assert results == []
|
||||
assert total == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_without_embeddings():
|
||||
"""Test hybrid search gracefully degrades when embeddings are unavailable."""
|
||||
# Mock database to return some results
|
||||
mock_results = [
|
||||
{
|
||||
"slug": "test-agent",
|
||||
"agent_name": "Test Agent",
|
||||
"agent_image": "test.png",
|
||||
"creator_username": "creator",
|
||||
"creator_avatar": "avatar.png",
|
||||
"sub_heading": "Test heading",
|
||||
"description": "Test description",
|
||||
"runs": 100,
|
||||
"rating": 4.5,
|
||||
"categories": ["AI"],
|
||||
"featured": False,
|
||||
"is_available": True,
|
||||
"updated_at": "2025-01-01T00:00:00Z",
|
||||
"semantic_score": 0.0, # Zero because no embedding
|
||||
"lexical_score": 0.5,
|
||||
"category_score": 0.0,
|
||||
"recency_score": 0.1,
|
||||
"popularity_score": 0.2,
|
||||
"combined_score": 0.3,
|
||||
"total_count": 1,
|
||||
}
|
||||
]
|
||||
|
||||
with patch("backend.api.features.store.hybrid_search.embed_query") as mock_embed:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
# Simulate embedding failure
|
||||
mock_embed.return_value = None
|
||||
mock_query.return_value = mock_results
|
||||
|
||||
# Should NOT raise - graceful degradation
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify it returns results even without embeddings
|
||||
assert len(results) == 1
|
||||
assert results[0]["slug"] == "test-agent"
|
||||
assert total == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_with_filters():
|
||||
"""Test hybrid search with various filters."""
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
mock_query.return_value = []
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
# Test with featured filter
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
featured=True,
|
||||
creators=["user1", "user2"],
|
||||
category="productivity",
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
|
||||
# Verify filters were applied in the query
|
||||
call_args = mock_query.call_args
|
||||
params = call_args[0][1:] # Skip SQL template
|
||||
|
||||
# Should have query, query_lower, creators array, category
|
||||
assert len(params) >= 4
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_weights():
|
||||
"""Test hybrid search with custom weights."""
|
||||
custom_weights = HybridSearchWeights(
|
||||
semantic=0.5,
|
||||
lexical=0.3,
|
||||
category=0.1,
|
||||
recency=0.1,
|
||||
popularity=0.0,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
mock_query.return_value = []
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
weights=custom_weights,
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify custom weights were used in the query
|
||||
call_args = mock_query.call_args
|
||||
sql_template = call_args[0][0]
|
||||
params = call_args[0][1:] # Get all parameters passed
|
||||
|
||||
# Check that SQL uses parameterized weights (not f-string interpolation)
|
||||
assert "$" in sql_template # Verify parameterization is used
|
||||
|
||||
# Check that custom weights are in the params
|
||||
assert 0.5 in params # semantic weight
|
||||
assert 0.3 in params # lexical weight
|
||||
assert 0.1 in params # category and recency weights
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_min_score_filtering():
|
||||
"""Test hybrid search minimum score threshold."""
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
# Return results with varying scores
|
||||
mock_query.return_value = [
|
||||
{
|
||||
"slug": "high-score/agent",
|
||||
"agent_name": "High Score Agent",
|
||||
"combined_score": 0.8,
|
||||
"total_count": 1,
|
||||
# ... other fields
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
# Test with custom min_score
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
min_score=0.5, # High threshold
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify min_score was applied in query
|
||||
call_args = mock_query.call_args
|
||||
sql_template = call_args[0][0]
|
||||
params = call_args[0][1:] # Get all parameters
|
||||
|
||||
# Check that SQL uses parameterized min_score
|
||||
assert "combined_score >=" in sql_template
|
||||
assert "$" in sql_template # Verify parameterization
|
||||
|
||||
# Check that custom min_score is in the params
|
||||
assert 0.5 in params
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_pagination():
|
||||
"""Test hybrid search pagination."""
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
mock_query.return_value = []
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
# Test page 2 with page_size 10
|
||||
results, total = await hybrid_search(
|
||||
query="test",
|
||||
page=2,
|
||||
page_size=10,
|
||||
)
|
||||
|
||||
# Verify pagination parameters
|
||||
call_args = mock_query.call_args
|
||||
params = call_args[0]
|
||||
|
||||
# Last two params should be LIMIT and OFFSET
|
||||
limit = params[-2]
|
||||
offset = params[-1]
|
||||
|
||||
assert limit == 10 # page_size
|
||||
assert offset == 10 # (page - 1) * page_size = (2 - 1) * 10
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_hybrid_search_error_handling():
|
||||
"""Test hybrid search error handling."""
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
# Simulate database error
|
||||
mock_query.side_effect = Exception("Database connection error")
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
# Should raise exception
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
await hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
assert "Database connection error" in str(exc_info.value)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unified Hybrid Search Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_basic():
|
||||
"""Test basic unified hybrid search across all content types."""
|
||||
mock_results = [
|
||||
{
|
||||
"content_type": "STORE_AGENT",
|
||||
"content_id": "agent-1",
|
||||
"searchable_text": "Test Agent Description",
|
||||
"metadata": {"name": "Test Agent"},
|
||||
"updated_at": "2025-01-01T00:00:00Z",
|
||||
"semantic_score": 0.7,
|
||||
"lexical_score": 0.8,
|
||||
"category_score": 0.5,
|
||||
"recency_score": 0.3,
|
||||
"combined_score": 0.6,
|
||||
"total_count": 2,
|
||||
},
|
||||
{
|
||||
"content_type": "BLOCK",
|
||||
"content_id": "block-1",
|
||||
"searchable_text": "Test Block Description",
|
||||
"metadata": {"name": "Test Block"},
|
||||
"updated_at": "2025-01-01T00:00:00Z",
|
||||
"semantic_score": 0.6,
|
||||
"lexical_score": 0.7,
|
||||
"category_score": 0.4,
|
||||
"recency_score": 0.2,
|
||||
"combined_score": 0.5,
|
||||
"total_count": 2,
|
||||
},
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = mock_results
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
assert total == 2
|
||||
assert results[0]["content_type"] == "STORE_AGENT"
|
||||
assert results[1]["content_type"] == "BLOCK"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_filter_by_content_type():
|
||||
"""Test unified search filtering by specific content types."""
|
||||
mock_results = [
|
||||
{
|
||||
"content_type": "BLOCK",
|
||||
"content_id": "block-1",
|
||||
"searchable_text": "Test Block",
|
||||
"metadata": {},
|
||||
"updated_at": "2025-01-01T00:00:00Z",
|
||||
"semantic_score": 0.7,
|
||||
"lexical_score": 0.8,
|
||||
"category_score": 0.0,
|
||||
"recency_score": 0.3,
|
||||
"combined_score": 0.5,
|
||||
"total_count": 1,
|
||||
},
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = mock_results
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify content_types parameter was passed correctly
|
||||
call_args = mock_query.call_args
|
||||
params = call_args[0][1:]
|
||||
# The content types should be in the params as a list
|
||||
assert ["BLOCK"] in params
|
||||
|
||||
assert len(results) == 1
|
||||
assert total == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_with_user_id():
|
||||
"""Test unified search with user_id for private content."""
|
||||
mock_results = [
|
||||
{
|
||||
"content_type": "STORE_AGENT",
|
||||
"content_id": "agent-1",
|
||||
"searchable_text": "My Private Agent",
|
||||
"metadata": {},
|
||||
"updated_at": "2025-01-01T00:00:00Z",
|
||||
"semantic_score": 0.7,
|
||||
"lexical_score": 0.8,
|
||||
"category_score": 0.0,
|
||||
"recency_score": 0.3,
|
||||
"combined_score": 0.6,
|
||||
"total_count": 1,
|
||||
},
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = mock_results
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
user_id="user-123",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify SQL contains user_id filter
|
||||
call_args = mock_query.call_args
|
||||
sql_template = call_args[0][0]
|
||||
params = call_args[0][1:]
|
||||
|
||||
assert 'uce."userId"' in sql_template
|
||||
assert "user-123" in params
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_custom_weights():
|
||||
"""Test unified search with custom weights."""
|
||||
custom_weights = UnifiedSearchWeights(
|
||||
semantic=0.6,
|
||||
lexical=0.2,
|
||||
category=0.1,
|
||||
recency=0.1,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = []
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
weights=custom_weights,
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Verify custom weights are in parameters
|
||||
call_args = mock_query.call_args
|
||||
params = call_args[0][1:]
|
||||
|
||||
assert 0.6 in params # semantic weight
|
||||
assert 0.2 in params # lexical weight
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_graceful_degradation():
|
||||
"""Test unified search gracefully degrades when embeddings unavailable."""
|
||||
mock_results = [
|
||||
{
|
||||
"content_type": "DOCUMENTATION",
|
||||
"content_id": "doc-1",
|
||||
"searchable_text": "API Documentation",
|
||||
"metadata": {},
|
||||
"updated_at": "2025-01-01T00:00:00Z",
|
||||
"semantic_score": 0.0, # Zero because no embedding
|
||||
"lexical_score": 0.8,
|
||||
"category_score": 0.0,
|
||||
"recency_score": 0.2,
|
||||
"combined_score": 0.5,
|
||||
"total_count": 1,
|
||||
},
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = mock_results
|
||||
mock_embed.return_value = None # Embedding failure
|
||||
|
||||
# Should NOT raise - graceful degradation
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert total == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_empty_query():
|
||||
"""Test unified search with empty query returns empty results."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
assert results == []
|
||||
assert total == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_pagination():
|
||||
"""Test unified search pagination."""
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = []
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
page=3,
|
||||
page_size=15,
|
||||
)
|
||||
|
||||
# Verify pagination parameters (last two params are LIMIT and OFFSET)
|
||||
call_args = mock_query.call_args
|
||||
params = call_args[0]
|
||||
|
||||
limit = params[-2]
|
||||
offset = params[-1]
|
||||
|
||||
assert limit == 15 # page_size
|
||||
assert offset == 30 # (page - 1) * page_size = (3 - 1) * 15
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_unified_hybrid_search_schema_prefix():
|
||||
"""Test unified search uses schema_prefix placeholder."""
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.query_raw_with_schema"
|
||||
) as mock_query:
|
||||
with patch(
|
||||
"backend.api.features.store.hybrid_search.embed_query"
|
||||
) as mock_embed:
|
||||
mock_query.return_value = []
|
||||
mock_embed.return_value = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
await unified_hybrid_search(
|
||||
query="test",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
call_args = mock_query.call_args
|
||||
sql_template = call_args[0][0]
|
||||
|
||||
# Verify schema_prefix placeholder is used for table references
|
||||
assert "{schema_prefix}" in sql_template
|
||||
assert '"UnifiedContentEmbedding"' in sql_template
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
@@ -110,7 +110,6 @@ class Profile(pydantic.BaseModel):
|
||||
|
||||
|
||||
class StoreSubmission(pydantic.BaseModel):
|
||||
listing_id: str
|
||||
agent_id: str
|
||||
agent_version: int
|
||||
name: str
|
||||
@@ -165,12 +164,8 @@ class StoreListingsWithVersionsResponse(pydantic.BaseModel):
|
||||
|
||||
|
||||
class StoreSubmissionRequest(pydantic.BaseModel):
|
||||
agent_id: str = pydantic.Field(
|
||||
..., min_length=1, description="Agent ID cannot be empty"
|
||||
)
|
||||
agent_version: int = pydantic.Field(
|
||||
..., gt=0, description="Agent version must be greater than 0"
|
||||
)
|
||||
agent_id: str
|
||||
agent_version: int
|
||||
slug: str
|
||||
name: str
|
||||
sub_heading: str
|
||||
@@ -221,23 +216,3 @@ class ReviewSubmissionRequest(pydantic.BaseModel):
|
||||
is_approved: bool
|
||||
comments: str # External comments visible to creator
|
||||
internal_comments: str | None = None # Private admin notes
|
||||
|
||||
|
||||
class UnifiedSearchResult(pydantic.BaseModel):
|
||||
"""A single result from unified hybrid search across all content types."""
|
||||
|
||||
content_type: str # STORE_AGENT, BLOCK, DOCUMENTATION
|
||||
content_id: str
|
||||
searchable_text: str
|
||||
metadata: dict | None = None
|
||||
updated_at: datetime.datetime | None = None
|
||||
combined_score: float | None = None
|
||||
semantic_score: float | None = None
|
||||
lexical_score: float | None = None
|
||||
|
||||
|
||||
class UnifiedSearchResponse(pydantic.BaseModel):
|
||||
"""Response model for unified search across all content types."""
|
||||
|
||||
results: list[UnifiedSearchResult]
|
||||
pagination: Pagination
|
||||
|
||||
@@ -138,7 +138,6 @@ def test_creator_details():
|
||||
|
||||
def test_store_submission():
|
||||
submission = store_model.StoreSubmission(
|
||||
listing_id="listing123",
|
||||
agent_id="agent123",
|
||||
agent_version=1,
|
||||
sub_heading="Test subheading",
|
||||
@@ -160,7 +159,6 @@ def test_store_submissions_response():
|
||||
response = store_model.StoreSubmissionsResponse(
|
||||
submissions=[
|
||||
store_model.StoreSubmission(
|
||||
listing_id="listing123",
|
||||
agent_id="agent123",
|
||||
agent_version=1,
|
||||
sub_heading="Test subheading",
|
||||
|
||||
@@ -7,15 +7,12 @@ from typing import Literal
|
||||
import autogpt_libs.auth
|
||||
import fastapi
|
||||
import fastapi.responses
|
||||
import prisma.enums
|
||||
|
||||
import backend.data.graph
|
||||
import backend.util.json
|
||||
from backend.util.models import Pagination
|
||||
|
||||
from . import cache as store_cache
|
||||
from . import db as store_db
|
||||
from . import hybrid_search as store_hybrid_search
|
||||
from . import image_gen as store_image_gen
|
||||
from . import media as store_media
|
||||
from . import model as store_model
|
||||
@@ -149,102 +146,6 @@ async def get_agents(
|
||||
return agents
|
||||
|
||||
|
||||
##############################################
|
||||
############### Search Endpoints #############
|
||||
##############################################
|
||||
|
||||
|
||||
@router.get(
|
||||
"/search",
|
||||
summary="Unified search across all content types",
|
||||
tags=["store", "public"],
|
||||
response_model=store_model.UnifiedSearchResponse,
|
||||
)
|
||||
async def unified_search(
|
||||
query: str,
|
||||
content_types: list[str] | None = fastapi.Query(
|
||||
default=None,
|
||||
description="Content types to search: STORE_AGENT, BLOCK, DOCUMENTATION. If not specified, searches all.",
|
||||
),
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
user_id: str | None = fastapi.Security(
|
||||
autogpt_libs.auth.get_optional_user_id, use_cache=False
|
||||
),
|
||||
):
|
||||
"""
|
||||
Search across all content types (store agents, blocks, documentation) using hybrid search.
|
||||
|
||||
Combines semantic (embedding-based) and lexical (text-based) search for best results.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
content_types: Optional list of content types to filter by (STORE_AGENT, BLOCK, DOCUMENTATION)
|
||||
page: Page number for pagination (default 1)
|
||||
page_size: Number of results per page (default 20)
|
||||
user_id: Optional authenticated user ID (for user-scoped content in future)
|
||||
|
||||
Returns:
|
||||
UnifiedSearchResponse: Paginated list of search results with relevance scores
|
||||
"""
|
||||
if page < 1:
|
||||
raise fastapi.HTTPException(
|
||||
status_code=422, detail="Page must be greater than 0"
|
||||
)
|
||||
|
||||
if page_size < 1:
|
||||
raise fastapi.HTTPException(
|
||||
status_code=422, detail="Page size must be greater than 0"
|
||||
)
|
||||
|
||||
# Convert string content types to enum
|
||||
content_type_enums: list[prisma.enums.ContentType] | None = None
|
||||
if content_types:
|
||||
try:
|
||||
content_type_enums = [prisma.enums.ContentType(ct) for ct in content_types]
|
||||
except ValueError as e:
|
||||
raise fastapi.HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Invalid content type. Valid values: STORE_AGENT, BLOCK, DOCUMENTATION. Error: {e}",
|
||||
)
|
||||
|
||||
# Perform unified hybrid search
|
||||
results, total = await store_hybrid_search.unified_hybrid_search(
|
||||
query=query,
|
||||
content_types=content_type_enums,
|
||||
user_id=user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
# Convert results to response model
|
||||
search_results = [
|
||||
store_model.UnifiedSearchResult(
|
||||
content_type=r["content_type"],
|
||||
content_id=r["content_id"],
|
||||
searchable_text=r.get("searchable_text", ""),
|
||||
metadata=r.get("metadata"),
|
||||
updated_at=r.get("updated_at"),
|
||||
combined_score=r.get("combined_score"),
|
||||
semantic_score=r.get("semantic_score"),
|
||||
lexical_score=r.get("lexical_score"),
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size if total > 0 else 0
|
||||
|
||||
return store_model.UnifiedSearchResponse(
|
||||
results=search_results,
|
||||
pagination=Pagination(
|
||||
total_items=total,
|
||||
total_pages=total_pages,
|
||||
current_page=page,
|
||||
page_size=page_size,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/agents/{username}/{agent_name}",
|
||||
summary="Get specific agent",
|
||||
|
||||
@@ -521,7 +521,6 @@ def test_get_submissions_success(
|
||||
mocked_value = store_model.StoreSubmissionsResponse(
|
||||
submissions=[
|
||||
store_model.StoreSubmission(
|
||||
listing_id="test-listing-id",
|
||||
name="Test Agent",
|
||||
description="Test agent description",
|
||||
image_urls=["test.jpg"],
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
"""Tests for the semantic_search function."""
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store.embeddings import EMBEDDING_DIM, semantic_search
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_blocks_only(mocker):
|
||||
"""Test searching only BLOCK content type."""
|
||||
# Mock embed_query to return a test embedding
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
# Mock query_raw_with_schema to return test results
|
||||
mock_results = [
|
||||
{
|
||||
"content_id": "block-123",
|
||||
"content_type": "BLOCK",
|
||||
"searchable_text": "Calculator Block - Performs arithmetic operations",
|
||||
"metadata": {"name": "Calculator", "categories": ["Math"]},
|
||||
"similarity": 0.85,
|
||||
}
|
||||
]
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_results,
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="calculate numbers",
|
||||
content_types=[ContentType.BLOCK],
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0]["content_type"] == "BLOCK"
|
||||
assert results[0]["content_id"] == "block-123"
|
||||
assert results[0]["similarity"] == 0.85
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_multiple_content_types(mocker):
|
||||
"""Test searching multiple content types simultaneously."""
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
mock_results = [
|
||||
{
|
||||
"content_id": "block-123",
|
||||
"content_type": "BLOCK",
|
||||
"searchable_text": "Calculator Block",
|
||||
"metadata": {},
|
||||
"similarity": 0.85,
|
||||
},
|
||||
{
|
||||
"content_id": "doc-456",
|
||||
"content_type": "DOCUMENTATION",
|
||||
"searchable_text": "How to use Calculator",
|
||||
"metadata": {},
|
||||
"similarity": 0.75,
|
||||
},
|
||||
]
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_results,
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="calculator",
|
||||
content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION],
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
assert results[0]["content_type"] == "BLOCK"
|
||||
assert results[1]["content_type"] == "DOCUMENTATION"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_with_min_similarity_threshold(mocker):
|
||||
"""Test that results below min_similarity are filtered out."""
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
# Only return results above 0.7 similarity
|
||||
mock_results = [
|
||||
{
|
||||
"content_id": "block-123",
|
||||
"content_type": "BLOCK",
|
||||
"searchable_text": "Calculator Block",
|
||||
"metadata": {},
|
||||
"similarity": 0.85,
|
||||
}
|
||||
]
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_results,
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="calculate",
|
||||
content_types=[ContentType.BLOCK],
|
||||
min_similarity=0.7,
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0]["similarity"] >= 0.7
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_fallback_to_lexical(mocker):
|
||||
"""Test fallback to lexical search when embeddings fail."""
|
||||
# Mock embed_query to return None (embeddings unavailable)
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=None,
|
||||
)
|
||||
|
||||
mock_lexical_results = [
|
||||
{
|
||||
"content_id": "block-123",
|
||||
"content_type": "BLOCK",
|
||||
"searchable_text": "Calculator Block performs calculations",
|
||||
"metadata": {},
|
||||
"similarity": 0.0,
|
||||
}
|
||||
]
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_lexical_results,
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="calculator",
|
||||
content_types=[ContentType.BLOCK],
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0]["similarity"] == 0.0 # Lexical search returns 0 similarity
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_empty_query():
|
||||
"""Test that empty query returns no results."""
|
||||
results = await semantic_search(query="")
|
||||
assert results == []
|
||||
|
||||
results = await semantic_search(query=" ")
|
||||
assert results == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_with_user_id_filter(mocker):
|
||||
"""Test searching with user_id filter for private content."""
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
mock_results = [
|
||||
{
|
||||
"content_id": "agent-789",
|
||||
"content_type": "LIBRARY_AGENT",
|
||||
"searchable_text": "My Custom Agent",
|
||||
"metadata": {},
|
||||
"similarity": 0.9,
|
||||
}
|
||||
]
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_results,
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="custom agent",
|
||||
content_types=[ContentType.LIBRARY_AGENT],
|
||||
user_id="user-123",
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0]["content_type"] == "LIBRARY_AGENT"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_limit_parameter(mocker):
|
||||
"""Test that limit parameter correctly limits results."""
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
# Return 5 results
|
||||
mock_results = [
|
||||
{
|
||||
"content_id": f"block-{i}",
|
||||
"content_type": "BLOCK",
|
||||
"searchable_text": f"Block {i}",
|
||||
"metadata": {},
|
||||
"similarity": 0.8,
|
||||
}
|
||||
for i in range(5)
|
||||
]
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_results,
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="block",
|
||||
content_types=[ContentType.BLOCK],
|
||||
limit=5,
|
||||
)
|
||||
|
||||
assert len(results) == 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_default_content_types(mocker):
|
||||
"""Test that default content_types includes BLOCK, STORE_AGENT, and DOCUMENTATION."""
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
mock_query_raw = mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=[],
|
||||
)
|
||||
|
||||
await semantic_search(query="test")
|
||||
|
||||
# Check that the SQL query includes all three default content types
|
||||
call_args = mock_query_raw.call_args
|
||||
assert "BLOCK" in str(call_args)
|
||||
assert "STORE_AGENT" in str(call_args)
|
||||
assert "DOCUMENTATION" in str(call_args)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_handles_database_error(mocker):
|
||||
"""Test that database errors are handled gracefully."""
|
||||
mock_embedding = [0.1] * EMBEDDING_DIM
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.embed_query",
|
||||
return_value=mock_embedding,
|
||||
)
|
||||
|
||||
# Simulate database error
|
||||
mocker.patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
side_effect=Exception("Database connection failed"),
|
||||
)
|
||||
|
||||
results = await semantic_search(
|
||||
query="test",
|
||||
content_types=[ContentType.BLOCK],
|
||||
)
|
||||
|
||||
# Should return empty list on error
|
||||
assert results == []
|
||||
@@ -64,6 +64,7 @@ from backend.data.onboarding import (
|
||||
complete_re_run_agent,
|
||||
get_recommended_agents,
|
||||
get_user_onboarding,
|
||||
increment_runs,
|
||||
onboarding_enabled,
|
||||
reset_user_onboarding,
|
||||
update_user_onboarding,
|
||||
@@ -693,13 +694,13 @@ class DeleteGraphResponse(TypedDict):
|
||||
async def list_graphs(
|
||||
user_id: Annotated[str, Security(get_user_id)],
|
||||
) -> Sequence[graph_db.GraphMeta]:
|
||||
graphs, _ = await graph_db.list_graphs_paginated(
|
||||
paginated_result = await graph_db.list_graphs_paginated(
|
||||
user_id=user_id,
|
||||
page=1,
|
||||
page_size=250,
|
||||
filter_by="active",
|
||||
)
|
||||
return graphs
|
||||
return paginated_result.graphs
|
||||
|
||||
|
||||
@v1_router.get(
|
||||
@@ -974,6 +975,7 @@ async def execute_graph(
|
||||
# Record successful graph execution
|
||||
record_graph_execution(graph_id=graph_id, status="success", user_id=user_id)
|
||||
record_graph_operation(operation="execute", status="success")
|
||||
await increment_runs(user_id)
|
||||
await complete_re_run_agent(user_id, graph_id)
|
||||
if source == "library":
|
||||
await complete_onboarding_step(
|
||||
|
||||
@@ -39,7 +39,7 @@ import backend.data.user
|
||||
import backend.integrations.webhooks.utils
|
||||
import backend.util.service
|
||||
import backend.util.settings
|
||||
from backend.blocks.llm import DEFAULT_LLM_MODEL
|
||||
from backend.blocks.llm import LlmModel
|
||||
from backend.data.model import Credentials
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.monitoring.instrumentation import instrument_fastapi
|
||||
@@ -113,7 +113,7 @@ async def lifespan_context(app: fastapi.FastAPI):
|
||||
|
||||
await backend.data.user.migrate_and_encrypt_user_integrations()
|
||||
await backend.data.graph.fix_llm_provider_credentials()
|
||||
await backend.data.graph.migrate_llm_models(DEFAULT_LLM_MODEL)
|
||||
await backend.data.graph.migrate_llm_models(LlmModel.GPT4O)
|
||||
await backend.integrations.webhooks.utils.migrate_legacy_triggered_graphs()
|
||||
|
||||
with launch_darkly_context():
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from typing import Any
|
||||
|
||||
from backend.blocks.llm import (
|
||||
DEFAULT_LLM_MODEL,
|
||||
TEST_CREDENTIALS,
|
||||
TEST_CREDENTIALS_INPUT,
|
||||
AIBlockBase,
|
||||
@@ -50,7 +49,7 @@ class AIConditionBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=DEFAULT_LLM_MODEL,
|
||||
default=LlmModel.GPT4O,
|
||||
description="The language model to use for evaluating the condition.",
|
||||
advanced=False,
|
||||
)
|
||||
@@ -82,7 +81,7 @@ class AIConditionBlock(AIBlockBase):
|
||||
"condition": "the input is an email address",
|
||||
"yes_value": "Valid email",
|
||||
"no_value": "Not an email",
|
||||
"model": DEFAULT_LLM_MODEL,
|
||||
"model": LlmModel.GPT4O,
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
|
||||
@@ -6,9 +6,6 @@ import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import cast
|
||||
|
||||
from prisma.types import Serializable
|
||||
|
||||
from backend.sdk import (
|
||||
BaseWebhooksManager,
|
||||
@@ -87,9 +84,7 @@ class AirtableWebhookManager(BaseWebhooksManager):
|
||||
# update webhook config
|
||||
await update_webhook(
|
||||
webhook.id,
|
||||
config=cast(
|
||||
dict[str, Serializable], {"base_id": base_id, "cursor": response.cursor}
|
||||
),
|
||||
config={"base_id": base_id, "cursor": response.cursor},
|
||||
)
|
||||
|
||||
event_type = "notification"
|
||||
|
||||
@@ -182,10 +182,13 @@ class DataForSeoRelatedKeywordsBlock(Block):
|
||||
if results and len(results) > 0:
|
||||
# results is a list, get the first element
|
||||
first_result = results[0] if isinstance(results, list) else results
|
||||
# Handle missing key, null value, or valid list value
|
||||
if isinstance(first_result, dict):
|
||||
items = first_result.get("items") or []
|
||||
else:
|
||||
items = (
|
||||
first_result.get("items", [])
|
||||
if isinstance(first_result, dict)
|
||||
else []
|
||||
)
|
||||
# Ensure items is never None
|
||||
if items is None:
|
||||
items = []
|
||||
for item in items:
|
||||
# Extract keyword_data from the item
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,184 +0,0 @@
|
||||
"""
|
||||
Shared helpers for Human-In-The-Loop (HITL) review functionality.
|
||||
Used by both the dedicated HumanInTheLoopBlock and blocks that require human review.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
from prisma.enums import ReviewStatus
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.data.execution import ExecutionContext, ExecutionStatus
|
||||
from backend.data.human_review import ReviewResult
|
||||
from backend.executor.manager import async_update_node_execution_status
|
||||
from backend.util.clients import get_database_manager_async_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReviewDecision(BaseModel):
|
||||
"""Result of a review decision."""
|
||||
|
||||
should_proceed: bool
|
||||
message: str
|
||||
review_result: ReviewResult
|
||||
|
||||
|
||||
class HITLReviewHelper:
|
||||
"""Helper class for Human-In-The-Loop review operations."""
|
||||
|
||||
@staticmethod
|
||||
async def get_or_create_human_review(**kwargs) -> Optional[ReviewResult]:
|
||||
"""Create or retrieve a human review from the database."""
|
||||
return await get_database_manager_async_client().get_or_create_human_review(
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def update_node_execution_status(**kwargs) -> None:
|
||||
"""Update the execution status of a node."""
|
||||
await async_update_node_execution_status(
|
||||
db_client=get_database_manager_async_client(), **kwargs
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def update_review_processed_status(
|
||||
node_exec_id: str, processed: bool
|
||||
) -> None:
|
||||
"""Update the processed status of a review."""
|
||||
return await get_database_manager_async_client().update_review_processed_status(
|
||||
node_exec_id, processed
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _handle_review_request(
|
||||
input_data: Any,
|
||||
user_id: str,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
graph_id: str,
|
||||
graph_version: int,
|
||||
execution_context: ExecutionContext,
|
||||
block_name: str = "Block",
|
||||
editable: bool = False,
|
||||
) -> Optional[ReviewResult]:
|
||||
"""
|
||||
Handle a review request for a block that requires human review.
|
||||
|
||||
Args:
|
||||
input_data: The input data to be reviewed
|
||||
user_id: ID of the user requesting the review
|
||||
node_exec_id: ID of the node execution
|
||||
graph_exec_id: ID of the graph execution
|
||||
graph_id: ID of the graph
|
||||
graph_version: Version of the graph
|
||||
execution_context: Current execution context
|
||||
block_name: Name of the block requesting review
|
||||
editable: Whether the reviewer can edit the data
|
||||
|
||||
Returns:
|
||||
ReviewResult if review is complete, None if waiting for human input
|
||||
|
||||
Raises:
|
||||
Exception: If review creation or status update fails
|
||||
"""
|
||||
# Skip review if safe mode is disabled - return auto-approved result
|
||||
if not execution_context.safe_mode:
|
||||
logger.info(
|
||||
f"Block {block_name} skipping review for node {node_exec_id} - safe mode disabled"
|
||||
)
|
||||
return ReviewResult(
|
||||
data=input_data,
|
||||
status=ReviewStatus.APPROVED,
|
||||
message="Auto-approved (safe mode disabled)",
|
||||
processed=True,
|
||||
node_exec_id=node_exec_id,
|
||||
)
|
||||
|
||||
result = await HITLReviewHelper.get_or_create_human_review(
|
||||
user_id=user_id,
|
||||
node_exec_id=node_exec_id,
|
||||
graph_exec_id=graph_exec_id,
|
||||
graph_id=graph_id,
|
||||
graph_version=graph_version,
|
||||
input_data=input_data,
|
||||
message=f"Review required for {block_name} execution",
|
||||
editable=editable,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
logger.info(
|
||||
f"Block {block_name} pausing execution for node {node_exec_id} - awaiting human review"
|
||||
)
|
||||
await HITLReviewHelper.update_node_execution_status(
|
||||
exec_id=node_exec_id,
|
||||
status=ExecutionStatus.REVIEW,
|
||||
)
|
||||
return None # Signal that execution should pause
|
||||
|
||||
# Mark review as processed if not already done
|
||||
if not result.processed:
|
||||
await HITLReviewHelper.update_review_processed_status(
|
||||
node_exec_id=node_exec_id, processed=True
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
async def handle_review_decision(
|
||||
input_data: Any,
|
||||
user_id: str,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
graph_id: str,
|
||||
graph_version: int,
|
||||
execution_context: ExecutionContext,
|
||||
block_name: str = "Block",
|
||||
editable: bool = False,
|
||||
) -> Optional[ReviewDecision]:
|
||||
"""
|
||||
Handle a review request and return the decision in a single call.
|
||||
|
||||
Args:
|
||||
input_data: The input data to be reviewed
|
||||
user_id: ID of the user requesting the review
|
||||
node_exec_id: ID of the node execution
|
||||
graph_exec_id: ID of the graph execution
|
||||
graph_id: ID of the graph
|
||||
graph_version: Version of the graph
|
||||
execution_context: Current execution context
|
||||
block_name: Name of the block requesting review
|
||||
editable: Whether the reviewer can edit the data
|
||||
|
||||
Returns:
|
||||
ReviewDecision if review is complete (approved/rejected),
|
||||
None if execution should pause (awaiting review)
|
||||
"""
|
||||
review_result = await HITLReviewHelper._handle_review_request(
|
||||
input_data=input_data,
|
||||
user_id=user_id,
|
||||
node_exec_id=node_exec_id,
|
||||
graph_exec_id=graph_exec_id,
|
||||
graph_id=graph_id,
|
||||
graph_version=graph_version,
|
||||
execution_context=execution_context,
|
||||
block_name=block_name,
|
||||
editable=editable,
|
||||
)
|
||||
|
||||
if review_result is None:
|
||||
# Still awaiting review - return None to pause execution
|
||||
return None
|
||||
|
||||
# Review is complete, determine outcome
|
||||
should_proceed = review_result.status == ReviewStatus.APPROVED
|
||||
message = review_result.message or (
|
||||
"Execution approved by reviewer"
|
||||
if should_proceed
|
||||
else "Execution rejected by reviewer"
|
||||
)
|
||||
|
||||
return ReviewDecision(
|
||||
should_proceed=should_proceed, message=message, review_result=review_result
|
||||
)
|
||||
@@ -3,7 +3,6 @@ from typing import Any
|
||||
|
||||
from prisma.enums import ReviewStatus
|
||||
|
||||
from backend.blocks.helpers.review import HITLReviewHelper
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
@@ -12,9 +11,11 @@ from backend.data.block import (
|
||||
BlockSchemaOutput,
|
||||
BlockType,
|
||||
)
|
||||
from backend.data.execution import ExecutionContext
|
||||
from backend.data.execution import ExecutionContext, ExecutionStatus
|
||||
from backend.data.human_review import ReviewResult
|
||||
from backend.data.model import SchemaField
|
||||
from backend.executor.manager import async_update_node_execution_status
|
||||
from backend.util.clients import get_database_manager_async_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -71,26 +72,32 @@ class HumanInTheLoopBlock(Block):
|
||||
("approved_data", {"name": "John Doe", "age": 30}),
|
||||
],
|
||||
test_mock={
|
||||
"handle_review_decision": lambda **kwargs: type(
|
||||
"ReviewDecision",
|
||||
(),
|
||||
{
|
||||
"should_proceed": True,
|
||||
"message": "Test approval message",
|
||||
"review_result": ReviewResult(
|
||||
data={"name": "John Doe", "age": 30},
|
||||
status=ReviewStatus.APPROVED,
|
||||
message="",
|
||||
processed=False,
|
||||
node_exec_id="test-node-exec-id",
|
||||
),
|
||||
},
|
||||
)(),
|
||||
"get_or_create_human_review": lambda *_args, **_kwargs: ReviewResult(
|
||||
data={"name": "John Doe", "age": 30},
|
||||
status=ReviewStatus.APPROVED,
|
||||
message="",
|
||||
processed=False,
|
||||
node_exec_id="test-node-exec-id",
|
||||
),
|
||||
"update_node_execution_status": lambda *_args, **_kwargs: None,
|
||||
"update_review_processed_status": lambda *_args, **_kwargs: None,
|
||||
},
|
||||
)
|
||||
|
||||
async def handle_review_decision(self, **kwargs):
|
||||
return await HITLReviewHelper.handle_review_decision(**kwargs)
|
||||
async def get_or_create_human_review(self, **kwargs):
|
||||
return await get_database_manager_async_client().get_or_create_human_review(
|
||||
**kwargs
|
||||
)
|
||||
|
||||
async def update_node_execution_status(self, **kwargs):
|
||||
return await async_update_node_execution_status(
|
||||
db_client=get_database_manager_async_client(), **kwargs
|
||||
)
|
||||
|
||||
async def update_review_processed_status(self, node_exec_id: str, processed: bool):
|
||||
return await get_database_manager_async_client().update_review_processed_status(
|
||||
node_exec_id, processed
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
@@ -102,7 +109,7 @@ class HumanInTheLoopBlock(Block):
|
||||
graph_id: str,
|
||||
graph_version: int,
|
||||
execution_context: ExecutionContext,
|
||||
**_kwargs,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
if not execution_context.safe_mode:
|
||||
logger.info(
|
||||
@@ -112,28 +119,48 @@ class HumanInTheLoopBlock(Block):
|
||||
yield "review_message", "Auto-approved (safe mode disabled)"
|
||||
return
|
||||
|
||||
decision = await self.handle_review_decision(
|
||||
input_data=input_data.data,
|
||||
user_id=user_id,
|
||||
node_exec_id=node_exec_id,
|
||||
graph_exec_id=graph_exec_id,
|
||||
graph_id=graph_id,
|
||||
graph_version=graph_version,
|
||||
execution_context=execution_context,
|
||||
block_name=self.name,
|
||||
editable=input_data.editable,
|
||||
)
|
||||
try:
|
||||
result = await self.get_or_create_human_review(
|
||||
user_id=user_id,
|
||||
node_exec_id=node_exec_id,
|
||||
graph_exec_id=graph_exec_id,
|
||||
graph_id=graph_id,
|
||||
graph_version=graph_version,
|
||||
input_data=input_data.data,
|
||||
message=input_data.name,
|
||||
editable=input_data.editable,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in HITL block for node {node_exec_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
if decision is None:
|
||||
return
|
||||
if result is None:
|
||||
logger.info(
|
||||
f"HITL block pausing execution for node {node_exec_id} - awaiting human review"
|
||||
)
|
||||
try:
|
||||
await self.update_node_execution_status(
|
||||
exec_id=node_exec_id,
|
||||
status=ExecutionStatus.REVIEW,
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to update node status for HITL block {node_exec_id}: {str(e)}"
|
||||
)
|
||||
raise
|
||||
|
||||
status = decision.review_result.status
|
||||
if status == ReviewStatus.APPROVED:
|
||||
yield "approved_data", decision.review_result.data
|
||||
elif status == ReviewStatus.REJECTED:
|
||||
yield "rejected_data", decision.review_result.data
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected review status: {status}")
|
||||
if not result.processed:
|
||||
await self.update_review_processed_status(
|
||||
node_exec_id=node_exec_id, processed=True
|
||||
)
|
||||
|
||||
if decision.message:
|
||||
yield "review_message", decision.message
|
||||
if result.status == ReviewStatus.APPROVED:
|
||||
yield "approved_data", result.data
|
||||
if result.message:
|
||||
yield "review_message", result.message
|
||||
|
||||
elif result.status == ReviewStatus.REJECTED:
|
||||
yield "rejected_data", result.data
|
||||
if result.message:
|
||||
yield "review_message", result.message
|
||||
|
||||
@@ -92,9 +92,8 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
|
||||
O1 = "o1"
|
||||
O1_MINI = "o1-mini"
|
||||
# GPT-5 models
|
||||
GPT5_2 = "gpt-5.2-2025-12-11"
|
||||
GPT5_1 = "gpt-5.1-2025-11-13"
|
||||
GPT5 = "gpt-5-2025-08-07"
|
||||
GPT5_1 = "gpt-5.1-2025-11-13"
|
||||
GPT5_MINI = "gpt-5-mini-2025-08-07"
|
||||
GPT5_NANO = "gpt-5-nano-2025-08-07"
|
||||
GPT5_CHAT = "gpt-5-chat-latest"
|
||||
@@ -195,9 +194,8 @@ MODEL_METADATA = {
|
||||
LlmModel.O1: ModelMetadata("openai", 200000, 100000), # o1-2024-12-17
|
||||
LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536), # o1-mini-2024-09-12
|
||||
# GPT-5 models
|
||||
LlmModel.GPT5_2: ModelMetadata("openai", 400000, 128000),
|
||||
LlmModel.GPT5_1: ModelMetadata("openai", 400000, 128000),
|
||||
LlmModel.GPT5: ModelMetadata("openai", 400000, 128000),
|
||||
LlmModel.GPT5_1: ModelMetadata("openai", 400000, 128000),
|
||||
LlmModel.GPT5_MINI: ModelMetadata("openai", 400000, 128000),
|
||||
LlmModel.GPT5_NANO: ModelMetadata("openai", 400000, 128000),
|
||||
LlmModel.GPT5_CHAT: ModelMetadata("openai", 400000, 16384),
|
||||
@@ -305,8 +303,6 @@ MODEL_METADATA = {
|
||||
LlmModel.V0_1_0_MD: ModelMetadata("v0", 128000, 64000),
|
||||
}
|
||||
|
||||
DEFAULT_LLM_MODEL = LlmModel.GPT5_2
|
||||
|
||||
for model in LlmModel:
|
||||
if model not in MODEL_METADATA:
|
||||
raise ValueError(f"Missing MODEL_METADATA metadata for model: {model}")
|
||||
@@ -794,7 +790,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=DEFAULT_LLM_MODEL,
|
||||
default=LlmModel.GPT4O,
|
||||
description="The language model to use for answering the prompt.",
|
||||
advanced=False,
|
||||
)
|
||||
@@ -859,7 +855,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
|
||||
input_schema=AIStructuredResponseGeneratorBlock.Input,
|
||||
output_schema=AIStructuredResponseGeneratorBlock.Output,
|
||||
test_input={
|
||||
"model": DEFAULT_LLM_MODEL,
|
||||
"model": LlmModel.GPT4O,
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
"expected_format": {
|
||||
"key1": "value1",
|
||||
@@ -1225,7 +1221,7 @@ class AITextGeneratorBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=DEFAULT_LLM_MODEL,
|
||||
default=LlmModel.GPT4O,
|
||||
description="The language model to use for answering the prompt.",
|
||||
advanced=False,
|
||||
)
|
||||
@@ -1321,7 +1317,7 @@ class AITextSummarizerBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=DEFAULT_LLM_MODEL,
|
||||
default=LlmModel.GPT4O,
|
||||
description="The language model to use for summarizing the text.",
|
||||
)
|
||||
focus: str = SchemaField(
|
||||
@@ -1538,7 +1534,7 @@ class AIConversationBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=DEFAULT_LLM_MODEL,
|
||||
default=LlmModel.GPT4O,
|
||||
description="The language model to use for the conversation.",
|
||||
)
|
||||
credentials: AICredentials = AICredentialsField()
|
||||
@@ -1576,7 +1572,7 @@ class AIConversationBlock(AIBlockBase):
|
||||
},
|
||||
{"role": "user", "content": "Where was it played?"},
|
||||
],
|
||||
"model": DEFAULT_LLM_MODEL,
|
||||
"model": LlmModel.GPT4O,
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
@@ -1639,7 +1635,7 @@ class AIListGeneratorBlock(AIBlockBase):
|
||||
)
|
||||
model: LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=DEFAULT_LLM_MODEL,
|
||||
default=LlmModel.GPT4O,
|
||||
description="The language model to use for generating the list.",
|
||||
advanced=True,
|
||||
)
|
||||
@@ -1696,7 +1692,7 @@ class AIListGeneratorBlock(AIBlockBase):
|
||||
"drawing explorers to uncover its mysteries. Each planet showcases the limitless possibilities of "
|
||||
"fictional worlds."
|
||||
),
|
||||
"model": DEFAULT_LLM_MODEL,
|
||||
"model": LlmModel.GPT4O,
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
"max_retries": 3,
|
||||
"force_json_output": False,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,6 @@ from backend.data.model import (
|
||||
SchemaField,
|
||||
)
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.util.request import DEFAULT_USER_AGENT
|
||||
|
||||
|
||||
class GetWikipediaSummaryBlock(Block, GetRequest):
|
||||
@@ -40,27 +39,17 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
|
||||
output_schema=GetWikipediaSummaryBlock.Output,
|
||||
test_input={"topic": "Artificial Intelligence"},
|
||||
test_output=("summary", "summary content"),
|
||||
test_mock={
|
||||
"get_request": lambda url, headers, json: {"extract": "summary content"}
|
||||
},
|
||||
test_mock={"get_request": lambda url, json: {"extract": "summary content"}},
|
||||
)
|
||||
|
||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
topic = input_data.topic
|
||||
# URL-encode the topic to handle spaces and special characters
|
||||
encoded_topic = quote(topic, safe="")
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_topic}"
|
||||
|
||||
# Set headers per Wikimedia robot policy (https://w.wiki/4wJS)
|
||||
# - User-Agent: Required, must identify the bot
|
||||
# - Accept-Encoding: gzip recommended to reduce bandwidth
|
||||
headers = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
}
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
|
||||
|
||||
# Note: User-Agent is now automatically set by the request library
|
||||
# to comply with Wikimedia's robot policy (https://w.wiki/4wJS)
|
||||
try:
|
||||
response = await self.get_request(url, headers=headers, json=True)
|
||||
response = await self.get_request(url, json=True)
|
||||
if "extract" not in response:
|
||||
raise ValueError(f"Unable to parse Wikipedia response: {response}")
|
||||
yield "summary", response["extract"]
|
||||
|
||||
@@ -226,7 +226,7 @@ class SmartDecisionMakerBlock(Block):
|
||||
)
|
||||
model: llm.LlmModel = SchemaField(
|
||||
title="LLM Model",
|
||||
default=llm.DEFAULT_LLM_MODEL,
|
||||
default=llm.LlmModel.GPT4O,
|
||||
description="The language model to use for answering the prompt.",
|
||||
advanced=False,
|
||||
)
|
||||
@@ -391,12 +391,8 @@ class SmartDecisionMakerBlock(Block):
|
||||
"""
|
||||
block = sink_node.block
|
||||
|
||||
# Use custom name from node metadata if set, otherwise fall back to block.name
|
||||
custom_name = sink_node.metadata.get("customized_name")
|
||||
tool_name = custom_name if custom_name else block.name
|
||||
|
||||
tool_function: dict[str, Any] = {
|
||||
"name": SmartDecisionMakerBlock.cleanup(tool_name),
|
||||
"name": SmartDecisionMakerBlock.cleanup(block.name),
|
||||
"description": block.description,
|
||||
}
|
||||
sink_block_input_schema = block.input_schema
|
||||
@@ -493,24 +489,14 @@ class SmartDecisionMakerBlock(Block):
|
||||
f"Sink graph metadata not found: {graph_id} {graph_version}"
|
||||
)
|
||||
|
||||
# Use custom name from node metadata if set, otherwise fall back to graph name
|
||||
custom_name = sink_node.metadata.get("customized_name")
|
||||
tool_name = custom_name if custom_name else sink_graph_meta.name
|
||||
|
||||
tool_function: dict[str, Any] = {
|
||||
"name": SmartDecisionMakerBlock.cleanup(tool_name),
|
||||
"name": SmartDecisionMakerBlock.cleanup(sink_graph_meta.name),
|
||||
"description": sink_graph_meta.description,
|
||||
}
|
||||
|
||||
properties = {}
|
||||
field_mapping = {}
|
||||
|
||||
for link in links:
|
||||
field_name = link.sink_name
|
||||
|
||||
clean_field_name = SmartDecisionMakerBlock.cleanup(field_name)
|
||||
field_mapping[clean_field_name] = field_name
|
||||
|
||||
sink_block_input_schema = sink_node.input_default["input_schema"]
|
||||
sink_block_properties = sink_block_input_schema.get("properties", {}).get(
|
||||
link.sink_name, {}
|
||||
@@ -520,7 +506,7 @@ class SmartDecisionMakerBlock(Block):
|
||||
if "description" in sink_block_properties
|
||||
else f"The {link.sink_name} of the tool"
|
||||
)
|
||||
properties[clean_field_name] = {
|
||||
properties[link.sink_name] = {
|
||||
"type": "string",
|
||||
"description": description,
|
||||
"default": json.dumps(sink_block_properties.get("default", None)),
|
||||
@@ -533,7 +519,7 @@ class SmartDecisionMakerBlock(Block):
|
||||
"strict": True,
|
||||
}
|
||||
|
||||
tool_function["_field_mapping"] = field_mapping
|
||||
# Store node info for later use in output processing
|
||||
tool_function["_sink_node_id"] = sink_node.id
|
||||
|
||||
return {"type": "function", "function": tool_function}
|
||||
@@ -989,28 +975,10 @@ class SmartDecisionMakerBlock(Block):
|
||||
graph_version: int,
|
||||
execution_context: ExecutionContext,
|
||||
execution_processor: "ExecutionProcessor",
|
||||
nodes_to_skip: set[str] | None = None,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
|
||||
tool_functions = await self._create_tool_node_signatures(node_id)
|
||||
original_tool_count = len(tool_functions)
|
||||
|
||||
# Filter out tools for nodes that should be skipped (e.g., missing optional credentials)
|
||||
if nodes_to_skip:
|
||||
tool_functions = [
|
||||
tf
|
||||
for tf in tool_functions
|
||||
if tf.get("function", {}).get("_sink_node_id") not in nodes_to_skip
|
||||
]
|
||||
|
||||
# Only raise error if we had tools but they were all filtered out
|
||||
if original_tool_count > 0 and not tool_functions:
|
||||
raise ValueError(
|
||||
"No available tools to execute - all downstream nodes are unavailable "
|
||||
"(possibly due to missing optional credentials)"
|
||||
)
|
||||
|
||||
yield "tool_functions", json.dumps(tool_functions)
|
||||
|
||||
conversation_history = input_data.conversation_history or []
|
||||
@@ -1161,9 +1129,8 @@ class SmartDecisionMakerBlock(Block):
|
||||
original_field_name = field_mapping.get(clean_arg_name, clean_arg_name)
|
||||
arg_value = tool_args.get(clean_arg_name)
|
||||
|
||||
# Use original_field_name directly (not sanitized) to match link sink_name
|
||||
# The field_mapping already translates from LLM's cleaned names to original names
|
||||
emit_key = f"tools_^_{sink_node_id}_~_{original_field_name}"
|
||||
sanitized_arg_name = self.cleanup(original_field_name)
|
||||
emit_key = f"tools_^_{sink_node_id}_~_{sanitized_arg_name}"
|
||||
|
||||
logger.debug(
|
||||
"[SmartDecisionMakerBlock|geid:%s|neid:%s] emit %s",
|
||||
|
||||
@@ -196,15 +196,6 @@ class TestXMLParserBlockSecurity:
|
||||
async for _ in block.run(XMLParserBlock.Input(input_xml=large_xml)):
|
||||
pass
|
||||
|
||||
async def test_rejects_text_outside_root(self):
|
||||
"""Ensure parser surfaces readable errors for invalid root text."""
|
||||
block = XMLParserBlock()
|
||||
invalid_xml = "<root><child>value</child></root> trailing"
|
||||
|
||||
with pytest.raises(ValueError, match="text outside the root element"):
|
||||
async for _ in block.run(XMLParserBlock.Input(input_xml=invalid_xml)):
|
||||
pass
|
||||
|
||||
|
||||
class TestStoreMediaFileSecurity:
|
||||
"""Test file storage security limits."""
|
||||
|
||||
@@ -28,7 +28,7 @@ class TestLLMStatsTracking:
|
||||
|
||||
response = await llm.llm_call(
|
||||
credentials=llm.TEST_CREDENTIALS,
|
||||
llm_model=llm.DEFAULT_LLM_MODEL,
|
||||
llm_model=llm.LlmModel.GPT4O,
|
||||
prompt=[{"role": "user", "content": "Hello"}],
|
||||
max_tokens=100,
|
||||
)
|
||||
@@ -65,7 +65,7 @@ class TestLLMStatsTracking:
|
||||
input_data = llm.AIStructuredResponseGeneratorBlock.Input(
|
||||
prompt="Test prompt",
|
||||
expected_format={"key1": "desc1", "key2": "desc2"},
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore # type: ignore
|
||||
)
|
||||
|
||||
@@ -109,7 +109,7 @@ class TestLLMStatsTracking:
|
||||
# Run the block
|
||||
input_data = llm.AITextGeneratorBlock.Input(
|
||||
prompt="Generate text",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
@@ -170,7 +170,7 @@ class TestLLMStatsTracking:
|
||||
input_data = llm.AIStructuredResponseGeneratorBlock.Input(
|
||||
prompt="Test prompt",
|
||||
expected_format={"key1": "desc1", "key2": "desc2"},
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
retry=2,
|
||||
)
|
||||
@@ -228,7 +228,7 @@ class TestLLMStatsTracking:
|
||||
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text=long_text,
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
max_tokens=100, # Small chunks
|
||||
chunk_overlap=10,
|
||||
@@ -299,7 +299,7 @@ class TestLLMStatsTracking:
|
||||
# Test with very short text (should only need 1 chunk + 1 final summary)
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="This is a short text.",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
max_tokens=1000, # Large enough to avoid chunking
|
||||
)
|
||||
@@ -346,7 +346,7 @@ class TestLLMStatsTracking:
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
{"role": "user", "content": "How are you?"},
|
||||
],
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
@@ -387,7 +387,7 @@ class TestLLMStatsTracking:
|
||||
# Run the block
|
||||
input_data = llm.AIListGeneratorBlock.Input(
|
||||
focus="test items",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
max_retries=3,
|
||||
)
|
||||
@@ -469,7 +469,7 @@ class TestLLMStatsTracking:
|
||||
input_data = llm.AIStructuredResponseGeneratorBlock.Input(
|
||||
prompt="Test",
|
||||
expected_format={"result": "desc"},
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
@@ -513,7 +513,7 @@ class TestAITextSummarizerValidation:
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
style=llm.SummaryStyle.BULLET_POINTS,
|
||||
)
|
||||
@@ -558,7 +558,7 @@ class TestAITextSummarizerValidation:
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
style=llm.SummaryStyle.BULLET_POINTS,
|
||||
max_tokens=1000,
|
||||
@@ -593,7 +593,7 @@ class TestAITextSummarizerValidation:
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
@@ -623,7 +623,7 @@ class TestAITextSummarizerValidation:
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
max_tokens=1000,
|
||||
)
|
||||
@@ -654,7 +654,7 @@ class TestAITextSummarizerValidation:
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ async def test_smart_decision_maker_tracks_llm_stats():
|
||||
# Create test input
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Should I continue with this task?",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
@@ -335,7 +335,7 @@ async def test_smart_decision_maker_parameter_validation():
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Search for keywords",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
retry=2, # Set retry to 2 for testing
|
||||
agent_mode_max_iterations=0,
|
||||
@@ -402,7 +402,7 @@ async def test_smart_decision_maker_parameter_validation():
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Search for keywords",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
@@ -462,7 +462,7 @@ async def test_smart_decision_maker_parameter_validation():
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Search for keywords",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
@@ -526,7 +526,7 @@ async def test_smart_decision_maker_parameter_validation():
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Search for keywords",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
@@ -648,7 +648,7 @@ async def test_smart_decision_maker_raw_response_conversion():
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test prompt",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
retry=2,
|
||||
agent_mode_max_iterations=0,
|
||||
@@ -722,7 +722,7 @@ async def test_smart_decision_maker_raw_response_conversion():
|
||||
):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Simple prompt",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
@@ -778,7 +778,7 @@ async def test_smart_decision_maker_raw_response_conversion():
|
||||
):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Another test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
@@ -931,7 +931,7 @@ async def test_smart_decision_maker_agent_mode():
|
||||
# Test agent mode with max_iterations = 3
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Complete this task using tools",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=3, # Enable agent mode with 3 max iterations
|
||||
)
|
||||
@@ -1020,7 +1020,7 @@ async def test_smart_decision_maker_traditional_mode_default():
|
||||
# Test default behavior (traditional mode)
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test prompt",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
model=llm_module.LlmModel.GPT4O,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
agent_mode_max_iterations=0, # Traditional mode
|
||||
)
|
||||
@@ -1057,153 +1057,3 @@ async def test_smart_decision_maker_traditional_mode_default():
|
||||
) # Should yield individual tool parameters
|
||||
assert "tools_^_test-sink-node-id_~_max_keyword_difficulty" in outputs
|
||||
assert "conversations" in outputs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smart_decision_maker_uses_customized_name_for_blocks():
|
||||
"""Test that SmartDecisionMakerBlock uses customized_name from node metadata for tool names."""
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from backend.blocks.basic import StoreValueBlock
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
from backend.data.graph import Link, Node
|
||||
|
||||
# Create a mock node with customized_name in metadata
|
||||
mock_node = MagicMock(spec=Node)
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block_id = StoreValueBlock().id
|
||||
mock_node.metadata = {"customized_name": "My Custom Tool Name"}
|
||||
mock_node.block = StoreValueBlock()
|
||||
|
||||
# Create a mock link
|
||||
mock_link = MagicMock(spec=Link)
|
||||
mock_link.sink_name = "input"
|
||||
|
||||
# Call the function directly
|
||||
result = await SmartDecisionMakerBlock._create_block_function_signature(
|
||||
mock_node, [mock_link]
|
||||
)
|
||||
|
||||
# Verify the tool name uses the customized name (cleaned up)
|
||||
assert result["type"] == "function"
|
||||
assert result["function"]["name"] == "my_custom_tool_name" # Cleaned version
|
||||
assert result["function"]["_sink_node_id"] == "test-node-id"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smart_decision_maker_falls_back_to_block_name():
|
||||
"""Test that SmartDecisionMakerBlock falls back to block.name when no customized_name."""
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from backend.blocks.basic import StoreValueBlock
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
from backend.data.graph import Link, Node
|
||||
|
||||
# Create a mock node without customized_name
|
||||
mock_node = MagicMock(spec=Node)
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block_id = StoreValueBlock().id
|
||||
mock_node.metadata = {} # No customized_name
|
||||
mock_node.block = StoreValueBlock()
|
||||
|
||||
# Create a mock link
|
||||
mock_link = MagicMock(spec=Link)
|
||||
mock_link.sink_name = "input"
|
||||
|
||||
# Call the function directly
|
||||
result = await SmartDecisionMakerBlock._create_block_function_signature(
|
||||
mock_node, [mock_link]
|
||||
)
|
||||
|
||||
# Verify the tool name uses the block's default name
|
||||
assert result["type"] == "function"
|
||||
assert result["function"]["name"] == "storevalueblock" # Default block name cleaned
|
||||
assert result["function"]["_sink_node_id"] == "test-node-id"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smart_decision_maker_uses_customized_name_for_agents():
|
||||
"""Test that SmartDecisionMakerBlock uses customized_name from metadata for agent nodes."""
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
from backend.data.graph import Link, Node
|
||||
|
||||
# Create a mock node with customized_name in metadata
|
||||
mock_node = MagicMock(spec=Node)
|
||||
mock_node.id = "test-agent-node-id"
|
||||
mock_node.metadata = {"customized_name": "My Custom Agent"}
|
||||
mock_node.input_default = {
|
||||
"graph_id": "test-graph-id",
|
||||
"graph_version": 1,
|
||||
"input_schema": {"properties": {"test_input": {"description": "Test input"}}},
|
||||
}
|
||||
|
||||
# Create a mock link
|
||||
mock_link = MagicMock(spec=Link)
|
||||
mock_link.sink_name = "test_input"
|
||||
|
||||
# Mock the database client
|
||||
mock_graph_meta = MagicMock()
|
||||
mock_graph_meta.name = "Original Agent Name"
|
||||
mock_graph_meta.description = "Agent description"
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_db_client.get_graph_metadata.return_value = mock_graph_meta
|
||||
|
||||
with patch(
|
||||
"backend.blocks.smart_decision_maker.get_database_manager_async_client",
|
||||
return_value=mock_db_client,
|
||||
):
|
||||
result = await SmartDecisionMakerBlock._create_agent_function_signature(
|
||||
mock_node, [mock_link]
|
||||
)
|
||||
|
||||
# Verify the tool name uses the customized name (cleaned up)
|
||||
assert result["type"] == "function"
|
||||
assert result["function"]["name"] == "my_custom_agent" # Cleaned version
|
||||
assert result["function"]["_sink_node_id"] == "test-agent-node-id"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_smart_decision_maker_agent_falls_back_to_graph_name():
|
||||
"""Test that agent node falls back to graph name when no customized_name."""
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
from backend.data.graph import Link, Node
|
||||
|
||||
# Create a mock node without customized_name
|
||||
mock_node = MagicMock(spec=Node)
|
||||
mock_node.id = "test-agent-node-id"
|
||||
mock_node.metadata = {} # No customized_name
|
||||
mock_node.input_default = {
|
||||
"graph_id": "test-graph-id",
|
||||
"graph_version": 1,
|
||||
"input_schema": {"properties": {"test_input": {"description": "Test input"}}},
|
||||
}
|
||||
|
||||
# Create a mock link
|
||||
mock_link = MagicMock(spec=Link)
|
||||
mock_link.sink_name = "test_input"
|
||||
|
||||
# Mock the database client
|
||||
mock_graph_meta = MagicMock()
|
||||
mock_graph_meta.name = "Original Agent Name"
|
||||
mock_graph_meta.description = "Agent description"
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_db_client.get_graph_metadata.return_value = mock_graph_meta
|
||||
|
||||
with patch(
|
||||
"backend.blocks.smart_decision_maker.get_database_manager_async_client",
|
||||
return_value=mock_db_client,
|
||||
):
|
||||
result = await SmartDecisionMakerBlock._create_agent_function_signature(
|
||||
mock_node, [mock_link]
|
||||
)
|
||||
|
||||
# Verify the tool name uses the graph's default name
|
||||
assert result["type"] == "function"
|
||||
assert result["function"]["name"] == "original_agent_name" # Graph name cleaned
|
||||
assert result["function"]["_sink_node_id"] == "test-agent-node-id"
|
||||
|
||||
@@ -15,7 +15,6 @@ async def test_smart_decision_maker_handles_dynamic_dict_fields():
|
||||
mock_node.block = CreateDictionaryBlock()
|
||||
mock_node.block_id = CreateDictionaryBlock().id
|
||||
mock_node.input_default = {}
|
||||
mock_node.metadata = {}
|
||||
|
||||
# Create mock links with dynamic dictionary fields
|
||||
mock_links = [
|
||||
@@ -78,7 +77,6 @@ async def test_smart_decision_maker_handles_dynamic_list_fields():
|
||||
mock_node.block = AddToListBlock()
|
||||
mock_node.block_id = AddToListBlock().id
|
||||
mock_node.input_default = {}
|
||||
mock_node.metadata = {}
|
||||
|
||||
# Create mock links with dynamic list fields
|
||||
mock_links = [
|
||||
|
||||
@@ -44,7 +44,6 @@ async def test_create_block_function_signature_with_dict_fields():
|
||||
mock_node.block = CreateDictionaryBlock()
|
||||
mock_node.block_id = CreateDictionaryBlock().id
|
||||
mock_node.input_default = {}
|
||||
mock_node.metadata = {}
|
||||
|
||||
# Create mock links with dynamic dictionary fields (source sanitized, sink original)
|
||||
mock_links = [
|
||||
@@ -107,7 +106,6 @@ async def test_create_block_function_signature_with_list_fields():
|
||||
mock_node.block = AddToListBlock()
|
||||
mock_node.block_id = AddToListBlock().id
|
||||
mock_node.input_default = {}
|
||||
mock_node.metadata = {}
|
||||
|
||||
# Create mock links with dynamic list fields
|
||||
mock_links = [
|
||||
@@ -161,7 +159,6 @@ async def test_create_block_function_signature_with_object_fields():
|
||||
mock_node.block = MatchTextPatternBlock()
|
||||
mock_node.block_id = MatchTextPatternBlock().id
|
||||
mock_node.input_default = {}
|
||||
mock_node.metadata = {}
|
||||
|
||||
# Create mock links with dynamic object fields
|
||||
mock_links = [
|
||||
@@ -211,13 +208,11 @@ async def test_create_tool_node_signatures():
|
||||
mock_dict_node.block = CreateDictionaryBlock()
|
||||
mock_dict_node.block_id = CreateDictionaryBlock().id
|
||||
mock_dict_node.input_default = {}
|
||||
mock_dict_node.metadata = {}
|
||||
|
||||
mock_list_node = Mock()
|
||||
mock_list_node.block = AddToListBlock()
|
||||
mock_list_node.block_id = AddToListBlock().id
|
||||
mock_list_node.input_default = {}
|
||||
mock_list_node.metadata = {}
|
||||
|
||||
# Mock links with dynamic fields
|
||||
dict_link1 = Mock(
|
||||
@@ -378,7 +373,7 @@ async def test_output_yielding_with_dynamic_fields():
|
||||
input_data = block.input_schema(
|
||||
prompt="Create a user dictionary",
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT,
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
agent_mode_max_iterations=0, # Use traditional mode to test output yielding
|
||||
)
|
||||
|
||||
@@ -428,7 +423,6 @@ async def test_mixed_regular_and_dynamic_fields():
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "A test block"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.metadata = {}
|
||||
|
||||
# Mock the get_field_schema to return a proper schema for regular fields
|
||||
def get_field_schema(field_name):
|
||||
@@ -600,7 +594,7 @@ async def test_validation_errors_dont_pollute_conversation():
|
||||
input_data = block.input_schema(
|
||||
prompt="Test prompt",
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT,
|
||||
model=llm.DEFAULT_LLM_MODEL,
|
||||
model=llm.LlmModel.GPT4O,
|
||||
retry=3, # Allow retries
|
||||
agent_mode_max_iterations=1,
|
||||
)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from .blog import WordPressCreatePostBlock, WordPressGetAllPostsBlock
|
||||
from .blog import WordPressCreatePostBlock
|
||||
|
||||
__all__ = ["WordPressCreatePostBlock", "WordPressGetAllPostsBlock"]
|
||||
__all__ = ["WordPressCreatePostBlock"]
|
||||
|
||||
@@ -161,7 +161,7 @@ async def oauth_exchange_code_for_tokens(
|
||||
grant_type="authorization_code",
|
||||
).model_dump(exclude_none=True)
|
||||
|
||||
response = await Requests(raise_for_status=False).post(
|
||||
response = await Requests().post(
|
||||
f"{WORDPRESS_BASE_URL}oauth2/token",
|
||||
headers=headers,
|
||||
data=data,
|
||||
@@ -205,7 +205,7 @@ async def oauth_refresh_tokens(
|
||||
grant_type="refresh_token",
|
||||
).model_dump(exclude_none=True)
|
||||
|
||||
response = await Requests(raise_for_status=False).post(
|
||||
response = await Requests().post(
|
||||
f"{WORDPRESS_BASE_URL}oauth2/token",
|
||||
headers=headers,
|
||||
data=data,
|
||||
@@ -252,7 +252,7 @@ async def validate_token(
|
||||
"token": token,
|
||||
}
|
||||
|
||||
response = await Requests(raise_for_status=False).get(
|
||||
response = await Requests().get(
|
||||
f"{WORDPRESS_BASE_URL}oauth2/token-info",
|
||||
params=params,
|
||||
)
|
||||
@@ -296,7 +296,7 @@ async def make_api_request(
|
||||
|
||||
url = f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}"
|
||||
|
||||
request_method = getattr(Requests(raise_for_status=False), method.lower())
|
||||
request_method = getattr(Requests(), method.lower())
|
||||
response = await request_method(
|
||||
url,
|
||||
headers=headers,
|
||||
@@ -476,7 +476,6 @@ async def create_post(
|
||||
data["tags"] = ",".join(str(t) for t in data["tags"])
|
||||
|
||||
# Make the API request
|
||||
site = normalize_site(site)
|
||||
endpoint = f"/rest/v1.1/sites/{site}/posts/new"
|
||||
|
||||
headers = {
|
||||
@@ -484,7 +483,7 @@ async def create_post(
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
}
|
||||
|
||||
response = await Requests(raise_for_status=False).post(
|
||||
response = await Requests().post(
|
||||
f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}",
|
||||
headers=headers,
|
||||
data=data,
|
||||
@@ -500,132 +499,3 @@ async def create_post(
|
||||
)
|
||||
error_message = error_data.get("message", response.text)
|
||||
raise ValueError(f"Failed to create post: {response.status} - {error_message}")
|
||||
|
||||
|
||||
class Post(BaseModel):
|
||||
"""Response model for individual posts in a posts list response.
|
||||
|
||||
This is a simplified version compared to PostResponse, as the list endpoint
|
||||
returns less detailed information than the create/get single post endpoints.
|
||||
"""
|
||||
|
||||
ID: int
|
||||
site_ID: int
|
||||
author: PostAuthor
|
||||
date: datetime
|
||||
modified: datetime
|
||||
title: str
|
||||
URL: str
|
||||
short_URL: str
|
||||
content: str | None = None
|
||||
excerpt: str | None = None
|
||||
slug: str
|
||||
guid: str
|
||||
status: str
|
||||
sticky: bool
|
||||
password: str | None = ""
|
||||
parent: Union[Dict[str, Any], bool, None] = None
|
||||
type: str
|
||||
discussion: Dict[str, Union[str, bool, int]] | None = None
|
||||
likes_enabled: bool | None = None
|
||||
sharing_enabled: bool | None = None
|
||||
like_count: int | None = None
|
||||
i_like: bool | None = None
|
||||
is_reblogged: bool | None = None
|
||||
is_following: bool | None = None
|
||||
global_ID: str | None = None
|
||||
featured_image: str | None = None
|
||||
post_thumbnail: Dict[str, Any] | None = None
|
||||
format: str | None = None
|
||||
geo: Union[Dict[str, Any], bool, None] = None
|
||||
menu_order: int | None = None
|
||||
page_template: str | None = None
|
||||
publicize_URLs: List[str] | None = None
|
||||
terms: Dict[str, Dict[str, Any]] | None = None
|
||||
tags: Dict[str, Dict[str, Any]] | None = None
|
||||
categories: Dict[str, Dict[str, Any]] | None = None
|
||||
attachments: Dict[str, Dict[str, Any]] | None = None
|
||||
attachment_count: int | None = None
|
||||
metadata: List[Dict[str, Any]] | None = None
|
||||
meta: Dict[str, Any] | None = None
|
||||
capabilities: Dict[str, bool] | None = None
|
||||
revisions: List[int] | None = None
|
||||
other_URLs: Dict[str, Any] | None = None
|
||||
|
||||
|
||||
class PostsResponse(BaseModel):
|
||||
"""Response model for WordPress posts list."""
|
||||
|
||||
found: int
|
||||
posts: List[Post]
|
||||
meta: Dict[str, Any]
|
||||
|
||||
|
||||
def normalize_site(site: str) -> str:
|
||||
"""
|
||||
Normalize a site identifier by stripping protocol and trailing slashes.
|
||||
|
||||
Args:
|
||||
site: Site URL, domain, or ID (e.g., "https://myblog.wordpress.com/", "myblog.wordpress.com", "123456789")
|
||||
|
||||
Returns:
|
||||
Normalized site identifier (domain or ID only)
|
||||
"""
|
||||
site = site.strip()
|
||||
if site.startswith("https://"):
|
||||
site = site[8:]
|
||||
elif site.startswith("http://"):
|
||||
site = site[7:]
|
||||
return site.rstrip("/")
|
||||
|
||||
|
||||
async def get_posts(
|
||||
credentials: Credentials,
|
||||
site: str,
|
||||
status: PostStatus | None = None,
|
||||
number: int = 100,
|
||||
offset: int = 0,
|
||||
) -> PostsResponse:
|
||||
"""
|
||||
Get posts from a WordPress site.
|
||||
|
||||
Args:
|
||||
credentials: OAuth credentials
|
||||
site: Site ID or domain (e.g., "myblog.wordpress.com" or "123456789")
|
||||
status: Filter by post status using PostStatus enum, or None for all
|
||||
number: Number of posts to retrieve (max 100)
|
||||
offset: Number of posts to skip (for pagination)
|
||||
|
||||
Returns:
|
||||
PostsResponse with the list of posts
|
||||
"""
|
||||
site = normalize_site(site)
|
||||
endpoint = f"/rest/v1.1/sites/{site}/posts"
|
||||
|
||||
headers = {
|
||||
"Authorization": credentials.auth_header(),
|
||||
}
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"number": max(1, min(number, 100)), # 1–100 posts per request
|
||||
"offset": offset,
|
||||
}
|
||||
|
||||
if status:
|
||||
params["status"] = status.value
|
||||
response = await Requests(raise_for_status=False).get(
|
||||
f"{WORDPRESS_BASE_URL.rstrip('/')}{endpoint}",
|
||||
headers=headers,
|
||||
params=params,
|
||||
)
|
||||
|
||||
if response.ok:
|
||||
return PostsResponse.model_validate(response.json())
|
||||
|
||||
error_data = (
|
||||
response.json()
|
||||
if response.headers.get("content-type", "").startswith("application/json")
|
||||
else {}
|
||||
)
|
||||
error_message = error_data.get("message", response.text)
|
||||
raise ValueError(f"Failed to get posts: {response.status} - {error_message}")
|
||||
|
||||
@@ -9,15 +9,7 @@ from backend.sdk import (
|
||||
SchemaField,
|
||||
)
|
||||
|
||||
from ._api import (
|
||||
CreatePostRequest,
|
||||
Post,
|
||||
PostResponse,
|
||||
PostsResponse,
|
||||
PostStatus,
|
||||
create_post,
|
||||
get_posts,
|
||||
)
|
||||
from ._api import CreatePostRequest, PostResponse, PostStatus, create_post
|
||||
from ._config import wordpress
|
||||
|
||||
|
||||
@@ -57,15 +49,8 @@ class WordPressCreatePostBlock(Block):
|
||||
media_urls: list[str] = SchemaField(
|
||||
description="URLs of images to sideload and attach to the post", default=[]
|
||||
)
|
||||
publish_as_draft: bool = SchemaField(
|
||||
description="If True, publishes the post as a draft. If False, publishes it publicly.",
|
||||
default=False,
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
site: str = SchemaField(
|
||||
description="The site ID or domain (pass-through for chaining with other blocks)"
|
||||
)
|
||||
post_id: int = SchemaField(description="The ID of the created post")
|
||||
post_url: str = SchemaField(description="The full URL of the created post")
|
||||
short_url: str = SchemaField(description="The shortened wp.me URL")
|
||||
@@ -93,9 +78,7 @@ class WordPressCreatePostBlock(Block):
|
||||
tags=input_data.tags,
|
||||
featured_image=input_data.featured_image,
|
||||
media_urls=input_data.media_urls,
|
||||
status=(
|
||||
PostStatus.DRAFT if input_data.publish_as_draft else PostStatus.PUBLISH
|
||||
),
|
||||
status=PostStatus.PUBLISH,
|
||||
)
|
||||
|
||||
post_response: PostResponse = await create_post(
|
||||
@@ -104,69 +87,7 @@ class WordPressCreatePostBlock(Block):
|
||||
post_data=post_request,
|
||||
)
|
||||
|
||||
yield "site", input_data.site
|
||||
yield "post_id", post_response.ID
|
||||
yield "post_url", post_response.URL
|
||||
yield "short_url", post_response.short_URL
|
||||
yield "post_data", post_response.model_dump()
|
||||
|
||||
|
||||
class WordPressGetAllPostsBlock(Block):
|
||||
"""
|
||||
Fetches all posts from a WordPress.com site or Jetpack-enabled site.
|
||||
Supports filtering by status and pagination.
|
||||
"""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
credentials: CredentialsMetaInput = wordpress.credentials_field()
|
||||
site: str = SchemaField(
|
||||
description="Site ID or domain (e.g., 'myblog.wordpress.com' or '123456789')"
|
||||
)
|
||||
status: PostStatus | None = SchemaField(
|
||||
description="Filter by post status, or None for all",
|
||||
default=None,
|
||||
)
|
||||
number: int = SchemaField(
|
||||
description="Number of posts to retrieve (max 100 per request)", default=20
|
||||
)
|
||||
offset: int = SchemaField(
|
||||
description="Number of posts to skip (for pagination)", default=0
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
site: str = SchemaField(
|
||||
description="The site ID or domain (pass-through for chaining with other blocks)"
|
||||
)
|
||||
found: int = SchemaField(description="Total number of posts found")
|
||||
posts: list[Post] = SchemaField(
|
||||
description="List of post objects with their details"
|
||||
)
|
||||
post: Post = SchemaField(
|
||||
description="Individual post object (yielded for each post)"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="97728fa7-7f6f-4789-ba0c-f2c114119536",
|
||||
description="Fetch all posts from WordPress.com or Jetpack sites",
|
||||
categories={BlockCategory.SOCIAL},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: Credentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
posts_response: PostsResponse = await get_posts(
|
||||
credentials=credentials,
|
||||
site=input_data.site,
|
||||
status=input_data.status,
|
||||
number=input_data.number,
|
||||
offset=input_data.offset,
|
||||
)
|
||||
|
||||
yield "site", input_data.site
|
||||
yield "found", posts_response.found
|
||||
yield "posts", posts_response.posts
|
||||
for post in posts_response.posts:
|
||||
yield "post", post
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from gravitasml.parser import Parser
|
||||
from gravitasml.token import Token, tokenize
|
||||
from gravitasml.token import tokenize
|
||||
|
||||
from backend.data.block import Block, BlockOutput, BlockSchemaInput, BlockSchemaOutput
|
||||
from backend.data.model import SchemaField
|
||||
@@ -25,38 +25,6 @@ class XMLParserBlock(Block):
|
||||
],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _validate_tokens(tokens: list[Token]) -> None:
|
||||
"""Ensure the XML has a single root element and no stray text."""
|
||||
if not tokens:
|
||||
raise ValueError("XML input is empty.")
|
||||
|
||||
depth = 0
|
||||
root_seen = False
|
||||
|
||||
for token in tokens:
|
||||
if token.type == "TAG_OPEN":
|
||||
if depth == 0 and root_seen:
|
||||
raise ValueError("XML must have a single root element.")
|
||||
depth += 1
|
||||
if depth == 1:
|
||||
root_seen = True
|
||||
elif token.type == "TAG_CLOSE":
|
||||
depth -= 1
|
||||
if depth < 0:
|
||||
raise SyntaxError("Unexpected closing tag in XML input.")
|
||||
elif token.type in {"TEXT", "ESCAPE"}:
|
||||
if depth == 0 and token.value:
|
||||
raise ValueError(
|
||||
"XML contains text outside the root element; "
|
||||
"wrap content in a single root tag."
|
||||
)
|
||||
|
||||
if depth != 0:
|
||||
raise SyntaxError("Unclosed tag detected in XML input.")
|
||||
if not root_seen:
|
||||
raise ValueError("XML must include a root element.")
|
||||
|
||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
# Security fix: Add size limits to prevent XML bomb attacks
|
||||
MAX_XML_SIZE = 10 * 1024 * 1024 # 10MB limit for XML input
|
||||
@@ -67,9 +35,7 @@ class XMLParserBlock(Block):
|
||||
)
|
||||
|
||||
try:
|
||||
tokens = list(tokenize(input_data.input_xml))
|
||||
self._validate_tokens(tokens)
|
||||
|
||||
tokens = tokenize(input_data.input_xml)
|
||||
parser = Parser(tokens)
|
||||
parsed_result = parser.parse()
|
||||
yield "parsed_xml", parsed_result
|
||||
|
||||
@@ -111,8 +111,6 @@ class TranscribeYoutubeVideoBlock(Block):
|
||||
return parsed_url.path.split("/")[2]
|
||||
if parsed_url.path[:3] == "/v/":
|
||||
return parsed_url.path.split("/")[2]
|
||||
if parsed_url.path.startswith("/shorts/"):
|
||||
return parsed_url.path.split("/")[2]
|
||||
raise ValueError(f"Invalid YouTube URL: {url}")
|
||||
|
||||
def get_transcript(
|
||||
|
||||
@@ -50,8 +50,6 @@ from .model import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
from .graph import Link
|
||||
|
||||
app_config = Config()
|
||||
@@ -474,7 +472,6 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
|
||||
self.block_type = block_type
|
||||
self.webhook_config = webhook_config
|
||||
self.execution_stats: NodeExecutionStats = NodeExecutionStats()
|
||||
self.requires_human_review: bool = False
|
||||
|
||||
if self.webhook_config:
|
||||
if isinstance(self.webhook_config, BlockWebhookConfig):
|
||||
@@ -617,77 +614,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
|
||||
block_id=self.id,
|
||||
) from ex
|
||||
|
||||
async def is_block_exec_need_review(
|
||||
self,
|
||||
input_data: BlockInput,
|
||||
*,
|
||||
user_id: str,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
graph_id: str,
|
||||
graph_version: int,
|
||||
execution_context: "ExecutionContext",
|
||||
**kwargs,
|
||||
) -> tuple[bool, BlockInput]:
|
||||
"""
|
||||
Check if this block execution needs human review and handle the review process.
|
||||
|
||||
Returns:
|
||||
Tuple of (should_pause, input_data_to_use)
|
||||
- should_pause: True if execution should be paused for review
|
||||
- input_data_to_use: The input data to use (may be modified by reviewer)
|
||||
"""
|
||||
# Skip review if not required or safe mode is disabled
|
||||
if not self.requires_human_review or not execution_context.safe_mode:
|
||||
return False, input_data
|
||||
|
||||
from backend.blocks.helpers.review import HITLReviewHelper
|
||||
|
||||
# Handle the review request and get decision
|
||||
decision = await HITLReviewHelper.handle_review_decision(
|
||||
input_data=input_data,
|
||||
user_id=user_id,
|
||||
node_exec_id=node_exec_id,
|
||||
graph_exec_id=graph_exec_id,
|
||||
graph_id=graph_id,
|
||||
graph_version=graph_version,
|
||||
execution_context=execution_context,
|
||||
block_name=self.name,
|
||||
editable=True,
|
||||
)
|
||||
|
||||
if decision is None:
|
||||
# We're awaiting review - pause execution
|
||||
return True, input_data
|
||||
|
||||
if not decision.should_proceed:
|
||||
# Review was rejected, raise an error to stop execution
|
||||
raise BlockExecutionError(
|
||||
message=f"Block execution rejected by reviewer: {decision.message}",
|
||||
block_name=self.name,
|
||||
block_id=self.id,
|
||||
)
|
||||
|
||||
# Review was approved - use the potentially modified data
|
||||
# ReviewResult.data must be a dict for block inputs
|
||||
reviewed_data = decision.review_result.data
|
||||
if not isinstance(reviewed_data, dict):
|
||||
raise BlockExecutionError(
|
||||
message=f"Review data must be a dict for block input, got {type(reviewed_data).__name__}",
|
||||
block_name=self.name,
|
||||
block_id=self.id,
|
||||
)
|
||||
return False, reviewed_data
|
||||
|
||||
async def _execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
|
||||
# Check for review requirement and get potentially modified input data
|
||||
should_pause, input_data = await self.is_block_exec_need_review(
|
||||
input_data, **kwargs
|
||||
)
|
||||
if should_pause:
|
||||
return
|
||||
|
||||
# Validate the input data (original or reviewer-modified) once
|
||||
if error := self.input_schema.validate_data(input_data):
|
||||
raise BlockInputError(
|
||||
message=f"Unable to execute block with invalid input data: {error}",
|
||||
@@ -695,7 +622,6 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
|
||||
block_id=self.id,
|
||||
)
|
||||
|
||||
# Use the validated input data
|
||||
async for output_name, output_data in self.run(
|
||||
self.input_schema(**{k: v for k, v in input_data.items() if v is not None}),
|
||||
**kwargs,
|
||||
|
||||
@@ -59,13 +59,12 @@ from backend.integrations.credentials_store import (
|
||||
|
||||
MODEL_COST: dict[LlmModel, int] = {
|
||||
LlmModel.O3: 4,
|
||||
LlmModel.O3_MINI: 2,
|
||||
LlmModel.O1: 16,
|
||||
LlmModel.O3_MINI: 2, # $1.10 / $4.40
|
||||
LlmModel.O1: 16, # $15 / $60
|
||||
LlmModel.O1_MINI: 4,
|
||||
# GPT-5 models
|
||||
LlmModel.GPT5_2: 6,
|
||||
LlmModel.GPT5_1: 5,
|
||||
LlmModel.GPT5: 2,
|
||||
LlmModel.GPT5_1: 5,
|
||||
LlmModel.GPT5_MINI: 1,
|
||||
LlmModel.GPT5_NANO: 1,
|
||||
LlmModel.GPT5_CHAT: 5,
|
||||
@@ -88,7 +87,7 @@ MODEL_COST: dict[LlmModel, int] = {
|
||||
LlmModel.AIML_API_LLAMA3_3_70B: 1,
|
||||
LlmModel.AIML_API_META_LLAMA_3_1_70B: 1,
|
||||
LlmModel.AIML_API_LLAMA_3_2_3B: 1,
|
||||
LlmModel.LLAMA3_3_70B: 1,
|
||||
LlmModel.LLAMA3_3_70B: 1, # $0.59 / $0.79
|
||||
LlmModel.LLAMA3_1_8B: 1,
|
||||
LlmModel.OLLAMA_LLAMA3_3: 1,
|
||||
LlmModel.OLLAMA_LLAMA3_2: 1,
|
||||
|
||||
@@ -341,19 +341,6 @@ class UserCreditBase(ABC):
|
||||
|
||||
if result:
|
||||
# UserBalance is already updated by the CTE
|
||||
|
||||
# Clear insufficient funds notification flags when credits are added
|
||||
# so user can receive alerts again if they run out in the future.
|
||||
if transaction.amount > 0 and transaction.type in [
|
||||
CreditTransactionType.GRANT,
|
||||
CreditTransactionType.TOP_UP,
|
||||
]:
|
||||
from backend.executor.manager import (
|
||||
clear_insufficient_funds_notifications,
|
||||
)
|
||||
|
||||
await clear_insufficient_funds_notifications(user_id)
|
||||
|
||||
return result[0]["balance"]
|
||||
|
||||
async def _add_transaction(
|
||||
@@ -543,22 +530,6 @@ class UserCreditBase(ABC):
|
||||
if result:
|
||||
new_balance, tx_key = result[0]["balance"], result[0]["transactionKey"]
|
||||
# UserBalance is already updated by the CTE
|
||||
|
||||
# Clear insufficient funds notification flags when credits are added
|
||||
# so user can receive alerts again if they run out in the future.
|
||||
if (
|
||||
amount > 0
|
||||
and is_active
|
||||
and transaction_type
|
||||
in [CreditTransactionType.GRANT, CreditTransactionType.TOP_UP]
|
||||
):
|
||||
# Lazy import to avoid circular dependency with executor.manager
|
||||
from backend.executor.manager import (
|
||||
clear_insufficient_funds_notifications,
|
||||
)
|
||||
|
||||
await clear_insufficient_funds_notifications(user_id)
|
||||
|
||||
return new_balance, tx_key
|
||||
|
||||
# If no result, either user doesn't exist or insufficient balance
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user