mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-10 23:05:17 -05:00
Compare commits
24 Commits
pwuts/open
...
native-aut
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87e3d7eaad | ||
|
|
974c14a7b9 | ||
|
|
af014ea19d | ||
|
|
9ecf8bcb08 | ||
|
|
a7a521cedd | ||
|
|
84244c0b56 | ||
|
|
9e83985b5b | ||
|
|
4ef3eab89d | ||
|
|
c68b53b6c1 | ||
|
|
23fb3ad8a4 | ||
|
|
175ba13ebe | ||
|
|
a415f471c6 | ||
|
|
3dd6e5cb04 | ||
|
|
3f1e66b317 | ||
|
|
8f722bd9cd | ||
|
|
65026fc9d3 | ||
|
|
af98bc1081 | ||
|
|
e92459fc5f | ||
|
|
1775286f59 | ||
|
|
f6af700f1a | ||
|
|
a80b06d459 | ||
|
|
17c9e7c8b4 | ||
|
|
f83c9391c8 | ||
|
|
7a0a90e421 |
@@ -1,36 +0,0 @@
|
||||
{
|
||||
"worktreeCopyPatterns": [
|
||||
".env*",
|
||||
".vscode/**",
|
||||
".auth/**",
|
||||
".claude/**",
|
||||
"autogpt_platform/.env*",
|
||||
"autogpt_platform/backend/.env*",
|
||||
"autogpt_platform/frontend/.env*",
|
||||
"autogpt_platform/frontend/.auth/**",
|
||||
"autogpt_platform/db/docker/.env*"
|
||||
],
|
||||
"worktreeCopyIgnores": [
|
||||
"**/node_modules/**",
|
||||
"**/dist/**",
|
||||
"**/.git/**",
|
||||
"**/Thumbs.db",
|
||||
"**/.DS_Store",
|
||||
"**/.next/**",
|
||||
"**/__pycache__/**",
|
||||
"**/.ruff_cache/**",
|
||||
"**/.pytest_cache/**",
|
||||
"**/*.pyc",
|
||||
"**/playwright-report/**",
|
||||
"**/logs/**",
|
||||
"**/site/**"
|
||||
],
|
||||
"worktreePathTemplate": "$BASE_PATH.worktree",
|
||||
"postCreateCmd": [
|
||||
"cd autogpt_platform/autogpt_libs && poetry install",
|
||||
"cd autogpt_platform/backend && poetry install && poetry run prisma generate",
|
||||
"cd autogpt_platform/frontend && pnpm install"
|
||||
],
|
||||
"terminalCommand": "code .",
|
||||
"deleteBranchWithWorktree": false
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,125 +0,0 @@
|
||||
---
|
||||
name: vercel-react-best-practices
|
||||
description: React and Next.js performance optimization guidelines from Vercel Engineering. This skill should be used when writing, reviewing, or refactoring React/Next.js code to ensure optimal performance patterns. Triggers on tasks involving React components, Next.js pages, data fetching, bundle optimization, or performance improvements.
|
||||
license: MIT
|
||||
metadata:
|
||||
author: vercel
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Vercel React Best Practices
|
||||
|
||||
Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories, prioritized by impact to guide automated refactoring and code generation.
|
||||
|
||||
## When to Apply
|
||||
|
||||
Reference these guidelines when:
|
||||
- Writing new React components or Next.js pages
|
||||
- Implementing data fetching (client or server-side)
|
||||
- Reviewing code for performance issues
|
||||
- Refactoring existing React/Next.js code
|
||||
- Optimizing bundle size or load times
|
||||
|
||||
## Rule Categories by Priority
|
||||
|
||||
| Priority | Category | Impact | Prefix |
|
||||
|----------|----------|--------|--------|
|
||||
| 1 | Eliminating Waterfalls | CRITICAL | `async-` |
|
||||
| 2 | Bundle Size Optimization | CRITICAL | `bundle-` |
|
||||
| 3 | Server-Side Performance | HIGH | `server-` |
|
||||
| 4 | Client-Side Data Fetching | MEDIUM-HIGH | `client-` |
|
||||
| 5 | Re-render Optimization | MEDIUM | `rerender-` |
|
||||
| 6 | Rendering Performance | MEDIUM | `rendering-` |
|
||||
| 7 | JavaScript Performance | LOW-MEDIUM | `js-` |
|
||||
| 8 | Advanced Patterns | LOW | `advanced-` |
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### 1. Eliminating Waterfalls (CRITICAL)
|
||||
|
||||
- `async-defer-await` - Move await into branches where actually used
|
||||
- `async-parallel` - Use Promise.all() for independent operations
|
||||
- `async-dependencies` - Use better-all for partial dependencies
|
||||
- `async-api-routes` - Start promises early, await late in API routes
|
||||
- `async-suspense-boundaries` - Use Suspense to stream content
|
||||
|
||||
### 2. Bundle Size Optimization (CRITICAL)
|
||||
|
||||
- `bundle-barrel-imports` - Import directly, avoid barrel files
|
||||
- `bundle-dynamic-imports` - Use next/dynamic for heavy components
|
||||
- `bundle-defer-third-party` - Load analytics/logging after hydration
|
||||
- `bundle-conditional` - Load modules only when feature is activated
|
||||
- `bundle-preload` - Preload on hover/focus for perceived speed
|
||||
|
||||
### 3. Server-Side Performance (HIGH)
|
||||
|
||||
- `server-cache-react` - Use React.cache() for per-request deduplication
|
||||
- `server-cache-lru` - Use LRU cache for cross-request caching
|
||||
- `server-serialization` - Minimize data passed to client components
|
||||
- `server-parallel-fetching` - Restructure components to parallelize fetches
|
||||
- `server-after-nonblocking` - Use after() for non-blocking operations
|
||||
|
||||
### 4. Client-Side Data Fetching (MEDIUM-HIGH)
|
||||
|
||||
- `client-swr-dedup` - Use SWR for automatic request deduplication
|
||||
- `client-event-listeners` - Deduplicate global event listeners
|
||||
|
||||
### 5. Re-render Optimization (MEDIUM)
|
||||
|
||||
- `rerender-defer-reads` - Don't subscribe to state only used in callbacks
|
||||
- `rerender-memo` - Extract expensive work into memoized components
|
||||
- `rerender-dependencies` - Use primitive dependencies in effects
|
||||
- `rerender-derived-state` - Subscribe to derived booleans, not raw values
|
||||
- `rerender-functional-setstate` - Use functional setState for stable callbacks
|
||||
- `rerender-lazy-state-init` - Pass function to useState for expensive values
|
||||
- `rerender-transitions` - Use startTransition for non-urgent updates
|
||||
|
||||
### 6. Rendering Performance (MEDIUM)
|
||||
|
||||
- `rendering-animate-svg-wrapper` - Animate div wrapper, not SVG element
|
||||
- `rendering-content-visibility` - Use content-visibility for long lists
|
||||
- `rendering-hoist-jsx` - Extract static JSX outside components
|
||||
- `rendering-svg-precision` - Reduce SVG coordinate precision
|
||||
- `rendering-hydration-no-flicker` - Use inline script for client-only data
|
||||
- `rendering-activity` - Use Activity component for show/hide
|
||||
- `rendering-conditional-render` - Use ternary, not && for conditionals
|
||||
|
||||
### 7. JavaScript Performance (LOW-MEDIUM)
|
||||
|
||||
- `js-batch-dom-css` - Group CSS changes via classes or cssText
|
||||
- `js-index-maps` - Build Map for repeated lookups
|
||||
- `js-cache-property-access` - Cache object properties in loops
|
||||
- `js-cache-function-results` - Cache function results in module-level Map
|
||||
- `js-cache-storage` - Cache localStorage/sessionStorage reads
|
||||
- `js-combine-iterations` - Combine multiple filter/map into one loop
|
||||
- `js-length-check-first` - Check array length before expensive comparison
|
||||
- `js-early-exit` - Return early from functions
|
||||
- `js-hoist-regexp` - Hoist RegExp creation outside loops
|
||||
- `js-min-max-loop` - Use loop for min/max instead of sort
|
||||
- `js-set-map-lookups` - Use Set/Map for O(1) lookups
|
||||
- `js-tosorted-immutable` - Use toSorted() for immutability
|
||||
|
||||
### 8. Advanced Patterns (LOW)
|
||||
|
||||
- `advanced-event-handler-refs` - Store event handlers in refs
|
||||
- `advanced-use-latest` - useLatest for stable callback refs
|
||||
|
||||
## How to Use
|
||||
|
||||
Read individual rule files for detailed explanations and code examples:
|
||||
|
||||
```
|
||||
rules/async-parallel.md
|
||||
rules/bundle-barrel-imports.md
|
||||
rules/_sections.md
|
||||
```
|
||||
|
||||
Each rule file contains:
|
||||
- Brief explanation of why it matters
|
||||
- Incorrect code example with explanation
|
||||
- Correct code example with explanation
|
||||
- Additional context and references
|
||||
|
||||
## Full Compiled Document
|
||||
|
||||
For the complete guide with all rules expanded: `AGENTS.md`
|
||||
@@ -1,55 +0,0 @@
|
||||
---
|
||||
title: Store Event Handlers in Refs
|
||||
impact: LOW
|
||||
impactDescription: stable subscriptions
|
||||
tags: advanced, hooks, refs, event-handlers, optimization
|
||||
---
|
||||
|
||||
## Store Event Handlers in Refs
|
||||
|
||||
Store callbacks in refs when used in effects that shouldn't re-subscribe on callback changes.
|
||||
|
||||
**Incorrect (re-subscribes on every render):**
|
||||
|
||||
```tsx
|
||||
function useWindowEvent(event: string, handler: () => void) {
|
||||
useEffect(() => {
|
||||
window.addEventListener(event, handler)
|
||||
return () => window.removeEventListener(event, handler)
|
||||
}, [event, handler])
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (stable subscription):**
|
||||
|
||||
```tsx
|
||||
function useWindowEvent(event: string, handler: () => void) {
|
||||
const handlerRef = useRef(handler)
|
||||
useEffect(() => {
|
||||
handlerRef.current = handler
|
||||
}, [handler])
|
||||
|
||||
useEffect(() => {
|
||||
const listener = () => handlerRef.current()
|
||||
window.addEventListener(event, listener)
|
||||
return () => window.removeEventListener(event, listener)
|
||||
}, [event])
|
||||
}
|
||||
```
|
||||
|
||||
**Alternative: use `useEffectEvent` if you're on latest React:**
|
||||
|
||||
```tsx
|
||||
import { useEffectEvent } from 'react'
|
||||
|
||||
function useWindowEvent(event: string, handler: () => void) {
|
||||
const onEvent = useEffectEvent(handler)
|
||||
|
||||
useEffect(() => {
|
||||
window.addEventListener(event, onEvent)
|
||||
return () => window.removeEventListener(event, onEvent)
|
||||
}, [event])
|
||||
}
|
||||
```
|
||||
|
||||
`useEffectEvent` provides a cleaner API for the same pattern: it creates a stable function reference that always calls the latest version of the handler.
|
||||
@@ -1,49 +0,0 @@
|
||||
---
|
||||
title: useLatest for Stable Callback Refs
|
||||
impact: LOW
|
||||
impactDescription: prevents effect re-runs
|
||||
tags: advanced, hooks, useLatest, refs, optimization
|
||||
---
|
||||
|
||||
## useLatest for Stable Callback Refs
|
||||
|
||||
Access latest values in callbacks without adding them to dependency arrays. Prevents effect re-runs while avoiding stale closures.
|
||||
|
||||
**Implementation:**
|
||||
|
||||
```typescript
|
||||
function useLatest<T>(value: T) {
|
||||
const ref = useRef(value)
|
||||
useEffect(() => {
|
||||
ref.current = value
|
||||
}, [value])
|
||||
return ref
|
||||
}
|
||||
```
|
||||
|
||||
**Incorrect (effect re-runs on every callback change):**
|
||||
|
||||
```tsx
|
||||
function SearchInput({ onSearch }: { onSearch: (q: string) => void }) {
|
||||
const [query, setQuery] = useState('')
|
||||
|
||||
useEffect(() => {
|
||||
const timeout = setTimeout(() => onSearch(query), 300)
|
||||
return () => clearTimeout(timeout)
|
||||
}, [query, onSearch])
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (stable effect, fresh callback):**
|
||||
|
||||
```tsx
|
||||
function SearchInput({ onSearch }: { onSearch: (q: string) => void }) {
|
||||
const [query, setQuery] = useState('')
|
||||
const onSearchRef = useLatest(onSearch)
|
||||
|
||||
useEffect(() => {
|
||||
const timeout = setTimeout(() => onSearchRef.current(query), 300)
|
||||
return () => clearTimeout(timeout)
|
||||
}, [query])
|
||||
}
|
||||
```
|
||||
@@ -1,38 +0,0 @@
|
||||
---
|
||||
title: Prevent Waterfall Chains in API Routes
|
||||
impact: CRITICAL
|
||||
impactDescription: 2-10× improvement
|
||||
tags: api-routes, server-actions, waterfalls, parallelization
|
||||
---
|
||||
|
||||
## Prevent Waterfall Chains in API Routes
|
||||
|
||||
In API routes and Server Actions, start independent operations immediately, even if you don't await them yet.
|
||||
|
||||
**Incorrect (config waits for auth, data waits for both):**
|
||||
|
||||
```typescript
|
||||
export async function GET(request: Request) {
|
||||
const session = await auth()
|
||||
const config = await fetchConfig()
|
||||
const data = await fetchData(session.user.id)
|
||||
return Response.json({ data, config })
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (auth and config start immediately):**
|
||||
|
||||
```typescript
|
||||
export async function GET(request: Request) {
|
||||
const sessionPromise = auth()
|
||||
const configPromise = fetchConfig()
|
||||
const session = await sessionPromise
|
||||
const [config, data] = await Promise.all([
|
||||
configPromise,
|
||||
fetchData(session.user.id)
|
||||
])
|
||||
return Response.json({ data, config })
|
||||
}
|
||||
```
|
||||
|
||||
For operations with more complex dependency chains, use `better-all` to automatically maximize parallelism (see Dependency-Based Parallelization).
|
||||
@@ -1,80 +0,0 @@
|
||||
---
|
||||
title: Defer Await Until Needed
|
||||
impact: HIGH
|
||||
impactDescription: avoids blocking unused code paths
|
||||
tags: async, await, conditional, optimization
|
||||
---
|
||||
|
||||
## Defer Await Until Needed
|
||||
|
||||
Move `await` operations into the branches where they're actually used to avoid blocking code paths that don't need them.
|
||||
|
||||
**Incorrect (blocks both branches):**
|
||||
|
||||
```typescript
|
||||
async function handleRequest(userId: string, skipProcessing: boolean) {
|
||||
const userData = await fetchUserData(userId)
|
||||
|
||||
if (skipProcessing) {
|
||||
// Returns immediately but still waited for userData
|
||||
return { skipped: true }
|
||||
}
|
||||
|
||||
// Only this branch uses userData
|
||||
return processUserData(userData)
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (only blocks when needed):**
|
||||
|
||||
```typescript
|
||||
async function handleRequest(userId: string, skipProcessing: boolean) {
|
||||
if (skipProcessing) {
|
||||
// Returns immediately without waiting
|
||||
return { skipped: true }
|
||||
}
|
||||
|
||||
// Fetch only when needed
|
||||
const userData = await fetchUserData(userId)
|
||||
return processUserData(userData)
|
||||
}
|
||||
```
|
||||
|
||||
**Another example (early return optimization):**
|
||||
|
||||
```typescript
|
||||
// Incorrect: always fetches permissions
|
||||
async function updateResource(resourceId: string, userId: string) {
|
||||
const permissions = await fetchPermissions(userId)
|
||||
const resource = await getResource(resourceId)
|
||||
|
||||
if (!resource) {
|
||||
return { error: 'Not found' }
|
||||
}
|
||||
|
||||
if (!permissions.canEdit) {
|
||||
return { error: 'Forbidden' }
|
||||
}
|
||||
|
||||
return await updateResourceData(resource, permissions)
|
||||
}
|
||||
|
||||
// Correct: fetches only when needed
|
||||
async function updateResource(resourceId: string, userId: string) {
|
||||
const resource = await getResource(resourceId)
|
||||
|
||||
if (!resource) {
|
||||
return { error: 'Not found' }
|
||||
}
|
||||
|
||||
const permissions = await fetchPermissions(userId)
|
||||
|
||||
if (!permissions.canEdit) {
|
||||
return { error: 'Forbidden' }
|
||||
}
|
||||
|
||||
return await updateResourceData(resource, permissions)
|
||||
}
|
||||
```
|
||||
|
||||
This optimization is especially valuable when the skipped branch is frequently taken, or when the deferred operation is expensive.
|
||||
@@ -1,36 +0,0 @@
|
||||
---
|
||||
title: Dependency-Based Parallelization
|
||||
impact: CRITICAL
|
||||
impactDescription: 2-10× improvement
|
||||
tags: async, parallelization, dependencies, better-all
|
||||
---
|
||||
|
||||
## Dependency-Based Parallelization
|
||||
|
||||
For operations with partial dependencies, use `better-all` to maximize parallelism. It automatically starts each task at the earliest possible moment.
|
||||
|
||||
**Incorrect (profile waits for config unnecessarily):**
|
||||
|
||||
```typescript
|
||||
const [user, config] = await Promise.all([
|
||||
fetchUser(),
|
||||
fetchConfig()
|
||||
])
|
||||
const profile = await fetchProfile(user.id)
|
||||
```
|
||||
|
||||
**Correct (config and profile run in parallel):**
|
||||
|
||||
```typescript
|
||||
import { all } from 'better-all'
|
||||
|
||||
const { user, config, profile } = await all({
|
||||
async user() { return fetchUser() },
|
||||
async config() { return fetchConfig() },
|
||||
async profile() {
|
||||
return fetchProfile((await this.$.user).id)
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
Reference: [https://github.com/shuding/better-all](https://github.com/shuding/better-all)
|
||||
@@ -1,28 +0,0 @@
|
||||
---
|
||||
title: Promise.all() for Independent Operations
|
||||
impact: CRITICAL
|
||||
impactDescription: 2-10× improvement
|
||||
tags: async, parallelization, promises, waterfalls
|
||||
---
|
||||
|
||||
## Promise.all() for Independent Operations
|
||||
|
||||
When async operations have no interdependencies, execute them concurrently using `Promise.all()`.
|
||||
|
||||
**Incorrect (sequential execution, 3 round trips):**
|
||||
|
||||
```typescript
|
||||
const user = await fetchUser()
|
||||
const posts = await fetchPosts()
|
||||
const comments = await fetchComments()
|
||||
```
|
||||
|
||||
**Correct (parallel execution, 1 round trip):**
|
||||
|
||||
```typescript
|
||||
const [user, posts, comments] = await Promise.all([
|
||||
fetchUser(),
|
||||
fetchPosts(),
|
||||
fetchComments()
|
||||
])
|
||||
```
|
||||
@@ -1,99 +0,0 @@
|
||||
---
|
||||
title: Strategic Suspense Boundaries
|
||||
impact: HIGH
|
||||
impactDescription: faster initial paint
|
||||
tags: async, suspense, streaming, layout-shift
|
||||
---
|
||||
|
||||
## Strategic Suspense Boundaries
|
||||
|
||||
Instead of awaiting data in async components before returning JSX, use Suspense boundaries to show the wrapper UI faster while data loads.
|
||||
|
||||
**Incorrect (wrapper blocked by data fetching):**
|
||||
|
||||
```tsx
|
||||
async function Page() {
|
||||
const data = await fetchData() // Blocks entire page
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div>Sidebar</div>
|
||||
<div>Header</div>
|
||||
<div>
|
||||
<DataDisplay data={data} />
|
||||
</div>
|
||||
<div>Footer</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
The entire layout waits for data even though only the middle section needs it.
|
||||
|
||||
**Correct (wrapper shows immediately, data streams in):**
|
||||
|
||||
```tsx
|
||||
function Page() {
|
||||
return (
|
||||
<div>
|
||||
<div>Sidebar</div>
|
||||
<div>Header</div>
|
||||
<div>
|
||||
<Suspense fallback={<Skeleton />}>
|
||||
<DataDisplay />
|
||||
</Suspense>
|
||||
</div>
|
||||
<div>Footer</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
async function DataDisplay() {
|
||||
const data = await fetchData() // Only blocks this component
|
||||
return <div>{data.content}</div>
|
||||
}
|
||||
```
|
||||
|
||||
Sidebar, Header, and Footer render immediately. Only DataDisplay waits for data.
|
||||
|
||||
**Alternative (share promise across components):**
|
||||
|
||||
```tsx
|
||||
function Page() {
|
||||
// Start fetch immediately, but don't await
|
||||
const dataPromise = fetchData()
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div>Sidebar</div>
|
||||
<div>Header</div>
|
||||
<Suspense fallback={<Skeleton />}>
|
||||
<DataDisplay dataPromise={dataPromise} />
|
||||
<DataSummary dataPromise={dataPromise} />
|
||||
</Suspense>
|
||||
<div>Footer</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function DataDisplay({ dataPromise }: { dataPromise: Promise<Data> }) {
|
||||
const data = use(dataPromise) // Unwraps the promise
|
||||
return <div>{data.content}</div>
|
||||
}
|
||||
|
||||
function DataSummary({ dataPromise }: { dataPromise: Promise<Data> }) {
|
||||
const data = use(dataPromise) // Reuses the same promise
|
||||
return <div>{data.summary}</div>
|
||||
}
|
||||
```
|
||||
|
||||
Both components share the same promise, so only one fetch occurs. Layout renders immediately while both components wait together.
|
||||
|
||||
**When NOT to use this pattern:**
|
||||
|
||||
- Critical data needed for layout decisions (affects positioning)
|
||||
- SEO-critical content above the fold
|
||||
- Small, fast queries where suspense overhead isn't worth it
|
||||
- When you want to avoid layout shift (loading → content jump)
|
||||
|
||||
**Trade-off:** Faster initial paint vs potential layout shift. Choose based on your UX priorities.
|
||||
@@ -1,59 +0,0 @@
|
||||
---
|
||||
title: Avoid Barrel File Imports
|
||||
impact: CRITICAL
|
||||
impactDescription: 200-800ms import cost, slow builds
|
||||
tags: bundle, imports, tree-shaking, barrel-files, performance
|
||||
---
|
||||
|
||||
## Avoid Barrel File Imports
|
||||
|
||||
Import directly from source files instead of barrel files to avoid loading thousands of unused modules. **Barrel files** are entry points that re-export multiple modules (e.g., `index.js` that does `export * from './module'`).
|
||||
|
||||
Popular icon and component libraries can have **up to 10,000 re-exports** in their entry file. For many React packages, **it takes 200-800ms just to import them**, affecting both development speed and production cold starts.
|
||||
|
||||
**Why tree-shaking doesn't help:** When a library is marked as external (not bundled), the bundler can't optimize it. If you bundle it to enable tree-shaking, builds become substantially slower analyzing the entire module graph.
|
||||
|
||||
**Incorrect (imports entire library):**
|
||||
|
||||
```tsx
|
||||
import { Check, X, Menu } from 'lucide-react'
|
||||
// Loads 1,583 modules, takes ~2.8s extra in dev
|
||||
// Runtime cost: 200-800ms on every cold start
|
||||
|
||||
import { Button, TextField } from '@mui/material'
|
||||
// Loads 2,225 modules, takes ~4.2s extra in dev
|
||||
```
|
||||
|
||||
**Correct (imports only what you need):**
|
||||
|
||||
```tsx
|
||||
import Check from 'lucide-react/dist/esm/icons/check'
|
||||
import X from 'lucide-react/dist/esm/icons/x'
|
||||
import Menu from 'lucide-react/dist/esm/icons/menu'
|
||||
// Loads only 3 modules (~2KB vs ~1MB)
|
||||
|
||||
import Button from '@mui/material/Button'
|
||||
import TextField from '@mui/material/TextField'
|
||||
// Loads only what you use
|
||||
```
|
||||
|
||||
**Alternative (Next.js 13.5+):**
|
||||
|
||||
```js
|
||||
// next.config.js - use optimizePackageImports
|
||||
module.exports = {
|
||||
experimental: {
|
||||
optimizePackageImports: ['lucide-react', '@mui/material']
|
||||
}
|
||||
}
|
||||
|
||||
// Then you can keep the ergonomic barrel imports:
|
||||
import { Check, X, Menu } from 'lucide-react'
|
||||
// Automatically transformed to direct imports at build time
|
||||
```
|
||||
|
||||
Direct imports provide 15-70% faster dev boot, 28% faster builds, 40% faster cold starts, and significantly faster HMR.
|
||||
|
||||
Libraries commonly affected: `lucide-react`, `@mui/material`, `@mui/icons-material`, `@tabler/icons-react`, `react-icons`, `@headlessui/react`, `@radix-ui/react-*`, `lodash`, `ramda`, `date-fns`, `rxjs`, `react-use`.
|
||||
|
||||
Reference: [How we optimized package imports in Next.js](https://vercel.com/blog/how-we-optimized-package-imports-in-next-js)
|
||||
@@ -1,31 +0,0 @@
|
||||
---
|
||||
title: Conditional Module Loading
|
||||
impact: HIGH
|
||||
impactDescription: loads large data only when needed
|
||||
tags: bundle, conditional-loading, lazy-loading
|
||||
---
|
||||
|
||||
## Conditional Module Loading
|
||||
|
||||
Load large data or modules only when a feature is activated.
|
||||
|
||||
**Example (lazy-load animation frames):**
|
||||
|
||||
```tsx
|
||||
function AnimationPlayer({ enabled }: { enabled: boolean }) {
|
||||
const [frames, setFrames] = useState<Frame[] | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (enabled && !frames && typeof window !== 'undefined') {
|
||||
import('./animation-frames.js')
|
||||
.then(mod => setFrames(mod.frames))
|
||||
.catch(() => setEnabled(false))
|
||||
}
|
||||
}, [enabled, frames])
|
||||
|
||||
if (!frames) return <Skeleton />
|
||||
return <Canvas frames={frames} />
|
||||
}
|
||||
```
|
||||
|
||||
The `typeof window !== 'undefined'` check prevents bundling this module for SSR, optimizing server bundle size and build speed.
|
||||
@@ -1,49 +0,0 @@
|
||||
---
|
||||
title: Defer Non-Critical Third-Party Libraries
|
||||
impact: MEDIUM
|
||||
impactDescription: loads after hydration
|
||||
tags: bundle, third-party, analytics, defer
|
||||
---
|
||||
|
||||
## Defer Non-Critical Third-Party Libraries
|
||||
|
||||
Analytics, logging, and error tracking don't block user interaction. Load them after hydration.
|
||||
|
||||
**Incorrect (blocks initial bundle):**
|
||||
|
||||
```tsx
|
||||
import { Analytics } from '@vercel/analytics/react'
|
||||
|
||||
export default function RootLayout({ children }) {
|
||||
return (
|
||||
<html>
|
||||
<body>
|
||||
{children}
|
||||
<Analytics />
|
||||
</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (loads after hydration):**
|
||||
|
||||
```tsx
|
||||
import dynamic from 'next/dynamic'
|
||||
|
||||
const Analytics = dynamic(
|
||||
() => import('@vercel/analytics/react').then(m => m.Analytics),
|
||||
{ ssr: false }
|
||||
)
|
||||
|
||||
export default function RootLayout({ children }) {
|
||||
return (
|
||||
<html>
|
||||
<body>
|
||||
{children}
|
||||
<Analytics />
|
||||
</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
```
|
||||
@@ -1,35 +0,0 @@
|
||||
---
|
||||
title: Dynamic Imports for Heavy Components
|
||||
impact: CRITICAL
|
||||
impactDescription: directly affects TTI and LCP
|
||||
tags: bundle, dynamic-import, code-splitting, next-dynamic
|
||||
---
|
||||
|
||||
## Dynamic Imports for Heavy Components
|
||||
|
||||
Use `next/dynamic` to lazy-load large components not needed on initial render.
|
||||
|
||||
**Incorrect (Monaco bundles with main chunk ~300KB):**
|
||||
|
||||
```tsx
|
||||
import { MonacoEditor } from './monaco-editor'
|
||||
|
||||
function CodePanel({ code }: { code: string }) {
|
||||
return <MonacoEditor value={code} />
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (Monaco loads on demand):**
|
||||
|
||||
```tsx
|
||||
import dynamic from 'next/dynamic'
|
||||
|
||||
const MonacoEditor = dynamic(
|
||||
() => import('./monaco-editor').then(m => m.MonacoEditor),
|
||||
{ ssr: false }
|
||||
)
|
||||
|
||||
function CodePanel({ code }: { code: string }) {
|
||||
return <MonacoEditor value={code} />
|
||||
}
|
||||
```
|
||||
@@ -1,50 +0,0 @@
|
||||
---
|
||||
title: Preload Based on User Intent
|
||||
impact: MEDIUM
|
||||
impactDescription: reduces perceived latency
|
||||
tags: bundle, preload, user-intent, hover
|
||||
---
|
||||
|
||||
## Preload Based on User Intent
|
||||
|
||||
Preload heavy bundles before they're needed to reduce perceived latency.
|
||||
|
||||
**Example (preload on hover/focus):**
|
||||
|
||||
```tsx
|
||||
function EditorButton({ onClick }: { onClick: () => void }) {
|
||||
const preload = () => {
|
||||
if (typeof window !== 'undefined') {
|
||||
void import('./monaco-editor')
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<button
|
||||
onMouseEnter={preload}
|
||||
onFocus={preload}
|
||||
onClick={onClick}
|
||||
>
|
||||
Open Editor
|
||||
</button>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Example (preload when feature flag is enabled):**
|
||||
|
||||
```tsx
|
||||
function FlagsProvider({ children, flags }: Props) {
|
||||
useEffect(() => {
|
||||
if (flags.editorEnabled && typeof window !== 'undefined') {
|
||||
void import('./monaco-editor').then(mod => mod.init())
|
||||
}
|
||||
}, [flags.editorEnabled])
|
||||
|
||||
return <FlagsContext.Provider value={flags}>
|
||||
{children}
|
||||
</FlagsContext.Provider>
|
||||
}
|
||||
```
|
||||
|
||||
The `typeof window !== 'undefined'` check prevents bundling preloaded modules for SSR, optimizing server bundle size and build speed.
|
||||
@@ -1,74 +0,0 @@
|
||||
---
|
||||
title: Deduplicate Global Event Listeners
|
||||
impact: LOW
|
||||
impactDescription: single listener for N components
|
||||
tags: client, swr, event-listeners, subscription
|
||||
---
|
||||
|
||||
## Deduplicate Global Event Listeners
|
||||
|
||||
Use `useSWRSubscription()` to share global event listeners across component instances.
|
||||
|
||||
**Incorrect (N instances = N listeners):**
|
||||
|
||||
```tsx
|
||||
function useKeyboardShortcut(key: string, callback: () => void) {
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.metaKey && e.key === key) {
|
||||
callback()
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
}, [key, callback])
|
||||
}
|
||||
```
|
||||
|
||||
When using the `useKeyboardShortcut` hook multiple times, each instance will register a new listener.
|
||||
|
||||
**Correct (N instances = 1 listener):**
|
||||
|
||||
```tsx
|
||||
import useSWRSubscription from 'swr/subscription'
|
||||
|
||||
// Module-level Map to track callbacks per key
|
||||
const keyCallbacks = new Map<string, Set<() => void>>()
|
||||
|
||||
function useKeyboardShortcut(key: string, callback: () => void) {
|
||||
// Register this callback in the Map
|
||||
useEffect(() => {
|
||||
if (!keyCallbacks.has(key)) {
|
||||
keyCallbacks.set(key, new Set())
|
||||
}
|
||||
keyCallbacks.get(key)!.add(callback)
|
||||
|
||||
return () => {
|
||||
const set = keyCallbacks.get(key)
|
||||
if (set) {
|
||||
set.delete(callback)
|
||||
if (set.size === 0) {
|
||||
keyCallbacks.delete(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}, [key, callback])
|
||||
|
||||
useSWRSubscription('global-keydown', () => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.metaKey && keyCallbacks.has(e.key)) {
|
||||
keyCallbacks.get(e.key)!.forEach(cb => cb())
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
})
|
||||
}
|
||||
|
||||
function Profile() {
|
||||
// Multiple shortcuts will share the same listener
|
||||
useKeyboardShortcut('p', () => { /* ... */ })
|
||||
useKeyboardShortcut('k', () => { /* ... */ })
|
||||
// ...
|
||||
}
|
||||
```
|
||||
@@ -1,56 +0,0 @@
|
||||
---
|
||||
title: Use SWR for Automatic Deduplication
|
||||
impact: MEDIUM-HIGH
|
||||
impactDescription: automatic deduplication
|
||||
tags: client, swr, deduplication, data-fetching
|
||||
---
|
||||
|
||||
## Use SWR for Automatic Deduplication
|
||||
|
||||
SWR enables request deduplication, caching, and revalidation across component instances.
|
||||
|
||||
**Incorrect (no deduplication, each instance fetches):**
|
||||
|
||||
```tsx
|
||||
function UserList() {
|
||||
const [users, setUsers] = useState([])
|
||||
useEffect(() => {
|
||||
fetch('/api/users')
|
||||
.then(r => r.json())
|
||||
.then(setUsers)
|
||||
}, [])
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (multiple instances share one request):**
|
||||
|
||||
```tsx
|
||||
import useSWR from 'swr'
|
||||
|
||||
function UserList() {
|
||||
const { data: users } = useSWR('/api/users', fetcher)
|
||||
}
|
||||
```
|
||||
|
||||
**For immutable data:**
|
||||
|
||||
```tsx
|
||||
import { useImmutableSWR } from '@/lib/swr'
|
||||
|
||||
function StaticContent() {
|
||||
const { data } = useImmutableSWR('/api/config', fetcher)
|
||||
}
|
||||
```
|
||||
|
||||
**For mutations:**
|
||||
|
||||
```tsx
|
||||
import { useSWRMutation } from 'swr/mutation'
|
||||
|
||||
function UpdateButton() {
|
||||
const { trigger } = useSWRMutation('/api/user', updateUser)
|
||||
return <button onClick={() => trigger()}>Update</button>
|
||||
}
|
||||
```
|
||||
|
||||
Reference: [https://swr.vercel.app](https://swr.vercel.app)
|
||||
@@ -1,82 +0,0 @@
|
||||
---
|
||||
title: Batch DOM CSS Changes
|
||||
impact: MEDIUM
|
||||
impactDescription: reduces reflows/repaints
|
||||
tags: javascript, dom, css, performance, reflow
|
||||
---
|
||||
|
||||
## Batch DOM CSS Changes
|
||||
|
||||
Avoid changing styles one property at a time. Group multiple CSS changes together via classes or `cssText` to minimize browser reflows.
|
||||
|
||||
**Incorrect (multiple reflows):**
|
||||
|
||||
```typescript
|
||||
function updateElementStyles(element: HTMLElement) {
|
||||
// Each line triggers a reflow
|
||||
element.style.width = '100px'
|
||||
element.style.height = '200px'
|
||||
element.style.backgroundColor = 'blue'
|
||||
element.style.border = '1px solid black'
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (add class - single reflow):**
|
||||
|
||||
```typescript
|
||||
// CSS file
|
||||
.highlighted-box {
|
||||
width: 100px;
|
||||
height: 200px;
|
||||
background-color: blue;
|
||||
border: 1px solid black;
|
||||
}
|
||||
|
||||
// JavaScript
|
||||
function updateElementStyles(element: HTMLElement) {
|
||||
element.classList.add('highlighted-box')
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (change cssText - single reflow):**
|
||||
|
||||
```typescript
|
||||
function updateElementStyles(element: HTMLElement) {
|
||||
element.style.cssText = `
|
||||
width: 100px;
|
||||
height: 200px;
|
||||
background-color: blue;
|
||||
border: 1px solid black;
|
||||
`
|
||||
}
|
||||
```
|
||||
|
||||
**React example:**
|
||||
|
||||
```tsx
|
||||
// Incorrect: changing styles one by one
|
||||
function Box({ isHighlighted }: { isHighlighted: boolean }) {
|
||||
const ref = useRef<HTMLDivElement>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (ref.current && isHighlighted) {
|
||||
ref.current.style.width = '100px'
|
||||
ref.current.style.height = '200px'
|
||||
ref.current.style.backgroundColor = 'blue'
|
||||
}
|
||||
}, [isHighlighted])
|
||||
|
||||
return <div ref={ref}>Content</div>
|
||||
}
|
||||
|
||||
// Correct: toggle class
|
||||
function Box({ isHighlighted }: { isHighlighted: boolean }) {
|
||||
return (
|
||||
<div className={isHighlighted ? 'highlighted-box' : ''}>
|
||||
Content
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
Prefer CSS classes over inline styles when possible. Classes are cached by the browser and provide better separation of concerns.
|
||||
@@ -1,80 +0,0 @@
|
||||
---
|
||||
title: Cache Repeated Function Calls
|
||||
impact: MEDIUM
|
||||
impactDescription: avoid redundant computation
|
||||
tags: javascript, cache, memoization, performance
|
||||
---
|
||||
|
||||
## Cache Repeated Function Calls
|
||||
|
||||
Use a module-level Map to cache function results when the same function is called repeatedly with the same inputs during render.
|
||||
|
||||
**Incorrect (redundant computation):**
|
||||
|
||||
```typescript
|
||||
function ProjectList({ projects }: { projects: Project[] }) {
|
||||
return (
|
||||
<div>
|
||||
{projects.map(project => {
|
||||
// slugify() called 100+ times for same project names
|
||||
const slug = slugify(project.name)
|
||||
|
||||
return <ProjectCard key={project.id} slug={slug} />
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (cached results):**
|
||||
|
||||
```typescript
|
||||
// Module-level cache
|
||||
const slugifyCache = new Map<string, string>()
|
||||
|
||||
function cachedSlugify(text: string): string {
|
||||
if (slugifyCache.has(text)) {
|
||||
return slugifyCache.get(text)!
|
||||
}
|
||||
const result = slugify(text)
|
||||
slugifyCache.set(text, result)
|
||||
return result
|
||||
}
|
||||
|
||||
function ProjectList({ projects }: { projects: Project[] }) {
|
||||
return (
|
||||
<div>
|
||||
{projects.map(project => {
|
||||
// Computed only once per unique project name
|
||||
const slug = cachedSlugify(project.name)
|
||||
|
||||
return <ProjectCard key={project.id} slug={slug} />
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Simpler pattern for single-value functions:**
|
||||
|
||||
```typescript
|
||||
let isLoggedInCache: boolean | null = null
|
||||
|
||||
function isLoggedIn(): boolean {
|
||||
if (isLoggedInCache !== null) {
|
||||
return isLoggedInCache
|
||||
}
|
||||
|
||||
isLoggedInCache = document.cookie.includes('auth=')
|
||||
return isLoggedInCache
|
||||
}
|
||||
|
||||
// Clear cache when auth changes
|
||||
function onAuthChange() {
|
||||
isLoggedInCache = null
|
||||
}
|
||||
```
|
||||
|
||||
Use a Map (not a hook) so it works everywhere: utilities, event handlers, not just React components.
|
||||
|
||||
Reference: [How we made the Vercel Dashboard twice as fast](https://vercel.com/blog/how-we-made-the-vercel-dashboard-twice-as-fast)
|
||||
@@ -1,28 +0,0 @@
|
||||
---
|
||||
title: Cache Property Access in Loops
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: reduces lookups
|
||||
tags: javascript, loops, optimization, caching
|
||||
---
|
||||
|
||||
## Cache Property Access in Loops
|
||||
|
||||
Cache object property lookups in hot paths.
|
||||
|
||||
**Incorrect (3 lookups × N iterations):**
|
||||
|
||||
```typescript
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
process(obj.config.settings.value)
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (1 lookup total):**
|
||||
|
||||
```typescript
|
||||
const value = obj.config.settings.value
|
||||
const len = arr.length
|
||||
for (let i = 0; i < len; i++) {
|
||||
process(value)
|
||||
}
|
||||
```
|
||||
@@ -1,70 +0,0 @@
|
||||
---
|
||||
title: Cache Storage API Calls
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: reduces expensive I/O
|
||||
tags: javascript, localStorage, storage, caching, performance
|
||||
---
|
||||
|
||||
## Cache Storage API Calls
|
||||
|
||||
`localStorage`, `sessionStorage`, and `document.cookie` are synchronous and expensive. Cache reads in memory.
|
||||
|
||||
**Incorrect (reads storage on every call):**
|
||||
|
||||
```typescript
|
||||
function getTheme() {
|
||||
return localStorage.getItem('theme') ?? 'light'
|
||||
}
|
||||
// Called 10 times = 10 storage reads
|
||||
```
|
||||
|
||||
**Correct (Map cache):**
|
||||
|
||||
```typescript
|
||||
const storageCache = new Map<string, string | null>()
|
||||
|
||||
function getLocalStorage(key: string) {
|
||||
if (!storageCache.has(key)) {
|
||||
storageCache.set(key, localStorage.getItem(key))
|
||||
}
|
||||
return storageCache.get(key)
|
||||
}
|
||||
|
||||
function setLocalStorage(key: string, value: string) {
|
||||
localStorage.setItem(key, value)
|
||||
storageCache.set(key, value) // keep cache in sync
|
||||
}
|
||||
```
|
||||
|
||||
Use a Map (not a hook) so it works everywhere: utilities, event handlers, not just React components.
|
||||
|
||||
**Cookie caching:**
|
||||
|
||||
```typescript
|
||||
let cookieCache: Record<string, string> | null = null
|
||||
|
||||
function getCookie(name: string) {
|
||||
if (!cookieCache) {
|
||||
cookieCache = Object.fromEntries(
|
||||
document.cookie.split('; ').map(c => c.split('='))
|
||||
)
|
||||
}
|
||||
return cookieCache[name]
|
||||
}
|
||||
```
|
||||
|
||||
**Important (invalidate on external changes):**
|
||||
|
||||
If storage can change externally (another tab, server-set cookies), invalidate cache:
|
||||
|
||||
```typescript
|
||||
window.addEventListener('storage', (e) => {
|
||||
if (e.key) storageCache.delete(e.key)
|
||||
})
|
||||
|
||||
document.addEventListener('visibilitychange', () => {
|
||||
if (document.visibilityState === 'visible') {
|
||||
storageCache.clear()
|
||||
}
|
||||
})
|
||||
```
|
||||
@@ -1,32 +0,0 @@
|
||||
---
|
||||
title: Combine Multiple Array Iterations
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: reduces iterations
|
||||
tags: javascript, arrays, loops, performance
|
||||
---
|
||||
|
||||
## Combine Multiple Array Iterations
|
||||
|
||||
Multiple `.filter()` or `.map()` calls iterate the array multiple times. Combine into one loop.
|
||||
|
||||
**Incorrect (3 iterations):**
|
||||
|
||||
```typescript
|
||||
const admins = users.filter(u => u.isAdmin)
|
||||
const testers = users.filter(u => u.isTester)
|
||||
const inactive = users.filter(u => !u.isActive)
|
||||
```
|
||||
|
||||
**Correct (1 iteration):**
|
||||
|
||||
```typescript
|
||||
const admins: User[] = []
|
||||
const testers: User[] = []
|
||||
const inactive: User[] = []
|
||||
|
||||
for (const user of users) {
|
||||
if (user.isAdmin) admins.push(user)
|
||||
if (user.isTester) testers.push(user)
|
||||
if (!user.isActive) inactive.push(user)
|
||||
}
|
||||
```
|
||||
@@ -1,50 +0,0 @@
|
||||
---
|
||||
title: Early Return from Functions
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: avoids unnecessary computation
|
||||
tags: javascript, functions, optimization, early-return
|
||||
---
|
||||
|
||||
## Early Return from Functions
|
||||
|
||||
Return early when result is determined to skip unnecessary processing.
|
||||
|
||||
**Incorrect (processes all items even after finding answer):**
|
||||
|
||||
```typescript
|
||||
function validateUsers(users: User[]) {
|
||||
let hasError = false
|
||||
let errorMessage = ''
|
||||
|
||||
for (const user of users) {
|
||||
if (!user.email) {
|
||||
hasError = true
|
||||
errorMessage = 'Email required'
|
||||
}
|
||||
if (!user.name) {
|
||||
hasError = true
|
||||
errorMessage = 'Name required'
|
||||
}
|
||||
// Continues checking all users even after error found
|
||||
}
|
||||
|
||||
return hasError ? { valid: false, error: errorMessage } : { valid: true }
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (returns immediately on first error):**
|
||||
|
||||
```typescript
|
||||
function validateUsers(users: User[]) {
|
||||
for (const user of users) {
|
||||
if (!user.email) {
|
||||
return { valid: false, error: 'Email required' }
|
||||
}
|
||||
if (!user.name) {
|
||||
return { valid: false, error: 'Name required' }
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: true }
|
||||
}
|
||||
```
|
||||
@@ -1,45 +0,0 @@
|
||||
---
|
||||
title: Hoist RegExp Creation
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: avoids recreation
|
||||
tags: javascript, regexp, optimization, memoization
|
||||
---
|
||||
|
||||
## Hoist RegExp Creation
|
||||
|
||||
Don't create RegExp inside render. Hoist to module scope or memoize with `useMemo()`.
|
||||
|
||||
**Incorrect (new RegExp every render):**
|
||||
|
||||
```tsx
|
||||
function Highlighter({ text, query }: Props) {
|
||||
const regex = new RegExp(`(${query})`, 'gi')
|
||||
const parts = text.split(regex)
|
||||
return <>{parts.map((part, i) => ...)}</>
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (memoize or hoist):**
|
||||
|
||||
```tsx
|
||||
const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
|
||||
|
||||
function Highlighter({ text, query }: Props) {
|
||||
const regex = useMemo(
|
||||
() => new RegExp(`(${escapeRegex(query)})`, 'gi'),
|
||||
[query]
|
||||
)
|
||||
const parts = text.split(regex)
|
||||
return <>{parts.map((part, i) => ...)}</>
|
||||
}
|
||||
```
|
||||
|
||||
**Warning (global regex has mutable state):**
|
||||
|
||||
Global regex (`/g`) has mutable `lastIndex` state:
|
||||
|
||||
```typescript
|
||||
const regex = /foo/g
|
||||
regex.test('foo') // true, lastIndex = 3
|
||||
regex.test('foo') // false, lastIndex = 0
|
||||
```
|
||||
@@ -1,37 +0,0 @@
|
||||
---
|
||||
title: Build Index Maps for Repeated Lookups
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: 1M ops to 2K ops
|
||||
tags: javascript, map, indexing, optimization, performance
|
||||
---
|
||||
|
||||
## Build Index Maps for Repeated Lookups
|
||||
|
||||
Multiple `.find()` calls by the same key should use a Map.
|
||||
|
||||
**Incorrect (O(n) per lookup):**
|
||||
|
||||
```typescript
|
||||
function processOrders(orders: Order[], users: User[]) {
|
||||
return orders.map(order => ({
|
||||
...order,
|
||||
user: users.find(u => u.id === order.userId)
|
||||
}))
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (O(1) per lookup):**
|
||||
|
||||
```typescript
|
||||
function processOrders(orders: Order[], users: User[]) {
|
||||
const userById = new Map(users.map(u => [u.id, u]))
|
||||
|
||||
return orders.map(order => ({
|
||||
...order,
|
||||
user: userById.get(order.userId)
|
||||
}))
|
||||
}
|
||||
```
|
||||
|
||||
Build map once (O(n)), then all lookups are O(1).
|
||||
For 1000 orders × 1000 users: 1M ops → 2K ops.
|
||||
@@ -1,49 +0,0 @@
|
||||
---
|
||||
title: Early Length Check for Array Comparisons
|
||||
impact: MEDIUM-HIGH
|
||||
impactDescription: avoids expensive operations when lengths differ
|
||||
tags: javascript, arrays, performance, optimization, comparison
|
||||
---
|
||||
|
||||
## Early Length Check for Array Comparisons
|
||||
|
||||
When comparing arrays with expensive operations (sorting, deep equality, serialization), check lengths first. If lengths differ, the arrays cannot be equal.
|
||||
|
||||
In real-world applications, this optimization is especially valuable when the comparison runs in hot paths (event handlers, render loops).
|
||||
|
||||
**Incorrect (always runs expensive comparison):**
|
||||
|
||||
```typescript
|
||||
function hasChanges(current: string[], original: string[]) {
|
||||
// Always sorts and joins, even when lengths differ
|
||||
return current.sort().join() !== original.sort().join()
|
||||
}
|
||||
```
|
||||
|
||||
Two O(n log n) sorts run even when `current.length` is 5 and `original.length` is 100. There is also overhead of joining the arrays and comparing the strings.
|
||||
|
||||
**Correct (O(1) length check first):**
|
||||
|
||||
```typescript
|
||||
function hasChanges(current: string[], original: string[]) {
|
||||
// Early return if lengths differ
|
||||
if (current.length !== original.length) {
|
||||
return true
|
||||
}
|
||||
// Only sort/join when lengths match
|
||||
const currentSorted = current.toSorted()
|
||||
const originalSorted = original.toSorted()
|
||||
for (let i = 0; i < currentSorted.length; i++) {
|
||||
if (currentSorted[i] !== originalSorted[i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
```
|
||||
|
||||
This new approach is more efficient because:
|
||||
- It avoids the overhead of sorting and joining the arrays when lengths differ
|
||||
- It avoids consuming memory for the joined strings (especially important for large arrays)
|
||||
- It avoids mutating the original arrays
|
||||
- It returns early when a difference is found
|
||||
@@ -1,82 +0,0 @@
|
||||
---
|
||||
title: Use Loop for Min/Max Instead of Sort
|
||||
impact: LOW
|
||||
impactDescription: O(n) instead of O(n log n)
|
||||
tags: javascript, arrays, performance, sorting, algorithms
|
||||
---
|
||||
|
||||
## Use Loop for Min/Max Instead of Sort
|
||||
|
||||
Finding the smallest or largest element only requires a single pass through the array. Sorting is wasteful and slower.
|
||||
|
||||
**Incorrect (O(n log n) - sort to find latest):**
|
||||
|
||||
```typescript
|
||||
interface Project {
|
||||
id: string
|
||||
name: string
|
||||
updatedAt: number
|
||||
}
|
||||
|
||||
function getLatestProject(projects: Project[]) {
|
||||
const sorted = [...projects].sort((a, b) => b.updatedAt - a.updatedAt)
|
||||
return sorted[0]
|
||||
}
|
||||
```
|
||||
|
||||
Sorts the entire array just to find the maximum value.
|
||||
|
||||
**Incorrect (O(n log n) - sort for oldest and newest):**
|
||||
|
||||
```typescript
|
||||
function getOldestAndNewest(projects: Project[]) {
|
||||
const sorted = [...projects].sort((a, b) => a.updatedAt - b.updatedAt)
|
||||
return { oldest: sorted[0], newest: sorted[sorted.length - 1] }
|
||||
}
|
||||
```
|
||||
|
||||
Still sorts unnecessarily when only min/max are needed.
|
||||
|
||||
**Correct (O(n) - single loop):**
|
||||
|
||||
```typescript
|
||||
function getLatestProject(projects: Project[]) {
|
||||
if (projects.length === 0) return null
|
||||
|
||||
let latest = projects[0]
|
||||
|
||||
for (let i = 1; i < projects.length; i++) {
|
||||
if (projects[i].updatedAt > latest.updatedAt) {
|
||||
latest = projects[i]
|
||||
}
|
||||
}
|
||||
|
||||
return latest
|
||||
}
|
||||
|
||||
function getOldestAndNewest(projects: Project[]) {
|
||||
if (projects.length === 0) return { oldest: null, newest: null }
|
||||
|
||||
let oldest = projects[0]
|
||||
let newest = projects[0]
|
||||
|
||||
for (let i = 1; i < projects.length; i++) {
|
||||
if (projects[i].updatedAt < oldest.updatedAt) oldest = projects[i]
|
||||
if (projects[i].updatedAt > newest.updatedAt) newest = projects[i]
|
||||
}
|
||||
|
||||
return { oldest, newest }
|
||||
}
|
||||
```
|
||||
|
||||
Single pass through the array, no copying, no sorting.
|
||||
|
||||
**Alternative (Math.min/Math.max for small arrays):**
|
||||
|
||||
```typescript
|
||||
const numbers = [5, 2, 8, 1, 9]
|
||||
const min = Math.min(...numbers)
|
||||
const max = Math.max(...numbers)
|
||||
```
|
||||
|
||||
This works for small arrays but can be slower for very large arrays due to spread operator limitations. Use the loop approach for reliability.
|
||||
@@ -1,24 +0,0 @@
|
||||
---
|
||||
title: Use Set/Map for O(1) Lookups
|
||||
impact: LOW-MEDIUM
|
||||
impactDescription: O(n) to O(1)
|
||||
tags: javascript, set, map, data-structures, performance
|
||||
---
|
||||
|
||||
## Use Set/Map for O(1) Lookups
|
||||
|
||||
Convert arrays to Set/Map for repeated membership checks.
|
||||
|
||||
**Incorrect (O(n) per check):**
|
||||
|
||||
```typescript
|
||||
const allowedIds = ['a', 'b', 'c', ...]
|
||||
items.filter(item => allowedIds.includes(item.id))
|
||||
```
|
||||
|
||||
**Correct (O(1) per check):**
|
||||
|
||||
```typescript
|
||||
const allowedIds = new Set(['a', 'b', 'c', ...])
|
||||
items.filter(item => allowedIds.has(item.id))
|
||||
```
|
||||
@@ -1,57 +0,0 @@
|
||||
---
|
||||
title: Use toSorted() Instead of sort() for Immutability
|
||||
impact: MEDIUM-HIGH
|
||||
impactDescription: prevents mutation bugs in React state
|
||||
tags: javascript, arrays, immutability, react, state, mutation
|
||||
---
|
||||
|
||||
## Use toSorted() Instead of sort() for Immutability
|
||||
|
||||
`.sort()` mutates the array in place, which can cause bugs with React state and props. Use `.toSorted()` to create a new sorted array without mutation.
|
||||
|
||||
**Incorrect (mutates original array):**
|
||||
|
||||
```typescript
|
||||
function UserList({ users }: { users: User[] }) {
|
||||
// Mutates the users prop array!
|
||||
const sorted = useMemo(
|
||||
() => users.sort((a, b) => a.name.localeCompare(b.name)),
|
||||
[users]
|
||||
)
|
||||
return <div>{sorted.map(renderUser)}</div>
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (creates new array):**
|
||||
|
||||
```typescript
|
||||
function UserList({ users }: { users: User[] }) {
|
||||
// Creates new sorted array, original unchanged
|
||||
const sorted = useMemo(
|
||||
() => users.toSorted((a, b) => a.name.localeCompare(b.name)),
|
||||
[users]
|
||||
)
|
||||
return <div>{sorted.map(renderUser)}</div>
|
||||
}
|
||||
```
|
||||
|
||||
**Why this matters in React:**
|
||||
|
||||
1. Props/state mutations break React's immutability model - React expects props and state to be treated as read-only
|
||||
2. Causes stale closure bugs - Mutating arrays inside closures (callbacks, effects) can lead to unexpected behavior
|
||||
|
||||
**Browser support (fallback for older browsers):**
|
||||
|
||||
`.toSorted()` is available in all modern browsers (Chrome 110+, Safari 16+, Firefox 115+, Node.js 20+). For older environments, use spread operator:
|
||||
|
||||
```typescript
|
||||
// Fallback for older browsers
|
||||
const sorted = [...items].sort((a, b) => a.value - b.value)
|
||||
```
|
||||
|
||||
**Other immutable array methods:**
|
||||
|
||||
- `.toSorted()` - immutable sort
|
||||
- `.toReversed()` - immutable reverse
|
||||
- `.toSpliced()` - immutable splice
|
||||
- `.with()` - immutable element replacement
|
||||
@@ -1,26 +0,0 @@
|
||||
---
|
||||
title: Use Activity Component for Show/Hide
|
||||
impact: MEDIUM
|
||||
impactDescription: preserves state/DOM
|
||||
tags: rendering, activity, visibility, state-preservation
|
||||
---
|
||||
|
||||
## Use Activity Component for Show/Hide
|
||||
|
||||
Use React's `<Activity>` to preserve state/DOM for expensive components that frequently toggle visibility.
|
||||
|
||||
**Usage:**
|
||||
|
||||
```tsx
|
||||
import { Activity } from 'react'
|
||||
|
||||
function Dropdown({ isOpen }: Props) {
|
||||
return (
|
||||
<Activity mode={isOpen ? 'visible' : 'hidden'}>
|
||||
<ExpensiveMenu />
|
||||
</Activity>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
Avoids expensive re-renders and state loss.
|
||||
@@ -1,47 +0,0 @@
|
||||
---
|
||||
title: Animate SVG Wrapper Instead of SVG Element
|
||||
impact: LOW
|
||||
impactDescription: enables hardware acceleration
|
||||
tags: rendering, svg, css, animation, performance
|
||||
---
|
||||
|
||||
## Animate SVG Wrapper Instead of SVG Element
|
||||
|
||||
Many browsers don't have hardware acceleration for CSS3 animations on SVG elements. Wrap SVG in a `<div>` and animate the wrapper instead.
|
||||
|
||||
**Incorrect (animating SVG directly - no hardware acceleration):**
|
||||
|
||||
```tsx
|
||||
function LoadingSpinner() {
|
||||
return (
|
||||
<svg
|
||||
className="animate-spin"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<circle cx="12" cy="12" r="10" stroke="currentColor" />
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (animating wrapper div - hardware accelerated):**
|
||||
|
||||
```tsx
|
||||
function LoadingSpinner() {
|
||||
return (
|
||||
<div className="animate-spin">
|
||||
<svg
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<circle cx="12" cy="12" r="10" stroke="currentColor" />
|
||||
</svg>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
This applies to all CSS transforms and transitions (`transform`, `opacity`, `translate`, `scale`, `rotate`). The wrapper div allows browsers to use GPU acceleration for smoother animations.
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
title: Use Explicit Conditional Rendering
|
||||
impact: LOW
|
||||
impactDescription: prevents rendering 0 or NaN
|
||||
tags: rendering, conditional, jsx, falsy-values
|
||||
---
|
||||
|
||||
## Use Explicit Conditional Rendering
|
||||
|
||||
Use explicit ternary operators (`? :`) instead of `&&` for conditional rendering when the condition can be `0`, `NaN`, or other falsy values that render.
|
||||
|
||||
**Incorrect (renders "0" when count is 0):**
|
||||
|
||||
```tsx
|
||||
function Badge({ count }: { count: number }) {
|
||||
return (
|
||||
<div>
|
||||
{count && <span className="badge">{count}</span>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// When count = 0, renders: <div>0</div>
|
||||
// When count = 5, renders: <div><span class="badge">5</span></div>
|
||||
```
|
||||
|
||||
**Correct (renders nothing when count is 0):**
|
||||
|
||||
```tsx
|
||||
function Badge({ count }: { count: number }) {
|
||||
return (
|
||||
<div>
|
||||
{count > 0 ? <span className="badge">{count}</span> : null}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// When count = 0, renders: <div></div>
|
||||
// When count = 5, renders: <div><span class="badge">5</span></div>
|
||||
```
|
||||
@@ -1,38 +0,0 @@
|
||||
---
|
||||
title: CSS content-visibility for Long Lists
|
||||
impact: HIGH
|
||||
impactDescription: faster initial render
|
||||
tags: rendering, css, content-visibility, long-lists
|
||||
---
|
||||
|
||||
## CSS content-visibility for Long Lists
|
||||
|
||||
Apply `content-visibility: auto` to defer off-screen rendering.
|
||||
|
||||
**CSS:**
|
||||
|
||||
```css
|
||||
.message-item {
|
||||
content-visibility: auto;
|
||||
contain-intrinsic-size: 0 80px;
|
||||
}
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```tsx
|
||||
function MessageList({ messages }: { messages: Message[] }) {
|
||||
return (
|
||||
<div className="overflow-y-auto h-screen">
|
||||
{messages.map(msg => (
|
||||
<div key={msg.id} className="message-item">
|
||||
<Avatar user={msg.author} />
|
||||
<div>{msg.content}</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
For 1000 messages, browser skips layout/paint for ~990 off-screen items (10× faster initial render).
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
title: Hoist Static JSX Elements
|
||||
impact: LOW
|
||||
impactDescription: avoids re-creation
|
||||
tags: rendering, jsx, static, optimization
|
||||
---
|
||||
|
||||
## Hoist Static JSX Elements
|
||||
|
||||
Extract static JSX outside components to avoid re-creation.
|
||||
|
||||
**Incorrect (recreates element every render):**
|
||||
|
||||
```tsx
|
||||
function LoadingSkeleton() {
|
||||
return <div className="animate-pulse h-20 bg-gray-200" />
|
||||
}
|
||||
|
||||
function Container() {
|
||||
return (
|
||||
<div>
|
||||
{loading && <LoadingSkeleton />}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (reuses same element):**
|
||||
|
||||
```tsx
|
||||
const loadingSkeleton = (
|
||||
<div className="animate-pulse h-20 bg-gray-200" />
|
||||
)
|
||||
|
||||
function Container() {
|
||||
return (
|
||||
<div>
|
||||
{loading && loadingSkeleton}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
This is especially helpful for large and static SVG nodes, which can be expensive to recreate on every render.
|
||||
|
||||
**Note:** If your project has [React Compiler](https://react.dev/learn/react-compiler) enabled, the compiler automatically hoists static JSX elements and optimizes component re-renders, making manual hoisting unnecessary.
|
||||
@@ -1,82 +0,0 @@
|
||||
---
|
||||
title: Prevent Hydration Mismatch Without Flickering
|
||||
impact: MEDIUM
|
||||
impactDescription: avoids visual flicker and hydration errors
|
||||
tags: rendering, ssr, hydration, localStorage, flicker
|
||||
---
|
||||
|
||||
## Prevent Hydration Mismatch Without Flickering
|
||||
|
||||
When rendering content that depends on client-side storage (localStorage, cookies), avoid both SSR breakage and post-hydration flickering by injecting a synchronous script that updates the DOM before React hydrates.
|
||||
|
||||
**Incorrect (breaks SSR):**
|
||||
|
||||
```tsx
|
||||
function ThemeWrapper({ children }: { children: ReactNode }) {
|
||||
// localStorage is not available on server - throws error
|
||||
const theme = localStorage.getItem('theme') || 'light'
|
||||
|
||||
return (
|
||||
<div className={theme}>
|
||||
{children}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
Server-side rendering will fail because `localStorage` is undefined.
|
||||
|
||||
**Incorrect (visual flickering):**
|
||||
|
||||
```tsx
|
||||
function ThemeWrapper({ children }: { children: ReactNode }) {
|
||||
const [theme, setTheme] = useState('light')
|
||||
|
||||
useEffect(() => {
|
||||
// Runs after hydration - causes visible flash
|
||||
const stored = localStorage.getItem('theme')
|
||||
if (stored) {
|
||||
setTheme(stored)
|
||||
}
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<div className={theme}>
|
||||
{children}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
Component first renders with default value (`light`), then updates after hydration, causing a visible flash of incorrect content.
|
||||
|
||||
**Correct (no flicker, no hydration mismatch):**
|
||||
|
||||
```tsx
|
||||
function ThemeWrapper({ children }: { children: ReactNode }) {
|
||||
return (
|
||||
<>
|
||||
<div id="theme-wrapper">
|
||||
{children}
|
||||
</div>
|
||||
<script
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: `
|
||||
(function() {
|
||||
try {
|
||||
var theme = localStorage.getItem('theme') || 'light';
|
||||
var el = document.getElementById('theme-wrapper');
|
||||
if (el) el.className = theme;
|
||||
} catch (e) {}
|
||||
})();
|
||||
`,
|
||||
}}
|
||||
/>
|
||||
</>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
The inline script executes synchronously before showing the element, ensuring the DOM already has the correct value. No flickering, no hydration mismatch.
|
||||
|
||||
This pattern is especially useful for theme toggles, user preferences, authentication states, and any client-only data that should render immediately without flashing default values.
|
||||
@@ -1,28 +0,0 @@
|
||||
---
|
||||
title: Optimize SVG Precision
|
||||
impact: LOW
|
||||
impactDescription: reduces file size
|
||||
tags: rendering, svg, optimization, svgo
|
||||
---
|
||||
|
||||
## Optimize SVG Precision
|
||||
|
||||
Reduce SVG coordinate precision to decrease file size. The optimal precision depends on the viewBox size, but in general reducing precision should be considered.
|
||||
|
||||
**Incorrect (excessive precision):**
|
||||
|
||||
```svg
|
||||
<path d="M 10.293847 20.847362 L 30.938472 40.192837" />
|
||||
```
|
||||
|
||||
**Correct (1 decimal place):**
|
||||
|
||||
```svg
|
||||
<path d="M 10.3 20.8 L 30.9 40.2" />
|
||||
```
|
||||
|
||||
**Automate with SVGO:**
|
||||
|
||||
```bash
|
||||
npx svgo --precision=1 --multipass icon.svg
|
||||
```
|
||||
@@ -1,39 +0,0 @@
|
||||
---
|
||||
title: Defer State Reads to Usage Point
|
||||
impact: MEDIUM
|
||||
impactDescription: avoids unnecessary subscriptions
|
||||
tags: rerender, searchParams, localStorage, optimization
|
||||
---
|
||||
|
||||
## Defer State Reads to Usage Point
|
||||
|
||||
Don't subscribe to dynamic state (searchParams, localStorage) if you only read it inside callbacks.
|
||||
|
||||
**Incorrect (subscribes to all searchParams changes):**
|
||||
|
||||
```tsx
|
||||
function ShareButton({ chatId }: { chatId: string }) {
|
||||
const searchParams = useSearchParams()
|
||||
|
||||
const handleShare = () => {
|
||||
const ref = searchParams.get('ref')
|
||||
shareChat(chatId, { ref })
|
||||
}
|
||||
|
||||
return <button onClick={handleShare}>Share</button>
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (reads on demand, no subscription):**
|
||||
|
||||
```tsx
|
||||
function ShareButton({ chatId }: { chatId: string }) {
|
||||
const handleShare = () => {
|
||||
const params = new URLSearchParams(window.location.search)
|
||||
const ref = params.get('ref')
|
||||
shareChat(chatId, { ref })
|
||||
}
|
||||
|
||||
return <button onClick={handleShare}>Share</button>
|
||||
}
|
||||
```
|
||||
@@ -1,45 +0,0 @@
|
||||
---
|
||||
title: Narrow Effect Dependencies
|
||||
impact: LOW
|
||||
impactDescription: minimizes effect re-runs
|
||||
tags: rerender, useEffect, dependencies, optimization
|
||||
---
|
||||
|
||||
## Narrow Effect Dependencies
|
||||
|
||||
Specify primitive dependencies instead of objects to minimize effect re-runs.
|
||||
|
||||
**Incorrect (re-runs on any user field change):**
|
||||
|
||||
```tsx
|
||||
useEffect(() => {
|
||||
console.log(user.id)
|
||||
}, [user])
|
||||
```
|
||||
|
||||
**Correct (re-runs only when id changes):**
|
||||
|
||||
```tsx
|
||||
useEffect(() => {
|
||||
console.log(user.id)
|
||||
}, [user.id])
|
||||
```
|
||||
|
||||
**For derived state, compute outside effect:**
|
||||
|
||||
```tsx
|
||||
// Incorrect: runs on width=767, 766, 765...
|
||||
useEffect(() => {
|
||||
if (width < 768) {
|
||||
enableMobileMode()
|
||||
}
|
||||
}, [width])
|
||||
|
||||
// Correct: runs only on boolean transition
|
||||
const isMobile = width < 768
|
||||
useEffect(() => {
|
||||
if (isMobile) {
|
||||
enableMobileMode()
|
||||
}
|
||||
}, [isMobile])
|
||||
```
|
||||
@@ -1,29 +0,0 @@
|
||||
---
|
||||
title: Subscribe to Derived State
|
||||
impact: MEDIUM
|
||||
impactDescription: reduces re-render frequency
|
||||
tags: rerender, derived-state, media-query, optimization
|
||||
---
|
||||
|
||||
## Subscribe to Derived State
|
||||
|
||||
Subscribe to derived boolean state instead of continuous values to reduce re-render frequency.
|
||||
|
||||
**Incorrect (re-renders on every pixel change):**
|
||||
|
||||
```tsx
|
||||
function Sidebar() {
|
||||
const width = useWindowWidth() // updates continuously
|
||||
const isMobile = width < 768
|
||||
return <nav className={isMobile ? 'mobile' : 'desktop'}>
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (re-renders only when boolean changes):**
|
||||
|
||||
```tsx
|
||||
function Sidebar() {
|
||||
const isMobile = useMediaQuery('(max-width: 767px)')
|
||||
return <nav className={isMobile ? 'mobile' : 'desktop'}>
|
||||
}
|
||||
```
|
||||
@@ -1,74 +0,0 @@
|
||||
---
|
||||
title: Use Functional setState Updates
|
||||
impact: MEDIUM
|
||||
impactDescription: prevents stale closures and unnecessary callback recreations
|
||||
tags: react, hooks, useState, useCallback, callbacks, closures
|
||||
---
|
||||
|
||||
## Use Functional setState Updates
|
||||
|
||||
When updating state based on the current state value, use the functional update form of setState instead of directly referencing the state variable. This prevents stale closures, eliminates unnecessary dependencies, and creates stable callback references.
|
||||
|
||||
**Incorrect (requires state as dependency):**
|
||||
|
||||
```tsx
|
||||
function TodoList() {
|
||||
const [items, setItems] = useState(initialItems)
|
||||
|
||||
// Callback must depend on items, recreated on every items change
|
||||
const addItems = useCallback((newItems: Item[]) => {
|
||||
setItems([...items, ...newItems])
|
||||
}, [items]) // ❌ items dependency causes recreations
|
||||
|
||||
// Risk of stale closure if dependency is forgotten
|
||||
const removeItem = useCallback((id: string) => {
|
||||
setItems(items.filter(item => item.id !== id))
|
||||
}, []) // ❌ Missing items dependency - will use stale items!
|
||||
|
||||
return <ItemsEditor items={items} onAdd={addItems} onRemove={removeItem} />
|
||||
}
|
||||
```
|
||||
|
||||
The first callback is recreated every time `items` changes, which can cause child components to re-render unnecessarily. The second callback has a stale closure bug—it will always reference the initial `items` value.
|
||||
|
||||
**Correct (stable callbacks, no stale closures):**
|
||||
|
||||
```tsx
|
||||
function TodoList() {
|
||||
const [items, setItems] = useState(initialItems)
|
||||
|
||||
// Stable callback, never recreated
|
||||
const addItems = useCallback((newItems: Item[]) => {
|
||||
setItems(curr => [...curr, ...newItems])
|
||||
}, []) // ✅ No dependencies needed
|
||||
|
||||
// Always uses latest state, no stale closure risk
|
||||
const removeItem = useCallback((id: string) => {
|
||||
setItems(curr => curr.filter(item => item.id !== id))
|
||||
}, []) // ✅ Safe and stable
|
||||
|
||||
return <ItemsEditor items={items} onAdd={addItems} onRemove={removeItem} />
|
||||
}
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
|
||||
1. **Stable callback references** - Callbacks don't need to be recreated when state changes
|
||||
2. **No stale closures** - Always operates on the latest state value
|
||||
3. **Fewer dependencies** - Simplifies dependency arrays and reduces memory leaks
|
||||
4. **Prevents bugs** - Eliminates the most common source of React closure bugs
|
||||
|
||||
**When to use functional updates:**
|
||||
|
||||
- Any setState that depends on the current state value
|
||||
- Inside useCallback/useMemo when state is needed
|
||||
- Event handlers that reference state
|
||||
- Async operations that update state
|
||||
|
||||
**When direct updates are fine:**
|
||||
|
||||
- Setting state to a static value: `setCount(0)`
|
||||
- Setting state from props/arguments only: `setName(newName)`
|
||||
- State doesn't depend on previous value
|
||||
|
||||
**Note:** If your project has [React Compiler](https://react.dev/learn/react-compiler) enabled, the compiler can automatically optimize some cases, but functional updates are still recommended for correctness and to prevent stale closure bugs.
|
||||
@@ -1,58 +0,0 @@
|
||||
---
|
||||
title: Use Lazy State Initialization
|
||||
impact: MEDIUM
|
||||
impactDescription: wasted computation on every render
|
||||
tags: react, hooks, useState, performance, initialization
|
||||
---
|
||||
|
||||
## Use Lazy State Initialization
|
||||
|
||||
Pass a function to `useState` for expensive initial values. Without the function form, the initializer runs on every render even though the value is only used once.
|
||||
|
||||
**Incorrect (runs on every render):**
|
||||
|
||||
```tsx
|
||||
function FilteredList({ items }: { items: Item[] }) {
|
||||
// buildSearchIndex() runs on EVERY render, even after initialization
|
||||
const [searchIndex, setSearchIndex] = useState(buildSearchIndex(items))
|
||||
const [query, setQuery] = useState('')
|
||||
|
||||
// When query changes, buildSearchIndex runs again unnecessarily
|
||||
return <SearchResults index={searchIndex} query={query} />
|
||||
}
|
||||
|
||||
function UserProfile() {
|
||||
// JSON.parse runs on every render
|
||||
const [settings, setSettings] = useState(
|
||||
JSON.parse(localStorage.getItem('settings') || '{}')
|
||||
)
|
||||
|
||||
return <SettingsForm settings={settings} onChange={setSettings} />
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (runs only once):**
|
||||
|
||||
```tsx
|
||||
function FilteredList({ items }: { items: Item[] }) {
|
||||
// buildSearchIndex() runs ONLY on initial render
|
||||
const [searchIndex, setSearchIndex] = useState(() => buildSearchIndex(items))
|
||||
const [query, setQuery] = useState('')
|
||||
|
||||
return <SearchResults index={searchIndex} query={query} />
|
||||
}
|
||||
|
||||
function UserProfile() {
|
||||
// JSON.parse runs only on initial render
|
||||
const [settings, setSettings] = useState(() => {
|
||||
const stored = localStorage.getItem('settings')
|
||||
return stored ? JSON.parse(stored) : {}
|
||||
})
|
||||
|
||||
return <SettingsForm settings={settings} onChange={setSettings} />
|
||||
}
|
||||
```
|
||||
|
||||
Use lazy initialization when computing initial values from localStorage/sessionStorage, building data structures (indexes, maps), reading from the DOM, or performing heavy transformations.
|
||||
|
||||
For simple primitives (`useState(0)`), direct references (`useState(props.value)`), or cheap literals (`useState({})`), the function form is unnecessary.
|
||||
@@ -1,44 +0,0 @@
|
||||
---
|
||||
title: Extract to Memoized Components
|
||||
impact: MEDIUM
|
||||
impactDescription: enables early returns
|
||||
tags: rerender, memo, useMemo, optimization
|
||||
---
|
||||
|
||||
## Extract to Memoized Components
|
||||
|
||||
Extract expensive work into memoized components to enable early returns before computation.
|
||||
|
||||
**Incorrect (computes avatar even when loading):**
|
||||
|
||||
```tsx
|
||||
function Profile({ user, loading }: Props) {
|
||||
const avatar = useMemo(() => {
|
||||
const id = computeAvatarId(user)
|
||||
return <Avatar id={id} />
|
||||
}, [user])
|
||||
|
||||
if (loading) return <Skeleton />
|
||||
return <div>{avatar}</div>
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (skips computation when loading):**
|
||||
|
||||
```tsx
|
||||
const UserAvatar = memo(function UserAvatar({ user }: { user: User }) {
|
||||
const id = useMemo(() => computeAvatarId(user), [user])
|
||||
return <Avatar id={id} />
|
||||
})
|
||||
|
||||
function Profile({ user, loading }: Props) {
|
||||
if (loading) return <Skeleton />
|
||||
return (
|
||||
<div>
|
||||
<UserAvatar user={user} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Note:** If your project has [React Compiler](https://react.dev/learn/react-compiler) enabled, manual memoization with `memo()` and `useMemo()` is not necessary. The compiler automatically optimizes re-renders.
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
title: Use Transitions for Non-Urgent Updates
|
||||
impact: MEDIUM
|
||||
impactDescription: maintains UI responsiveness
|
||||
tags: rerender, transitions, startTransition, performance
|
||||
---
|
||||
|
||||
## Use Transitions for Non-Urgent Updates
|
||||
|
||||
Mark frequent, non-urgent state updates as transitions to maintain UI responsiveness.
|
||||
|
||||
**Incorrect (blocks UI on every scroll):**
|
||||
|
||||
```tsx
|
||||
function ScrollTracker() {
|
||||
const [scrollY, setScrollY] = useState(0)
|
||||
useEffect(() => {
|
||||
const handler = () => setScrollY(window.scrollY)
|
||||
window.addEventListener('scroll', handler, { passive: true })
|
||||
return () => window.removeEventListener('scroll', handler)
|
||||
}, [])
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (non-blocking updates):**
|
||||
|
||||
```tsx
|
||||
import { startTransition } from 'react'
|
||||
|
||||
function ScrollTracker() {
|
||||
const [scrollY, setScrollY] = useState(0)
|
||||
useEffect(() => {
|
||||
const handler = () => {
|
||||
startTransition(() => setScrollY(window.scrollY))
|
||||
}
|
||||
window.addEventListener('scroll', handler, { passive: true })
|
||||
return () => window.removeEventListener('scroll', handler)
|
||||
}, [])
|
||||
}
|
||||
```
|
||||
@@ -1,73 +0,0 @@
|
||||
---
|
||||
title: Use after() for Non-Blocking Operations
|
||||
impact: MEDIUM
|
||||
impactDescription: faster response times
|
||||
tags: server, async, logging, analytics, side-effects
|
||||
---
|
||||
|
||||
## Use after() for Non-Blocking Operations
|
||||
|
||||
Use Next.js's `after()` to schedule work that should execute after a response is sent. This prevents logging, analytics, and other side effects from blocking the response.
|
||||
|
||||
**Incorrect (blocks response):**
|
||||
|
||||
```tsx
|
||||
import { logUserAction } from '@/app/utils'
|
||||
|
||||
export async function POST(request: Request) {
|
||||
// Perform mutation
|
||||
await updateDatabase(request)
|
||||
|
||||
// Logging blocks the response
|
||||
const userAgent = request.headers.get('user-agent') || 'unknown'
|
||||
await logUserAction({ userAgent })
|
||||
|
||||
return new Response(JSON.stringify({ status: 'success' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (non-blocking):**
|
||||
|
||||
```tsx
|
||||
import { after } from 'next/server'
|
||||
import { headers, cookies } from 'next/headers'
|
||||
import { logUserAction } from '@/app/utils'
|
||||
|
||||
export async function POST(request: Request) {
|
||||
// Perform mutation
|
||||
await updateDatabase(request)
|
||||
|
||||
// Log after response is sent
|
||||
after(async () => {
|
||||
const userAgent = (await headers()).get('user-agent') || 'unknown'
|
||||
const sessionCookie = (await cookies()).get('session-id')?.value || 'anonymous'
|
||||
|
||||
logUserAction({ sessionCookie, userAgent })
|
||||
})
|
||||
|
||||
return new Response(JSON.stringify({ status: 'success' }), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
The response is sent immediately while logging happens in the background.
|
||||
|
||||
**Common use cases:**
|
||||
|
||||
- Analytics tracking
|
||||
- Audit logging
|
||||
- Sending notifications
|
||||
- Cache invalidation
|
||||
- Cleanup tasks
|
||||
|
||||
**Important notes:**
|
||||
|
||||
- `after()` runs even if the response fails or redirects
|
||||
- Works in Server Actions, Route Handlers, and Server Components
|
||||
|
||||
Reference: [https://nextjs.org/docs/app/api-reference/functions/after](https://nextjs.org/docs/app/api-reference/functions/after)
|
||||
@@ -1,41 +0,0 @@
|
||||
---
|
||||
title: Cross-Request LRU Caching
|
||||
impact: HIGH
|
||||
impactDescription: caches across requests
|
||||
tags: server, cache, lru, cross-request
|
||||
---
|
||||
|
||||
## Cross-Request LRU Caching
|
||||
|
||||
`React.cache()` only works within one request. For data shared across sequential requests (user clicks button A then button B), use an LRU cache.
|
||||
|
||||
**Implementation:**
|
||||
|
||||
```typescript
|
||||
import { LRUCache } from 'lru-cache'
|
||||
|
||||
const cache = new LRUCache<string, any>({
|
||||
max: 1000,
|
||||
ttl: 5 * 60 * 1000 // 5 minutes
|
||||
})
|
||||
|
||||
export async function getUser(id: string) {
|
||||
const cached = cache.get(id)
|
||||
if (cached) return cached
|
||||
|
||||
const user = await db.user.findUnique({ where: { id } })
|
||||
cache.set(id, user)
|
||||
return user
|
||||
}
|
||||
|
||||
// Request 1: DB query, result cached
|
||||
// Request 2: cache hit, no DB query
|
||||
```
|
||||
|
||||
Use when sequential user actions hit multiple endpoints needing the same data within seconds.
|
||||
|
||||
**With Vercel's [Fluid Compute](https://vercel.com/docs/fluid-compute):** LRU caching is especially effective because multiple concurrent requests can share the same function instance and cache. This means the cache persists across requests without needing external storage like Redis.
|
||||
|
||||
**In traditional serverless:** Each invocation runs in isolation, so consider Redis for cross-process caching.
|
||||
|
||||
Reference: [https://github.com/isaacs/node-lru-cache](https://github.com/isaacs/node-lru-cache)
|
||||
@@ -1,26 +0,0 @@
|
||||
---
|
||||
title: Per-Request Deduplication with React.cache()
|
||||
impact: MEDIUM
|
||||
impactDescription: deduplicates within request
|
||||
tags: server, cache, react-cache, deduplication
|
||||
---
|
||||
|
||||
## Per-Request Deduplication with React.cache()
|
||||
|
||||
Use `React.cache()` for server-side request deduplication. Authentication and database queries benefit most.
|
||||
|
||||
**Usage:**
|
||||
|
||||
```typescript
|
||||
import { cache } from 'react'
|
||||
|
||||
export const getCurrentUser = cache(async () => {
|
||||
const session = await auth()
|
||||
if (!session?.user?.id) return null
|
||||
return await db.user.findUnique({
|
||||
where: { id: session.user.id }
|
||||
})
|
||||
})
|
||||
```
|
||||
|
||||
Within a single request, multiple calls to `getCurrentUser()` execute the query only once.
|
||||
@@ -1,79 +0,0 @@
|
||||
---
|
||||
title: Parallel Data Fetching with Component Composition
|
||||
impact: CRITICAL
|
||||
impactDescription: eliminates server-side waterfalls
|
||||
tags: server, rsc, parallel-fetching, composition
|
||||
---
|
||||
|
||||
## Parallel Data Fetching with Component Composition
|
||||
|
||||
React Server Components execute sequentially within a tree. Restructure with composition to parallelize data fetching.
|
||||
|
||||
**Incorrect (Sidebar waits for Page's fetch to complete):**
|
||||
|
||||
```tsx
|
||||
export default async function Page() {
|
||||
const header = await fetchHeader()
|
||||
return (
|
||||
<div>
|
||||
<div>{header}</div>
|
||||
<Sidebar />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
async function Sidebar() {
|
||||
const items = await fetchSidebarItems()
|
||||
return <nav>{items.map(renderItem)}</nav>
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (both fetch simultaneously):**
|
||||
|
||||
```tsx
|
||||
async function Header() {
|
||||
const data = await fetchHeader()
|
||||
return <div>{data}</div>
|
||||
}
|
||||
|
||||
async function Sidebar() {
|
||||
const items = await fetchSidebarItems()
|
||||
return <nav>{items.map(renderItem)}</nav>
|
||||
}
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<div>
|
||||
<Header />
|
||||
<Sidebar />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Alternative with children prop:**
|
||||
|
||||
```tsx
|
||||
async function Layout({ children }: { children: ReactNode }) {
|
||||
const header = await fetchHeader()
|
||||
return (
|
||||
<div>
|
||||
<div>{header}</div>
|
||||
{children}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
async function Sidebar() {
|
||||
const items = await fetchSidebarItems()
|
||||
return <nav>{items.map(renderItem)}</nav>
|
||||
}
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<Layout>
|
||||
<Sidebar />
|
||||
</Layout>
|
||||
)
|
||||
}
|
||||
```
|
||||
@@ -1,38 +0,0 @@
|
||||
---
|
||||
title: Minimize Serialization at RSC Boundaries
|
||||
impact: HIGH
|
||||
impactDescription: reduces data transfer size
|
||||
tags: server, rsc, serialization, props
|
||||
---
|
||||
|
||||
## Minimize Serialization at RSC Boundaries
|
||||
|
||||
The React Server/Client boundary serializes all object properties into strings and embeds them in the HTML response and subsequent RSC requests. This serialized data directly impacts page weight and load time, so **size matters a lot**. Only pass fields that the client actually uses.
|
||||
|
||||
**Incorrect (serializes all 50 fields):**
|
||||
|
||||
```tsx
|
||||
async function Page() {
|
||||
const user = await fetchUser() // 50 fields
|
||||
return <Profile user={user} />
|
||||
}
|
||||
|
||||
'use client'
|
||||
function Profile({ user }: { user: User }) {
|
||||
return <div>{user.name}</div> // uses 1 field
|
||||
}
|
||||
```
|
||||
|
||||
**Correct (serializes only 1 field):**
|
||||
|
||||
```tsx
|
||||
async function Page() {
|
||||
const user = await fetchUser()
|
||||
return <Profile name={user.name} />
|
||||
}
|
||||
|
||||
'use client'
|
||||
function Profile({ name }: { name: string }) {
|
||||
return <div>{name}</div>
|
||||
}
|
||||
```
|
||||
@@ -1,9 +1,6 @@
|
||||
# Ignore everything by default, selectively add things to context
|
||||
*
|
||||
|
||||
# Documentation (for embeddings/search)
|
||||
!docs/
|
||||
|
||||
# Platform - Libs
|
||||
!autogpt_platform/autogpt_libs/autogpt_libs/
|
||||
!autogpt_platform/autogpt_libs/pyproject.toml
|
||||
@@ -19,7 +16,6 @@
|
||||
!autogpt_platform/backend/poetry.lock
|
||||
!autogpt_platform/backend/README.md
|
||||
!autogpt_platform/backend/.env
|
||||
!autogpt_platform/backend/gen_prisma_types_stub.py
|
||||
|
||||
# Platform - Market
|
||||
!autogpt_platform/market/market/
|
||||
|
||||
14
.github/copilot-instructions.md
vendored
14
.github/copilot-instructions.md
vendored
@@ -142,7 +142,7 @@ pnpm storybook # Start component development server
|
||||
### Security & Middleware
|
||||
|
||||
**Cache Protection**: Backend includes middleware preventing sensitive data caching in browsers/proxies
|
||||
**Authentication**: JWT-based with Supabase integration
|
||||
**Authentication**: JWT-based with native authentication
|
||||
**User ID Validation**: All data access requires user ID checks - verify this for any `data/*.py` changes
|
||||
|
||||
### Development Workflow
|
||||
@@ -160,7 +160,7 @@ pnpm storybook # Start component development server
|
||||
|
||||
**Backend Entry Points:**
|
||||
|
||||
- `backend/backend/api/rest_api.py` - FastAPI application setup
|
||||
- `backend/backend/server/server.py` - FastAPI application setup
|
||||
- `backend/backend/data/` - Database models and user management
|
||||
- `backend/blocks/` - Agent execution blocks and logic
|
||||
|
||||
@@ -168,9 +168,9 @@ pnpm storybook # Start component development server
|
||||
|
||||
- `frontend/src/app/layout.tsx` - Root application layout
|
||||
- `frontend/src/app/page.tsx` - Home page
|
||||
- `frontend/src/lib/supabase/` - Authentication and database client
|
||||
- `frontend/src/lib/auth/` - Authentication client
|
||||
|
||||
**Protected Routes**: Update `frontend/lib/supabase/middleware.ts` when adding protected routes
|
||||
**Protected Routes**: Update `frontend/middleware.ts` when adding protected routes
|
||||
|
||||
### Agent Block System
|
||||
|
||||
@@ -194,7 +194,7 @@ Agents are built using a visual block-based system where each block performs a s
|
||||
|
||||
1. **Backend**: `/backend/.env.default` → `/backend/.env` (user overrides)
|
||||
2. **Frontend**: `/frontend/.env.default` → `/frontend/.env` (user overrides)
|
||||
3. **Platform**: `/.env.default` (Supabase/shared) → `/.env` (user overrides)
|
||||
3. **Platform**: `/.env.default` (shared) → `/.env` (user overrides)
|
||||
4. Docker Compose `environment:` sections override file-based config
|
||||
5. Shell environment variables have highest precedence
|
||||
|
||||
@@ -219,7 +219,7 @@ Agents are built using a visual block-based system where each block performs a s
|
||||
|
||||
### API Development
|
||||
|
||||
1. Update routes in `/backend/backend/api/features/`
|
||||
1. Update routes in `/backend/backend/server/routers/`
|
||||
2. Add/update Pydantic models in same directory
|
||||
3. Write tests alongside route files
|
||||
4. For `data/*.py` changes, validate user ID checks
|
||||
@@ -285,7 +285,7 @@ Agents are built using a visual block-based system where each block performs a s
|
||||
|
||||
### Security Guidelines
|
||||
|
||||
**Cache Protection Middleware** (`/backend/backend/api/middleware/security.py`):
|
||||
**Cache Protection Middleware** (`/backend/backend/server/middleware/security.py`):
|
||||
|
||||
- Default: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private`
|
||||
- Uses allow list approach for cacheable paths (static assets, health checks, public pages)
|
||||
|
||||
2
.github/workflows/classic-frontend-ci.yml
vendored
2
.github/workflows/classic-frontend-ci.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
|
||||
if: github.event_name == 'push'
|
||||
uses: peter-evans/create-pull-request@v8
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
with:
|
||||
add-paths: classic/frontend/build/web
|
||||
base: ${{ github.ref_name }}
|
||||
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Get CI failure details
|
||||
id: failure_details
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const run = await github.rest.actions.getWorkflowRun({
|
||||
@@ -93,5 +93,5 @@ jobs:
|
||||
|
||||
Error logs:
|
||||
${{ toJSON(fromJSON(steps.failure_details.outputs.result).errorLogs) }}
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
claude_args: "--allowedTools 'Edit,MultiEdit,Write,Read,Glob,Grep,LS,Bash(git:*),Bash(bun:*),Bash(npm:*),Bash(npx:*),Bash(gh:*)'"
|
||||
|
||||
21
.github/workflows/claude-dependabot.yml
vendored
21
.github/workflows/claude-dependabot.yml
vendored
@@ -7,7 +7,7 @@
|
||||
# - Provide actionable recommendations for the development team
|
||||
#
|
||||
# Triggered on: Dependabot PRs (opened, synchronize)
|
||||
# Requirements: CLAUDE_CODE_OAUTH_TOKEN secret must be configured
|
||||
# Requirements: ANTHROPIC_API_KEY secret must be configured
|
||||
|
||||
name: Claude Dependabot PR Review
|
||||
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
python-version: "3.11" # Use standard version matching CI
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
@@ -74,11 +74,11 @@ jobs:
|
||||
|
||||
- name: Generate Prisma Client
|
||||
working-directory: autogpt_platform/backend
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22"
|
||||
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
# Phase 1: Cache and load Docker images for faster setup
|
||||
- name: Set up Docker image cache
|
||||
id: docker-cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/docker-cache
|
||||
# Use a versioned key for cache invalidation when image list changes
|
||||
@@ -144,11 +144,7 @@ jobs:
|
||||
"rabbitmq:management"
|
||||
"clamav/clamav-debian:latest"
|
||||
"busybox:latest"
|
||||
"kong:2.8.1"
|
||||
"supabase/gotrue:v2.170.0"
|
||||
"supabase/postgres:15.8.1.049"
|
||||
"supabase/postgres-meta:v0.86.1"
|
||||
"supabase/studio:20250224-d10db0f"
|
||||
"pgvector/pgvector:pg18"
|
||||
)
|
||||
|
||||
# Check if any cached tar files exist (more reliable than cache-hit)
|
||||
@@ -308,8 +304,7 @@ jobs:
|
||||
id: claude_review
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
allowed_bots: "dependabot[bot]"
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
claude_args: |
|
||||
--allowedTools "Bash(npm:*),Bash(pnpm:*),Bash(poetry:*),Bash(git:*),Edit,Replace,NotebookEditCell,mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*)"
|
||||
prompt: |
|
||||
|
||||
18
.github/workflows/claude.yml
vendored
18
.github/workflows/claude.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
python-version: "3.11" # Use standard version matching CI
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
@@ -90,11 +90,11 @@ jobs:
|
||||
|
||||
- name: Generate Prisma Client
|
||||
working-directory: autogpt_platform/backend
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22"
|
||||
|
||||
@@ -107,7 +107,7 @@ jobs:
|
||||
echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
# Phase 1: Cache and load Docker images for faster setup
|
||||
- name: Set up Docker image cache
|
||||
id: docker-cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/docker-cache
|
||||
# Use a versioned key for cache invalidation when image list changes
|
||||
@@ -160,11 +160,7 @@ jobs:
|
||||
"rabbitmq:management"
|
||||
"clamav/clamav-debian:latest"
|
||||
"busybox:latest"
|
||||
"kong:2.8.1"
|
||||
"supabase/gotrue:v2.170.0"
|
||||
"supabase/postgres:15.8.1.049"
|
||||
"supabase/postgres-meta:v0.86.1"
|
||||
"supabase/studio:20250224-d10db0f"
|
||||
"pgvector/pgvector:pg18"
|
||||
)
|
||||
|
||||
# Check if any cached tar files exist (more reliable than cache-hit)
|
||||
@@ -323,7 +319,7 @@ jobs:
|
||||
id: claude
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
claude_args: |
|
||||
--allowedTools "Bash(npm:*),Bash(pnpm:*),Bash(poetry:*),Bash(git:*),Edit,Replace,NotebookEditCell,mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*), Bash(gh pr edit:*)"
|
||||
--model opus
|
||||
|
||||
26
.github/workflows/copilot-setup-steps.yml
vendored
26
.github/workflows/copilot-setup-steps.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
python-version: "3.11" # Use standard version matching CI
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
@@ -72,11 +72,11 @@ jobs:
|
||||
|
||||
- name: Generate Prisma Client
|
||||
working-directory: autogpt_platform/backend
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
run: poetry run prisma generate
|
||||
|
||||
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22"
|
||||
|
||||
@@ -89,7 +89,7 @@ jobs:
|
||||
echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache frontend dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
|
||||
@@ -108,16 +108,6 @@ jobs:
|
||||
# run: pnpm playwright install --with-deps chromium
|
||||
|
||||
# Docker setup for development environment
|
||||
- name: Free up disk space
|
||||
run: |
|
||||
# Remove large unused tools to free disk space for Docker builds
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker system prune -af
|
||||
df -h
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
@@ -132,7 +122,7 @@ jobs:
|
||||
# Phase 1: Cache and load Docker images for faster setup
|
||||
- name: Set up Docker image cache
|
||||
id: docker-cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/docker-cache
|
||||
# Use a versioned key for cache invalidation when image list changes
|
||||
@@ -152,11 +142,7 @@ jobs:
|
||||
"rabbitmq:management"
|
||||
"clamav/clamav-debian:latest"
|
||||
"busybox:latest"
|
||||
"kong:2.8.1"
|
||||
"supabase/gotrue:v2.170.0"
|
||||
"supabase/postgres:15.8.1.049"
|
||||
"supabase/postgres-meta:v0.86.1"
|
||||
"supabase/studio:20250224-d10db0f"
|
||||
"pgvector/pgvector:pg18"
|
||||
)
|
||||
|
||||
# Check if any cached tar files exist (more reliable than cache-hit)
|
||||
|
||||
78
.github/workflows/docs-block-sync.yml
vendored
78
.github/workflows/docs-block-sync.yml
vendored
@@ -1,78 +0,0 @@
|
||||
name: Block Documentation Sync Check
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, dev]
|
||||
paths:
|
||||
- "autogpt_platform/backend/backend/blocks/**"
|
||||
- "docs/integrations/**"
|
||||
- "autogpt_platform/backend/scripts/generate_block_docs.py"
|
||||
- ".github/workflows/docs-block-sync.yml"
|
||||
pull_request:
|
||||
branches: [master, dev]
|
||||
paths:
|
||||
- "autogpt_platform/backend/backend/blocks/**"
|
||||
- "docs/integrations/**"
|
||||
- "autogpt_platform/backend/scripts/generate_block_docs.py"
|
||||
- ".github/workflows/docs-block-sync.yml"
|
||||
|
||||
jobs:
|
||||
check-docs-sync:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
restore-keys: |
|
||||
poetry-${{ runner.os }}-
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
cd autogpt_platform/backend
|
||||
HEAD_POETRY_VERSION=$(python3 ../../.github/workflows/scripts/get_package_version_from_lockfile.py poetry)
|
||||
echo "Found Poetry version ${HEAD_POETRY_VERSION} in backend/poetry.lock"
|
||||
curl -sSL https://install.python-poetry.org | POETRY_VERSION=$HEAD_POETRY_VERSION python3 -
|
||||
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: autogpt_platform/backend
|
||||
run: |
|
||||
poetry install --only main
|
||||
poetry run prisma generate
|
||||
|
||||
- name: Check block documentation is in sync
|
||||
working-directory: autogpt_platform/backend
|
||||
run: |
|
||||
echo "Checking if block documentation is in sync with code..."
|
||||
poetry run python scripts/generate_block_docs.py --check
|
||||
|
||||
- name: Show diff if out of sync
|
||||
if: failure()
|
||||
working-directory: autogpt_platform/backend
|
||||
run: |
|
||||
echo "::error::Block documentation is out of sync with code!"
|
||||
echo ""
|
||||
echo "To fix this, run the following command locally:"
|
||||
echo " cd autogpt_platform/backend && poetry run python scripts/generate_block_docs.py"
|
||||
echo ""
|
||||
echo "Then commit the updated documentation files."
|
||||
echo ""
|
||||
echo "Regenerating docs to show diff..."
|
||||
poetry run python scripts/generate_block_docs.py
|
||||
echo ""
|
||||
echo "Changes detected:"
|
||||
git diff ../../docs/integrations/ || true
|
||||
95
.github/workflows/docs-claude-review.yml
vendored
95
.github/workflows/docs-claude-review.yml
vendored
@@ -1,95 +0,0 @@
|
||||
name: Claude Block Docs Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
paths:
|
||||
- "docs/integrations/**"
|
||||
- "autogpt_platform/backend/backend/blocks/**"
|
||||
|
||||
jobs:
|
||||
claude-review:
|
||||
# Only run for PRs from members/collaborators
|
||||
if: |
|
||||
github.event.pull_request.author_association == 'OWNER' ||
|
||||
github.event.pull_request.author_association == 'MEMBER' ||
|
||||
github.event.pull_request.author_association == 'COLLABORATOR'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
restore-keys: |
|
||||
poetry-${{ runner.os }}-
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
cd autogpt_platform/backend
|
||||
HEAD_POETRY_VERSION=$(python3 ../../.github/workflows/scripts/get_package_version_from_lockfile.py poetry)
|
||||
curl -sSL https://install.python-poetry.org | POETRY_VERSION=$HEAD_POETRY_VERSION python3 -
|
||||
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: autogpt_platform/backend
|
||||
run: |
|
||||
poetry install --only main
|
||||
poetry run prisma generate
|
||||
|
||||
- name: Run Claude Code Review
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
claude_args: |
|
||||
--allowedTools "Read,Glob,Grep,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)"
|
||||
prompt: |
|
||||
You are reviewing a PR that modifies block documentation or block code for AutoGPT.
|
||||
|
||||
## Your Task
|
||||
Review the changes in this PR and provide constructive feedback. Focus on:
|
||||
|
||||
1. **Documentation Accuracy**: For any block code changes, verify that:
|
||||
- Input/output tables in docs match the actual block schemas
|
||||
- Description text accurately reflects what the block does
|
||||
- Any new blocks have corresponding documentation
|
||||
|
||||
2. **Manual Content Quality**: Check manual sections (marked with `<!-- MANUAL: -->` markers):
|
||||
- "How it works" sections should have clear technical explanations
|
||||
- "Possible use case" sections should have practical, real-world examples
|
||||
- Content should be helpful for users trying to understand the blocks
|
||||
|
||||
3. **Template Compliance**: Ensure docs follow the standard template:
|
||||
- What it is (brief intro)
|
||||
- What it does (description)
|
||||
- How it works (technical explanation)
|
||||
- Inputs table
|
||||
- Outputs table
|
||||
- Possible use case
|
||||
|
||||
4. **Cross-references**: Check that links and anchors are correct
|
||||
|
||||
## Review Process
|
||||
1. First, get the PR diff to see what changed: `gh pr diff ${{ github.event.pull_request.number }}`
|
||||
2. Read any modified block files to understand the implementation
|
||||
3. Read corresponding documentation files to verify accuracy
|
||||
4. Provide your feedback as a PR comment
|
||||
|
||||
Be constructive and specific. If everything looks good, say so!
|
||||
If there are issues, explain what's wrong and suggest how to fix it.
|
||||
194
.github/workflows/docs-enhance.yml
vendored
194
.github/workflows/docs-enhance.yml
vendored
@@ -1,194 +0,0 @@
|
||||
name: Enhance Block Documentation
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
block_pattern:
|
||||
description: 'Block file pattern to enhance (e.g., "google/*.md" or "*" for all blocks)'
|
||||
required: true
|
||||
default: '*'
|
||||
type: string
|
||||
dry_run:
|
||||
description: 'Dry run mode - show proposed changes without committing'
|
||||
type: boolean
|
||||
default: true
|
||||
max_blocks:
|
||||
description: 'Maximum number of blocks to process (0 for unlimited)'
|
||||
type: number
|
||||
default: 10
|
||||
|
||||
jobs:
|
||||
enhance-docs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
restore-keys: |
|
||||
poetry-${{ runner.os }}-
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
cd autogpt_platform/backend
|
||||
HEAD_POETRY_VERSION=$(python3 ../../.github/workflows/scripts/get_package_version_from_lockfile.py poetry)
|
||||
curl -sSL https://install.python-poetry.org | POETRY_VERSION=$HEAD_POETRY_VERSION python3 -
|
||||
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: autogpt_platform/backend
|
||||
run: |
|
||||
poetry install --only main
|
||||
poetry run prisma generate
|
||||
|
||||
- name: Run Claude Enhancement
|
||||
uses: anthropics/claude-code-action@v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
claude_args: |
|
||||
--allowedTools "Read,Edit,Glob,Grep,Write,Bash(git:*),Bash(gh:*),Bash(find:*),Bash(ls:*)"
|
||||
prompt: |
|
||||
You are enhancing block documentation for AutoGPT. Your task is to improve the MANUAL sections
|
||||
of block documentation files by reading the actual block implementations and writing helpful content.
|
||||
|
||||
## Configuration
|
||||
- Block pattern: ${{ inputs.block_pattern }}
|
||||
- Dry run: ${{ inputs.dry_run }}
|
||||
- Max blocks to process: ${{ inputs.max_blocks }}
|
||||
|
||||
## Your Task
|
||||
|
||||
1. **Find Documentation Files**
|
||||
Find block documentation files matching the pattern in `docs/integrations/`
|
||||
Pattern: ${{ inputs.block_pattern }}
|
||||
|
||||
Use: `find docs/integrations -name "*.md" -type f`
|
||||
|
||||
2. **For Each Documentation File** (up to ${{ inputs.max_blocks }} files):
|
||||
|
||||
a. Read the documentation file
|
||||
|
||||
b. Identify which block(s) it documents (look for the block class name)
|
||||
|
||||
c. Find and read the corresponding block implementation in `autogpt_platform/backend/backend/blocks/`
|
||||
|
||||
d. Improve the MANUAL sections:
|
||||
|
||||
**"How it works" section** (within `<!-- MANUAL: how_it_works -->` markers):
|
||||
- Explain the technical flow of the block
|
||||
- Describe what APIs or services it connects to
|
||||
- Note any important configuration or prerequisites
|
||||
- Keep it concise but informative (2-4 paragraphs)
|
||||
|
||||
**"Possible use case" section** (within `<!-- MANUAL: use_case -->` markers):
|
||||
- Provide 2-3 practical, real-world examples
|
||||
- Make them specific and actionable
|
||||
- Show how this block could be used in an automation workflow
|
||||
|
||||
3. **Important Rules**
|
||||
- ONLY modify content within `<!-- MANUAL: -->` and `<!-- END MANUAL -->` markers
|
||||
- Do NOT modify auto-generated sections (inputs/outputs tables, descriptions)
|
||||
- Keep content accurate based on the actual block implementation
|
||||
- Write for users who may not be technical experts
|
||||
|
||||
4. **Output**
|
||||
${{ inputs.dry_run == true && 'DRY RUN MODE: Show proposed changes for each file but do NOT actually edit the files. Describe what you would change.' || 'LIVE MODE: Actually edit the files to improve the documentation.' }}
|
||||
|
||||
## Example Improvements
|
||||
|
||||
**Before (How it works):**
|
||||
```
|
||||
_Add technical explanation here._
|
||||
```
|
||||
|
||||
**After (How it works):**
|
||||
```
|
||||
This block connects to the GitHub API to retrieve issue information. When executed,
|
||||
it authenticates using your GitHub credentials and fetches issue details including
|
||||
title, body, labels, and assignees.
|
||||
|
||||
The block requires a valid GitHub OAuth connection with repository access permissions.
|
||||
It supports both public and private repositories you have access to.
|
||||
```
|
||||
|
||||
**Before (Possible use case):**
|
||||
```
|
||||
_Add practical use case examples here._
|
||||
```
|
||||
|
||||
**After (Possible use case):**
|
||||
```
|
||||
**Customer Support Automation**: Monitor a GitHub repository for new issues with
|
||||
the "bug" label, then automatically create a ticket in your support system and
|
||||
notify the on-call engineer via Slack.
|
||||
|
||||
**Release Notes Generation**: When a new release is published, gather all closed
|
||||
issues since the last release and generate a summary for your changelog.
|
||||
```
|
||||
|
||||
Begin by finding and listing the documentation files to process.
|
||||
|
||||
- name: Create PR with enhanced documentation
|
||||
if: ${{ inputs.dry_run == false }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Check if there are changes
|
||||
if git diff --quiet docs/integrations/; then
|
||||
echo "No changes to commit"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Configure git
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
# Create branch and commit
|
||||
BRANCH_NAME="docs/enhance-blocks-$(date +%Y%m%d-%H%M%S)"
|
||||
git checkout -b "$BRANCH_NAME"
|
||||
git add docs/integrations/
|
||||
git commit -m "docs: enhance block documentation with LLM-generated content
|
||||
|
||||
Pattern: ${{ inputs.block_pattern }}
|
||||
Max blocks: ${{ inputs.max_blocks }}
|
||||
|
||||
🤖 Generated with [Claude Code](https://claude.com/claude-code)
|
||||
|
||||
Co-Authored-By: Claude <noreply@anthropic.com>"
|
||||
|
||||
# Push and create PR
|
||||
git push -u origin "$BRANCH_NAME"
|
||||
gh pr create \
|
||||
--title "docs: LLM-enhanced block documentation" \
|
||||
--body "## Summary
|
||||
This PR contains LLM-enhanced documentation for block files matching pattern: \`${{ inputs.block_pattern }}\`
|
||||
|
||||
The following manual sections were improved:
|
||||
- **How it works**: Technical explanations based on block implementations
|
||||
- **Possible use case**: Practical, real-world examples
|
||||
|
||||
## Review Checklist
|
||||
- [ ] Content is accurate based on block implementations
|
||||
- [ ] Examples are practical and helpful
|
||||
- [ ] No auto-generated sections were modified
|
||||
|
||||
---
|
||||
🤖 Generated with [Claude Code](https://claude.com/claude-code)" \
|
||||
--base dev
|
||||
50
.github/workflows/platform-backend-ci.yml
vendored
50
.github/workflows/platform-backend-ci.yml
vendored
@@ -2,13 +2,13 @@ name: AutoGPT Platform - Backend CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, dev, ci-test*]
|
||||
branches: [master, dev, ci-test*, native-auth]
|
||||
paths:
|
||||
- ".github/workflows/platform-backend-ci.yml"
|
||||
- "autogpt_platform/backend/**"
|
||||
- "autogpt_platform/autogpt_libs/**"
|
||||
pull_request:
|
||||
branches: [master, dev, release-*]
|
||||
branches: [master, dev, release-*, native-auth]
|
||||
paths:
|
||||
- ".github/workflows/platform-backend-ci.yml"
|
||||
- "autogpt_platform/backend/**"
|
||||
@@ -36,6 +36,19 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg18
|
||||
ports:
|
||||
- 5432:5432
|
||||
env:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: your-super-secret-and-long-postgres-password
|
||||
POSTGRES_DB: postgres
|
||||
options: >-
|
||||
--health-cmd "pg_isready -U postgres"
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 10
|
||||
redis:
|
||||
image: redis:latest
|
||||
ports:
|
||||
@@ -78,17 +91,12 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup Supabase
|
||||
uses: supabase/setup-cli@v1
|
||||
with:
|
||||
version: 1.178.1
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Python dependency cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
|
||||
@@ -134,17 +142,7 @@ jobs:
|
||||
run: poetry install
|
||||
|
||||
- name: Generate Prisma Client
|
||||
run: poetry run prisma generate && poetry run gen-prisma-stub
|
||||
|
||||
- id: supabase
|
||||
name: Start Supabase
|
||||
working-directory: .
|
||||
run: |
|
||||
supabase init
|
||||
supabase start --exclude postgres-meta,realtime,storage-api,imgproxy,inbucket,studio,edge-runtime,logflare,vector,supavisor
|
||||
supabase status -o env | sed 's/="/=/; s/"$//' >> $GITHUB_OUTPUT
|
||||
# outputs:
|
||||
# DB_URL, API_URL, GRAPHQL_URL, ANON_KEY, SERVICE_ROLE_KEY, JWT_SECRET
|
||||
run: poetry run prisma generate
|
||||
|
||||
- name: Wait for ClamAV to be ready
|
||||
run: |
|
||||
@@ -176,10 +174,10 @@ jobs:
|
||||
}
|
||||
|
||||
- name: Run Database Migrations
|
||||
run: poetry run prisma migrate deploy
|
||||
run: poetry run prisma migrate dev --name updates
|
||||
env:
|
||||
DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
|
||||
DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}
|
||||
DATABASE_URL: postgresql://postgres:your-super-secret-and-long-postgres-password@localhost:5432/postgres
|
||||
DIRECT_URL: postgresql://postgres:your-super-secret-and-long-postgres-password@localhost:5432/postgres
|
||||
|
||||
- id: lint
|
||||
name: Run Linter
|
||||
@@ -195,11 +193,9 @@ jobs:
|
||||
if: success() || (failure() && steps.lint.outcome == 'failure')
|
||||
env:
|
||||
LOG_LEVEL: ${{ runner.debug && 'DEBUG' || 'INFO' }}
|
||||
DATABASE_URL: ${{ steps.supabase.outputs.DB_URL }}
|
||||
DIRECT_URL: ${{ steps.supabase.outputs.DB_URL }}
|
||||
SUPABASE_URL: ${{ steps.supabase.outputs.API_URL }}
|
||||
SUPABASE_SERVICE_ROLE_KEY: ${{ steps.supabase.outputs.SERVICE_ROLE_KEY }}
|
||||
JWT_VERIFY_KEY: ${{ steps.supabase.outputs.JWT_SECRET }}
|
||||
DATABASE_URL: postgresql://postgres:your-super-secret-and-long-postgres-password@localhost:5432/postgres
|
||||
DIRECT_URL: postgresql://postgres:your-super-secret-and-long-postgres-password@localhost:5432/postgres
|
||||
JWT_SECRET: your-super-secret-jwt-token-with-at-least-32-characters-long
|
||||
REDIS_HOST: "localhost"
|
||||
REDIS_PORT: "6379"
|
||||
ENCRYPTION_KEY: "dvziYgz0KSK8FENhju0ZYi8-fRTfAdlz6YLhdB_jhNw=" # DO NOT USE IN PRODUCTION!!
|
||||
|
||||
@@ -17,7 +17,7 @@ jobs:
|
||||
- name: Check comment permissions and deployment status
|
||||
id: check_status
|
||||
if: github.event_name == 'issue_comment' && github.event.issue.pull_request
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const commentBody = context.payload.comment.body.trim();
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
|
||||
- name: Post permission denied comment
|
||||
if: steps.check_status.outputs.permission_denied == 'true'
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
- name: Get PR details for deployment
|
||||
id: pr_details
|
||||
if: steps.check_status.outputs.should_deploy == 'true' || steps.check_status.outputs.should_undeploy == 'true'
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const pr = await github.rest.pulls.get({
|
||||
@@ -98,7 +98,7 @@ jobs:
|
||||
|
||||
- name: Post deploy success comment
|
||||
if: steps.check_status.outputs.should_deploy == 'true'
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
|
||||
- name: Post undeploy success comment
|
||||
if: steps.check_status.outputs.should_undeploy == 'true'
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
- name: Check deployment status on PR close
|
||||
id: check_pr_close
|
||||
if: github.event_name == 'pull_request' && github.event.action == 'closed'
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const comments = await github.rest.issues.listComments({
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
github.event_name == 'pull_request' &&
|
||||
github.event.action == 'closed' &&
|
||||
steps.check_pr_close.outputs.should_undeploy == 'true'
|
||||
uses: actions/github-script@v8
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
|
||||
102
.github/workflows/platform-frontend-ci.yml
vendored
102
.github/workflows/platform-frontend-ci.yml
vendored
@@ -2,16 +2,16 @@ name: AutoGPT Platform - Frontend CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, dev]
|
||||
branches: [master, dev, native-auth]
|
||||
paths:
|
||||
- ".github/workflows/platform-frontend-ci.yml"
|
||||
- "autogpt_platform/frontend/**"
|
||||
pull_request:
|
||||
branches: [master, dev, native-auth]
|
||||
paths:
|
||||
- ".github/workflows/platform-frontend-ci.yml"
|
||||
- "autogpt_platform/frontend/**"
|
||||
merge_group:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event_name == 'merge_group' && format('merge-queue-{0}', github.ref) || format('{0}-{1}', github.ref, github.event.pull_request.number || github.sha) }}
|
||||
@@ -27,22 +27,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
cache-key: ${{ steps.cache-key.outputs.key }}
|
||||
components-changed: ${{ steps.filter.outputs.components }}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check for component changes
|
||||
uses: dorny/paths-filter@v3
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
components:
|
||||
- 'autogpt_platform/frontend/src/components/**'
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
@@ -54,7 +45,7 @@ jobs:
|
||||
run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ steps.cache-key.outputs.key }}
|
||||
@@ -74,7 +65,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
@@ -82,7 +73,7 @@ jobs:
|
||||
run: corepack enable
|
||||
|
||||
- name: Restore dependencies cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ needs.setup.outputs.cache-key }}
|
||||
@@ -99,11 +90,8 @@ jobs:
|
||||
chromatic:
|
||||
runs-on: ubuntu-latest
|
||||
needs: setup
|
||||
# Disabled: to re-enable, remove 'false &&' from the condition below
|
||||
if: >-
|
||||
false
|
||||
&& (github.ref == 'refs/heads/dev' || github.base_ref == 'dev')
|
||||
&& needs.setup.outputs.components-changed == 'true'
|
||||
# Only run on dev branch pushes or PRs targeting dev
|
||||
if: github.ref == 'refs/heads/dev' || github.base_ref == 'dev'
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@@ -112,7 +100,7 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
@@ -120,7 +108,7 @@ jobs:
|
||||
run: corepack enable
|
||||
|
||||
- name: Restore dependencies cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ needs.setup.outputs.cache-key }}
|
||||
@@ -140,7 +128,7 @@ jobs:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
exitOnceUploaded: true
|
||||
|
||||
e2e_test:
|
||||
test:
|
||||
runs-on: big-boi
|
||||
needs: setup
|
||||
strategy:
|
||||
@@ -153,30 +141,22 @@ jobs:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
- name: Enable corepack
|
||||
run: corepack enable
|
||||
|
||||
- name: Copy default supabase .env
|
||||
- name: Copy default platform .env
|
||||
run: |
|
||||
cp ../.env.default ../.env
|
||||
|
||||
- name: Copy backend .env and set OpenAI API key
|
||||
run: |
|
||||
cp ../backend/.env.default ../backend/.env
|
||||
echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
|
||||
env:
|
||||
# Used by E2E test data script to generate embeddings for approved store agents
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /tmp/.buildx-cache
|
||||
key: ${{ runner.os }}-buildx-frontend-test-${{ hashFiles('autogpt_platform/docker-compose.yml', 'autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/pyproject.toml', 'autogpt_platform/backend/poetry.lock') }}
|
||||
@@ -231,7 +211,7 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Restore dependencies cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ needs.setup.outputs.cache-key }}
|
||||
@@ -247,62 +227,14 @@ jobs:
|
||||
|
||||
- name: Run Playwright tests
|
||||
run: pnpm test:no-build
|
||||
continue-on-error: false
|
||||
|
||||
- name: Upload Playwright report
|
||||
if: always()
|
||||
- name: Upload Playwright artifacts
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: playwright-report
|
||||
path: playwright-report
|
||||
if-no-files-found: ignore
|
||||
retention-days: 3
|
||||
|
||||
- name: Upload Playwright test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: playwright-test-results
|
||||
path: test-results
|
||||
if-no-files-found: ignore
|
||||
retention-days: 3
|
||||
|
||||
- name: Print Final Docker Compose logs
|
||||
if: always()
|
||||
run: docker compose -f ../docker-compose.yml logs
|
||||
|
||||
integration_test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: setup
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
- name: Enable corepack
|
||||
run: corepack enable
|
||||
|
||||
- name: Restore dependencies cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ needs.setup.outputs.cache-key }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
|
||||
${{ runner.os }}-pnpm-
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Generate API client
|
||||
run: pnpm generate:api
|
||||
|
||||
- name: Run Integration Tests
|
||||
run: pnpm test:unit
|
||||
|
||||
66
.github/workflows/platform-fullstack-ci.yml
vendored
66
.github/workflows/platform-fullstack-ci.yml
vendored
@@ -1,12 +1,13 @@
|
||||
name: AutoGPT Platform - Frontend CI
|
||||
name: AutoGPT Platform - Fullstack CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master, dev]
|
||||
branches: [master, dev, native-auth]
|
||||
paths:
|
||||
- ".github/workflows/platform-fullstack-ci.yml"
|
||||
- "autogpt_platform/**"
|
||||
pull_request:
|
||||
branches: [master, dev, native-auth]
|
||||
paths:
|
||||
- ".github/workflows/platform-fullstack-ci.yml"
|
||||
- "autogpt_platform/**"
|
||||
@@ -32,7 +33,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
@@ -44,7 +45,7 @@ jobs:
|
||||
run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ steps.cache-key.outputs.key }}
|
||||
@@ -56,39 +57,24 @@ jobs:
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
types:
|
||||
runs-on: big-boi
|
||||
runs-on: ubuntu-latest
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v6
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22.18.0"
|
||||
|
||||
- name: Enable corepack
|
||||
run: corepack enable
|
||||
|
||||
- name: Copy default supabase .env
|
||||
run: |
|
||||
cp ../.env.default ../.env
|
||||
|
||||
- name: Copy backend .env
|
||||
run: |
|
||||
cp ../backend/.env.default ../backend/.env
|
||||
|
||||
- name: Run docker compose
|
||||
run: |
|
||||
docker compose -f ../docker-compose.yml --profile local up -d deps_backend
|
||||
|
||||
- name: Restore dependencies cache
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.pnpm-store
|
||||
key: ${{ needs.setup.outputs.cache-key }}
|
||||
@@ -101,36 +87,12 @@ jobs:
|
||||
- name: Setup .env
|
||||
run: cp .env.default .env
|
||||
|
||||
- name: Wait for services to be ready
|
||||
run: |
|
||||
echo "Waiting for rest_server to be ready..."
|
||||
timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
|
||||
echo "Waiting for database to be ready..."
|
||||
timeout 60 sh -c 'until docker compose -f ../docker-compose.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done' || echo "Database ready check timeout, continuing..."
|
||||
|
||||
- name: Generate API queries
|
||||
run: pnpm generate:api:force
|
||||
|
||||
- name: Check for API schema changes
|
||||
run: |
|
||||
if ! git diff --exit-code src/app/api/openapi.json; then
|
||||
echo "❌ API schema changes detected in src/app/api/openapi.json"
|
||||
echo ""
|
||||
echo "The openapi.json file has been modified after running 'pnpm generate:api-all'."
|
||||
echo "This usually means changes have been made in the BE endpoints without updating the Frontend."
|
||||
echo "The API schema is now out of sync with the Front-end queries."
|
||||
echo ""
|
||||
echo "To fix this:"
|
||||
echo "1. Pull the backend 'docker compose pull && docker compose up -d --build --force-recreate'"
|
||||
echo "2. Run 'pnpm generate:api' locally"
|
||||
echo "3. Run 'pnpm types' locally"
|
||||
echo "4. Fix any TypeScript errors that may have been introduced"
|
||||
echo "5. Commit and push your changes"
|
||||
echo ""
|
||||
exit 1
|
||||
else
|
||||
echo "✅ No API schema changes detected"
|
||||
fi
|
||||
run: pnpm generate:api
|
||||
|
||||
- name: Run Typescript checks
|
||||
run: pnpm types
|
||||
|
||||
env:
|
||||
CI: true
|
||||
PLAIN_OUTPUT: True
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -178,6 +178,4 @@ autogpt_platform/backend/settings.py
|
||||
*.ign.*
|
||||
.test-contents
|
||||
.claude/settings.local.json
|
||||
CLAUDE.local.md
|
||||
/autogpt_platform/backend/logs
|
||||
.next
|
||||
48
AGENTS.md
48
AGENTS.md
@@ -16,34 +16,6 @@ See `docs/content/platform/getting-started.md` for setup instructions.
|
||||
- Format Python code with `poetry run format`.
|
||||
- Format frontend code using `pnpm format`.
|
||||
|
||||
## Frontend guidelines:
|
||||
|
||||
See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
|
||||
1. **Pages**: Create in `src/app/(platform)/feature-name/page.tsx`
|
||||
- Add `usePageName.ts` hook for logic
|
||||
- Put sub-components in local `components/` folder
|
||||
2. **Components**: Structure as `ComponentName/ComponentName.tsx` + `useComponentName.ts` + `helpers.ts`
|
||||
- Use design system components from `src/components/` (atoms, molecules, organisms)
|
||||
- Never use `src/components/__legacy__/*`
|
||||
3. **Data fetching**: Use generated API hooks from `@/app/api/__generated__/endpoints/`
|
||||
- Regenerate with `pnpm generate:api`
|
||||
- Pattern: `use{Method}{Version}{OperationName}`
|
||||
4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
|
||||
5. **Testing**: Add Storybook stories for new components, Playwright for E2E
|
||||
6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
|
||||
|
||||
- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
|
||||
- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
|
||||
- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
|
||||
- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
|
||||
- Use function declarations for components, arrow functions only for callbacks
|
||||
- No barrel files or `index.ts` re-exports
|
||||
- Avoid comments at all times unless the code is very complex
|
||||
- Do not use `useCallback` or `useMemo` unless asked to optimise a given function
|
||||
- Do not type hook returns, let Typescript infer as much as possible
|
||||
- Never type with `any`, if not types available use `unknown`
|
||||
|
||||
## Testing
|
||||
|
||||
- Backend: `poetry run test` (runs pytest with a docker based postgres + prisma).
|
||||
@@ -51,8 +23,22 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
|
||||
Always run the relevant linters and tests before committing.
|
||||
Use conventional commit messages for all commits (e.g. `feat(backend): add API`).
|
||||
Types: - feat - fix - refactor - ci - dx (developer experience)
|
||||
Scopes: - platform - platform/library - platform/marketplace - backend - backend/executor - frontend - frontend/library - frontend/marketplace - blocks
|
||||
Types:
|
||||
- feat
|
||||
- fix
|
||||
- refactor
|
||||
- ci
|
||||
- dx (developer experience)
|
||||
Scopes:
|
||||
- platform
|
||||
- platform/library
|
||||
- platform/marketplace
|
||||
- backend
|
||||
- backend/executor
|
||||
- frontend
|
||||
- frontend/library
|
||||
- frontend/marketplace
|
||||
- blocks
|
||||
|
||||
## Pull requests
|
||||
|
||||
@@ -63,5 +49,5 @@ Scopes: - platform - platform/library - platform/marketplace - backend - backend
|
||||
- Keep out-of-scope changes under 20% of the PR.
|
||||
- Ensure PR descriptions are complete.
|
||||
- For changes touching `data/*.py`, validate user ID checks or explain why not needed.
|
||||
- If adding protected frontend routes, update `frontend/lib/supabase/middleware.ts`.
|
||||
- If adding protected frontend routes, update `frontend/lib/auth/helpers.ts`.
|
||||
- Use the linear ticket branch structure if given codex/open-1668-resume-dropped-runs
|
||||
|
||||
@@ -54,7 +54,7 @@ Before proceeding with the installation, ensure your system meets the following
|
||||
### Updated Setup Instructions:
|
||||
We've moved to a fully maintained and regularly updated documentation site.
|
||||
|
||||
👉 [Follow the official self-hosting guide here](https://agpt.co/docs/platform/getting-started/getting-started)
|
||||
👉 [Follow the official self-hosting guide here](https://docs.agpt.co/platform/getting-started/)
|
||||
|
||||
|
||||
This tutorial assumes you have Docker, VSCode, git and npm installed.
|
||||
|
||||
@@ -5,12 +5,6 @@
|
||||
|
||||
POSTGRES_PASSWORD=your-super-secret-and-long-postgres-password
|
||||
JWT_SECRET=your-super-secret-jwt-token-with-at-least-32-characters-long
|
||||
ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyAgCiAgICAicm9sZSI6ICJhbm9uIiwKICAgICJpc3MiOiAic3VwYWJhc2UtZGVtbyIsCiAgICAiaWF0IjogMTY0MTc2OTIwMCwKICAgICJleHAiOiAxNzk5NTM1NjAwCn0.dc_X5iR_VP_qT0zsiyj_I_OZ2T9FtRU2BBNWN8Bu4GE
|
||||
SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyAgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q
|
||||
DASHBOARD_USERNAME=supabase
|
||||
DASHBOARD_PASSWORD=this_password_is_insecure_and_should_be_updated
|
||||
SECRET_KEY_BASE=UpNVntn3cDxHJpq99YMc1T1AQgQpc8kfYTuRgBiYa15BLrx8etQoXz3gZv1/u2oq
|
||||
VAULT_ENC_KEY=your-encryption-key-32-chars-min
|
||||
|
||||
|
||||
############
|
||||
@@ -24,100 +18,31 @@ POSTGRES_PORT=5432
|
||||
|
||||
|
||||
############
|
||||
# Supavisor -- Database pooler
|
||||
############
|
||||
POOLER_PROXY_PORT_TRANSACTION=6543
|
||||
POOLER_DEFAULT_POOL_SIZE=20
|
||||
POOLER_MAX_CLIENT_CONN=100
|
||||
POOLER_TENANT_ID=your-tenant-id
|
||||
|
||||
|
||||
############
|
||||
# API Proxy - Configuration for the Kong Reverse proxy.
|
||||
# Auth - Native authentication configuration
|
||||
############
|
||||
|
||||
KONG_HTTP_PORT=8000
|
||||
KONG_HTTPS_PORT=8443
|
||||
|
||||
|
||||
############
|
||||
# API - Configuration for PostgREST.
|
||||
############
|
||||
|
||||
PGRST_DB_SCHEMAS=public,storage,graphql_public
|
||||
|
||||
|
||||
############
|
||||
# Auth - Configuration for the GoTrue authentication server.
|
||||
############
|
||||
|
||||
## General
|
||||
SITE_URL=http://localhost:3000
|
||||
ADDITIONAL_REDIRECT_URLS=
|
||||
JWT_EXPIRY=3600
|
||||
DISABLE_SIGNUP=false
|
||||
API_EXTERNAL_URL=http://localhost:8000
|
||||
|
||||
## Mailer Config
|
||||
MAILER_URLPATHS_CONFIRMATION="/auth/v1/verify"
|
||||
MAILER_URLPATHS_INVITE="/auth/v1/verify"
|
||||
MAILER_URLPATHS_RECOVERY="/auth/v1/verify"
|
||||
MAILER_URLPATHS_EMAIL_CHANGE="/auth/v1/verify"
|
||||
# JWT token configuration
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=15
|
||||
REFRESH_TOKEN_EXPIRE_DAYS=7
|
||||
JWT_ISSUER=autogpt-platform
|
||||
|
||||
## Email auth
|
||||
ENABLE_EMAIL_SIGNUP=true
|
||||
ENABLE_EMAIL_AUTOCONFIRM=false
|
||||
SMTP_ADMIN_EMAIL=admin@example.com
|
||||
SMTP_HOST=supabase-mail
|
||||
SMTP_PORT=2500
|
||||
SMTP_USER=fake_mail_user
|
||||
SMTP_PASS=fake_mail_password
|
||||
SMTP_SENDER_NAME=fake_sender
|
||||
ENABLE_ANONYMOUS_USERS=false
|
||||
|
||||
## Phone auth
|
||||
ENABLE_PHONE_SIGNUP=true
|
||||
ENABLE_PHONE_AUTOCONFIRM=true
|
||||
# Google OAuth (optional)
|
||||
GOOGLE_CLIENT_ID=
|
||||
GOOGLE_CLIENT_SECRET=
|
||||
|
||||
|
||||
############
|
||||
# Studio - Configuration for the Dashboard
|
||||
# Email configuration (optional)
|
||||
############
|
||||
|
||||
STUDIO_DEFAULT_ORGANIZATION=Default Organization
|
||||
STUDIO_DEFAULT_PROJECT=Default Project
|
||||
SMTP_HOST=
|
||||
SMTP_PORT=587
|
||||
SMTP_USER=
|
||||
SMTP_PASS=
|
||||
SMTP_FROM_EMAIL=noreply@example.com
|
||||
|
||||
STUDIO_PORT=3000
|
||||
# replace if you intend to use Studio outside of localhost
|
||||
SUPABASE_PUBLIC_URL=http://localhost:8000
|
||||
|
||||
# Enable webp support
|
||||
IMGPROXY_ENABLE_WEBP_DETECTION=true
|
||||
|
||||
# Add your OpenAI API key to enable SQL Editor Assistant
|
||||
OPENAI_API_KEY=
|
||||
|
||||
|
||||
############
|
||||
# Functions - Configuration for Functions
|
||||
############
|
||||
# NOTE: VERIFY_JWT applies to all functions. Per-function VERIFY_JWT is not supported yet.
|
||||
FUNCTIONS_VERIFY_JWT=false
|
||||
|
||||
|
||||
############
|
||||
# Logs - Configuration for Logflare
|
||||
# Please refer to https://supabase.com/docs/reference/self-hosting-analytics/introduction
|
||||
############
|
||||
|
||||
LOGFLARE_LOGGER_BACKEND_API_KEY=your-super-secret-and-long-logflare-key
|
||||
|
||||
# Change vector.toml sinks to reflect this change
|
||||
LOGFLARE_API_KEY=your-super-secret-and-long-logflare-key
|
||||
|
||||
# Docker socket location - this value will differ depending on your OS
|
||||
DOCKER_SOCKET_LOCATION=/var/run/docker.sock
|
||||
|
||||
# Google Cloud Project details
|
||||
GOOGLE_PROJECT_ID=GOOGLE_PROJECT_ID
|
||||
GOOGLE_PROJECT_NUMBER=GOOGLE_PROJECT_NUMBER
|
||||
|
||||
@@ -6,30 +6,152 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
||||
|
||||
AutoGPT Platform is a monorepo containing:
|
||||
|
||||
- **Backend** (`backend`): Python FastAPI server with async support
|
||||
- **Frontend** (`frontend`): Next.js React application
|
||||
- **Shared Libraries** (`autogpt_libs`): Common Python utilities
|
||||
- **Backend** (`/backend`): Python FastAPI server with async support
|
||||
- **Frontend** (`/frontend`): Next.js React application
|
||||
- **Shared Libraries** (`/autogpt_libs`): Common Python utilities
|
||||
|
||||
## Component Documentation
|
||||
## Essential Commands
|
||||
|
||||
- **Backend**: See @backend/CLAUDE.md for backend-specific commands, architecture, and development tasks
|
||||
- **Frontend**: See @frontend/CLAUDE.md for frontend-specific commands, architecture, and development patterns
|
||||
### Backend Development
|
||||
|
||||
## Key Concepts
|
||||
```bash
|
||||
# Install dependencies
|
||||
cd backend && poetry install
|
||||
|
||||
# Run database migrations
|
||||
poetry run prisma migrate dev
|
||||
|
||||
# Start all services (database, redis, rabbitmq, clamav)
|
||||
docker compose up -d
|
||||
|
||||
# Run the backend server
|
||||
poetry run serve
|
||||
|
||||
# Run tests
|
||||
poetry run test
|
||||
|
||||
# Run specific test
|
||||
poetry run pytest path/to/test_file.py::test_function_name
|
||||
|
||||
# Run block tests (tests that validate all blocks work correctly)
|
||||
poetry run pytest backend/blocks/test/test_block.py -xvs
|
||||
|
||||
# Run tests for a specific block (e.g., GetCurrentTimeBlock)
|
||||
poetry run pytest 'backend/blocks/test/test_block.py::test_available_blocks[GetCurrentTimeBlock]' -xvs
|
||||
|
||||
# Lint and format
|
||||
# prefer format if you want to just "fix" it and only get the errors that can't be autofixed
|
||||
poetry run format # Black + isort
|
||||
poetry run lint # ruff
|
||||
```
|
||||
|
||||
More details can be found in TESTING.md
|
||||
|
||||
#### Creating/Updating Snapshots
|
||||
|
||||
When you first write a test or when the expected output changes:
|
||||
|
||||
```bash
|
||||
poetry run pytest path/to/test.py --snapshot-update
|
||||
```
|
||||
|
||||
⚠️ **Important**: Always review snapshot changes before committing! Use `git diff` to verify the changes are expected.
|
||||
|
||||
### Frontend Development
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
cd frontend && pnpm i
|
||||
|
||||
# Generate API client from OpenAPI spec
|
||||
pnpm generate:api
|
||||
|
||||
# Start development server
|
||||
pnpm dev
|
||||
|
||||
# Run E2E tests
|
||||
pnpm test
|
||||
|
||||
# Run Storybook for component development
|
||||
pnpm storybook
|
||||
|
||||
# Build production
|
||||
pnpm build
|
||||
|
||||
# Format and lint
|
||||
pnpm format
|
||||
|
||||
# Type checking
|
||||
pnpm types
|
||||
```
|
||||
|
||||
**📖 Complete Guide**: See `/frontend/CONTRIBUTING.md` and `/frontend/.cursorrules` for comprehensive frontend patterns.
|
||||
|
||||
**Key Frontend Conventions:**
|
||||
|
||||
- Separate render logic from data/behavior in components
|
||||
- Use generated API hooks from `@/app/api/__generated__/endpoints/`
|
||||
- Use function declarations (not arrow functions) for components/handlers
|
||||
- Use design system components from `src/components/` (atoms, molecules, organisms)
|
||||
- Only use Phosphor Icons
|
||||
- Never use `src/components/__legacy__/*` or deprecated `BackendAPI`
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Backend Architecture
|
||||
|
||||
- **API Layer**: FastAPI with REST and WebSocket endpoints
|
||||
- **Database**: PostgreSQL with Prisma ORM, includes pgvector for embeddings
|
||||
- **Queue System**: RabbitMQ for async task processing
|
||||
- **Execution Engine**: Separate executor service processes agent workflows
|
||||
- **Authentication**: JWT-based with Supabase integration
|
||||
- **Security**: Cache protection middleware prevents sensitive data caching in browsers/proxies
|
||||
|
||||
### Frontend Architecture
|
||||
|
||||
- **Framework**: Next.js 15 App Router (client-first approach)
|
||||
- **Data Fetching**: Type-safe generated API hooks via Orval + React Query
|
||||
- **State Management**: React Query for server state, co-located UI state in components/hooks
|
||||
- **Component Structure**: Separate render logic (`.tsx`) from business logic (`use*.ts` hooks)
|
||||
- **Workflow Builder**: Visual graph editor using @xyflow/react
|
||||
- **UI Components**: shadcn/ui (Radix UI primitives) with Tailwind CSS styling
|
||||
- **Icons**: Phosphor Icons only
|
||||
- **Feature Flags**: LaunchDarkly integration
|
||||
- **Error Handling**: ErrorCard for render errors, toast for mutations, Sentry for exceptions
|
||||
- **Testing**: Playwright for E2E, Storybook for component development
|
||||
|
||||
### Key Concepts
|
||||
|
||||
1. **Agent Graphs**: Workflow definitions stored as JSON, executed by the backend
|
||||
2. **Blocks**: Reusable components in `backend/backend/blocks/` that perform specific tasks
|
||||
2. **Blocks**: Reusable components in `/backend/blocks/` that perform specific tasks
|
||||
3. **Integrations**: OAuth and API connections stored per user
|
||||
4. **Store**: Marketplace for sharing agent templates
|
||||
5. **Virus Scanning**: ClamAV integration for file upload security
|
||||
|
||||
### Testing Approach
|
||||
|
||||
- Backend uses pytest with snapshot testing for API responses
|
||||
- Test files are colocated with source files (`*_test.py`)
|
||||
- Frontend uses Playwright for E2E tests
|
||||
- Component testing via Storybook
|
||||
|
||||
### Database Schema
|
||||
|
||||
Key models (defined in `/backend/schema.prisma`):
|
||||
|
||||
- `User`: Authentication and profile data
|
||||
- `AgentGraph`: Workflow definitions with version control
|
||||
- `AgentGraphExecution`: Execution history and results
|
||||
- `AgentNode`: Individual nodes in a workflow
|
||||
- `StoreListing`: Marketplace listings for sharing agents
|
||||
|
||||
### Environment Configuration
|
||||
|
||||
#### Configuration Files
|
||||
|
||||
- **Backend**: `backend/.env.default` (defaults) → `backend/.env` (user overrides)
|
||||
- **Frontend**: `frontend/.env.default` (defaults) → `frontend/.env` (user overrides)
|
||||
- **Platform**: `.env.default` (Supabase/shared defaults) → `.env` (user overrides)
|
||||
- **Backend**: `/backend/.env.default` (defaults) → `/backend/.env` (user overrides)
|
||||
- **Frontend**: `/frontend/.env.default` (defaults) → `/frontend/.env` (user overrides)
|
||||
- **Platform**: `/.env.default` (Supabase/shared defaults) → `/.env` (user overrides)
|
||||
|
||||
#### Docker Environment Loading Order
|
||||
|
||||
@@ -45,12 +167,75 @@ AutoGPT Platform is a monorepo containing:
|
||||
- Backend/Frontend services use YAML anchors for consistent configuration
|
||||
- Supabase services (`db/docker/docker-compose.yml`) follow the same pattern
|
||||
|
||||
### Common Development Tasks
|
||||
|
||||
**Adding a new block:**
|
||||
|
||||
Follow the comprehensive [Block SDK Guide](../../../docs/content/platform/block-sdk-guide.md) which covers:
|
||||
|
||||
- Provider configuration with `ProviderBuilder`
|
||||
- Block schema definition
|
||||
- Authentication (API keys, OAuth, webhooks)
|
||||
- Testing and validation
|
||||
- File organization
|
||||
|
||||
Quick steps:
|
||||
|
||||
1. Create new file in `/backend/backend/blocks/`
|
||||
2. Configure provider using `ProviderBuilder` in `_config.py`
|
||||
3. Inherit from `Block` base class
|
||||
4. Define input/output schemas using `BlockSchema`
|
||||
5. Implement async `run` method
|
||||
6. Generate unique block ID using `uuid.uuid4()`
|
||||
7. Test with `poetry run pytest backend/blocks/test/test_block.py`
|
||||
|
||||
Note: when making many new blocks analyze the interfaces for each of these blocks and picture if they would go well together in a graph based editor or would they struggle to connect productively?
|
||||
ex: do the inputs and outputs tie well together?
|
||||
|
||||
If you get any pushback or hit complex block conditions check the new_blocks guide in the docs.
|
||||
|
||||
**Modifying the API:**
|
||||
|
||||
1. Update route in `/backend/backend/server/routers/`
|
||||
2. Add/update Pydantic models in same directory
|
||||
3. Write tests alongside the route file
|
||||
4. Run `poetry run test` to verify
|
||||
|
||||
**Frontend feature development:**
|
||||
|
||||
See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
|
||||
1. **Pages**: Create in `src/app/(platform)/feature-name/page.tsx`
|
||||
- Add `usePageName.ts` hook for logic
|
||||
- Put sub-components in local `components/` folder
|
||||
2. **Components**: Structure as `ComponentName/ComponentName.tsx` + `useComponentName.ts` + `helpers.ts`
|
||||
- Use design system components from `src/components/` (atoms, molecules, organisms)
|
||||
- Never use `src/components/__legacy__/*`
|
||||
3. **Data fetching**: Use generated API hooks from `@/app/api/__generated__/endpoints/`
|
||||
- Regenerate with `pnpm generate:api`
|
||||
- Pattern: `use{Method}{Version}{OperationName}`
|
||||
4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
|
||||
5. **Testing**: Add Storybook stories for new components, Playwright for E2E
|
||||
6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
|
||||
|
||||
### Security Implementation
|
||||
|
||||
**Cache Protection Middleware:**
|
||||
|
||||
- Located in `/backend/backend/server/middleware/security.py`
|
||||
- Default behavior: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private`
|
||||
- Uses an allow list approach - only explicitly permitted paths can be cached
|
||||
- Cacheable paths include: static assets (`/static/*`, `/_next/static/*`), health checks, public store pages, documentation
|
||||
- Prevents sensitive data (auth tokens, API keys, user data) from being cached by browsers/proxies
|
||||
- To allow caching for a new endpoint, add it to `CACHEABLE_PATHS` in the middleware
|
||||
- Applied to both main API server and external API applications
|
||||
|
||||
### Creating Pull Requests
|
||||
|
||||
- Create the PR against the `dev` branch of the repository.
|
||||
- Ensure the branch name is descriptive (e.g., `feature/add-new-block`)
|
||||
- Use conventional commit messages (see below)
|
||||
- Fill out the .github/PULL_REQUEST_TEMPLATE.md template as the PR description
|
||||
- Create the PR aginst the `dev` branch of the repository.
|
||||
- Ensure the branch name is descriptive (e.g., `feature/add-new-block`)/
|
||||
- Use conventional commit messages (see below)/
|
||||
- Fill out the .github/PULL_REQUEST_TEMPLATE.md template as the PR description/
|
||||
- Run the github pre-commit hooks to ensure code quality.
|
||||
|
||||
### Reviewing/Revising Pull Requests
|
||||
|
||||
@@ -1,19 +1,17 @@
|
||||
.PHONY: start-core stop-core logs-core format lint migrate run-backend run-frontend load-store-agents
|
||||
|
||||
# Run just Supabase + Redis + RabbitMQ
|
||||
# Run just PostgreSQL + Redis + RabbitMQ + ClamAV
|
||||
start-core:
|
||||
docker compose up -d deps
|
||||
|
||||
# Stop core services
|
||||
stop-core:
|
||||
docker compose stop
|
||||
docker compose stop deps
|
||||
|
||||
reset-db:
|
||||
docker compose stop db
|
||||
rm -rf db/docker/volumes/db/data
|
||||
cd backend && poetry run prisma migrate deploy
|
||||
cd backend && poetry run prisma generate
|
||||
cd backend && poetry run gen-prisma-stub
|
||||
|
||||
# View logs for core services
|
||||
logs-core:
|
||||
@@ -35,7 +33,6 @@ init-env:
|
||||
migrate:
|
||||
cd backend && poetry run prisma migrate deploy
|
||||
cd backend && poetry run prisma generate
|
||||
cd backend && poetry run gen-prisma-stub
|
||||
|
||||
run-backend:
|
||||
cd backend && poetry run app
|
||||
@@ -52,7 +49,7 @@ load-store-agents:
|
||||
help:
|
||||
@echo "Usage: make <target>"
|
||||
@echo "Targets:"
|
||||
@echo " start-core - Start just the core services (Supabase, Redis, RabbitMQ) in background"
|
||||
@echo " start-core - Start just the core services (PostgreSQL, Redis, RabbitMQ, ClamAV) in background"
|
||||
@echo " stop-core - Stop the core services"
|
||||
@echo " reset-db - Reset the database by deleting the volume"
|
||||
@echo " logs-core - Tail the logs for core services"
|
||||
@@ -61,4 +58,4 @@ help:
|
||||
@echo " run-backend - Run the backend FastAPI server"
|
||||
@echo " run-frontend - Run the frontend Next.js development server"
|
||||
@echo " test-data - Run the test data creator"
|
||||
@echo " load-store-agents - Load store agents from agents/ folder into test database"
|
||||
@echo " load-store-agents - Load store agents from agents/ folder into test database"
|
||||
@@ -16,17 +16,37 @@ ALGO_RECOMMENDATION = (
|
||||
"We highly recommend using an asymmetric algorithm such as ES256, "
|
||||
"because when leaked, a shared secret would allow anyone to "
|
||||
"forge valid tokens and impersonate users. "
|
||||
"More info: https://supabase.com/docs/guides/auth/signing-keys#choosing-the-right-signing-algorithm" # noqa
|
||||
"More info: https://pyjwt.readthedocs.io/en/stable/algorithms.html"
|
||||
)
|
||||
|
||||
|
||||
class Settings:
|
||||
def __init__(self):
|
||||
# JWT verification key (public key for asymmetric, shared secret for symmetric)
|
||||
self.JWT_VERIFY_KEY: str = os.getenv(
|
||||
"JWT_VERIFY_KEY", os.getenv("SUPABASE_JWT_SECRET", "")
|
||||
).strip()
|
||||
|
||||
# JWT signing key (private key for asymmetric, shared secret for symmetric)
|
||||
# Falls back to JWT_VERIFY_KEY for symmetric algorithms like HS256
|
||||
self.JWT_SIGN_KEY: str = os.getenv("JWT_SIGN_KEY", self.JWT_VERIFY_KEY).strip()
|
||||
|
||||
self.JWT_ALGORITHM: str = os.getenv("JWT_SIGN_ALGORITHM", "HS256").strip()
|
||||
|
||||
# Token expiration settings
|
||||
self.ACCESS_TOKEN_EXPIRE_MINUTES: int = int(
|
||||
os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "15")
|
||||
)
|
||||
self.REFRESH_TOKEN_EXPIRE_DAYS: int = int(
|
||||
os.getenv("REFRESH_TOKEN_EXPIRE_DAYS", "7")
|
||||
)
|
||||
|
||||
# JWT issuer claim
|
||||
self.JWT_ISSUER: str = os.getenv("JWT_ISSUER", "autogpt-platform").strip()
|
||||
|
||||
# JWT audience claim
|
||||
self.JWT_AUDIENCE: str = os.getenv("JWT_AUDIENCE", "authenticated").strip()
|
||||
|
||||
self.validate()
|
||||
|
||||
def validate(self):
|
||||
|
||||
@@ -1,25 +1,29 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.openapi.utils import get_openapi
|
||||
|
||||
from .jwt_utils import bearer_jwt_auth
|
||||
|
||||
|
||||
def add_auth_responses_to_openapi(app: FastAPI) -> None:
|
||||
"""
|
||||
Patch a FastAPI instance's `openapi()` method to add 401 responses
|
||||
Set up custom OpenAPI schema generation that adds 401 responses
|
||||
to all authenticated endpoints.
|
||||
|
||||
This is needed when using HTTPBearer with auto_error=False to get proper
|
||||
401 responses instead of 403, but FastAPI only automatically adds security
|
||||
responses when auto_error=True.
|
||||
"""
|
||||
# Wrap current method to allow stacking OpenAPI schema modifiers like this
|
||||
wrapped_openapi = app.openapi
|
||||
|
||||
def custom_openapi():
|
||||
if app.openapi_schema:
|
||||
return app.openapi_schema
|
||||
|
||||
openapi_schema = wrapped_openapi()
|
||||
openapi_schema = get_openapi(
|
||||
title=app.title,
|
||||
version=app.version,
|
||||
description=app.description,
|
||||
routes=app.routes,
|
||||
)
|
||||
|
||||
# Add 401 response to all endpoints that have security requirements
|
||||
for path, methods in openapi_schema["paths"].items():
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import secrets
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import jwt
|
||||
@@ -16,6 +20,57 @@ bearer_jwt_auth = HTTPBearer(
|
||||
)
|
||||
|
||||
|
||||
def create_access_token(
|
||||
user_id: str,
|
||||
email: str,
|
||||
role: str = "authenticated",
|
||||
email_verified: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Generate a new JWT access token.
|
||||
|
||||
:param user_id: The user's unique identifier
|
||||
:param email: The user's email address
|
||||
:param role: The user's role (default: "authenticated")
|
||||
:param email_verified: Whether the user's email is verified
|
||||
:return: Encoded JWT token
|
||||
"""
|
||||
settings = get_settings()
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
payload = {
|
||||
"sub": user_id,
|
||||
"email": email,
|
||||
"role": role,
|
||||
"email_verified": email_verified,
|
||||
"aud": settings.JWT_AUDIENCE,
|
||||
"iss": settings.JWT_ISSUER,
|
||||
"iat": now,
|
||||
"exp": now + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES),
|
||||
"jti": str(uuid.uuid4()), # Unique token ID
|
||||
}
|
||||
|
||||
return jwt.encode(payload, settings.JWT_SIGN_KEY, algorithm=settings.JWT_ALGORITHM)
|
||||
|
||||
|
||||
def create_refresh_token() -> tuple[str, str]:
|
||||
"""
|
||||
Generate a new refresh token.
|
||||
|
||||
Returns a tuple of (raw_token, hashed_token).
|
||||
The raw token should be sent to the client.
|
||||
The hashed token should be stored in the database.
|
||||
"""
|
||||
raw_token = secrets.token_urlsafe(64)
|
||||
hashed_token = hashlib.sha256(raw_token.encode()).hexdigest()
|
||||
return raw_token, hashed_token
|
||||
|
||||
|
||||
def hash_token(token: str) -> str:
|
||||
"""Hash a token using SHA-256."""
|
||||
return hashlib.sha256(token.encode()).hexdigest()
|
||||
|
||||
|
||||
async def get_jwt_payload(
|
||||
credentials: HTTPAuthorizationCredentials | None = Security(bearer_jwt_auth),
|
||||
) -> dict[str, Any]:
|
||||
@@ -52,11 +107,19 @@ def parse_jwt_token(token: str) -> dict[str, Any]:
|
||||
"""
|
||||
settings = get_settings()
|
||||
try:
|
||||
# Build decode options
|
||||
options = {
|
||||
"verify_aud": True,
|
||||
"verify_iss": bool(settings.JWT_ISSUER),
|
||||
}
|
||||
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
settings.JWT_VERIFY_KEY,
|
||||
algorithms=[settings.JWT_ALGORITHM],
|
||||
audience="authenticated",
|
||||
audience=settings.JWT_AUDIENCE,
|
||||
issuer=settings.JWT_ISSUER if settings.JWT_ISSUER else None,
|
||||
options=options,
|
||||
)
|
||||
return payload
|
||||
except jwt.ExpiredSignatureError:
|
||||
|
||||
@@ -11,6 +11,7 @@ class User:
|
||||
email: str
|
||||
phone_number: str
|
||||
role: str
|
||||
email_verified: bool = False
|
||||
|
||||
@classmethod
|
||||
def from_payload(cls, payload):
|
||||
@@ -18,5 +19,6 @@ class User:
|
||||
user_id=payload["sub"],
|
||||
email=payload.get("email", ""),
|
||||
phone_number=payload.get("phone", ""),
|
||||
role=payload["role"],
|
||||
role=payload.get("role", "authenticated"),
|
||||
email_verified=payload.get("email_verified", False),
|
||||
)
|
||||
|
||||
2168
autogpt_platform/autogpt_libs/poetry.lock
generated
2168
autogpt_platform/autogpt_libs/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -9,25 +9,26 @@ packages = [{ include = "autogpt_libs" }]
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10,<4.0"
|
||||
colorama = "^0.4.6"
|
||||
cryptography = "^46.0"
|
||||
cryptography = "^45.0"
|
||||
expiringdict = "^1.2.2"
|
||||
fastapi = "^0.128.0"
|
||||
google-cloud-logging = "^3.13.0"
|
||||
launchdarkly-server-sdk = "^9.14.1"
|
||||
pydantic = "^2.12.5"
|
||||
pydantic-settings = "^2.12.0"
|
||||
pyjwt = { version = "^2.11.0", extras = ["crypto"] }
|
||||
fastapi = "^0.116.1"
|
||||
google-cloud-logging = "^3.12.1"
|
||||
launchdarkly-server-sdk = "^9.12.0"
|
||||
pydantic = "^2.11.7"
|
||||
pydantic-settings = "^2.10.1"
|
||||
pyjwt = { version = "^2.10.1", extras = ["crypto"] }
|
||||
redis = "^6.2.0"
|
||||
supabase = "^2.27.2"
|
||||
uvicorn = "^0.40.0"
|
||||
bcrypt = "^4.1.0"
|
||||
authlib = "^1.3.0"
|
||||
uvicorn = "^0.35.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pyright = "^1.1.408"
|
||||
pyright = "^1.1.404"
|
||||
pytest = "^8.4.1"
|
||||
pytest-asyncio = "^1.3.0"
|
||||
pytest-mock = "^3.15.1"
|
||||
pytest-cov = "^7.0.0"
|
||||
ruff = "^0.15.0"
|
||||
pytest-asyncio = "^1.1.0"
|
||||
pytest-mock = "^3.14.1"
|
||||
pytest-cov = "^6.2.1"
|
||||
ruff = "^0.12.11"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
||||
@@ -27,10 +27,15 @@ REDIS_PORT=6379
|
||||
RABBITMQ_DEFAULT_USER=rabbitmq_user_default
|
||||
RABBITMQ_DEFAULT_PASS=k0VMxyIJF9S35f3x2uaw5IWAl6Y536O7
|
||||
|
||||
# Supabase Authentication
|
||||
SUPABASE_URL=http://localhost:8000
|
||||
SUPABASE_SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyAgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q
|
||||
# JWT Authentication
|
||||
# Generate a secure random key: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||
JWT_SIGN_KEY=your-super-secret-jwt-token-with-at-least-32-characters-long
|
||||
JWT_VERIFY_KEY=your-super-secret-jwt-token-with-at-least-32-characters-long
|
||||
JWT_SIGN_ALGORITHM=HS256
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=15
|
||||
REFRESH_TOKEN_EXPIRE_DAYS=7
|
||||
JWT_ISSUER=autogpt-platform
|
||||
JWT_AUDIENCE=authenticated
|
||||
|
||||
## ===== REQUIRED SECURITY KEYS ===== ##
|
||||
# Generate using: from cryptography.fernet import Fernet;Fernet.generate_key().decode()
|
||||
@@ -58,13 +63,6 @@ V0_API_KEY=
|
||||
OPEN_ROUTER_API_KEY=
|
||||
NVIDIA_API_KEY=
|
||||
|
||||
# Langfuse Prompt Management
|
||||
# Used for managing the CoPilot system prompt externally
|
||||
# Get credentials from https://cloud.langfuse.com or your self-hosted instance
|
||||
LANGFUSE_PUBLIC_KEY=
|
||||
LANGFUSE_SECRET_KEY=
|
||||
LANGFUSE_HOST=https://cloud.langfuse.com
|
||||
|
||||
# OAuth Credentials
|
||||
# For the OAuth callback URL, use <your_frontend_url>/auth/integrations/oauth_callback,
|
||||
# e.g. http://localhost:3000/auth/integrations/oauth_callback
|
||||
@@ -152,7 +150,6 @@ REPLICATE_API_KEY=
|
||||
REVID_API_KEY=
|
||||
SCREENSHOTONE_API_KEY=
|
||||
UNREAL_SPEECH_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# Data & Search Services
|
||||
E2B_API_KEY=
|
||||
@@ -179,10 +176,5 @@ AYRSHARE_JWT_KEY=
|
||||
SMARTLEAD_API_KEY=
|
||||
ZEROBOUNCE_API_KEY=
|
||||
|
||||
# PostHog Analytics
|
||||
# Get API key from https://posthog.com - Project Settings > Project API Key
|
||||
POSTHOG_API_KEY=
|
||||
POSTHOG_HOST=https://eu.i.posthog.com
|
||||
|
||||
# Other Services
|
||||
AUTOMOD_API_KEY=
|
||||
|
||||
5
autogpt_platform/backend/.gitignore
vendored
5
autogpt_platform/backend/.gitignore
vendored
@@ -18,7 +18,6 @@ load-tests/results/
|
||||
load-tests/*.json
|
||||
load-tests/*.log
|
||||
load-tests/node_modules/*
|
||||
migrations/*/rollback*.sql
|
||||
|
||||
# Workspace files
|
||||
workspaces/
|
||||
# Migration backups (contain user data)
|
||||
migration_backups/
|
||||
|
||||
@@ -1,170 +0,0 @@
|
||||
# CLAUDE.md - Backend
|
||||
|
||||
This file provides guidance to Claude Code when working with the backend.
|
||||
|
||||
## Essential Commands
|
||||
|
||||
To run something with Python package dependencies you MUST use `poetry run ...`.
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
poetry install
|
||||
|
||||
# Run database migrations
|
||||
poetry run prisma migrate dev
|
||||
|
||||
# Start all services (database, redis, rabbitmq, clamav)
|
||||
docker compose up -d
|
||||
|
||||
# Run the backend as a whole
|
||||
poetry run app
|
||||
|
||||
# Run tests
|
||||
poetry run test
|
||||
|
||||
# Run specific test
|
||||
poetry run pytest path/to/test_file.py::test_function_name
|
||||
|
||||
# Run block tests (tests that validate all blocks work correctly)
|
||||
poetry run pytest backend/blocks/test/test_block.py -xvs
|
||||
|
||||
# Run tests for a specific block (e.g., GetCurrentTimeBlock)
|
||||
poetry run pytest 'backend/blocks/test/test_block.py::test_available_blocks[GetCurrentTimeBlock]' -xvs
|
||||
|
||||
# Lint and format
|
||||
# prefer format if you want to just "fix" it and only get the errors that can't be autofixed
|
||||
poetry run format # Black + isort
|
||||
poetry run lint # ruff
|
||||
```
|
||||
|
||||
More details can be found in @TESTING.md
|
||||
|
||||
### Creating/Updating Snapshots
|
||||
|
||||
When you first write a test or when the expected output changes:
|
||||
|
||||
```bash
|
||||
poetry run pytest path/to/test.py --snapshot-update
|
||||
```
|
||||
|
||||
⚠️ **Important**: Always review snapshot changes before committing! Use `git diff` to verify the changes are expected.
|
||||
|
||||
## Architecture
|
||||
|
||||
- **API Layer**: FastAPI with REST and WebSocket endpoints
|
||||
- **Database**: PostgreSQL with Prisma ORM, includes pgvector for embeddings
|
||||
- **Queue System**: RabbitMQ for async task processing
|
||||
- **Execution Engine**: Separate executor service processes agent workflows
|
||||
- **Authentication**: JWT-based with Supabase integration
|
||||
- **Security**: Cache protection middleware prevents sensitive data caching in browsers/proxies
|
||||
|
||||
## Testing Approach
|
||||
|
||||
- Uses pytest with snapshot testing for API responses
|
||||
- Test files are colocated with source files (`*_test.py`)
|
||||
|
||||
## Database Schema
|
||||
|
||||
Key models (defined in `schema.prisma`):
|
||||
|
||||
- `User`: Authentication and profile data
|
||||
- `AgentGraph`: Workflow definitions with version control
|
||||
- `AgentGraphExecution`: Execution history and results
|
||||
- `AgentNode`: Individual nodes in a workflow
|
||||
- `StoreListing`: Marketplace listings for sharing agents
|
||||
|
||||
## Environment Configuration
|
||||
|
||||
- **Backend**: `.env.default` (defaults) → `.env` (user overrides)
|
||||
|
||||
## Common Development Tasks
|
||||
|
||||
### Adding a new block
|
||||
|
||||
Follow the comprehensive [Block SDK Guide](@../../docs/content/platform/block-sdk-guide.md) which covers:
|
||||
|
||||
- Provider configuration with `ProviderBuilder`
|
||||
- Block schema definition
|
||||
- Authentication (API keys, OAuth, webhooks)
|
||||
- Testing and validation
|
||||
- File organization
|
||||
|
||||
Quick steps:
|
||||
|
||||
1. Create new file in `backend/blocks/`
|
||||
2. Configure provider using `ProviderBuilder` in `_config.py`
|
||||
3. Inherit from `Block` base class
|
||||
4. Define input/output schemas using `BlockSchema`
|
||||
5. Implement async `run` method
|
||||
6. Generate unique block ID using `uuid.uuid4()`
|
||||
7. Test with `poetry run pytest backend/blocks/test/test_block.py`
|
||||
|
||||
Note: when making many new blocks analyze the interfaces for each of these blocks and picture if they would go well together in a graph-based editor or would they struggle to connect productively?
|
||||
ex: do the inputs and outputs tie well together?
|
||||
|
||||
If you get any pushback or hit complex block conditions check the new_blocks guide in the docs.
|
||||
|
||||
#### Handling files in blocks with `store_media_file()`
|
||||
|
||||
When blocks need to work with files (images, videos, documents), use `store_media_file()` from `backend.util.file`. The `return_format` parameter determines what you get back:
|
||||
|
||||
| Format | Use When | Returns |
|
||||
|--------|----------|---------|
|
||||
| `"for_local_processing"` | Processing with local tools (ffmpeg, MoviePy, PIL) | Local file path (e.g., `"image.png"`) |
|
||||
| `"for_external_api"` | Sending content to external APIs (Replicate, OpenAI) | Data URI (e.g., `"data:image/png;base64,..."`) |
|
||||
| `"for_block_output"` | Returning output from your block | Smart: `workspace://` in CoPilot, data URI in graphs |
|
||||
|
||||
**Examples:**
|
||||
|
||||
```python
|
||||
# INPUT: Need to process file locally with ffmpeg
|
||||
local_path = await store_media_file(
|
||||
file=input_data.video,
|
||||
execution_context=execution_context,
|
||||
return_format="for_local_processing",
|
||||
)
|
||||
# local_path = "video.mp4" - use with Path/ffmpeg/etc
|
||||
|
||||
# INPUT: Need to send to external API like Replicate
|
||||
image_b64 = await store_media_file(
|
||||
file=input_data.image,
|
||||
execution_context=execution_context,
|
||||
return_format="for_external_api",
|
||||
)
|
||||
# image_b64 = "data:image/png;base64,iVBORw0..." - send to API
|
||||
|
||||
# OUTPUT: Returning result from block
|
||||
result_url = await store_media_file(
|
||||
file=generated_image_url,
|
||||
execution_context=execution_context,
|
||||
return_format="for_block_output",
|
||||
)
|
||||
yield "image_url", result_url
|
||||
# In CoPilot: result_url = "workspace://abc123"
|
||||
# In graphs: result_url = "data:image/png;base64,..."
|
||||
```
|
||||
|
||||
**Key points:**
|
||||
|
||||
- `for_block_output` is the ONLY format that auto-adapts to execution context
|
||||
- Always use `for_block_output` for block outputs unless you have a specific reason not to
|
||||
- Never hardcode workspace checks - let `for_block_output` handle it
|
||||
|
||||
### Modifying the API
|
||||
|
||||
1. Update route in `backend/api/features/`
|
||||
2. Add/update Pydantic models in same directory
|
||||
3. Write tests alongside the route file
|
||||
4. Run `poetry run test` to verify
|
||||
|
||||
## Security Implementation
|
||||
|
||||
### Cache Protection Middleware
|
||||
|
||||
- Located in `backend/api/middleware/security.py`
|
||||
- Default behavior: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private`
|
||||
- Uses an allow list approach - only explicitly permitted paths can be cached
|
||||
- Cacheable paths include: static assets (`static/*`, `_next/static/*`), health checks, public store pages, documentation
|
||||
- Prevents sensitive data (auth tokens, API keys, user data) from being cached by browsers/proxies
|
||||
- To allow caching for a new endpoint, add it to `CACHEABLE_PATHS` in the middleware
|
||||
- Applied to both main API server and external API applications
|
||||
@@ -48,8 +48,7 @@ RUN poetry install --no-ansi --no-root
|
||||
# Generate Prisma client
|
||||
COPY autogpt_platform/backend/schema.prisma ./
|
||||
COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/partial_types.py
|
||||
COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
|
||||
RUN poetry run prisma generate && poetry run gen-prisma-stub
|
||||
RUN poetry run prisma generate
|
||||
|
||||
FROM debian:13-slim AS server_dependencies
|
||||
|
||||
@@ -62,12 +61,10 @@ ENV POETRY_HOME=/opt/poetry \
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
ENV PATH=/opt/poetry/bin:$PATH
|
||||
|
||||
# Install Python, FFmpeg, and ImageMagick (required for video processing blocks)
|
||||
# Install Python without upgrading system-managed packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3.13 \
|
||||
python3-pip \
|
||||
ffmpeg \
|
||||
imagemagick \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy only necessary files from builder
|
||||
@@ -102,7 +99,6 @@ COPY autogpt_platform/backend/migrations /app/autogpt_platform/backend/migration
|
||||
FROM server_dependencies AS server
|
||||
|
||||
COPY autogpt_platform/backend /app/autogpt_platform/backend
|
||||
COPY docs /app/docs
|
||||
RUN poetry install --no-ansi --only-root
|
||||
|
||||
ENV PORT=8000
|
||||
|
||||
@@ -108,7 +108,7 @@ import fastapi.testclient
|
||||
import pytest
|
||||
from pytest_snapshot.plugin import Snapshot
|
||||
|
||||
from backend.api.features.myroute import router
|
||||
from backend.server.v2.myroute import router
|
||||
|
||||
app = fastapi.FastAPI()
|
||||
app.include_router(router)
|
||||
@@ -138,7 +138,7 @@ If the test doesn't need the `user_id` specifically, mocking is not necessary as
|
||||
|
||||
#### Using Global Auth Fixtures
|
||||
|
||||
Two global auth fixtures are provided by `backend/api/conftest.py`:
|
||||
Two global auth fixtures are provided by `backend/server/conftest.py`:
|
||||
|
||||
- `mock_jwt_user` - Regular user with `test_user_id` ("test-user-id")
|
||||
- `mock_jwt_admin` - Admin user with `admin_user_id` ("admin-user-id")
|
||||
@@ -149,7 +149,7 @@ These provide the easiest way to set up authentication mocking in test modules:
|
||||
import fastapi
|
||||
import fastapi.testclient
|
||||
import pytest
|
||||
from backend.api.features.myroute import router
|
||||
from backend.server.v2.myroute import router
|
||||
|
||||
app = fastapi.FastAPI()
|
||||
app.include_router(router)
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
from fastapi import FastAPI
|
||||
|
||||
from backend.api.middleware.security import SecurityHeadersMiddleware
|
||||
from backend.monitoring.instrumentation import instrument_fastapi
|
||||
|
||||
from .v1.routes import v1_router
|
||||
|
||||
external_api = FastAPI(
|
||||
title="AutoGPT External API",
|
||||
description="External API for AutoGPT integrations",
|
||||
docs_url="/docs",
|
||||
version="1.0",
|
||||
)
|
||||
|
||||
external_api.add_middleware(SecurityHeadersMiddleware)
|
||||
external_api.include_router(v1_router, prefix="/v1")
|
||||
|
||||
# Add Prometheus instrumentation
|
||||
instrument_fastapi(
|
||||
external_api,
|
||||
service_name="external-api",
|
||||
expose_endpoint=True,
|
||||
endpoint="/metrics",
|
||||
include_in_schema=True,
|
||||
)
|
||||
@@ -1,340 +0,0 @@
|
||||
"""Tests for analytics API endpoints."""
|
||||
|
||||
import json
|
||||
from unittest.mock import AsyncMock, Mock
|
||||
|
||||
import fastapi
|
||||
import fastapi.testclient
|
||||
import pytest
|
||||
import pytest_mock
|
||||
from pytest_snapshot.plugin import Snapshot
|
||||
|
||||
from .analytics import router as analytics_router
|
||||
|
||||
app = fastapi.FastAPI()
|
||||
app.include_router(analytics_router)
|
||||
|
||||
client = fastapi.testclient.TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_app_auth(mock_jwt_user):
|
||||
"""Setup auth overrides for all tests in this module."""
|
||||
from autogpt_libs.auth.jwt_utils import get_jwt_payload
|
||||
|
||||
app.dependency_overrides[get_jwt_payload] = mock_jwt_user["get_jwt_payload"]
|
||||
yield
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# /log_raw_metric endpoint tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_log_raw_metric_success(
|
||||
mocker: pytest_mock.MockFixture,
|
||||
configured_snapshot: Snapshot,
|
||||
test_user_id: str,
|
||||
) -> None:
|
||||
"""Test successful raw metric logging."""
|
||||
mock_result = Mock(id="metric-123-uuid")
|
||||
mock_log_metric = mocker.patch(
|
||||
"backend.data.analytics.log_raw_metric",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_result,
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"metric_name": "page_load_time",
|
||||
"metric_value": 2.5,
|
||||
"data_string": "/dashboard",
|
||||
}
|
||||
|
||||
response = client.post("/log_raw_metric", json=request_data)
|
||||
|
||||
assert response.status_code == 200, f"Unexpected response: {response.text}"
|
||||
assert response.json() == "metric-123-uuid"
|
||||
|
||||
mock_log_metric.assert_called_once_with(
|
||||
user_id=test_user_id,
|
||||
metric_name="page_load_time",
|
||||
metric_value=2.5,
|
||||
data_string="/dashboard",
|
||||
)
|
||||
|
||||
configured_snapshot.assert_match(
|
||||
json.dumps({"metric_id": response.json()}, indent=2, sort_keys=True),
|
||||
"analytics_log_metric_success",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metric_value,metric_name,data_string,test_id",
|
||||
[
|
||||
(100, "api_calls_count", "external_api", "integer_value"),
|
||||
(0, "error_count", "no_errors", "zero_value"),
|
||||
(-5.2, "temperature_delta", "cooling", "negative_value"),
|
||||
(1.23456789, "precision_test", "float_precision", "float_precision"),
|
||||
(999999999, "large_number", "max_value", "large_number"),
|
||||
(0.0000001, "tiny_number", "min_value", "tiny_number"),
|
||||
],
|
||||
)
|
||||
def test_log_raw_metric_various_values(
|
||||
mocker: pytest_mock.MockFixture,
|
||||
configured_snapshot: Snapshot,
|
||||
metric_value: float,
|
||||
metric_name: str,
|
||||
data_string: str,
|
||||
test_id: str,
|
||||
) -> None:
|
||||
"""Test raw metric logging with various metric values."""
|
||||
mock_result = Mock(id=f"metric-{test_id}-uuid")
|
||||
mocker.patch(
|
||||
"backend.data.analytics.log_raw_metric",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_result,
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"metric_name": metric_name,
|
||||
"metric_value": metric_value,
|
||||
"data_string": data_string,
|
||||
}
|
||||
|
||||
response = client.post("/log_raw_metric", json=request_data)
|
||||
|
||||
assert response.status_code == 200, f"Failed for {test_id}: {response.text}"
|
||||
|
||||
configured_snapshot.assert_match(
|
||||
json.dumps(
|
||||
{"metric_id": response.json(), "test_case": test_id},
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
),
|
||||
f"analytics_metric_{test_id}",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_data,expected_error",
|
||||
[
|
||||
({}, "Field required"),
|
||||
({"metric_name": "test"}, "Field required"),
|
||||
(
|
||||
{"metric_name": "test", "metric_value": "not_a_number", "data_string": "x"},
|
||||
"Input should be a valid number",
|
||||
),
|
||||
(
|
||||
{"metric_name": "", "metric_value": 1.0, "data_string": "test"},
|
||||
"String should have at least 1 character",
|
||||
),
|
||||
(
|
||||
{"metric_name": "test", "metric_value": 1.0, "data_string": ""},
|
||||
"String should have at least 1 character",
|
||||
),
|
||||
],
|
||||
ids=[
|
||||
"empty_request",
|
||||
"missing_metric_value_and_data_string",
|
||||
"invalid_metric_value_type",
|
||||
"empty_metric_name",
|
||||
"empty_data_string",
|
||||
],
|
||||
)
|
||||
def test_log_raw_metric_validation_errors(
|
||||
invalid_data: dict,
|
||||
expected_error: str,
|
||||
) -> None:
|
||||
"""Test validation errors for invalid metric requests."""
|
||||
response = client.post("/log_raw_metric", json=invalid_data)
|
||||
|
||||
assert response.status_code == 422
|
||||
error_detail = response.json()
|
||||
assert "detail" in error_detail, f"Missing 'detail' in error: {error_detail}"
|
||||
|
||||
error_text = json.dumps(error_detail)
|
||||
assert (
|
||||
expected_error in error_text
|
||||
), f"Expected '{expected_error}' in error response: {error_text}"
|
||||
|
||||
|
||||
def test_log_raw_metric_service_error(
|
||||
mocker: pytest_mock.MockFixture,
|
||||
test_user_id: str,
|
||||
) -> None:
|
||||
"""Test error handling when analytics service fails."""
|
||||
mocker.patch(
|
||||
"backend.data.analytics.log_raw_metric",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=Exception("Database connection failed"),
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"metric_name": "test_metric",
|
||||
"metric_value": 1.0,
|
||||
"data_string": "test",
|
||||
}
|
||||
|
||||
response = client.post("/log_raw_metric", json=request_data)
|
||||
|
||||
assert response.status_code == 500
|
||||
error_detail = response.json()["detail"]
|
||||
assert "Database connection failed" in error_detail["message"]
|
||||
assert "hint" in error_detail
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# /log_raw_analytics endpoint tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_log_raw_analytics_success(
|
||||
mocker: pytest_mock.MockFixture,
|
||||
configured_snapshot: Snapshot,
|
||||
test_user_id: str,
|
||||
) -> None:
|
||||
"""Test successful raw analytics logging."""
|
||||
mock_result = Mock(id="analytics-789-uuid")
|
||||
mock_log_analytics = mocker.patch(
|
||||
"backend.data.analytics.log_raw_analytics",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_result,
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"type": "user_action",
|
||||
"data": {
|
||||
"action": "button_click",
|
||||
"button_id": "submit_form",
|
||||
"timestamp": "2023-01-01T00:00:00Z",
|
||||
"metadata": {"form_type": "registration", "fields_filled": 5},
|
||||
},
|
||||
"data_index": "button_click_submit_form",
|
||||
}
|
||||
|
||||
response = client.post("/log_raw_analytics", json=request_data)
|
||||
|
||||
assert response.status_code == 200, f"Unexpected response: {response.text}"
|
||||
assert response.json() == "analytics-789-uuid"
|
||||
|
||||
mock_log_analytics.assert_called_once_with(
|
||||
test_user_id,
|
||||
"user_action",
|
||||
request_data["data"],
|
||||
"button_click_submit_form",
|
||||
)
|
||||
|
||||
configured_snapshot.assert_match(
|
||||
json.dumps({"analytics_id": response.json()}, indent=2, sort_keys=True),
|
||||
"analytics_log_analytics_success",
|
||||
)
|
||||
|
||||
|
||||
def test_log_raw_analytics_complex_data(
|
||||
mocker: pytest_mock.MockFixture,
|
||||
configured_snapshot: Snapshot,
|
||||
) -> None:
|
||||
"""Test raw analytics logging with complex nested data structures."""
|
||||
mock_result = Mock(id="analytics-complex-uuid")
|
||||
mocker.patch(
|
||||
"backend.data.analytics.log_raw_analytics",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_result,
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"type": "agent_execution",
|
||||
"data": {
|
||||
"agent_id": "agent_123",
|
||||
"execution_id": "exec_456",
|
||||
"status": "completed",
|
||||
"duration_ms": 3500,
|
||||
"nodes_executed": 15,
|
||||
"blocks_used": [
|
||||
{"block_id": "llm_block", "count": 3},
|
||||
{"block_id": "http_block", "count": 5},
|
||||
{"block_id": "code_block", "count": 2},
|
||||
],
|
||||
"errors": [],
|
||||
"metadata": {
|
||||
"trigger": "manual",
|
||||
"user_tier": "premium",
|
||||
"environment": "production",
|
||||
},
|
||||
},
|
||||
"data_index": "agent_123_exec_456",
|
||||
}
|
||||
|
||||
response = client.post("/log_raw_analytics", json=request_data)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
configured_snapshot.assert_match(
|
||||
json.dumps(
|
||||
{"analytics_id": response.json(), "logged_data": request_data["data"]},
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
),
|
||||
"analytics_log_analytics_complex_data",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_data,expected_error",
|
||||
[
|
||||
({}, "Field required"),
|
||||
({"type": "test"}, "Field required"),
|
||||
(
|
||||
{"type": "test", "data": "not_a_dict", "data_index": "test"},
|
||||
"Input should be a valid dictionary",
|
||||
),
|
||||
({"type": "test", "data": {"key": "value"}}, "Field required"),
|
||||
],
|
||||
ids=[
|
||||
"empty_request",
|
||||
"missing_data_and_data_index",
|
||||
"invalid_data_type",
|
||||
"missing_data_index",
|
||||
],
|
||||
)
|
||||
def test_log_raw_analytics_validation_errors(
|
||||
invalid_data: dict,
|
||||
expected_error: str,
|
||||
) -> None:
|
||||
"""Test validation errors for invalid analytics requests."""
|
||||
response = client.post("/log_raw_analytics", json=invalid_data)
|
||||
|
||||
assert response.status_code == 422
|
||||
error_detail = response.json()
|
||||
assert "detail" in error_detail, f"Missing 'detail' in error: {error_detail}"
|
||||
|
||||
error_text = json.dumps(error_detail)
|
||||
assert (
|
||||
expected_error in error_text
|
||||
), f"Expected '{expected_error}' in error response: {error_text}"
|
||||
|
||||
|
||||
def test_log_raw_analytics_service_error(
|
||||
mocker: pytest_mock.MockFixture,
|
||||
test_user_id: str,
|
||||
) -> None:
|
||||
"""Test error handling when analytics service fails."""
|
||||
mocker.patch(
|
||||
"backend.data.analytics.log_raw_analytics",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=Exception("Analytics DB unreachable"),
|
||||
)
|
||||
|
||||
request_data = {
|
||||
"type": "test_event",
|
||||
"data": {"key": "value"},
|
||||
"data_index": "test_index",
|
||||
}
|
||||
|
||||
response = client.post("/log_raw_analytics", json=request_data)
|
||||
|
||||
assert response.status_code == 500
|
||||
error_detail = response.json()["detail"]
|
||||
assert "Analytics DB unreachable" in error_detail["message"]
|
||||
assert "hint" in error_detail
|
||||
@@ -1,939 +0,0 @@
|
||||
"""Chat API routes for chat session management and streaming via SSE."""
|
||||
|
||||
import logging
|
||||
import uuid as uuid_module
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Annotated
|
||||
|
||||
from autogpt_libs import auth
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, Security
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.copilot import service as chat_service
|
||||
from backend.copilot import stream_registry
|
||||
from backend.copilot.completion_handler import (
|
||||
process_operation_failure,
|
||||
process_operation_success,
|
||||
)
|
||||
from backend.copilot.config import ChatConfig
|
||||
from backend.copilot.executor.utils import enqueue_copilot_task
|
||||
from backend.copilot.model import (
|
||||
ChatSession,
|
||||
create_chat_session,
|
||||
get_chat_session,
|
||||
get_user_sessions,
|
||||
)
|
||||
from backend.copilot.response_model import StreamFinish, StreamHeartbeat
|
||||
from backend.copilot.tools.models import (
|
||||
AgentDetailsResponse,
|
||||
AgentOutputResponse,
|
||||
AgentPreviewResponse,
|
||||
AgentSavedResponse,
|
||||
AgentsFoundResponse,
|
||||
BlockListResponse,
|
||||
BlockOutputResponse,
|
||||
ClarificationNeededResponse,
|
||||
DocPageResponse,
|
||||
DocSearchResultsResponse,
|
||||
ErrorResponse,
|
||||
ExecutionStartedResponse,
|
||||
InputValidationErrorResponse,
|
||||
NeedLoginResponse,
|
||||
NoResultsResponse,
|
||||
OperationInProgressResponse,
|
||||
OperationPendingResponse,
|
||||
OperationStartedResponse,
|
||||
SetupRequirementsResponse,
|
||||
UnderstandingUpdatedResponse,
|
||||
)
|
||||
from backend.util.exceptions import NotFoundError
|
||||
|
||||
config = ChatConfig()
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _validate_and_get_session(
|
||||
session_id: str,
|
||||
user_id: str | None,
|
||||
) -> ChatSession:
|
||||
"""Validate session exists and belongs to user."""
|
||||
session = await get_chat_session(session_id, user_id)
|
||||
if not session:
|
||||
raise NotFoundError(f"Session {session_id} not found.")
|
||||
return session
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
tags=["chat"],
|
||||
)
|
||||
|
||||
# ========== Request/Response Models ==========
|
||||
|
||||
|
||||
class StreamChatRequest(BaseModel):
|
||||
"""Request model for streaming chat with optional context."""
|
||||
|
||||
message: str
|
||||
is_user_message: bool = True
|
||||
context: dict[str, str] | None = None # {url: str, content: str}
|
||||
|
||||
|
||||
class CreateSessionResponse(BaseModel):
|
||||
"""Response model containing information on a newly created chat session."""
|
||||
|
||||
id: str
|
||||
created_at: str
|
||||
user_id: str | None
|
||||
|
||||
|
||||
class ActiveStreamInfo(BaseModel):
|
||||
"""Information about an active stream for reconnection."""
|
||||
|
||||
task_id: str
|
||||
last_message_id: str # Redis Stream message ID for resumption
|
||||
operation_id: str # Operation ID for completion tracking
|
||||
tool_name: str # Name of the tool being executed
|
||||
|
||||
|
||||
class SessionDetailResponse(BaseModel):
|
||||
"""Response model providing complete details for a chat session, including messages."""
|
||||
|
||||
id: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
user_id: str | None
|
||||
messages: list[dict]
|
||||
active_stream: ActiveStreamInfo | None = None # Present if stream is still active
|
||||
|
||||
|
||||
class SessionSummaryResponse(BaseModel):
|
||||
"""Response model for a session summary (without messages)."""
|
||||
|
||||
id: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
title: str | None = None
|
||||
|
||||
|
||||
class ListSessionsResponse(BaseModel):
|
||||
"""Response model for listing chat sessions."""
|
||||
|
||||
sessions: list[SessionSummaryResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class OperationCompleteRequest(BaseModel):
|
||||
"""Request model for external completion webhook."""
|
||||
|
||||
success: bool
|
||||
result: dict | str | None = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
# ========== Routes ==========
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions",
|
||||
dependencies=[Security(auth.requires_user)],
|
||||
)
|
||||
async def list_sessions(
|
||||
user_id: Annotated[str, Security(auth.get_user_id)],
|
||||
limit: int = Query(default=50, ge=1, le=100),
|
||||
offset: int = Query(default=0, ge=0),
|
||||
) -> ListSessionsResponse:
|
||||
"""
|
||||
List chat sessions for the authenticated user.
|
||||
|
||||
Returns a paginated list of chat sessions belonging to the current user,
|
||||
ordered by most recently updated.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user's ID.
|
||||
limit: Maximum number of sessions to return (1-100).
|
||||
offset: Number of sessions to skip for pagination.
|
||||
|
||||
Returns:
|
||||
ListSessionsResponse: List of session summaries and total count.
|
||||
"""
|
||||
sessions, total_count = await get_user_sessions(user_id, limit, offset)
|
||||
|
||||
return ListSessionsResponse(
|
||||
sessions=[
|
||||
SessionSummaryResponse(
|
||||
id=session.session_id,
|
||||
created_at=session.started_at.isoformat(),
|
||||
updated_at=session.updated_at.isoformat(),
|
||||
title=session.title,
|
||||
)
|
||||
for session in sessions
|
||||
],
|
||||
total=total_count,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/sessions",
|
||||
)
|
||||
async def create_session(
|
||||
user_id: Annotated[str, Depends(auth.get_user_id)],
|
||||
) -> CreateSessionResponse:
|
||||
"""
|
||||
Create a new chat session.
|
||||
|
||||
Initiates a new chat session for the authenticated user.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user ID parsed from the JWT (required).
|
||||
|
||||
Returns:
|
||||
CreateSessionResponse: Details of the created session.
|
||||
|
||||
"""
|
||||
logger.info(
|
||||
f"Creating session with user_id: "
|
||||
f"...{user_id[-8:] if len(user_id) > 8 else '<redacted>'}"
|
||||
)
|
||||
|
||||
session = await create_chat_session(user_id)
|
||||
|
||||
return CreateSessionResponse(
|
||||
id=session.session_id,
|
||||
created_at=session.started_at.isoformat(),
|
||||
user_id=session.user_id,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{session_id}",
|
||||
)
|
||||
async def get_session(
|
||||
session_id: str,
|
||||
user_id: Annotated[str | None, Depends(auth.get_user_id)],
|
||||
) -> SessionDetailResponse:
|
||||
"""
|
||||
Retrieve the details of a specific chat session.
|
||||
|
||||
Looks up a chat session by ID for the given user (if authenticated) and returns all session data including messages.
|
||||
If there's an active stream for this session, returns the task_id for reconnection.
|
||||
|
||||
Args:
|
||||
session_id: The unique identifier for the desired chat session.
|
||||
user_id: The optional authenticated user ID, or None for anonymous access.
|
||||
|
||||
Returns:
|
||||
SessionDetailResponse: Details for the requested session, including active_stream info if applicable.
|
||||
|
||||
"""
|
||||
session = await get_chat_session(session_id, user_id)
|
||||
if not session:
|
||||
raise NotFoundError(f"Session {session_id} not found.")
|
||||
|
||||
messages = [message.model_dump() for message in session.messages]
|
||||
|
||||
# Check if there's an active stream for this session
|
||||
active_stream_info = None
|
||||
active_task, last_message_id = await stream_registry.get_active_task_for_session(
|
||||
session_id, user_id
|
||||
)
|
||||
if active_task:
|
||||
# Filter out the in-progress assistant message from the session response.
|
||||
# The client will receive the complete assistant response through the SSE
|
||||
# stream replay instead, preventing duplicate content.
|
||||
if messages and messages[-1].get("role") == "assistant":
|
||||
messages = messages[:-1]
|
||||
|
||||
# Use "0-0" as last_message_id to replay the stream from the beginning.
|
||||
# Since we filtered out the cached assistant message, the client needs
|
||||
# the full stream to reconstruct the response.
|
||||
active_stream_info = ActiveStreamInfo(
|
||||
task_id=active_task.task_id,
|
||||
last_message_id="0-0",
|
||||
operation_id=active_task.operation_id,
|
||||
tool_name=active_task.tool_name,
|
||||
)
|
||||
|
||||
return SessionDetailResponse(
|
||||
id=session.session_id,
|
||||
created_at=session.started_at.isoformat(),
|
||||
updated_at=session.updated_at.isoformat(),
|
||||
user_id=session.user_id or None,
|
||||
messages=messages,
|
||||
active_stream=active_stream_info,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/sessions/{session_id}/stream",
|
||||
)
|
||||
async def stream_chat_post(
|
||||
session_id: str,
|
||||
request: StreamChatRequest,
|
||||
user_id: str | None = Depends(auth.get_user_id),
|
||||
):
|
||||
"""
|
||||
Stream chat responses for a session (POST with context support).
|
||||
|
||||
Streams the AI/completion responses in real time over Server-Sent Events (SSE), including:
|
||||
- Text fragments as they are generated
|
||||
- Tool call UI elements (if invoked)
|
||||
- Tool execution results
|
||||
|
||||
The AI generation runs in a background task that continues even if the client disconnects.
|
||||
All chunks are written to Redis for reconnection support. If the client disconnects,
|
||||
they can reconnect using GET /tasks/{task_id}/stream to resume from where they left off.
|
||||
|
||||
Args:
|
||||
session_id: The chat session identifier to associate with the streamed messages.
|
||||
request: Request body containing message, is_user_message, and optional context.
|
||||
user_id: Optional authenticated user ID.
|
||||
Returns:
|
||||
StreamingResponse: SSE-formatted response chunks. First chunk is a "start" event
|
||||
containing the task_id for reconnection.
|
||||
|
||||
"""
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
stream_start_time = time.perf_counter()
|
||||
log_meta = {"component": "ChatStream", "session_id": session_id}
|
||||
if user_id:
|
||||
log_meta["user_id"] = user_id
|
||||
|
||||
logger.info(
|
||||
f"[TIMING] stream_chat_post STARTED, session={session_id}, "
|
||||
f"user={user_id}, message_len={len(request.message)}",
|
||||
extra={"json_fields": log_meta},
|
||||
)
|
||||
|
||||
_session = await _validate_and_get_session(session_id, user_id) # noqa: F841
|
||||
logger.info(
|
||||
f"[TIMING] session validated in {(time.perf_counter() - stream_start_time)*1000:.1f}ms",
|
||||
extra={
|
||||
"json_fields": {
|
||||
**log_meta,
|
||||
"duration_ms": (time.perf_counter() - stream_start_time) * 1000,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Create a task in the stream registry for reconnection support
|
||||
task_id = str(uuid_module.uuid4())
|
||||
operation_id = str(uuid_module.uuid4())
|
||||
log_meta["task_id"] = task_id
|
||||
|
||||
task_create_start = time.perf_counter()
|
||||
await stream_registry.create_task(
|
||||
task_id=task_id,
|
||||
session_id=session_id,
|
||||
user_id=user_id,
|
||||
tool_call_id="chat_stream", # Not a tool call, but needed for the model
|
||||
tool_name="chat",
|
||||
operation_id=operation_id,
|
||||
)
|
||||
logger.info(
|
||||
f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start)*1000:.1f}ms",
|
||||
extra={
|
||||
"json_fields": {
|
||||
**log_meta,
|
||||
"duration_ms": (time.perf_counter() - task_create_start) * 1000,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Enqueue the task to RabbitMQ for processing by the CoPilot executor
|
||||
await enqueue_copilot_task(
|
||||
task_id=task_id,
|
||||
session_id=session_id,
|
||||
user_id=user_id,
|
||||
operation_id=operation_id,
|
||||
message=request.message,
|
||||
is_user_message=request.is_user_message,
|
||||
context=request.context,
|
||||
)
|
||||
|
||||
setup_time = (time.perf_counter() - stream_start_time) * 1000
|
||||
logger.info(
|
||||
f"[TIMING] Task enqueued to RabbitMQ, setup={setup_time:.1f}ms",
|
||||
extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
|
||||
)
|
||||
|
||||
# SSE endpoint that subscribes to the task's stream
|
||||
async def event_generator() -> AsyncGenerator[str, None]:
|
||||
import time as time_module
|
||||
|
||||
event_gen_start = time_module.perf_counter()
|
||||
logger.info(
|
||||
f"[TIMING] event_generator STARTED, task={task_id}, session={session_id}, "
|
||||
f"user={user_id}",
|
||||
extra={"json_fields": log_meta},
|
||||
)
|
||||
subscriber_queue = None
|
||||
first_chunk_yielded = False
|
||||
chunks_yielded = 0
|
||||
try:
|
||||
# Subscribe to the task stream (this replays existing messages + live updates)
|
||||
subscriber_queue = await stream_registry.subscribe_to_task(
|
||||
task_id=task_id,
|
||||
user_id=user_id,
|
||||
last_message_id="0-0", # Get all messages from the beginning
|
||||
)
|
||||
|
||||
if subscriber_queue is None:
|
||||
yield StreamFinish().to_sse()
|
||||
yield "data: [DONE]\n\n"
|
||||
return
|
||||
|
||||
# Read from the subscriber queue and yield to SSE
|
||||
logger.info(
|
||||
"[TIMING] Starting to read from subscriber_queue",
|
||||
extra={"json_fields": log_meta},
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
|
||||
chunks_yielded += 1
|
||||
|
||||
if not first_chunk_yielded:
|
||||
first_chunk_yielded = True
|
||||
elapsed = time_module.perf_counter() - event_gen_start
|
||||
logger.info(
|
||||
f"[TIMING] FIRST CHUNK from queue at {elapsed:.2f}s, "
|
||||
f"type={type(chunk).__name__}",
|
||||
extra={
|
||||
"json_fields": {
|
||||
**log_meta,
|
||||
"chunk_type": type(chunk).__name__,
|
||||
"elapsed_ms": elapsed * 1000,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
yield chunk.to_sse()
|
||||
|
||||
# Check for finish signal
|
||||
if isinstance(chunk, StreamFinish):
|
||||
total_time = time_module.perf_counter() - event_gen_start
|
||||
logger.info(
|
||||
f"[TIMING] StreamFinish received in {total_time:.2f}s; "
|
||||
f"n_chunks={chunks_yielded}",
|
||||
extra={
|
||||
"json_fields": {
|
||||
**log_meta,
|
||||
"chunks_yielded": chunks_yielded,
|
||||
"total_time_ms": total_time * 1000,
|
||||
}
|
||||
},
|
||||
)
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
yield StreamHeartbeat().to_sse()
|
||||
|
||||
except GeneratorExit:
|
||||
logger.info(
|
||||
f"[TIMING] GeneratorExit (client disconnected), chunks={chunks_yielded}",
|
||||
extra={
|
||||
"json_fields": {
|
||||
**log_meta,
|
||||
"chunks_yielded": chunks_yielded,
|
||||
"reason": "client_disconnect",
|
||||
}
|
||||
},
|
||||
)
|
||||
pass # Client disconnected - background task continues
|
||||
except Exception as e:
|
||||
elapsed = (time_module.perf_counter() - event_gen_start) * 1000
|
||||
logger.error(
|
||||
f"[TIMING] event_generator ERROR after {elapsed:.1f}ms: {e}",
|
||||
extra={
|
||||
"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
|
||||
},
|
||||
)
|
||||
finally:
|
||||
# Unsubscribe when client disconnects or stream ends to prevent resource leak
|
||||
if subscriber_queue is not None:
|
||||
try:
|
||||
await stream_registry.unsubscribe_from_task(
|
||||
task_id, subscriber_queue
|
||||
)
|
||||
except Exception as unsub_err:
|
||||
logger.error(
|
||||
f"Error unsubscribing from task {task_id}: {unsub_err}",
|
||||
exc_info=True,
|
||||
)
|
||||
# AI SDK protocol termination - always yield even if unsubscribe fails
|
||||
total_time = time_module.perf_counter() - event_gen_start
|
||||
logger.info(
|
||||
f"[TIMING] event_generator FINISHED in {total_time:.2f}s; "
|
||||
f"task={task_id}, session={session_id}, n_chunks={chunks_yielded}",
|
||||
extra={
|
||||
"json_fields": {
|
||||
**log_meta,
|
||||
"total_time_ms": total_time * 1000,
|
||||
"chunks_yielded": chunks_yielded,
|
||||
}
|
||||
},
|
||||
)
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no", # Disable nginx buffering
|
||||
"x-vercel-ai-ui-message-stream": "v1", # AI SDK protocol header
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/sessions/{session_id}/stream",
|
||||
)
|
||||
async def resume_session_stream(
|
||||
session_id: str,
|
||||
user_id: str | None = Depends(auth.get_user_id),
|
||||
):
|
||||
"""
|
||||
Resume an active stream for a session.
|
||||
|
||||
Called by the AI SDK's ``useChat(resume: true)`` on page load.
|
||||
Checks for an active (in-progress) task on the session and either replays
|
||||
the full SSE stream or returns 204 No Content if nothing is running.
|
||||
|
||||
Args:
|
||||
session_id: The chat session identifier.
|
||||
user_id: Optional authenticated user ID.
|
||||
|
||||
Returns:
|
||||
StreamingResponse (SSE) when an active stream exists,
|
||||
or 204 No Content when there is nothing to resume.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
active_task, _last_id = await stream_registry.get_active_task_for_session(
|
||||
session_id, user_id
|
||||
)
|
||||
|
||||
if not active_task:
|
||||
return Response(status_code=204)
|
||||
|
||||
subscriber_queue = await stream_registry.subscribe_to_task(
|
||||
task_id=active_task.task_id,
|
||||
user_id=user_id,
|
||||
last_message_id="0-0", # Full replay so useChat rebuilds the message
|
||||
)
|
||||
|
||||
if subscriber_queue is None:
|
||||
return Response(status_code=204)
|
||||
|
||||
async def event_generator() -> AsyncGenerator[str, None]:
|
||||
chunk_count = 0
|
||||
first_chunk_type: str | None = None
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
|
||||
if chunk_count < 3:
|
||||
logger.info(
|
||||
"Resume stream chunk",
|
||||
extra={
|
||||
"session_id": session_id,
|
||||
"chunk_type": str(chunk.type),
|
||||
},
|
||||
)
|
||||
if not first_chunk_type:
|
||||
first_chunk_type = str(chunk.type)
|
||||
chunk_count += 1
|
||||
yield chunk.to_sse()
|
||||
|
||||
if isinstance(chunk, StreamFinish):
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
yield StreamHeartbeat().to_sse()
|
||||
except GeneratorExit:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"Error in resume stream for session {session_id}: {e}")
|
||||
finally:
|
||||
try:
|
||||
await stream_registry.unsubscribe_from_task(
|
||||
active_task.task_id, subscriber_queue
|
||||
)
|
||||
except Exception as unsub_err:
|
||||
logger.error(
|
||||
f"Error unsubscribing from task {active_task.task_id}: {unsub_err}",
|
||||
exc_info=True,
|
||||
)
|
||||
logger.info(
|
||||
"Resume stream completed",
|
||||
extra={
|
||||
"session_id": session_id,
|
||||
"n_chunks": chunk_count,
|
||||
"first_chunk_type": first_chunk_type,
|
||||
},
|
||||
)
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
"x-vercel-ai-ui-message-stream": "v1",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/sessions/{session_id}/assign-user",
|
||||
dependencies=[Security(auth.requires_user)],
|
||||
status_code=200,
|
||||
)
|
||||
async def session_assign_user(
|
||||
session_id: str,
|
||||
user_id: Annotated[str, Security(auth.get_user_id)],
|
||||
) -> dict:
|
||||
"""
|
||||
Assign an authenticated user to a chat session.
|
||||
|
||||
Used (typically post-login) to claim an existing anonymous session as the current authenticated user.
|
||||
|
||||
Args:
|
||||
session_id: The identifier for the (previously anonymous) session.
|
||||
user_id: The authenticated user's ID to associate with the session.
|
||||
|
||||
Returns:
|
||||
dict: Status of the assignment.
|
||||
|
||||
"""
|
||||
await chat_service.assign_user_to_session(session_id, user_id)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# ========== Task Streaming (SSE Reconnection) ==========
|
||||
|
||||
|
||||
@router.get(
|
||||
"/tasks/{task_id}/stream",
|
||||
)
|
||||
async def stream_task(
|
||||
task_id: str,
|
||||
user_id: str | None = Depends(auth.get_user_id),
|
||||
last_message_id: str = Query(
|
||||
default="0-0",
|
||||
description="Last Redis Stream message ID received (e.g., '1706540123456-0'). Use '0-0' for full replay.",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Reconnect to a long-running task's SSE stream.
|
||||
|
||||
When a long-running operation (like agent generation) starts, the client
|
||||
receives a task_id. If the connection drops, the client can reconnect
|
||||
using this endpoint to resume receiving updates.
|
||||
|
||||
Args:
|
||||
task_id: The task ID from the operation_started response.
|
||||
user_id: Authenticated user ID for ownership validation.
|
||||
last_message_id: Last Redis Stream message ID received ("0-0" for full replay).
|
||||
|
||||
Returns:
|
||||
StreamingResponse: SSE-formatted response chunks starting after last_message_id.
|
||||
|
||||
Raises:
|
||||
HTTPException: 404 if task not found, 410 if task expired, 403 if access denied.
|
||||
"""
|
||||
# Check task existence and expiry before subscribing
|
||||
task, error_code = await stream_registry.get_task_with_expiry_info(task_id)
|
||||
|
||||
if error_code == "TASK_EXPIRED":
|
||||
raise HTTPException(
|
||||
status_code=410,
|
||||
detail={
|
||||
"code": "TASK_EXPIRED",
|
||||
"message": "This operation has expired. Please try again.",
|
||||
},
|
||||
)
|
||||
|
||||
if error_code == "TASK_NOT_FOUND":
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"code": "TASK_NOT_FOUND",
|
||||
"message": f"Task {task_id} not found.",
|
||||
},
|
||||
)
|
||||
|
||||
# Validate ownership if task has an owner
|
||||
if task and task.user_id and user_id != task.user_id:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail={
|
||||
"code": "ACCESS_DENIED",
|
||||
"message": "You do not have access to this task.",
|
||||
},
|
||||
)
|
||||
|
||||
# Get subscriber queue from stream registry
|
||||
subscriber_queue = await stream_registry.subscribe_to_task(
|
||||
task_id=task_id,
|
||||
user_id=user_id,
|
||||
last_message_id=last_message_id,
|
||||
)
|
||||
|
||||
if subscriber_queue is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"code": "TASK_NOT_FOUND",
|
||||
"message": f"Task {task_id} not found or access denied.",
|
||||
},
|
||||
)
|
||||
|
||||
async def event_generator() -> AsyncGenerator[str, None]:
|
||||
import asyncio
|
||||
|
||||
heartbeat_interval = 15.0 # Send heartbeat every 15 seconds
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
# Wait for next chunk with timeout for heartbeats
|
||||
chunk = await asyncio.wait_for(
|
||||
subscriber_queue.get(), timeout=heartbeat_interval
|
||||
)
|
||||
yield chunk.to_sse()
|
||||
|
||||
# Check for finish signal
|
||||
if isinstance(chunk, StreamFinish):
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
# Send heartbeat to keep connection alive
|
||||
yield StreamHeartbeat().to_sse()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in task stream {task_id}: {e}", exc_info=True)
|
||||
finally:
|
||||
# Unsubscribe when client disconnects or stream ends
|
||||
try:
|
||||
await stream_registry.unsubscribe_from_task(task_id, subscriber_queue)
|
||||
except Exception as unsub_err:
|
||||
logger.error(
|
||||
f"Error unsubscribing from task {task_id}: {unsub_err}",
|
||||
exc_info=True,
|
||||
)
|
||||
# AI SDK protocol termination - always yield even if unsubscribe fails
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
"x-vercel-ai-ui-message-stream": "v1",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/tasks/{task_id}",
|
||||
)
|
||||
async def get_task_status(
|
||||
task_id: str,
|
||||
user_id: str | None = Depends(auth.get_user_id),
|
||||
) -> dict:
|
||||
"""
|
||||
Get the status of a long-running task.
|
||||
|
||||
Args:
|
||||
task_id: The task ID to check.
|
||||
user_id: Authenticated user ID for ownership validation.
|
||||
|
||||
Returns:
|
||||
dict: Task status including task_id, status, tool_name, and operation_id.
|
||||
|
||||
Raises:
|
||||
NotFoundError: If task_id is not found or user doesn't have access.
|
||||
"""
|
||||
task = await stream_registry.get_task(task_id)
|
||||
|
||||
if task is None:
|
||||
raise NotFoundError(f"Task {task_id} not found.")
|
||||
|
||||
# Validate ownership - if task has an owner, requester must match
|
||||
if task.user_id and user_id != task.user_id:
|
||||
raise NotFoundError(f"Task {task_id} not found.")
|
||||
|
||||
return {
|
||||
"task_id": task.task_id,
|
||||
"session_id": task.session_id,
|
||||
"status": task.status,
|
||||
"tool_name": task.tool_name,
|
||||
"operation_id": task.operation_id,
|
||||
"created_at": task.created_at.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
# ========== External Completion Webhook ==========
|
||||
|
||||
|
||||
@router.post(
|
||||
"/operations/{operation_id}/complete",
|
||||
status_code=200,
|
||||
)
|
||||
async def complete_operation(
|
||||
operation_id: str,
|
||||
request: OperationCompleteRequest,
|
||||
x_api_key: str | None = Header(default=None),
|
||||
) -> dict:
|
||||
"""
|
||||
External completion webhook for long-running operations.
|
||||
|
||||
Called by Agent Generator (or other services) when an operation completes.
|
||||
This triggers the stream registry to publish completion and continue LLM generation.
|
||||
|
||||
Args:
|
||||
operation_id: The operation ID to complete.
|
||||
request: Completion payload with success status and result/error.
|
||||
x_api_key: Internal API key for authentication.
|
||||
|
||||
Returns:
|
||||
dict: Status of the completion.
|
||||
|
||||
Raises:
|
||||
HTTPException: If API key is invalid or operation not found.
|
||||
"""
|
||||
# Validate internal API key - reject if not configured or invalid
|
||||
if not config.internal_api_key:
|
||||
logger.error(
|
||||
"Operation complete webhook rejected: CHAT_INTERNAL_API_KEY not configured"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Webhook not available: internal API key not configured",
|
||||
)
|
||||
if x_api_key != config.internal_api_key:
|
||||
raise HTTPException(status_code=401, detail="Invalid API key")
|
||||
|
||||
# Find task by operation_id
|
||||
task = await stream_registry.find_task_by_operation_id(operation_id)
|
||||
if task is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Operation {operation_id} not found",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Received completion webhook for operation {operation_id} "
|
||||
f"(task_id={task.task_id}, success={request.success})"
|
||||
)
|
||||
|
||||
if request.success:
|
||||
await process_operation_success(task, request.result)
|
||||
else:
|
||||
await process_operation_failure(task, request.error)
|
||||
|
||||
return {"status": "ok", "task_id": task.task_id}
|
||||
|
||||
|
||||
# ========== Configuration ==========
|
||||
|
||||
|
||||
@router.get("/config/ttl", status_code=200)
|
||||
async def get_ttl_config() -> dict:
|
||||
"""
|
||||
Get the stream TTL configuration.
|
||||
|
||||
Returns the Time-To-Live settings for chat streams, which determines
|
||||
how long clients can reconnect to an active stream.
|
||||
|
||||
Returns:
|
||||
dict: TTL configuration with seconds and milliseconds values.
|
||||
"""
|
||||
return {
|
||||
"stream_ttl_seconds": config.stream_ttl,
|
||||
"stream_ttl_ms": config.stream_ttl * 1000,
|
||||
}
|
||||
|
||||
|
||||
# ========== Health Check ==========
|
||||
|
||||
|
||||
@router.get("/health", status_code=200)
|
||||
async def health_check() -> dict:
|
||||
"""
|
||||
Health check endpoint for the chat service.
|
||||
|
||||
Performs a full cycle test of session creation and retrieval. Should always return healthy
|
||||
if the service and data layer are operational.
|
||||
|
||||
Returns:
|
||||
dict: A status dictionary indicating health, service name, and API version.
|
||||
|
||||
"""
|
||||
from backend.data.user import get_or_create_user
|
||||
|
||||
# Ensure health check user exists (required for FK constraint)
|
||||
health_check_user_id = "health-check-user"
|
||||
await get_or_create_user(
|
||||
{
|
||||
"sub": health_check_user_id,
|
||||
"email": "health-check@system.local",
|
||||
"user_metadata": {"name": "Health Check User"},
|
||||
}
|
||||
)
|
||||
|
||||
# Create and retrieve session to verify full data layer
|
||||
session = await create_chat_session(health_check_user_id)
|
||||
await get_chat_session(session.session_id, health_check_user_id)
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "chat",
|
||||
"version": "0.1.0",
|
||||
}
|
||||
|
||||
|
||||
# ========== Schema Export (for OpenAPI / Orval codegen) ==========
|
||||
|
||||
ToolResponseUnion = (
|
||||
AgentsFoundResponse
|
||||
| NoResultsResponse
|
||||
| AgentDetailsResponse
|
||||
| SetupRequirementsResponse
|
||||
| ExecutionStartedResponse
|
||||
| NeedLoginResponse
|
||||
| ErrorResponse
|
||||
| InputValidationErrorResponse
|
||||
| AgentOutputResponse
|
||||
| UnderstandingUpdatedResponse
|
||||
| AgentPreviewResponse
|
||||
| AgentSavedResponse
|
||||
| ClarificationNeededResponse
|
||||
| BlockListResponse
|
||||
| BlockOutputResponse
|
||||
| DocSearchResultsResponse
|
||||
| DocPageResponse
|
||||
| OperationStartedResponse
|
||||
| OperationPendingResponse
|
||||
| OperationInProgressResponse
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/schema/tool-responses",
|
||||
response_model=ToolResponseUnion,
|
||||
include_in_schema=True,
|
||||
summary="[Dummy] Tool response type export for codegen",
|
||||
description="This endpoint is not meant to be called. It exists solely to "
|
||||
"expose tool response models in the OpenAPI schema for frontend codegen.",
|
||||
)
|
||||
async def _tool_response_schema() -> ToolResponseUnion: # type: ignore[return]
|
||||
"""Never called at runtime. Exists only so Orval generates TS types."""
|
||||
raise HTTPException(status_code=501, detail="Schema-only endpoint")
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,353 +0,0 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any, List
|
||||
|
||||
import autogpt_libs.auth as autogpt_auth_lib
|
||||
from fastapi import APIRouter, HTTPException, Query, Security, status
|
||||
from prisma.enums import ReviewStatus
|
||||
|
||||
from backend.data.execution import (
|
||||
ExecutionContext,
|
||||
ExecutionStatus,
|
||||
get_graph_execution_meta,
|
||||
)
|
||||
from backend.data.graph import get_graph_settings
|
||||
from backend.data.human_review import (
|
||||
create_auto_approval_record,
|
||||
get_pending_reviews_for_execution,
|
||||
get_pending_reviews_for_user,
|
||||
get_reviews_by_node_exec_ids,
|
||||
has_pending_reviews_for_graph_exec,
|
||||
process_all_reviews_for_execution,
|
||||
)
|
||||
from backend.data.model import USER_TIMEZONE_NOT_SET
|
||||
from backend.data.user import get_user_by_id
|
||||
from backend.executor.utils import add_graph_execution
|
||||
|
||||
from .model import PendingHumanReviewModel, ReviewRequest, ReviewResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
tags=["v2", "executions", "review"],
|
||||
dependencies=[Security(autogpt_auth_lib.requires_user)],
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/pending",
|
||||
summary="Get Pending Reviews",
|
||||
response_model=List[PendingHumanReviewModel],
|
||||
responses={
|
||||
200: {"description": "List of pending reviews"},
|
||||
500: {"description": "Server error", "content": {"application/json": {}}},
|
||||
},
|
||||
)
|
||||
async def list_pending_reviews(
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
|
||||
page_size: int = Query(25, ge=1, le=100, description="Number of reviews per page"),
|
||||
) -> List[PendingHumanReviewModel]:
|
||||
"""Get all pending reviews for the current user.
|
||||
|
||||
Retrieves all reviews with status "WAITING" that belong to the authenticated user.
|
||||
Results are ordered by creation time (newest first).
|
||||
|
||||
Args:
|
||||
user_id: Authenticated user ID from security dependency
|
||||
|
||||
Returns:
|
||||
List of pending review objects with status converted to typed literals
|
||||
|
||||
Raises:
|
||||
HTTPException: If authentication fails or database error occurs
|
||||
|
||||
Note:
|
||||
Reviews with invalid status values are logged as warnings but excluded
|
||||
from results rather than failing the entire request.
|
||||
"""
|
||||
|
||||
return await get_pending_reviews_for_user(user_id, page, page_size)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/execution/{graph_exec_id}",
|
||||
summary="Get Pending Reviews for Execution",
|
||||
response_model=List[PendingHumanReviewModel],
|
||||
responses={
|
||||
200: {"description": "List of pending reviews for the execution"},
|
||||
404: {"description": "Graph execution not found"},
|
||||
500: {"description": "Server error", "content": {"application/json": {}}},
|
||||
},
|
||||
)
|
||||
async def list_pending_reviews_for_execution(
|
||||
graph_exec_id: str,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> List[PendingHumanReviewModel]:
|
||||
"""Get all pending reviews for a specific graph execution.
|
||||
|
||||
Retrieves all reviews with status "WAITING" for the specified graph execution
|
||||
that belong to the authenticated user. Results are ordered by creation time
|
||||
(oldest first) to preserve review order within the execution.
|
||||
|
||||
Args:
|
||||
graph_exec_id: ID of the graph execution to get reviews for
|
||||
user_id: Authenticated user ID from security dependency
|
||||
|
||||
Returns:
|
||||
List of pending review objects for the specified execution
|
||||
|
||||
Raises:
|
||||
HTTPException:
|
||||
- 404: If the graph execution doesn't exist or isn't owned by this user
|
||||
- 500: If authentication fails or database error occurs
|
||||
|
||||
Note:
|
||||
Only returns reviews owned by the authenticated user for security.
|
||||
Reviews with invalid status are excluded with warning logs.
|
||||
"""
|
||||
|
||||
# Verify user owns the graph execution before returning reviews
|
||||
graph_exec = await get_graph_execution_meta(
|
||||
user_id=user_id, execution_id=graph_exec_id
|
||||
)
|
||||
if not graph_exec:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Graph execution #{graph_exec_id} not found",
|
||||
)
|
||||
|
||||
return await get_pending_reviews_for_execution(graph_exec_id, user_id)
|
||||
|
||||
|
||||
@router.post("/action", response_model=ReviewResponse)
|
||||
async def process_review_action(
|
||||
request: ReviewRequest,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> ReviewResponse:
|
||||
"""Process reviews with approve or reject actions."""
|
||||
|
||||
# Collect all node exec IDs from the request
|
||||
all_request_node_ids = {review.node_exec_id for review in request.reviews}
|
||||
|
||||
if not all_request_node_ids:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="At least one review must be provided",
|
||||
)
|
||||
|
||||
# Batch fetch all requested reviews (regardless of status for idempotent handling)
|
||||
reviews_map = await get_reviews_by_node_exec_ids(
|
||||
list(all_request_node_ids), user_id
|
||||
)
|
||||
|
||||
# Validate all reviews were found (must exist, any status is OK for now)
|
||||
missing_ids = all_request_node_ids - set(reviews_map.keys())
|
||||
if missing_ids:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Review(s) not found: {', '.join(missing_ids)}",
|
||||
)
|
||||
|
||||
# Validate all reviews belong to the same execution
|
||||
graph_exec_ids = {review.graph_exec_id for review in reviews_map.values()}
|
||||
if len(graph_exec_ids) > 1:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail="All reviews in a single request must belong to the same execution.",
|
||||
)
|
||||
|
||||
graph_exec_id = next(iter(graph_exec_ids))
|
||||
|
||||
# Validate execution status before processing reviews
|
||||
graph_exec_meta = await get_graph_execution_meta(
|
||||
user_id=user_id, execution_id=graph_exec_id
|
||||
)
|
||||
|
||||
if not graph_exec_meta:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Graph execution #{graph_exec_id} not found",
|
||||
)
|
||||
|
||||
# Only allow processing reviews if execution is paused for review
|
||||
# or incomplete (partial execution with some reviews already processed)
|
||||
if graph_exec_meta.status not in (
|
||||
ExecutionStatus.REVIEW,
|
||||
ExecutionStatus.INCOMPLETE,
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"Cannot process reviews while execution status is {graph_exec_meta.status}. "
|
||||
f"Reviews can only be processed when execution is paused (REVIEW status). "
|
||||
f"Current status: {graph_exec_meta.status}",
|
||||
)
|
||||
|
||||
# Build review decisions map and track which reviews requested auto-approval
|
||||
# Auto-approved reviews use original data (no modifications allowed)
|
||||
review_decisions = {}
|
||||
auto_approve_requests = {} # Map node_exec_id -> auto_approve_future flag
|
||||
|
||||
for review in request.reviews:
|
||||
review_status = (
|
||||
ReviewStatus.APPROVED if review.approved else ReviewStatus.REJECTED
|
||||
)
|
||||
# If this review requested auto-approval, don't allow data modifications
|
||||
reviewed_data = None if review.auto_approve_future else review.reviewed_data
|
||||
review_decisions[review.node_exec_id] = (
|
||||
review_status,
|
||||
reviewed_data,
|
||||
review.message,
|
||||
)
|
||||
auto_approve_requests[review.node_exec_id] = review.auto_approve_future
|
||||
|
||||
# Process all reviews
|
||||
updated_reviews = await process_all_reviews_for_execution(
|
||||
user_id=user_id,
|
||||
review_decisions=review_decisions,
|
||||
)
|
||||
|
||||
# Create auto-approval records for approved reviews that requested it
|
||||
# Deduplicate by node_id to avoid race conditions when multiple reviews
|
||||
# for the same node are processed in parallel
|
||||
async def create_auto_approval_for_node(
|
||||
node_id: str, review_result
|
||||
) -> tuple[str, bool]:
|
||||
"""
|
||||
Create auto-approval record for a node.
|
||||
Returns (node_id, success) tuple for tracking failures.
|
||||
"""
|
||||
try:
|
||||
await create_auto_approval_record(
|
||||
user_id=user_id,
|
||||
graph_exec_id=review_result.graph_exec_id,
|
||||
graph_id=review_result.graph_id,
|
||||
graph_version=review_result.graph_version,
|
||||
node_id=node_id,
|
||||
payload=review_result.payload,
|
||||
)
|
||||
return (node_id, True)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to create auto-approval record for node {node_id}",
|
||||
exc_info=e,
|
||||
)
|
||||
return (node_id, False)
|
||||
|
||||
# Collect node_exec_ids that need auto-approval
|
||||
node_exec_ids_needing_auto_approval = [
|
||||
node_exec_id
|
||||
for node_exec_id, review_result in updated_reviews.items()
|
||||
if review_result.status == ReviewStatus.APPROVED
|
||||
and auto_approve_requests.get(node_exec_id, False)
|
||||
]
|
||||
|
||||
# Batch-fetch node executions to get node_ids
|
||||
nodes_needing_auto_approval: dict[str, Any] = {}
|
||||
if node_exec_ids_needing_auto_approval:
|
||||
from backend.data.execution import get_node_executions
|
||||
|
||||
node_execs = await get_node_executions(
|
||||
graph_exec_id=graph_exec_id, include_exec_data=False
|
||||
)
|
||||
node_exec_map = {node_exec.node_exec_id: node_exec for node_exec in node_execs}
|
||||
|
||||
for node_exec_id in node_exec_ids_needing_auto_approval:
|
||||
node_exec = node_exec_map.get(node_exec_id)
|
||||
if node_exec:
|
||||
review_result = updated_reviews[node_exec_id]
|
||||
# Use the first approved review for this node (deduplicate by node_id)
|
||||
if node_exec.node_id not in nodes_needing_auto_approval:
|
||||
nodes_needing_auto_approval[node_exec.node_id] = review_result
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to create auto-approval record for {node_exec_id}: "
|
||||
f"Node execution not found. This may indicate a race condition "
|
||||
f"or data inconsistency."
|
||||
)
|
||||
|
||||
# Execute all auto-approval creations in parallel (deduplicated by node_id)
|
||||
auto_approval_results = await asyncio.gather(
|
||||
*[
|
||||
create_auto_approval_for_node(node_id, review_result)
|
||||
for node_id, review_result in nodes_needing_auto_approval.items()
|
||||
],
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
# Count auto-approval failures
|
||||
auto_approval_failed_count = 0
|
||||
for result in auto_approval_results:
|
||||
if isinstance(result, Exception):
|
||||
# Unexpected exception during auto-approval creation
|
||||
auto_approval_failed_count += 1
|
||||
logger.error(
|
||||
f"Unexpected exception during auto-approval creation: {result}"
|
||||
)
|
||||
elif isinstance(result, tuple) and len(result) == 2 and not result[1]:
|
||||
# Auto-approval creation failed (returned False)
|
||||
auto_approval_failed_count += 1
|
||||
|
||||
# Count results
|
||||
approved_count = sum(
|
||||
1
|
||||
for review in updated_reviews.values()
|
||||
if review.status == ReviewStatus.APPROVED
|
||||
)
|
||||
rejected_count = sum(
|
||||
1
|
||||
for review in updated_reviews.values()
|
||||
if review.status == ReviewStatus.REJECTED
|
||||
)
|
||||
|
||||
# Resume execution only if ALL pending reviews for this execution have been processed
|
||||
if updated_reviews:
|
||||
still_has_pending = await has_pending_reviews_for_graph_exec(graph_exec_id)
|
||||
|
||||
if not still_has_pending:
|
||||
# Get the graph_id from any processed review
|
||||
first_review = next(iter(updated_reviews.values()))
|
||||
|
||||
try:
|
||||
# Fetch user and settings to build complete execution context
|
||||
user = await get_user_by_id(user_id)
|
||||
settings = await get_graph_settings(
|
||||
user_id=user_id, graph_id=first_review.graph_id
|
||||
)
|
||||
|
||||
# Preserve user's timezone preference when resuming execution
|
||||
user_timezone = (
|
||||
user.timezone if user.timezone != USER_TIMEZONE_NOT_SET else "UTC"
|
||||
)
|
||||
|
||||
execution_context = ExecutionContext(
|
||||
human_in_the_loop_safe_mode=settings.human_in_the_loop_safe_mode,
|
||||
sensitive_action_safe_mode=settings.sensitive_action_safe_mode,
|
||||
user_timezone=user_timezone,
|
||||
)
|
||||
|
||||
await add_graph_execution(
|
||||
graph_id=first_review.graph_id,
|
||||
user_id=user_id,
|
||||
graph_exec_id=graph_exec_id,
|
||||
execution_context=execution_context,
|
||||
)
|
||||
logger.info(f"Resumed execution {graph_exec_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to resume execution {graph_exec_id}: {str(e)}")
|
||||
|
||||
# Build error message if auto-approvals failed
|
||||
error_message = None
|
||||
if auto_approval_failed_count > 0:
|
||||
error_message = (
|
||||
f"{auto_approval_failed_count} auto-approval setting(s) could not be saved. "
|
||||
f"You may need to manually approve these reviews in future executions."
|
||||
)
|
||||
|
||||
return ReviewResponse(
|
||||
approved_count=approved_count,
|
||||
rejected_count=rejected_count,
|
||||
failed_count=auto_approval_failed_count,
|
||||
error=error_message,
|
||||
)
|
||||
@@ -1,199 +0,0 @@
|
||||
from typing import Literal, Optional
|
||||
|
||||
import autogpt_libs.auth as autogpt_auth_lib
|
||||
from fastapi import APIRouter, Body, HTTPException, Query, Security, status
|
||||
from fastapi.responses import Response
|
||||
from prisma.enums import OnboardingStep
|
||||
|
||||
from backend.data.onboarding import complete_onboarding_step
|
||||
|
||||
from .. import db as library_db
|
||||
from .. import model as library_model
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/agents",
|
||||
tags=["library", "private"],
|
||||
dependencies=[Security(autogpt_auth_lib.requires_user)],
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"",
|
||||
summary="List Library Agents",
|
||||
response_model=library_model.LibraryAgentResponse,
|
||||
)
|
||||
async def list_library_agents(
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
search_term: Optional[str] = Query(
|
||||
None, description="Search term to filter agents"
|
||||
),
|
||||
sort_by: library_model.LibraryAgentSort = Query(
|
||||
library_model.LibraryAgentSort.UPDATED_AT,
|
||||
description="Criteria to sort results by",
|
||||
),
|
||||
page: int = Query(
|
||||
1,
|
||||
ge=1,
|
||||
description="Page number to retrieve (must be >= 1)",
|
||||
),
|
||||
page_size: int = Query(
|
||||
15,
|
||||
ge=1,
|
||||
description="Number of agents per page (must be >= 1)",
|
||||
),
|
||||
) -> library_model.LibraryAgentResponse:
|
||||
"""
|
||||
Get all agents in the user's library (both created and saved).
|
||||
"""
|
||||
return await library_db.list_library_agents(
|
||||
user_id=user_id,
|
||||
search_term=search_term,
|
||||
sort_by=sort_by,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/favorites",
|
||||
summary="List Favorite Library Agents",
|
||||
)
|
||||
async def list_favorite_library_agents(
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
page: int = Query(
|
||||
1,
|
||||
ge=1,
|
||||
description="Page number to retrieve (must be >= 1)",
|
||||
),
|
||||
page_size: int = Query(
|
||||
15,
|
||||
ge=1,
|
||||
description="Number of agents per page (must be >= 1)",
|
||||
),
|
||||
) -> library_model.LibraryAgentResponse:
|
||||
"""
|
||||
Get all favorite agents in the user's library.
|
||||
"""
|
||||
return await library_db.list_favorite_library_agents(
|
||||
user_id=user_id,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{library_agent_id}", summary="Get Library Agent")
|
||||
async def get_library_agent(
|
||||
library_agent_id: str,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> library_model.LibraryAgent:
|
||||
return await library_db.get_library_agent(id=library_agent_id, user_id=user_id)
|
||||
|
||||
|
||||
@router.get("/by-graph/{graph_id}")
|
||||
async def get_library_agent_by_graph_id(
|
||||
graph_id: str,
|
||||
version: Optional[int] = Query(default=None),
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> library_model.LibraryAgent:
|
||||
library_agent = await library_db.get_library_agent_by_graph_id(
|
||||
user_id, graph_id, version
|
||||
)
|
||||
if not library_agent:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Library agent for graph #{graph_id} and user #{user_id} not found",
|
||||
)
|
||||
return library_agent
|
||||
|
||||
|
||||
@router.get(
|
||||
"/marketplace/{store_listing_version_id}",
|
||||
summary="Get Agent By Store ID",
|
||||
tags=["store", "library"],
|
||||
response_model=library_model.LibraryAgent | None,
|
||||
)
|
||||
async def get_library_agent_by_store_listing_version_id(
|
||||
store_listing_version_id: str,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> library_model.LibraryAgent | None:
|
||||
"""
|
||||
Get Library Agent from Store Listing Version ID.
|
||||
"""
|
||||
return await library_db.get_library_agent_by_store_version_id(
|
||||
store_listing_version_id, user_id
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"",
|
||||
summary="Add Marketplace Agent",
|
||||
status_code=status.HTTP_201_CREATED,
|
||||
)
|
||||
async def add_marketplace_agent_to_library(
|
||||
store_listing_version_id: str = Body(embed=True),
|
||||
source: Literal["onboarding", "marketplace"] = Body(
|
||||
default="marketplace", embed=True
|
||||
),
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> library_model.LibraryAgent:
|
||||
"""
|
||||
Add an agent from the marketplace to the user's library.
|
||||
"""
|
||||
agent = await library_db.add_store_agent_to_library(
|
||||
store_listing_version_id=store_listing_version_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
if source != "onboarding":
|
||||
await complete_onboarding_step(user_id, OnboardingStep.MARKETPLACE_ADD_AGENT)
|
||||
return agent
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/{library_agent_id}",
|
||||
summary="Update Library Agent",
|
||||
)
|
||||
async def update_library_agent(
|
||||
library_agent_id: str,
|
||||
payload: library_model.LibraryAgentUpdateRequest,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> library_model.LibraryAgent:
|
||||
"""
|
||||
Update the library agent with the given fields.
|
||||
"""
|
||||
return await library_db.update_library_agent(
|
||||
library_agent_id=library_agent_id,
|
||||
user_id=user_id,
|
||||
auto_update_version=payload.auto_update_version,
|
||||
graph_version=payload.graph_version,
|
||||
is_favorite=payload.is_favorite,
|
||||
is_archived=payload.is_archived,
|
||||
settings=payload.settings,
|
||||
)
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/{library_agent_id}",
|
||||
summary="Delete Library Agent",
|
||||
)
|
||||
async def delete_library_agent(
|
||||
library_agent_id: str,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> Response:
|
||||
"""
|
||||
Soft-delete the specified library agent.
|
||||
"""
|
||||
await library_db.delete_library_agent(
|
||||
library_agent_id=library_agent_id, user_id=user_id
|
||||
)
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
|
||||
@router.post("/{library_agent_id}/fork", summary="Fork Library Agent")
|
||||
async def fork_library_agent(
|
||||
library_agent_id: str,
|
||||
user_id: str = Security(autogpt_auth_lib.get_user_id),
|
||||
) -> library_model.LibraryAgent:
|
||||
return await library_db.fork_library_agent(
|
||||
library_agent_id=library_agent_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
@@ -1,621 +0,0 @@
|
||||
"""
|
||||
Content Type Handlers for Unified Embeddings
|
||||
|
||||
Pluggable system for different content sources (store agents, blocks, docs).
|
||||
Each handler knows how to fetch and process its content type for embedding.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.data.db import query_raw_with_schema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContentItem:
|
||||
"""Represents a piece of content to be embedded."""
|
||||
|
||||
content_id: str # Unique identifier (DB ID or file path)
|
||||
content_type: ContentType
|
||||
searchable_text: str # Combined text for embedding
|
||||
metadata: dict[str, Any] # Content-specific metadata
|
||||
user_id: str | None = None # For user-scoped content
|
||||
|
||||
|
||||
class ContentHandler(ABC):
|
||||
"""Base handler for fetching and processing content for embeddings."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def content_type(self) -> ContentType:
|
||||
"""The ContentType this handler manages."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""
|
||||
Fetch items that don't have embeddings yet.
|
||||
|
||||
Args:
|
||||
batch_size: Maximum number of items to return
|
||||
|
||||
Returns:
|
||||
List of ContentItem objects ready for embedding
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""
|
||||
Get statistics about embedding coverage.
|
||||
|
||||
Returns:
|
||||
Dict with keys: total, with_embeddings, without_embeddings
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StoreAgentHandler(ContentHandler):
|
||||
"""Handler for marketplace store agent listings."""
|
||||
|
||||
@property
|
||||
def content_type(self) -> ContentType:
|
||||
return ContentType.STORE_AGENT
|
||||
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""Fetch approved store listings without embeddings."""
|
||||
from backend.api.features.store.embeddings import build_searchable_text
|
||||
|
||||
missing = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT
|
||||
slv.id,
|
||||
slv.name,
|
||||
slv.description,
|
||||
slv."subHeading",
|
||||
slv.categories
|
||||
FROM {schema_prefix}"StoreListingVersion" slv
|
||||
LEFT JOIN {schema_prefix}"UnifiedContentEmbedding" uce
|
||||
ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
|
||||
WHERE slv."submissionStatus" = 'APPROVED'
|
||||
AND slv."isDeleted" = false
|
||||
AND uce."contentId" IS NULL
|
||||
LIMIT $1
|
||||
""",
|
||||
batch_size,
|
||||
)
|
||||
|
||||
return [
|
||||
ContentItem(
|
||||
content_id=row["id"],
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text=build_searchable_text(
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
sub_heading=row["subHeading"],
|
||||
categories=row["categories"] or [],
|
||||
),
|
||||
metadata={
|
||||
"name": row["name"],
|
||||
"categories": row["categories"] or [],
|
||||
},
|
||||
user_id=None, # Store agents are public
|
||||
)
|
||||
for row in missing
|
||||
]
|
||||
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get statistics about store agent embedding coverage."""
|
||||
# Count approved versions
|
||||
approved_result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {schema_prefix}"StoreListingVersion"
|
||||
WHERE "submissionStatus" = 'APPROVED'
|
||||
AND "isDeleted" = false
|
||||
"""
|
||||
)
|
||||
total_approved = approved_result[0]["count"] if approved_result else 0
|
||||
|
||||
# Count versions with embeddings
|
||||
embedded_result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {schema_prefix}"StoreListingVersion" slv
|
||||
JOIN {schema_prefix}"UnifiedContentEmbedding" uce ON slv.id = uce."contentId" AND uce."contentType" = 'STORE_AGENT'::{schema_prefix}"ContentType"
|
||||
WHERE slv."submissionStatus" = 'APPROVED'
|
||||
AND slv."isDeleted" = false
|
||||
"""
|
||||
)
|
||||
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
|
||||
|
||||
return {
|
||||
"total": total_approved,
|
||||
"with_embeddings": with_embeddings,
|
||||
"without_embeddings": total_approved - with_embeddings,
|
||||
}
|
||||
|
||||
|
||||
class BlockHandler(ContentHandler):
|
||||
"""Handler for block definitions (Python classes)."""
|
||||
|
||||
@property
|
||||
def content_type(self) -> ContentType:
|
||||
return ContentType.BLOCK
|
||||
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""Fetch blocks without embeddings."""
|
||||
from backend.data.block import get_blocks
|
||||
|
||||
# Get all available blocks
|
||||
all_blocks = get_blocks()
|
||||
|
||||
# Check which ones have embeddings
|
||||
if not all_blocks:
|
||||
return []
|
||||
|
||||
block_ids = list(all_blocks.keys())
|
||||
|
||||
# Query for existing embeddings
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
|
||||
existing_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT "contentId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'BLOCK'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*block_ids,
|
||||
)
|
||||
|
||||
existing_ids = {row["contentId"] for row in existing_result}
|
||||
missing_blocks = [
|
||||
(block_id, block_cls)
|
||||
for block_id, block_cls in all_blocks.items()
|
||||
if block_id not in existing_ids
|
||||
]
|
||||
|
||||
# Convert to ContentItem
|
||||
items = []
|
||||
for block_id, block_cls in missing_blocks[:batch_size]:
|
||||
try:
|
||||
block_instance = block_cls()
|
||||
|
||||
# Skip disabled blocks - they shouldn't be indexed
|
||||
if block_instance.disabled:
|
||||
continue
|
||||
|
||||
# Build searchable text from block metadata
|
||||
parts = []
|
||||
if hasattr(block_instance, "name") and block_instance.name:
|
||||
parts.append(block_instance.name)
|
||||
if (
|
||||
hasattr(block_instance, "description")
|
||||
and block_instance.description
|
||||
):
|
||||
parts.append(block_instance.description)
|
||||
if hasattr(block_instance, "categories") and block_instance.categories:
|
||||
# Convert BlockCategory enum to strings
|
||||
parts.append(
|
||||
" ".join(str(cat.value) for cat in block_instance.categories)
|
||||
)
|
||||
|
||||
# Add input/output schema info
|
||||
if hasattr(block_instance, "input_schema"):
|
||||
schema = block_instance.input_schema
|
||||
if hasattr(schema, "model_json_schema"):
|
||||
schema_dict = schema.model_json_schema()
|
||||
if "properties" in schema_dict:
|
||||
for prop_name, prop_info in schema_dict[
|
||||
"properties"
|
||||
].items():
|
||||
if "description" in prop_info:
|
||||
parts.append(
|
||||
f"{prop_name}: {prop_info['description']}"
|
||||
)
|
||||
|
||||
searchable_text = " ".join(parts)
|
||||
|
||||
# Convert categories set of enums to list of strings for JSON serialization
|
||||
categories = getattr(block_instance, "categories", set())
|
||||
categories_list = (
|
||||
[cat.value for cat in categories] if categories else []
|
||||
)
|
||||
|
||||
items.append(
|
||||
ContentItem(
|
||||
content_id=block_id,
|
||||
content_type=ContentType.BLOCK,
|
||||
searchable_text=searchable_text,
|
||||
metadata={
|
||||
"name": getattr(block_instance, "name", ""),
|
||||
"categories": categories_list,
|
||||
},
|
||||
user_id=None, # Blocks are public
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process block {block_id}: {e}")
|
||||
continue
|
||||
|
||||
return items
|
||||
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get statistics about block embedding coverage."""
|
||||
from backend.data.block import get_blocks
|
||||
|
||||
all_blocks = get_blocks()
|
||||
|
||||
# Filter out disabled blocks - they're not indexed
|
||||
enabled_block_ids = [
|
||||
block_id
|
||||
for block_id, block_cls in all_blocks.items()
|
||||
if not block_cls().disabled
|
||||
]
|
||||
total_blocks = len(enabled_block_ids)
|
||||
|
||||
if total_blocks == 0:
|
||||
return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
|
||||
|
||||
block_ids = enabled_block_ids
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
|
||||
|
||||
embedded_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'BLOCK'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*block_ids,
|
||||
)
|
||||
|
||||
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
|
||||
|
||||
return {
|
||||
"total": total_blocks,
|
||||
"with_embeddings": with_embeddings,
|
||||
"without_embeddings": total_blocks - with_embeddings,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarkdownSection:
|
||||
"""Represents a section of a markdown document."""
|
||||
|
||||
title: str # Section heading text
|
||||
content: str # Section content (including the heading line)
|
||||
level: int # Heading level (1 for #, 2 for ##, etc.)
|
||||
index: int # Section index within the document
|
||||
|
||||
|
||||
class DocumentationHandler(ContentHandler):
|
||||
"""Handler for documentation files (.md/.mdx).
|
||||
|
||||
Chunks documents by markdown headings to create multiple embeddings per file.
|
||||
Each section (## heading) becomes a separate embedding for better retrieval.
|
||||
"""
|
||||
|
||||
@property
|
||||
def content_type(self) -> ContentType:
|
||||
return ContentType.DOCUMENTATION
|
||||
|
||||
def _get_docs_root(self) -> Path:
|
||||
"""Get the documentation root directory."""
|
||||
# content_handlers.py is at: backend/backend/api/features/store/content_handlers.py
|
||||
# Need to go up to project root then into docs/
|
||||
# In container: /app/autogpt_platform/backend/backend/api/features/store -> /app/docs
|
||||
# In development: /repo/autogpt_platform/backend/backend/api/features/store -> /repo/docs
|
||||
this_file = Path(
|
||||
__file__
|
||||
) # .../backend/backend/api/features/store/content_handlers.py
|
||||
project_root = (
|
||||
this_file.parent.parent.parent.parent.parent.parent.parent
|
||||
) # -> /app or /repo
|
||||
docs_root = project_root / "docs"
|
||||
return docs_root
|
||||
|
||||
def _extract_doc_title(self, file_path: Path) -> str:
|
||||
"""Extract the document title from a markdown file."""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
lines = content.split("\n")
|
||||
|
||||
# Try to extract title from first # heading
|
||||
for line in lines:
|
||||
if line.startswith("# "):
|
||||
return line[2:].strip()
|
||||
|
||||
# If no title found, use filename
|
||||
return file_path.stem.replace("-", " ").replace("_", " ").title()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read title from {file_path}: {e}")
|
||||
return file_path.stem.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
def _chunk_markdown_by_headings(
|
||||
self, file_path: Path, min_heading_level: int = 2
|
||||
) -> list[MarkdownSection]:
|
||||
"""
|
||||
Split a markdown file into sections based on headings.
|
||||
|
||||
Args:
|
||||
file_path: Path to the markdown file
|
||||
min_heading_level: Minimum heading level to split on (default: 2 for ##)
|
||||
|
||||
Returns:
|
||||
List of MarkdownSection objects, one per section.
|
||||
If no headings found, returns a single section with all content.
|
||||
"""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read {file_path}: {e}")
|
||||
return []
|
||||
|
||||
lines = content.split("\n")
|
||||
sections: list[MarkdownSection] = []
|
||||
current_section_lines: list[str] = []
|
||||
current_title = ""
|
||||
current_level = 0
|
||||
section_index = 0
|
||||
doc_title = ""
|
||||
|
||||
for line in lines:
|
||||
# Check if line is a heading
|
||||
if line.startswith("#"):
|
||||
# Count heading level
|
||||
level = 0
|
||||
for char in line:
|
||||
if char == "#":
|
||||
level += 1
|
||||
else:
|
||||
break
|
||||
|
||||
heading_text = line[level:].strip()
|
||||
|
||||
# Track document title (level 1 heading)
|
||||
if level == 1 and not doc_title:
|
||||
doc_title = heading_text
|
||||
# Don't create a section for just the title - add it to first section
|
||||
current_section_lines.append(line)
|
||||
continue
|
||||
|
||||
# Check if this heading should start a new section
|
||||
if level >= min_heading_level:
|
||||
# Save previous section if it has content
|
||||
if current_section_lines:
|
||||
section_content = "\n".join(current_section_lines).strip()
|
||||
if section_content:
|
||||
# Use doc title for first section if no specific title
|
||||
title = current_title if current_title else doc_title
|
||||
if not title:
|
||||
title = file_path.stem.replace("-", " ").replace(
|
||||
"_", " "
|
||||
)
|
||||
sections.append(
|
||||
MarkdownSection(
|
||||
title=title,
|
||||
content=section_content,
|
||||
level=current_level if current_level else 1,
|
||||
index=section_index,
|
||||
)
|
||||
)
|
||||
section_index += 1
|
||||
|
||||
# Start new section
|
||||
current_section_lines = [line]
|
||||
current_title = heading_text
|
||||
current_level = level
|
||||
else:
|
||||
# Lower level heading (e.g., # when splitting on ##)
|
||||
current_section_lines.append(line)
|
||||
else:
|
||||
current_section_lines.append(line)
|
||||
|
||||
# Don't forget the last section
|
||||
if current_section_lines:
|
||||
section_content = "\n".join(current_section_lines).strip()
|
||||
if section_content:
|
||||
title = current_title if current_title else doc_title
|
||||
if not title:
|
||||
title = file_path.stem.replace("-", " ").replace("_", " ")
|
||||
sections.append(
|
||||
MarkdownSection(
|
||||
title=title,
|
||||
content=section_content,
|
||||
level=current_level if current_level else 1,
|
||||
index=section_index,
|
||||
)
|
||||
)
|
||||
|
||||
# If no sections were created (no headings found), create one section with all content
|
||||
if not sections and content.strip():
|
||||
title = (
|
||||
doc_title
|
||||
if doc_title
|
||||
else file_path.stem.replace("-", " ").replace("_", " ")
|
||||
)
|
||||
sections.append(
|
||||
MarkdownSection(
|
||||
title=title,
|
||||
content=content.strip(),
|
||||
level=1,
|
||||
index=0,
|
||||
)
|
||||
)
|
||||
|
||||
return sections
|
||||
|
||||
def _make_section_content_id(self, doc_path: str, section_index: int) -> str:
|
||||
"""Create a unique content ID for a document section.
|
||||
|
||||
Format: doc_path::section_index
|
||||
Example: 'platform/getting-started.md::0'
|
||||
"""
|
||||
return f"{doc_path}::{section_index}"
|
||||
|
||||
def _parse_section_content_id(self, content_id: str) -> tuple[str, int]:
|
||||
"""Parse a section content ID back into doc_path and section_index.
|
||||
|
||||
Returns: (doc_path, section_index)
|
||||
"""
|
||||
if "::" in content_id:
|
||||
parts = content_id.rsplit("::", 1)
|
||||
return parts[0], int(parts[1])
|
||||
# Legacy format (whole document)
|
||||
return content_id, 0
|
||||
|
||||
async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
|
||||
"""Fetch documentation sections without embeddings.
|
||||
|
||||
Chunks each document by markdown headings and creates embeddings for each section.
|
||||
Content IDs use the format: 'path/to/doc.md::section_index'
|
||||
"""
|
||||
docs_root = self._get_docs_root()
|
||||
|
||||
if not docs_root.exists():
|
||||
logger.warning(f"Documentation root not found: {docs_root}")
|
||||
return []
|
||||
|
||||
# Find all .md and .mdx files
|
||||
all_docs = list(docs_root.rglob("*.md")) + list(docs_root.rglob("*.mdx"))
|
||||
|
||||
if not all_docs:
|
||||
return []
|
||||
|
||||
# Build list of all sections from all documents
|
||||
all_sections: list[tuple[str, Path, MarkdownSection]] = []
|
||||
for doc_file in all_docs:
|
||||
doc_path = str(doc_file.relative_to(docs_root))
|
||||
sections = self._chunk_markdown_by_headings(doc_file)
|
||||
for section in sections:
|
||||
all_sections.append((doc_path, doc_file, section))
|
||||
|
||||
if not all_sections:
|
||||
return []
|
||||
|
||||
# Generate content IDs for all sections
|
||||
section_content_ids = [
|
||||
self._make_section_content_id(doc_path, section.index)
|
||||
for doc_path, _, section in all_sections
|
||||
]
|
||||
|
||||
# Check which ones have embeddings
|
||||
placeholders = ",".join([f"${i+1}" for i in range(len(section_content_ids))])
|
||||
existing_result = await query_raw_with_schema(
|
||||
f"""
|
||||
SELECT "contentId"
|
||||
FROM {{schema_prefix}}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'DOCUMENTATION'::{{schema_prefix}}"ContentType"
|
||||
AND "contentId" = ANY(ARRAY[{placeholders}])
|
||||
""",
|
||||
*section_content_ids,
|
||||
)
|
||||
|
||||
existing_ids = {row["contentId"] for row in existing_result}
|
||||
|
||||
# Filter to missing sections
|
||||
missing_sections = [
|
||||
(doc_path, doc_file, section, content_id)
|
||||
for (doc_path, doc_file, section), content_id in zip(
|
||||
all_sections, section_content_ids
|
||||
)
|
||||
if content_id not in existing_ids
|
||||
]
|
||||
|
||||
# Convert to ContentItem (up to batch_size)
|
||||
items = []
|
||||
for doc_path, doc_file, section, content_id in missing_sections[:batch_size]:
|
||||
try:
|
||||
# Get document title for context
|
||||
doc_title = self._extract_doc_title(doc_file)
|
||||
|
||||
# Build searchable text with context
|
||||
# Include doc title and section title for better search relevance
|
||||
searchable_text = f"{doc_title} - {section.title}\n\n{section.content}"
|
||||
|
||||
items.append(
|
||||
ContentItem(
|
||||
content_id=content_id,
|
||||
content_type=ContentType.DOCUMENTATION,
|
||||
searchable_text=searchable_text,
|
||||
metadata={
|
||||
"doc_title": doc_title,
|
||||
"section_title": section.title,
|
||||
"section_index": section.index,
|
||||
"heading_level": section.level,
|
||||
"path": doc_path,
|
||||
},
|
||||
user_id=None, # Documentation is public
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process section {content_id}: {e}")
|
||||
continue
|
||||
|
||||
return items
|
||||
|
||||
def _get_all_section_content_ids(self, docs_root: Path) -> set[str]:
|
||||
"""Get all current section content IDs from the docs directory.
|
||||
|
||||
Used for stats and cleanup to know what sections should exist.
|
||||
"""
|
||||
all_docs = list(docs_root.rglob("*.md")) + list(docs_root.rglob("*.mdx"))
|
||||
content_ids = set()
|
||||
|
||||
for doc_file in all_docs:
|
||||
doc_path = str(doc_file.relative_to(docs_root))
|
||||
sections = self._chunk_markdown_by_headings(doc_file)
|
||||
for section in sections:
|
||||
content_ids.add(self._make_section_content_id(doc_path, section.index))
|
||||
|
||||
return content_ids
|
||||
|
||||
async def get_stats(self) -> dict[str, int]:
|
||||
"""Get statistics about documentation embedding coverage.
|
||||
|
||||
Counts sections (not documents) since each section gets its own embedding.
|
||||
"""
|
||||
docs_root = self._get_docs_root()
|
||||
|
||||
if not docs_root.exists():
|
||||
return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
|
||||
|
||||
# Get all section content IDs
|
||||
all_section_ids = self._get_all_section_content_ids(docs_root)
|
||||
total_sections = len(all_section_ids)
|
||||
|
||||
if total_sections == 0:
|
||||
return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
|
||||
|
||||
# Count embeddings in database for DOCUMENTATION type
|
||||
embedded_result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = 'DOCUMENTATION'::{schema_prefix}"ContentType"
|
||||
"""
|
||||
)
|
||||
|
||||
with_embeddings = embedded_result[0]["count"] if embedded_result else 0
|
||||
|
||||
return {
|
||||
"total": total_sections,
|
||||
"with_embeddings": with_embeddings,
|
||||
"without_embeddings": total_sections - with_embeddings,
|
||||
}
|
||||
|
||||
|
||||
# Content handler registry
|
||||
CONTENT_HANDLERS: dict[ContentType, ContentHandler] = {
|
||||
ContentType.STORE_AGENT: StoreAgentHandler(),
|
||||
ContentType.BLOCK: BlockHandler(),
|
||||
ContentType.DOCUMENTATION: DocumentationHandler(),
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
"""
|
||||
Integration tests for content handlers using real DB.
|
||||
|
||||
Run with: poetry run pytest backend/api/features/store/content_handlers_integration_test.py -xvs
|
||||
|
||||
These tests use the real database but mock OpenAI calls.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.api.features.store.content_handlers import (
|
||||
CONTENT_HANDLERS,
|
||||
BlockHandler,
|
||||
DocumentationHandler,
|
||||
StoreAgentHandler,
|
||||
)
|
||||
from backend.api.features.store.embeddings import (
|
||||
EMBEDDING_DIM,
|
||||
backfill_all_content_types,
|
||||
ensure_content_embedding,
|
||||
get_embedding_stats,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_agent_handler_real_db():
|
||||
"""Test StoreAgentHandler with real database queries."""
|
||||
handler = StoreAgentHandler()
|
||||
|
||||
# Get stats from real DB
|
||||
stats = await handler.get_stats()
|
||||
|
||||
# Stats should have correct structure
|
||||
assert "total" in stats
|
||||
assert "with_embeddings" in stats
|
||||
assert "without_embeddings" in stats
|
||||
assert stats["total"] >= 0
|
||||
assert stats["with_embeddings"] >= 0
|
||||
assert stats["without_embeddings"] >= 0
|
||||
|
||||
# Get missing items (max 1 to keep test fast)
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
# Items should be list (may be empty if all have embeddings)
|
||||
assert isinstance(items, list)
|
||||
|
||||
if items:
|
||||
item = items[0]
|
||||
assert item.content_id is not None
|
||||
assert item.content_type.value == "STORE_AGENT"
|
||||
assert item.searchable_text != ""
|
||||
assert item.user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_real_db():
|
||||
"""Test BlockHandler with real database queries."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Get stats from real DB
|
||||
stats = await handler.get_stats()
|
||||
|
||||
# Stats should have correct structure
|
||||
assert "total" in stats
|
||||
assert "with_embeddings" in stats
|
||||
assert "without_embeddings" in stats
|
||||
assert stats["total"] >= 0 # Should have at least some blocks
|
||||
assert stats["with_embeddings"] >= 0
|
||||
assert stats["without_embeddings"] >= 0
|
||||
|
||||
# Get missing items (max 1 to keep test fast)
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
# Items should be list
|
||||
assert isinstance(items, list)
|
||||
|
||||
if items:
|
||||
item = items[0]
|
||||
assert item.content_id is not None # Should be block UUID
|
||||
assert item.content_type.value == "BLOCK"
|
||||
assert item.searchable_text != ""
|
||||
assert item.user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_real_fs():
|
||||
"""Test DocumentationHandler with real filesystem."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Get stats from real filesystem
|
||||
stats = await handler.get_stats()
|
||||
|
||||
# Stats should have correct structure
|
||||
assert "total" in stats
|
||||
assert "with_embeddings" in stats
|
||||
assert "without_embeddings" in stats
|
||||
assert stats["total"] >= 0
|
||||
assert stats["with_embeddings"] >= 0
|
||||
assert stats["without_embeddings"] >= 0
|
||||
|
||||
# Get missing items (max 1 to keep test fast)
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
# Items should be list
|
||||
assert isinstance(items, list)
|
||||
|
||||
if items:
|
||||
item = items[0]
|
||||
assert item.content_id is not None # Should be relative path
|
||||
assert item.content_type.value == "DOCUMENTATION"
|
||||
assert item.searchable_text != ""
|
||||
assert item.user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_stats_all_types():
|
||||
"""Test get_embedding_stats aggregates all content types."""
|
||||
stats = await get_embedding_stats()
|
||||
|
||||
# Should have structure with by_type and totals
|
||||
assert "by_type" in stats
|
||||
assert "totals" in stats
|
||||
|
||||
# Check each content type is present
|
||||
by_type = stats["by_type"]
|
||||
assert "STORE_AGENT" in by_type
|
||||
assert "BLOCK" in by_type
|
||||
assert "DOCUMENTATION" in by_type
|
||||
|
||||
# Check totals are aggregated
|
||||
totals = stats["totals"]
|
||||
assert totals["total"] >= 0
|
||||
assert totals["with_embeddings"] >= 0
|
||||
assert totals["without_embeddings"] >= 0
|
||||
assert "coverage_percent" in totals
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
async def test_ensure_content_embedding_blocks(mock_generate):
|
||||
"""Test creating embeddings for blocks (mocked OpenAI)."""
|
||||
# Mock OpenAI to return fake embedding
|
||||
mock_generate.return_value = [0.1] * EMBEDDING_DIM
|
||||
|
||||
# Get one block without embedding
|
||||
handler = BlockHandler()
|
||||
items = await handler.get_missing_items(batch_size=1)
|
||||
|
||||
if not items:
|
||||
pytest.skip("No blocks without embeddings")
|
||||
|
||||
item = items[0]
|
||||
|
||||
# Try to create embedding (OpenAI mocked)
|
||||
result = await ensure_content_embedding(
|
||||
content_type=item.content_type,
|
||||
content_id=item.content_id,
|
||||
searchable_text=item.searchable_text,
|
||||
metadata=item.metadata,
|
||||
user_id=item.user_id,
|
||||
)
|
||||
|
||||
# Should succeed with mocked OpenAI
|
||||
assert result is True
|
||||
mock_generate.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
async def test_backfill_all_content_types_dry_run(mock_generate):
|
||||
"""Test backfill_all_content_types processes all handlers in order."""
|
||||
# Mock OpenAI to return fake embedding
|
||||
mock_generate.return_value = [0.1] * EMBEDDING_DIM
|
||||
|
||||
# Run backfill with batch_size=1 to process max 1 per type
|
||||
result = await backfill_all_content_types(batch_size=1)
|
||||
|
||||
# Should have results for all content types
|
||||
assert "by_type" in result
|
||||
assert "totals" in result
|
||||
|
||||
by_type = result["by_type"]
|
||||
assert "BLOCK" in by_type
|
||||
assert "STORE_AGENT" in by_type
|
||||
assert "DOCUMENTATION" in by_type
|
||||
|
||||
# Each type should have correct structure
|
||||
for content_type, type_result in by_type.items():
|
||||
assert "processed" in type_result
|
||||
assert "success" in type_result
|
||||
assert "failed" in type_result
|
||||
|
||||
# Totals should aggregate
|
||||
totals = result["totals"]
|
||||
assert totals["processed"] >= 0
|
||||
assert totals["success"] >= 0
|
||||
assert totals["failed"] >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_content_handler_registry():
|
||||
"""Test all handlers are registered in correct order."""
|
||||
from prisma.enums import ContentType
|
||||
|
||||
# All three types should be registered
|
||||
assert ContentType.STORE_AGENT in CONTENT_HANDLERS
|
||||
assert ContentType.BLOCK in CONTENT_HANDLERS
|
||||
assert ContentType.DOCUMENTATION in CONTENT_HANDLERS
|
||||
|
||||
# Check handler types
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.STORE_AGENT], StoreAgentHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.BLOCK], BlockHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.DOCUMENTATION], DocumentationHandler)
|
||||
@@ -1,391 +0,0 @@
|
||||
"""
|
||||
E2E tests for content handlers (blocks, store agents, documentation).
|
||||
|
||||
Tests the full flow: discovering content → generating embeddings → storing.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store.content_handlers import (
|
||||
CONTENT_HANDLERS,
|
||||
BlockHandler,
|
||||
DocumentationHandler,
|
||||
StoreAgentHandler,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_agent_handler_get_missing_items(mocker):
|
||||
"""Test StoreAgentHandler fetches approved agents without embeddings."""
|
||||
handler = StoreAgentHandler()
|
||||
|
||||
# Mock database query
|
||||
mock_missing = [
|
||||
{
|
||||
"id": "agent-1",
|
||||
"name": "Test Agent",
|
||||
"description": "A test agent",
|
||||
"subHeading": "Test heading",
|
||||
"categories": ["AI", "Testing"],
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_missing,
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 1
|
||||
assert items[0].content_id == "agent-1"
|
||||
assert items[0].content_type == ContentType.STORE_AGENT
|
||||
assert "Test Agent" in items[0].searchable_text
|
||||
assert "A test agent" in items[0].searchable_text
|
||||
assert items[0].metadata["name"] == "Test Agent"
|
||||
assert items[0].user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_agent_handler_get_stats(mocker):
|
||||
"""Test StoreAgentHandler returns correct stats."""
|
||||
handler = StoreAgentHandler()
|
||||
|
||||
# Mock approved count query
|
||||
mock_approved = [{"count": 50}]
|
||||
# Mock embedded count query
|
||||
mock_embedded = [{"count": 30}]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
side_effect=[mock_approved, mock_embedded],
|
||||
):
|
||||
stats = await handler.get_stats()
|
||||
|
||||
assert stats["total"] == 50
|
||||
assert stats["with_embeddings"] == 30
|
||||
assert stats["without_embeddings"] == 20
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_get_missing_items(mocker):
|
||||
"""Test BlockHandler discovers blocks without embeddings."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock get_blocks to return test blocks
|
||||
mock_block_class = MagicMock()
|
||||
mock_block_instance = MagicMock()
|
||||
mock_block_instance.name = "Calculator Block"
|
||||
mock_block_instance.description = "Performs calculations"
|
||||
mock_block_instance.categories = [MagicMock(value="MATH")]
|
||||
mock_block_instance.disabled = False
|
||||
mock_block_instance.input_schema.model_json_schema.return_value = {
|
||||
"properties": {"expression": {"description": "Math expression to evaluate"}}
|
||||
}
|
||||
mock_block_class.return_value = mock_block_instance
|
||||
|
||||
mock_blocks = {"block-uuid-1": mock_block_class}
|
||||
|
||||
# Mock existing embeddings query (no embeddings exist)
|
||||
mock_existing = []
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_existing,
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 1
|
||||
assert items[0].content_id == "block-uuid-1"
|
||||
assert items[0].content_type == ContentType.BLOCK
|
||||
assert "Calculator Block" in items[0].searchable_text
|
||||
assert "Performs calculations" in items[0].searchable_text
|
||||
assert "MATH" in items[0].searchable_text
|
||||
assert "expression: Math expression" in items[0].searchable_text
|
||||
assert items[0].user_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_get_stats(mocker):
|
||||
"""Test BlockHandler returns correct stats."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock get_blocks - each block class returns an instance with disabled=False
|
||||
def make_mock_block_class():
|
||||
mock_class = MagicMock()
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.disabled = False
|
||||
mock_class.return_value = mock_instance
|
||||
return mock_class
|
||||
|
||||
mock_blocks = {
|
||||
"block-1": make_mock_block_class(),
|
||||
"block-2": make_mock_block_class(),
|
||||
"block-3": make_mock_block_class(),
|
||||
}
|
||||
|
||||
# Mock embedded count query (2 blocks have embeddings)
|
||||
mock_embedded = [{"count": 2}]
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_embedded,
|
||||
):
|
||||
stats = await handler.get_stats()
|
||||
|
||||
assert stats["total"] == 3
|
||||
assert stats["with_embeddings"] == 2
|
||||
assert stats["without_embeddings"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_get_missing_items(tmp_path, mocker):
|
||||
"""Test DocumentationHandler discovers docs without embeddings."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Create temporary docs directory with test files
|
||||
docs_root = tmp_path / "docs"
|
||||
docs_root.mkdir()
|
||||
|
||||
(docs_root / "guide.md").write_text("# Getting Started\n\nThis is a guide.")
|
||||
(docs_root / "api.mdx").write_text("# API Reference\n\nAPI documentation.")
|
||||
|
||||
# Mock _get_docs_root to return temp dir
|
||||
with patch.object(handler, "_get_docs_root", return_value=docs_root):
|
||||
# Mock existing embeddings query (no embeddings exist)
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 2
|
||||
|
||||
# Check guide.md (content_id format: doc_path::section_index)
|
||||
guide_item = next(
|
||||
(item for item in items if item.content_id == "guide.md::0"), None
|
||||
)
|
||||
assert guide_item is not None
|
||||
assert guide_item.content_type == ContentType.DOCUMENTATION
|
||||
assert "Getting Started" in guide_item.searchable_text
|
||||
assert "This is a guide" in guide_item.searchable_text
|
||||
assert guide_item.metadata["doc_title"] == "Getting Started"
|
||||
assert guide_item.user_id is None
|
||||
|
||||
# Check api.mdx (content_id format: doc_path::section_index)
|
||||
api_item = next(
|
||||
(item for item in items if item.content_id == "api.mdx::0"), None
|
||||
)
|
||||
assert api_item is not None
|
||||
assert "API Reference" in api_item.searchable_text
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_get_stats(tmp_path, mocker):
|
||||
"""Test DocumentationHandler returns correct stats."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Create temporary docs directory
|
||||
docs_root = tmp_path / "docs"
|
||||
docs_root.mkdir()
|
||||
(docs_root / "doc1.md").write_text("# Doc 1")
|
||||
(docs_root / "doc2.md").write_text("# Doc 2")
|
||||
(docs_root / "doc3.mdx").write_text("# Doc 3")
|
||||
|
||||
# Mock embedded count query (1 doc has embedding)
|
||||
mock_embedded = [{"count": 1}]
|
||||
|
||||
with patch.object(handler, "_get_docs_root", return_value=docs_root):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=mock_embedded,
|
||||
):
|
||||
stats = await handler.get_stats()
|
||||
|
||||
assert stats["total"] == 3
|
||||
assert stats["with_embeddings"] == 1
|
||||
assert stats["without_embeddings"] == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_title_extraction(tmp_path):
|
||||
"""Test DocumentationHandler extracts title from markdown heading."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Test with heading
|
||||
doc_with_heading = tmp_path / "with_heading.md"
|
||||
doc_with_heading.write_text("# My Title\n\nContent here")
|
||||
title = handler._extract_doc_title(doc_with_heading)
|
||||
assert title == "My Title"
|
||||
|
||||
# Test without heading
|
||||
doc_without_heading = tmp_path / "no-heading.md"
|
||||
doc_without_heading.write_text("Just content, no heading")
|
||||
title = handler._extract_doc_title(doc_without_heading)
|
||||
assert title == "No Heading" # Uses filename
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_markdown_chunking(tmp_path):
|
||||
"""Test DocumentationHandler chunks markdown by headings."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Test document with multiple sections
|
||||
doc_with_sections = tmp_path / "sections.md"
|
||||
doc_with_sections.write_text(
|
||||
"# Document Title\n\n"
|
||||
"Intro paragraph.\n\n"
|
||||
"## Section One\n\n"
|
||||
"Content for section one.\n\n"
|
||||
"## Section Two\n\n"
|
||||
"Content for section two.\n"
|
||||
)
|
||||
sections = handler._chunk_markdown_by_headings(doc_with_sections)
|
||||
|
||||
# Should have 3 sections: intro (with doc title), section one, section two
|
||||
assert len(sections) == 3
|
||||
assert sections[0].title == "Document Title"
|
||||
assert sections[0].index == 0
|
||||
assert "Intro paragraph" in sections[0].content
|
||||
|
||||
assert sections[1].title == "Section One"
|
||||
assert sections[1].index == 1
|
||||
assert "Content for section one" in sections[1].content
|
||||
|
||||
assert sections[2].title == "Section Two"
|
||||
assert sections[2].index == 2
|
||||
assert "Content for section two" in sections[2].content
|
||||
|
||||
# Test document without headings
|
||||
doc_no_sections = tmp_path / "no-sections.md"
|
||||
doc_no_sections.write_text("Just plain content without any headings.")
|
||||
sections = handler._chunk_markdown_by_headings(doc_no_sections)
|
||||
assert len(sections) == 1
|
||||
assert sections[0].index == 0
|
||||
assert "Just plain content" in sections[0].content
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_section_content_ids():
|
||||
"""Test DocumentationHandler creates and parses section content IDs."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Test making content ID
|
||||
content_id = handler._make_section_content_id("docs/guide.md", 2)
|
||||
assert content_id == "docs/guide.md::2"
|
||||
|
||||
# Test parsing content ID
|
||||
doc_path, section_index = handler._parse_section_content_id("docs/guide.md::2")
|
||||
assert doc_path == "docs/guide.md"
|
||||
assert section_index == 2
|
||||
|
||||
# Test parsing legacy format (no section index)
|
||||
doc_path, section_index = handler._parse_section_content_id("docs/old-format.md")
|
||||
assert doc_path == "docs/old-format.md"
|
||||
assert section_index == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_content_handlers_registry():
|
||||
"""Test all content types are registered."""
|
||||
assert ContentType.STORE_AGENT in CONTENT_HANDLERS
|
||||
assert ContentType.BLOCK in CONTENT_HANDLERS
|
||||
assert ContentType.DOCUMENTATION in CONTENT_HANDLERS
|
||||
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.STORE_AGENT], StoreAgentHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.BLOCK], BlockHandler)
|
||||
assert isinstance(CONTENT_HANDLERS[ContentType.DOCUMENTATION], DocumentationHandler)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_handles_missing_attributes():
|
||||
"""Test BlockHandler gracefully handles blocks with missing attributes."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock block with minimal attributes
|
||||
mock_block_class = MagicMock()
|
||||
mock_block_instance = MagicMock()
|
||||
mock_block_instance.name = "Minimal Block"
|
||||
mock_block_instance.disabled = False
|
||||
# No description, categories, or schema
|
||||
del mock_block_instance.description
|
||||
del mock_block_instance.categories
|
||||
del mock_block_instance.input_schema
|
||||
mock_block_class.return_value = mock_block_instance
|
||||
|
||||
mock_blocks = {"block-minimal": mock_block_class}
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
assert len(items) == 1
|
||||
assert items[0].searchable_text == "Minimal Block"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_block_handler_skips_failed_blocks():
|
||||
"""Test BlockHandler skips blocks that fail to instantiate."""
|
||||
handler = BlockHandler()
|
||||
|
||||
# Mock one good block and one bad block
|
||||
good_block = MagicMock()
|
||||
good_instance = MagicMock()
|
||||
good_instance.name = "Good Block"
|
||||
good_instance.description = "Works fine"
|
||||
good_instance.categories = []
|
||||
good_instance.disabled = False
|
||||
good_block.return_value = good_instance
|
||||
|
||||
bad_block = MagicMock()
|
||||
bad_block.side_effect = Exception("Instantiation failed")
|
||||
|
||||
mock_blocks = {"good-block": good_block, "bad-block": bad_block}
|
||||
|
||||
with patch(
|
||||
"backend.data.block.get_blocks",
|
||||
return_value=mock_blocks,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.content_handlers.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
|
||||
# Should only get the good block
|
||||
assert len(items) == 1
|
||||
assert items[0].content_id == "good-block"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_documentation_handler_missing_docs_directory():
|
||||
"""Test DocumentationHandler handles missing docs directory gracefully."""
|
||||
handler = DocumentationHandler()
|
||||
|
||||
# Mock _get_docs_root to return non-existent path
|
||||
fake_path = Path("/nonexistent/docs")
|
||||
with patch.object(handler, "_get_docs_root", return_value=fake_path):
|
||||
items = await handler.get_missing_items(batch_size=10)
|
||||
assert items == []
|
||||
|
||||
stats = await handler.get_stats()
|
||||
assert stats["total"] == 0
|
||||
assert stats["with_embeddings"] == 0
|
||||
assert stats["without_embeddings"] == 0
|
||||
@@ -1,965 +0,0 @@
|
||||
"""
|
||||
Unified Content Embeddings Service
|
||||
|
||||
Handles generation and storage of OpenAI embeddings for all content types
|
||||
(store listings, blocks, documentation, library agents) to enable semantic/hybrid search.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import prisma
|
||||
from prisma.enums import ContentType
|
||||
from tiktoken import encoding_for_model
|
||||
|
||||
from backend.api.features.store.content_handlers import CONTENT_HANDLERS
|
||||
from backend.data.db import execute_raw_with_schema, query_raw_with_schema
|
||||
from backend.util.clients import get_openai_client
|
||||
from backend.util.json import dumps
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# OpenAI embedding model configuration
|
||||
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||
# Embedding dimension for the model above
|
||||
# text-embedding-3-small: 1536, text-embedding-3-large: 3072
|
||||
EMBEDDING_DIM = 1536
|
||||
# OpenAI embedding token limit (8,191 with 1 token buffer for safety)
|
||||
EMBEDDING_MAX_TOKENS = 8191
|
||||
|
||||
|
||||
def build_searchable_text(
|
||||
name: str,
|
||||
description: str,
|
||||
sub_heading: str,
|
||||
categories: list[str],
|
||||
) -> str:
|
||||
"""
|
||||
Build searchable text from listing version fields.
|
||||
|
||||
Combines relevant fields into a single string for embedding.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Name is important - include it
|
||||
if name:
|
||||
parts.append(name)
|
||||
|
||||
# Sub-heading provides context
|
||||
if sub_heading:
|
||||
parts.append(sub_heading)
|
||||
|
||||
# Description is the main content
|
||||
if description:
|
||||
parts.append(description)
|
||||
|
||||
# Categories help with semantic matching
|
||||
if categories:
|
||||
parts.append(" ".join(categories))
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
async def generate_embedding(text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding for text using OpenAI API.
|
||||
|
||||
Raises exceptions on failure - caller should handle.
|
||||
"""
|
||||
client = get_openai_client()
|
||||
if not client:
|
||||
raise RuntimeError("openai_internal_api_key not set, cannot generate embedding")
|
||||
|
||||
# Truncate text to token limit using tiktoken
|
||||
# Character-based truncation is insufficient because token ratios vary by content type
|
||||
enc = encoding_for_model(EMBEDDING_MODEL)
|
||||
tokens = enc.encode(text)
|
||||
if len(tokens) > EMBEDDING_MAX_TOKENS:
|
||||
tokens = tokens[:EMBEDDING_MAX_TOKENS]
|
||||
truncated_text = enc.decode(tokens)
|
||||
logger.info(
|
||||
f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
|
||||
)
|
||||
else:
|
||||
truncated_text = text
|
||||
|
||||
start_time = time.time()
|
||||
response = await client.embeddings.create(
|
||||
model=EMBEDDING_MODEL,
|
||||
input=truncated_text,
|
||||
)
|
||||
latency_ms = (time.time() - start_time) * 1000
|
||||
|
||||
embedding = response.data[0].embedding
|
||||
logger.info(
|
||||
f"Generated embedding: {len(embedding)} dims, "
|
||||
f"{len(tokens)} tokens, {latency_ms:.0f}ms"
|
||||
)
|
||||
return embedding
|
||||
|
||||
|
||||
async def store_embedding(
|
||||
version_id: str,
|
||||
embedding: list[float],
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Store embedding in the database.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
|
||||
DEPRECATED: Use ensure_embedding() instead (includes searchable_text).
|
||||
"""
|
||||
return await store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=version_id,
|
||||
embedding=embedding,
|
||||
searchable_text="", # Empty for backward compat; ensure_embedding() populates this
|
||||
metadata=None,
|
||||
user_id=None, # Store agents are public
|
||||
tx=tx,
|
||||
)
|
||||
|
||||
|
||||
async def store_content_embedding(
|
||||
content_type: ContentType,
|
||||
content_id: str,
|
||||
embedding: list[float],
|
||||
searchable_text: str,
|
||||
metadata: dict | None = None,
|
||||
user_id: str | None = None,
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Store embedding in the unified content embeddings table.
|
||||
|
||||
New function for unified content embedding storage.
|
||||
Uses raw SQL since Prisma doesn't natively support pgvector.
|
||||
|
||||
Raises exceptions on failure - caller should handle.
|
||||
"""
|
||||
client = tx if tx else prisma.get_client()
|
||||
|
||||
# Convert embedding to PostgreSQL vector format
|
||||
embedding_str = embedding_to_vector_string(embedding)
|
||||
metadata_json = dumps(metadata or {})
|
||||
|
||||
# Upsert the embedding
|
||||
# WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
|
||||
# Use unqualified ::vector - pgvector is in search_path on all environments
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
|
||||
"id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
|
||||
)
|
||||
VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
|
||||
ON CONFLICT ("contentType", "contentId", "userId")
|
||||
DO UPDATE SET
|
||||
"embedding" = $4::vector,
|
||||
"searchableText" = $5,
|
||||
"metadata" = $6::jsonb,
|
||||
"updatedAt" = NOW()
|
||||
WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
|
||||
AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
|
||||
AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
|
||||
""",
|
||||
content_type,
|
||||
content_id,
|
||||
user_id,
|
||||
embedding_str,
|
||||
searchable_text,
|
||||
metadata_json,
|
||||
client=client,
|
||||
)
|
||||
|
||||
logger.info(f"Stored embedding for {content_type}:{content_id}")
|
||||
return True
|
||||
|
||||
|
||||
async def get_embedding(version_id: str) -> dict[str, Any] | None:
|
||||
"""
|
||||
Retrieve embedding record for a listing version.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
|
||||
Returns dict with storeListingVersionId, embedding, timestamps or None if not found.
|
||||
"""
|
||||
result = await get_content_embedding(
|
||||
ContentType.STORE_AGENT, version_id, user_id=None
|
||||
)
|
||||
if result:
|
||||
# Transform to old format for backward compatibility
|
||||
return {
|
||||
"storeListingVersionId": result["contentId"],
|
||||
"embedding": result["embedding"],
|
||||
"createdAt": result["createdAt"],
|
||||
"updatedAt": result["updatedAt"],
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
async def get_content_embedding(
|
||||
content_type: ContentType, content_id: str, user_id: str | None = None
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Retrieve embedding record for any content type.
|
||||
|
||||
New function for unified content embedding retrieval.
|
||||
Returns dict with contentType, contentId, embedding, timestamps or None if not found.
|
||||
|
||||
Raises exceptions on failure - caller should handle.
|
||||
"""
|
||||
result = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT
|
||||
"contentType",
|
||||
"contentId",
|
||||
"userId",
|
||||
"embedding"::text as "embedding",
|
||||
"searchableText",
|
||||
"metadata",
|
||||
"createdAt",
|
||||
"updatedAt"
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
|
||||
""",
|
||||
content_type,
|
||||
content_id,
|
||||
user_id,
|
||||
)
|
||||
|
||||
if result and len(result) > 0:
|
||||
return result[0]
|
||||
return None
|
||||
|
||||
|
||||
async def ensure_embedding(
|
||||
version_id: str,
|
||||
name: str,
|
||||
description: str,
|
||||
sub_heading: str,
|
||||
categories: list[str],
|
||||
force: bool = False,
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Ensure an embedding exists for the listing version.
|
||||
|
||||
Creates embedding if missing. Use force=True to regenerate.
|
||||
Backward-compatible wrapper for store listings.
|
||||
|
||||
Args:
|
||||
version_id: The StoreListingVersion ID
|
||||
name: Agent name
|
||||
description: Agent description
|
||||
sub_heading: Agent sub-heading
|
||||
categories: Agent categories
|
||||
force: Force regeneration even if embedding exists
|
||||
tx: Optional transaction client
|
||||
|
||||
Returns:
|
||||
True if embedding exists/was created
|
||||
|
||||
Raises exceptions on failure - caller should handle.
|
||||
"""
|
||||
# Check if embedding already exists
|
||||
if not force:
|
||||
existing = await get_embedding(version_id)
|
||||
if existing and existing.get("embedding"):
|
||||
logger.debug(f"Embedding for version {version_id} already exists")
|
||||
return True
|
||||
|
||||
# Build searchable text for embedding
|
||||
searchable_text = build_searchable_text(name, description, sub_heading, categories)
|
||||
|
||||
# Generate new embedding
|
||||
embedding = await generate_embedding(searchable_text)
|
||||
|
||||
# Store the embedding with metadata using new function
|
||||
metadata = {
|
||||
"name": name,
|
||||
"subHeading": sub_heading,
|
||||
"categories": categories,
|
||||
}
|
||||
return await store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=version_id,
|
||||
embedding=embedding,
|
||||
searchable_text=searchable_text,
|
||||
metadata=metadata,
|
||||
user_id=None, # Store agents are public
|
||||
tx=tx,
|
||||
)
|
||||
|
||||
|
||||
async def delete_embedding(version_id: str) -> bool:
|
||||
"""
|
||||
Delete embedding for a listing version.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing store listing usage.
|
||||
Note: This is usually handled automatically by CASCADE delete,
|
||||
but provided for manual cleanup if needed.
|
||||
"""
|
||||
return await delete_content_embedding(ContentType.STORE_AGENT, version_id)
|
||||
|
||||
|
||||
async def delete_content_embedding(
|
||||
content_type: ContentType, content_id: str, user_id: str | None = None
|
||||
) -> bool:
|
||||
"""
|
||||
Delete embedding for any content type.
|
||||
|
||||
New function for unified content embedding deletion.
|
||||
Note: This is usually handled automatically by CASCADE delete,
|
||||
but provided for manual cleanup if needed.
|
||||
|
||||
Args:
|
||||
content_type: The type of content (STORE_AGENT, LIBRARY_AGENT, etc.)
|
||||
content_id: The unique identifier for the content
|
||||
user_id: Optional user ID. For public content (STORE_AGENT, BLOCK), pass None.
|
||||
For user-scoped content (LIBRARY_AGENT), pass the user's ID to avoid
|
||||
deleting embeddings belonging to other users.
|
||||
|
||||
Returns:
|
||||
True if deletion succeeded, False otherwise
|
||||
"""
|
||||
try:
|
||||
client = prisma.get_client()
|
||||
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType"
|
||||
AND "contentId" = $2
|
||||
AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
|
||||
""",
|
||||
content_type,
|
||||
content_id,
|
||||
user_id,
|
||||
client=client,
|
||||
)
|
||||
|
||||
user_str = f" (user: {user_id})" if user_id else ""
|
||||
logger.info(f"Deleted embedding for {content_type}:{content_id}{user_str}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete embedding for {content_type}:{content_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def get_embedding_stats() -> dict[str, Any]:
|
||||
"""
|
||||
Get statistics about embedding coverage for all content types.
|
||||
|
||||
Returns stats per content type and overall totals.
|
||||
"""
|
||||
try:
|
||||
stats_by_type = {}
|
||||
total_items = 0
|
||||
total_with_embeddings = 0
|
||||
total_without_embeddings = 0
|
||||
|
||||
# Aggregate stats from all handlers
|
||||
for content_type, handler in CONTENT_HANDLERS.items():
|
||||
try:
|
||||
stats = await handler.get_stats()
|
||||
stats_by_type[content_type.value] = {
|
||||
"total": stats["total"],
|
||||
"with_embeddings": stats["with_embeddings"],
|
||||
"without_embeddings": stats["without_embeddings"],
|
||||
"coverage_percent": (
|
||||
round(stats["with_embeddings"] / stats["total"] * 100, 1)
|
||||
if stats["total"] > 0
|
||||
else 0
|
||||
),
|
||||
}
|
||||
|
||||
total_items += stats["total"]
|
||||
total_with_embeddings += stats["with_embeddings"]
|
||||
total_without_embeddings += stats["without_embeddings"]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get stats for {content_type.value}: {e}")
|
||||
stats_by_type[content_type.value] = {
|
||||
"total": 0,
|
||||
"with_embeddings": 0,
|
||||
"without_embeddings": 0,
|
||||
"coverage_percent": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return {
|
||||
"by_type": stats_by_type,
|
||||
"totals": {
|
||||
"total": total_items,
|
||||
"with_embeddings": total_with_embeddings,
|
||||
"without_embeddings": total_without_embeddings,
|
||||
"coverage_percent": (
|
||||
round(total_with_embeddings / total_items * 100, 1)
|
||||
if total_items > 0
|
||||
else 0
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get embedding stats: {e}")
|
||||
return {
|
||||
"by_type": {},
|
||||
"totals": {
|
||||
"total": 0,
|
||||
"with_embeddings": 0,
|
||||
"without_embeddings": 0,
|
||||
"coverage_percent": 0,
|
||||
},
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
async def backfill_missing_embeddings(batch_size: int = 10) -> dict[str, Any]:
|
||||
"""
|
||||
Generate embeddings for approved listings that don't have them.
|
||||
|
||||
BACKWARD COMPATIBILITY: Maintained for existing usage.
|
||||
This now delegates to backfill_all_content_types() to process all content types.
|
||||
|
||||
Args:
|
||||
batch_size: Number of embeddings to generate per content type
|
||||
|
||||
Returns:
|
||||
Dict with success/failure counts aggregated across all content types
|
||||
"""
|
||||
# Delegate to the new generic backfill system
|
||||
result = await backfill_all_content_types(batch_size)
|
||||
|
||||
# Return in the old format for backward compatibility
|
||||
return result["totals"]
|
||||
|
||||
|
||||
async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
|
||||
"""
|
||||
Generate embeddings for all content types using registered handlers.
|
||||
|
||||
Processes content types in order: BLOCK → STORE_AGENT → DOCUMENTATION.
|
||||
This ensures foundational content (blocks) are searchable first.
|
||||
|
||||
Args:
|
||||
batch_size: Number of embeddings to generate per content type
|
||||
|
||||
Returns:
|
||||
Dict with stats per content type and overall totals
|
||||
"""
|
||||
results_by_type = {}
|
||||
total_processed = 0
|
||||
total_success = 0
|
||||
total_failed = 0
|
||||
all_errors: dict[str, int] = {} # Aggregate errors across all content types
|
||||
|
||||
# Process content types in explicit order
|
||||
processing_order = [
|
||||
ContentType.BLOCK,
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
for content_type in processing_order:
|
||||
handler = CONTENT_HANDLERS.get(content_type)
|
||||
if not handler:
|
||||
logger.warning(f"No handler registered for {content_type.value}")
|
||||
continue
|
||||
try:
|
||||
logger.info(f"Processing {content_type.value} content type...")
|
||||
|
||||
# Get missing items from handler
|
||||
missing_items = await handler.get_missing_items(batch_size)
|
||||
|
||||
if not missing_items:
|
||||
results_by_type[content_type.value] = {
|
||||
"processed": 0,
|
||||
"success": 0,
|
||||
"failed": 0,
|
||||
"message": "No missing embeddings",
|
||||
}
|
||||
continue
|
||||
|
||||
# Process embeddings concurrently for better performance
|
||||
embedding_tasks = [
|
||||
ensure_content_embedding(
|
||||
content_type=item.content_type,
|
||||
content_id=item.content_id,
|
||||
searchable_text=item.searchable_text,
|
||||
metadata=item.metadata,
|
||||
user_id=item.user_id,
|
||||
)
|
||||
for item in missing_items
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*embedding_tasks, return_exceptions=True)
|
||||
|
||||
success = sum(1 for result in results if result is True)
|
||||
failed = len(results) - success
|
||||
|
||||
# Aggregate errors across all content types
|
||||
if failed > 0:
|
||||
for result in results:
|
||||
if isinstance(result, Exception):
|
||||
error_key = f"{type(result).__name__}: {str(result)}"
|
||||
all_errors[error_key] = all_errors.get(error_key, 0) + 1
|
||||
|
||||
results_by_type[content_type.value] = {
|
||||
"processed": len(missing_items),
|
||||
"success": success,
|
||||
"failed": failed,
|
||||
"message": f"Backfilled {success} embeddings, {failed} failed",
|
||||
}
|
||||
|
||||
total_processed += len(missing_items)
|
||||
total_success += success
|
||||
total_failed += failed
|
||||
|
||||
logger.info(
|
||||
f"{content_type.value}: processed {len(missing_items)}, "
|
||||
f"success {success}, failed {failed}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process {content_type.value}: {e}")
|
||||
results_by_type[content_type.value] = {
|
||||
"processed": 0,
|
||||
"success": 0,
|
||||
"failed": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# Log aggregated errors once at the end
|
||||
if all_errors:
|
||||
error_details = ", ".join(
|
||||
f"{error} ({count}x)" for error, count in all_errors.items()
|
||||
)
|
||||
logger.error(f"Embedding backfill errors: {error_details}")
|
||||
|
||||
return {
|
||||
"by_type": results_by_type,
|
||||
"totals": {
|
||||
"processed": total_processed,
|
||||
"success": total_success,
|
||||
"failed": total_failed,
|
||||
"message": f"Overall: {total_success} succeeded, {total_failed} failed",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def embed_query(query: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding for a search query.
|
||||
|
||||
Same as generate_embedding but with clearer intent.
|
||||
Raises exceptions on failure - caller should handle.
|
||||
"""
|
||||
return await generate_embedding(query)
|
||||
|
||||
|
||||
def embedding_to_vector_string(embedding: list[float]) -> str:
|
||||
"""Convert embedding list to PostgreSQL vector string format."""
|
||||
return "[" + ",".join(str(x) for x in embedding) + "]"
|
||||
|
||||
|
||||
async def ensure_content_embedding(
|
||||
content_type: ContentType,
|
||||
content_id: str,
|
||||
searchable_text: str,
|
||||
metadata: dict | None = None,
|
||||
user_id: str | None = None,
|
||||
force: bool = False,
|
||||
tx: prisma.Prisma | None = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Ensure an embedding exists for any content type.
|
||||
|
||||
Generic function for creating embeddings for store agents, blocks, docs, etc.
|
||||
|
||||
Args:
|
||||
content_type: ContentType enum value (STORE_AGENT, BLOCK, etc.)
|
||||
content_id: Unique identifier for the content
|
||||
searchable_text: Combined text for embedding generation
|
||||
metadata: Optional metadata to store with embedding
|
||||
force: Force regeneration even if embedding exists
|
||||
tx: Optional transaction client
|
||||
|
||||
Returns:
|
||||
True if embedding exists/was created
|
||||
|
||||
Raises exceptions on failure - caller should handle.
|
||||
"""
|
||||
# Check if embedding already exists
|
||||
if not force:
|
||||
existing = await get_content_embedding(content_type, content_id, user_id)
|
||||
if existing and existing.get("embedding"):
|
||||
logger.debug(f"Embedding for {content_type}:{content_id} already exists")
|
||||
return True
|
||||
|
||||
# Generate new embedding
|
||||
embedding = await generate_embedding(searchable_text)
|
||||
|
||||
# Store the embedding
|
||||
return await store_content_embedding(
|
||||
content_type=content_type,
|
||||
content_id=content_id,
|
||||
embedding=embedding,
|
||||
searchable_text=searchable_text,
|
||||
metadata=metadata or {},
|
||||
user_id=user_id,
|
||||
tx=tx,
|
||||
)
|
||||
|
||||
|
||||
async def cleanup_orphaned_embeddings() -> dict[str, Any]:
|
||||
"""
|
||||
Clean up embeddings for content that no longer exists or is no longer valid.
|
||||
|
||||
Compares current content with embeddings in database and removes orphaned records:
|
||||
- STORE_AGENT: Removes embeddings for rejected/deleted store listings
|
||||
- BLOCK: Removes embeddings for blocks no longer registered
|
||||
- DOCUMENTATION: Removes embeddings for deleted doc files
|
||||
|
||||
Returns:
|
||||
Dict with cleanup statistics per content type
|
||||
"""
|
||||
results_by_type = {}
|
||||
total_deleted = 0
|
||||
|
||||
# Cleanup orphaned embeddings for all content types
|
||||
cleanup_types = [
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.BLOCK,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
for content_type in cleanup_types:
|
||||
try:
|
||||
handler = CONTENT_HANDLERS.get(content_type)
|
||||
if not handler:
|
||||
logger.warning(f"No handler registered for {content_type}")
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"error": "No handler registered",
|
||||
}
|
||||
continue
|
||||
|
||||
# Get all current content IDs from handler
|
||||
if content_type == ContentType.STORE_AGENT:
|
||||
# Get IDs of approved store listing versions from non-deleted listings
|
||||
valid_agents = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT slv.id
|
||||
FROM {schema_prefix}"StoreListingVersion" slv
|
||||
JOIN {schema_prefix}"StoreListing" sl ON slv."storeListingId" = sl.id
|
||||
WHERE slv."submissionStatus" = 'APPROVED'
|
||||
AND slv."isDeleted" = false
|
||||
AND sl."isDeleted" = false
|
||||
""",
|
||||
)
|
||||
current_ids = {row["id"] for row in valid_agents}
|
||||
elif content_type == ContentType.BLOCK:
|
||||
from backend.data.block import get_blocks
|
||||
|
||||
current_ids = set(get_blocks().keys())
|
||||
elif content_type == ContentType.DOCUMENTATION:
|
||||
# Use DocumentationHandler to get section-based content IDs
|
||||
from backend.api.features.store.content_handlers import (
|
||||
DocumentationHandler,
|
||||
)
|
||||
|
||||
doc_handler = CONTENT_HANDLERS.get(ContentType.DOCUMENTATION)
|
||||
if isinstance(doc_handler, DocumentationHandler):
|
||||
docs_root = doc_handler._get_docs_root()
|
||||
if docs_root.exists():
|
||||
current_ids = doc_handler._get_all_section_content_ids(
|
||||
docs_root
|
||||
)
|
||||
else:
|
||||
current_ids = set()
|
||||
else:
|
||||
current_ids = set()
|
||||
else:
|
||||
# Skip unknown content types to avoid accidental deletion
|
||||
logger.warning(
|
||||
f"Skipping cleanup for unknown content type: {content_type}"
|
||||
)
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"error": "Unknown content type - skipped for safety",
|
||||
}
|
||||
continue
|
||||
|
||||
# Get all embedding IDs from database
|
||||
db_embeddings = await query_raw_with_schema(
|
||||
"""
|
||||
SELECT "contentId"
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType"
|
||||
""",
|
||||
content_type,
|
||||
)
|
||||
|
||||
db_ids = {row["contentId"] for row in db_embeddings}
|
||||
|
||||
# Find orphaned embeddings (in DB but not in current content)
|
||||
orphaned_ids = db_ids - current_ids
|
||||
|
||||
if not orphaned_ids:
|
||||
logger.info(f"{content_type.value}: No orphaned embeddings found")
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"message": "No orphaned embeddings",
|
||||
}
|
||||
continue
|
||||
|
||||
# Delete orphaned embeddings in batch for better performance
|
||||
orphaned_list = list(orphaned_ids)
|
||||
try:
|
||||
await execute_raw_with_schema(
|
||||
"""
|
||||
DELETE FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" = $1::{schema_prefix}"ContentType"
|
||||
AND "contentId" = ANY($2::text[])
|
||||
""",
|
||||
content_type,
|
||||
orphaned_list,
|
||||
)
|
||||
deleted = len(orphaned_list)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to batch delete orphaned embeddings: {e}")
|
||||
deleted = 0
|
||||
|
||||
logger.info(
|
||||
f"{content_type.value}: Deleted {deleted}/{len(orphaned_ids)} orphaned embeddings"
|
||||
)
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": deleted,
|
||||
"orphaned": len(orphaned_ids),
|
||||
"message": f"Deleted {deleted} orphaned embeddings",
|
||||
}
|
||||
|
||||
total_deleted += deleted
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup {content_type.value}: {e}")
|
||||
results_by_type[content_type.value] = {
|
||||
"deleted": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return {
|
||||
"by_type": results_by_type,
|
||||
"totals": {
|
||||
"deleted": total_deleted,
|
||||
"message": f"Deleted {total_deleted} orphaned embeddings",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def semantic_search(
|
||||
query: str,
|
||||
content_types: list[ContentType] | None = None,
|
||||
user_id: str | None = None,
|
||||
limit: int = 20,
|
||||
min_similarity: float = 0.5,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Semantic search across content types using embeddings.
|
||||
|
||||
Performs vector similarity search on UnifiedContentEmbedding table.
|
||||
Used directly for blocks/docs/library agents, or as the semantic component
|
||||
within hybrid_search for store agents.
|
||||
|
||||
If embedding generation fails, falls back to lexical search on searchableText.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
content_types: List of ContentType to search. Defaults to [BLOCK, STORE_AGENT, DOCUMENTATION]
|
||||
user_id: Optional user ID for searching private content (library agents)
|
||||
limit: Maximum number of results to return (default: 20)
|
||||
min_similarity: Minimum cosine similarity threshold (0-1, default: 0.5)
|
||||
|
||||
Returns:
|
||||
List of search results with the following structure:
|
||||
[
|
||||
{
|
||||
"content_id": str,
|
||||
"content_type": str, # "BLOCK", "STORE_AGENT", "DOCUMENTATION", or "LIBRARY_AGENT"
|
||||
"searchable_text": str,
|
||||
"metadata": dict,
|
||||
"similarity": float, # Cosine similarity score (0-1)
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
Examples:
|
||||
# Search blocks only
|
||||
results = await semantic_search("calculate", content_types=[ContentType.BLOCK])
|
||||
|
||||
# Search blocks and documentation
|
||||
results = await semantic_search(
|
||||
"how to use API",
|
||||
content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION]
|
||||
)
|
||||
|
||||
# Search all public content (default)
|
||||
results = await semantic_search("AI agent")
|
||||
|
||||
# Search user's library agents
|
||||
results = await semantic_search(
|
||||
"my custom agent",
|
||||
content_types=[ContentType.LIBRARY_AGENT],
|
||||
user_id="user123"
|
||||
)
|
||||
"""
|
||||
# Default to searching all public content types
|
||||
if content_types is None:
|
||||
content_types = [
|
||||
ContentType.BLOCK,
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.DOCUMENTATION,
|
||||
]
|
||||
|
||||
# Validate inputs
|
||||
if not content_types:
|
||||
return [] # Empty content_types would cause invalid SQL (IN ())
|
||||
|
||||
query = query.strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
if limit < 1:
|
||||
limit = 1
|
||||
if limit > 100:
|
||||
limit = 100
|
||||
|
||||
# Generate query embedding
|
||||
try:
|
||||
query_embedding = await embed_query(query)
|
||||
# Semantic search with embeddings
|
||||
embedding_str = embedding_to_vector_string(query_embedding)
|
||||
|
||||
# Build params in order: limit, then user_id (if provided), then content types
|
||||
params: list[Any] = [limit]
|
||||
user_filter = ""
|
||||
if user_id is not None:
|
||||
user_filter = 'AND "userId" = ${}'.format(len(params) + 1)
|
||||
params.append(user_id)
|
||||
|
||||
# Add content type parameters and build placeholders dynamically
|
||||
content_type_start_idx = len(params) + 1
|
||||
content_type_placeholders = ", ".join(
|
||||
"$" + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
|
||||
for i in range(len(content_types))
|
||||
)
|
||||
params.extend([ct.value for ct in content_types])
|
||||
|
||||
# Build min_similarity param index before appending
|
||||
min_similarity_idx = len(params) + 1
|
||||
params.append(min_similarity)
|
||||
|
||||
# Use unqualified ::vector and <=> operator - pgvector is in search_path on all environments
|
||||
sql = (
|
||||
"""
|
||||
SELECT
|
||||
"contentId" as content_id,
|
||||
"contentType" as content_type,
|
||||
"searchableText" as searchable_text,
|
||||
metadata,
|
||||
1 - (embedding <=> '"""
|
||||
+ embedding_str
|
||||
+ """'::vector) as similarity
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" IN ("""
|
||||
+ content_type_placeholders
|
||||
+ """)
|
||||
"""
|
||||
+ user_filter
|
||||
+ """
|
||||
AND 1 - (embedding <=> '"""
|
||||
+ embedding_str
|
||||
+ """'::vector) >= $"""
|
||||
+ str(min_similarity_idx)
|
||||
+ """
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $1
|
||||
"""
|
||||
)
|
||||
|
||||
results = await query_raw_with_schema(sql, *params)
|
||||
return [
|
||||
{
|
||||
"content_id": row["content_id"],
|
||||
"content_type": row["content_type"],
|
||||
"searchable_text": row["searchable_text"],
|
||||
"metadata": row["metadata"],
|
||||
"similarity": float(row["similarity"]),
|
||||
}
|
||||
for row in results
|
||||
]
|
||||
except Exception as e:
|
||||
logger.warning(f"Semantic search failed, falling back to lexical search: {e}")
|
||||
|
||||
# Fallback to lexical search if embeddings unavailable
|
||||
|
||||
params_lexical: list[Any] = [limit]
|
||||
user_filter = ""
|
||||
if user_id is not None:
|
||||
user_filter = 'AND "userId" = ${}'.format(len(params_lexical) + 1)
|
||||
params_lexical.append(user_id)
|
||||
|
||||
# Add content type parameters and build placeholders dynamically
|
||||
content_type_start_idx = len(params_lexical) + 1
|
||||
content_type_placeholders_lexical = ", ".join(
|
||||
"$" + str(content_type_start_idx + i) + '::{schema_prefix}"ContentType"'
|
||||
for i in range(len(content_types))
|
||||
)
|
||||
params_lexical.extend([ct.value for ct in content_types])
|
||||
|
||||
# Build query param index before appending
|
||||
query_param_idx = len(params_lexical) + 1
|
||||
params_lexical.append(f"%{query}%")
|
||||
|
||||
# Use regular string (not f-string) for template to preserve {schema_prefix} placeholders
|
||||
sql_lexical = (
|
||||
"""
|
||||
SELECT
|
||||
"contentId" as content_id,
|
||||
"contentType" as content_type,
|
||||
"searchableText" as searchable_text,
|
||||
metadata,
|
||||
0.0 as similarity
|
||||
FROM {schema_prefix}"UnifiedContentEmbedding"
|
||||
WHERE "contentType" IN ("""
|
||||
+ content_type_placeholders_lexical
|
||||
+ """)
|
||||
"""
|
||||
+ user_filter
|
||||
+ """
|
||||
AND "searchableText" ILIKE $"""
|
||||
+ str(query_param_idx)
|
||||
+ """
|
||||
ORDER BY "updatedAt" DESC
|
||||
LIMIT $1
|
||||
"""
|
||||
)
|
||||
|
||||
try:
|
||||
results = await query_raw_with_schema(sql_lexical, *params_lexical)
|
||||
return [
|
||||
{
|
||||
"content_id": row["content_id"],
|
||||
"content_type": row["content_type"],
|
||||
"searchable_text": row["searchable_text"],
|
||||
"metadata": row["metadata"],
|
||||
"similarity": 0.0, # Lexical search doesn't provide similarity
|
||||
}
|
||||
for row in results
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Lexical search failed: {e}")
|
||||
return []
|
||||
@@ -1,669 +0,0 @@
|
||||
"""
|
||||
End-to-end database tests for embeddings and hybrid search.
|
||||
|
||||
These tests hit the actual database to verify SQL queries work correctly.
|
||||
Tests cover:
|
||||
1. Embedding storage (store_content_embedding)
|
||||
2. Embedding retrieval (get_content_embedding)
|
||||
3. Embedding deletion (delete_content_embedding)
|
||||
4. Unified hybrid search across content types
|
||||
5. Store agent hybrid search
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import AsyncGenerator
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
from backend.api.features.store.embeddings import EMBEDDING_DIM
|
||||
from backend.api.features.store.hybrid_search import (
|
||||
hybrid_search,
|
||||
unified_hybrid_search,
|
||||
)
|
||||
|
||||
# ============================================================================
|
||||
# Test Fixtures
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_content_id() -> str:
|
||||
"""Generate unique content ID for test isolation."""
|
||||
return f"test-content-{uuid.uuid4()}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_user_id() -> str:
|
||||
"""Generate unique user ID for test isolation."""
|
||||
return f"test-user-{uuid.uuid4()}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding() -> list[float]:
|
||||
"""Generate a mock embedding vector."""
|
||||
# Create a normalized embedding vector
|
||||
import math
|
||||
|
||||
raw = [float(i % 10) / 10.0 for i in range(EMBEDDING_DIM)]
|
||||
# Normalize to unit length (required for cosine similarity)
|
||||
magnitude = math.sqrt(sum(x * x for x in raw))
|
||||
return [x / magnitude for x in raw]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def similar_embedding() -> list[float]:
|
||||
"""Generate an embedding similar to mock_embedding."""
|
||||
import math
|
||||
|
||||
# Similar but slightly different values
|
||||
raw = [float(i % 10) / 10.0 + 0.01 for i in range(EMBEDDING_DIM)]
|
||||
magnitude = math.sqrt(sum(x * x for x in raw))
|
||||
return [x / magnitude for x in raw]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def different_embedding() -> list[float]:
|
||||
"""Generate an embedding very different from mock_embedding."""
|
||||
import math
|
||||
|
||||
# Reversed pattern to be maximally different
|
||||
raw = [float((EMBEDDING_DIM - i) % 10) / 10.0 for i in range(EMBEDDING_DIM)]
|
||||
magnitude = math.sqrt(sum(x * x for x in raw))
|
||||
return [x / magnitude for x in raw]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def cleanup_embeddings(
|
||||
server,
|
||||
) -> AsyncGenerator[list[tuple[ContentType, str, str | None]], None]:
|
||||
"""
|
||||
Fixture that tracks created embeddings and cleans them up after tests.
|
||||
|
||||
Yields a list to which tests can append (content_type, content_id, user_id) tuples.
|
||||
"""
|
||||
created_embeddings: list[tuple[ContentType, str, str | None]] = []
|
||||
yield created_embeddings
|
||||
|
||||
# Cleanup all created embeddings
|
||||
for content_type, content_id, user_id in created_embeddings:
|
||||
try:
|
||||
await embeddings.delete_content_embedding(content_type, content_id, user_id)
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# store_content_embedding Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_store_agent(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test storing embedding for STORE_AGENT content type."""
|
||||
# Track for cleanup
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, test_content_id, None))
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="AI assistant for productivity tasks",
|
||||
metadata={"name": "Test Agent", "categories": ["productivity"]},
|
||||
user_id=None, # Store agents are public
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
# Verify it was stored
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["contentId"] == test_content_id
|
||||
assert stored["contentType"] == "STORE_AGENT"
|
||||
assert stored["searchableText"] == "AI assistant for productivity tasks"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_block(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test storing embedding for BLOCK content type."""
|
||||
cleanup_embeddings.append((ContentType.BLOCK, test_content_id, None))
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="HTTP request block for API calls",
|
||||
metadata={"name": "HTTP Request Block"},
|
||||
user_id=None, # Blocks are public
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["contentType"] == "BLOCK"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_documentation(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test storing embedding for DOCUMENTATION content type."""
|
||||
cleanup_embeddings.append((ContentType.DOCUMENTATION, test_content_id, None))
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.DOCUMENTATION,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Getting started guide for AutoGPT platform",
|
||||
metadata={"title": "Getting Started", "url": "/docs/getting-started"},
|
||||
user_id=None, # Docs are public
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.DOCUMENTATION, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["contentType"] == "DOCUMENTATION"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_content_embedding_upsert(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test that storing embedding twice updates instead of duplicates."""
|
||||
cleanup_embeddings.append((ContentType.BLOCK, test_content_id, None))
|
||||
|
||||
# Store first time
|
||||
result1 = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Original text",
|
||||
metadata={"version": 1},
|
||||
user_id=None,
|
||||
)
|
||||
assert result1 is True
|
||||
|
||||
# Store again with different text (upsert)
|
||||
result2 = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Updated text",
|
||||
metadata={"version": 2},
|
||||
user_id=None,
|
||||
)
|
||||
assert result2 is True
|
||||
|
||||
# Verify only one record with updated text
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
assert stored["searchableText"] == "Updated text"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# get_content_embedding Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_content_embedding_not_found(server):
|
||||
"""Test retrieving non-existent embedding returns None."""
|
||||
result = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT, "non-existent-id", user_id=None
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_content_embedding_with_metadata(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test that metadata is correctly stored and retrieved."""
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, test_content_id, None))
|
||||
|
||||
metadata = {
|
||||
"name": "Test Agent",
|
||||
"subHeading": "A test agent",
|
||||
"categories": ["ai", "productivity"],
|
||||
"customField": 123,
|
||||
}
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="test",
|
||||
metadata=metadata,
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT, test_content_id, user_id=None
|
||||
)
|
||||
|
||||
assert stored is not None
|
||||
assert stored["metadata"]["name"] == "Test Agent"
|
||||
assert stored["metadata"]["categories"] == ["ai", "productivity"]
|
||||
assert stored["metadata"]["customField"] == 123
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# delete_content_embedding Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_delete_content_embedding(
|
||||
server,
|
||||
test_content_id: str,
|
||||
mock_embedding: list[float],
|
||||
):
|
||||
"""Test deleting embedding removes it from database."""
|
||||
# Store embedding
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=test_content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="To be deleted",
|
||||
metadata=None,
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Verify it exists
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is not None
|
||||
|
||||
# Delete it
|
||||
result = await embeddings.delete_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert result is True
|
||||
|
||||
# Verify it's gone
|
||||
stored = await embeddings.get_content_embedding(
|
||||
ContentType.BLOCK, test_content_id, user_id=None
|
||||
)
|
||||
assert stored is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_delete_content_embedding_not_found(server):
|
||||
"""Test deleting non-existent embedding doesn't error."""
|
||||
result = await embeddings.delete_content_embedding(
|
||||
ContentType.BLOCK, "non-existent-id", user_id=None
|
||||
)
|
||||
# Should succeed even if nothing to delete
|
||||
assert result is True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# unified_hybrid_search Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_finds_matching_content(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search finds content matching the query."""
|
||||
# Create unique content IDs
|
||||
agent_id = f"test-agent-{uuid.uuid4()}"
|
||||
block_id = f"test-block-{uuid.uuid4()}"
|
||||
doc_id = f"test-doc-{uuid.uuid4()}"
|
||||
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, agent_id, None))
|
||||
cleanup_embeddings.append((ContentType.BLOCK, block_id, None))
|
||||
cleanup_embeddings.append((ContentType.DOCUMENTATION, doc_id, None))
|
||||
|
||||
# Store embeddings for different content types
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=agent_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="AI writing assistant for blog posts",
|
||||
metadata={"name": "Writing Assistant"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=block_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="Text generation block for creative writing",
|
||||
metadata={"name": "Text Generator"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.DOCUMENTATION,
|
||||
content_id=doc_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="How to use writing blocks in AutoGPT",
|
||||
metadata={"title": "Writing Guide"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Search for "writing" - should find all three
|
||||
results, total = await unified_hybrid_search(
|
||||
query="writing",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should find at least our test content (may find others too)
|
||||
content_ids = [r["content_id"] for r in results]
|
||||
assert agent_id in content_ids or total >= 1 # Lexical search should find it
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_filter_by_content_type(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search can filter by content type."""
|
||||
agent_id = f"test-agent-{uuid.uuid4()}"
|
||||
block_id = f"test-block-{uuid.uuid4()}"
|
||||
|
||||
cleanup_embeddings.append((ContentType.STORE_AGENT, agent_id, None))
|
||||
cleanup_embeddings.append((ContentType.BLOCK, block_id, None))
|
||||
|
||||
# Store both types with same searchable text
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id=agent_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="unique_search_term_xyz123",
|
||||
metadata={},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=block_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="unique_search_term_xyz123",
|
||||
metadata={},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Search only for BLOCK type
|
||||
results, total = await unified_hybrid_search(
|
||||
query="unique_search_term_xyz123",
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# All results should be BLOCK type
|
||||
for r in results:
|
||||
assert r["content_type"] == "BLOCK"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_empty_query(server):
|
||||
"""Test unified search with empty query returns empty results."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
assert results == []
|
||||
assert total == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_pagination(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search pagination works correctly."""
|
||||
# Use a unique search term to avoid matching other test data
|
||||
unique_term = f"xyzpagtest{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# Create multiple items
|
||||
content_ids = []
|
||||
for i in range(5):
|
||||
content_id = f"test-pagination-{uuid.uuid4()}"
|
||||
content_ids.append(content_id)
|
||||
cleanup_embeddings.append((ContentType.BLOCK, content_id, None))
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text=f"{unique_term} item number {i}",
|
||||
metadata={"index": i},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Get first page
|
||||
page1_results, total1 = await unified_hybrid_search(
|
||||
query=unique_term,
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=1,
|
||||
page_size=2,
|
||||
)
|
||||
|
||||
# Get second page
|
||||
page2_results, total2 = await unified_hybrid_search(
|
||||
query=unique_term,
|
||||
content_types=[ContentType.BLOCK],
|
||||
page=2,
|
||||
page_size=2,
|
||||
)
|
||||
|
||||
# Total should be consistent
|
||||
assert total1 == total2
|
||||
|
||||
# Pages should have different content (if we have enough results)
|
||||
if len(page1_results) > 0 and len(page2_results) > 0:
|
||||
page1_ids = {r["content_id"] for r in page1_results}
|
||||
page2_ids = {r["content_id"] for r in page2_results}
|
||||
# No overlap between pages
|
||||
assert page1_ids.isdisjoint(page2_ids)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_unified_hybrid_search_min_score_filtering(
|
||||
server,
|
||||
mock_embedding: list[float],
|
||||
cleanup_embeddings: list,
|
||||
):
|
||||
"""Test unified search respects min_score threshold."""
|
||||
content_id = f"test-minscore-{uuid.uuid4()}"
|
||||
cleanup_embeddings.append((ContentType.BLOCK, content_id, None))
|
||||
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.BLOCK,
|
||||
content_id=content_id,
|
||||
embedding=mock_embedding,
|
||||
searchable_text="completely unrelated content about bananas",
|
||||
metadata={},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Search with very high min_score - should filter out low relevance
|
||||
results_high, _ = await unified_hybrid_search(
|
||||
query="quantum computing algorithms",
|
||||
content_types=[ContentType.BLOCK],
|
||||
min_score=0.9, # Very high threshold
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Search with low min_score
|
||||
results_low, _ = await unified_hybrid_search(
|
||||
query="quantum computing algorithms",
|
||||
content_types=[ContentType.BLOCK],
|
||||
min_score=0.01, # Very low threshold
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# High threshold should have fewer or equal results
|
||||
assert len(results_high) <= len(results_low)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# hybrid_search (Store Agents) Tests
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_hybrid_search_store_agents_sql_valid(server):
|
||||
"""Test that hybrid_search SQL executes without errors."""
|
||||
# This test verifies the SQL is syntactically correct
|
||||
# even if no results are found
|
||||
results, total = await hybrid_search(
|
||||
query="test agent",
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should not raise - verifies SQL is valid
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
assert total >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_hybrid_search_with_filters(server):
|
||||
"""Test hybrid_search with various filter options."""
|
||||
# Test with all filter types
|
||||
results, total = await hybrid_search(
|
||||
query="productivity",
|
||||
featured=True,
|
||||
creators=["test-creator"],
|
||||
category="productivity",
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
|
||||
# Should not raise - verifies filter SQL is valid
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_hybrid_search_pagination(server):
|
||||
"""Test hybrid_search pagination."""
|
||||
# Page 1
|
||||
results1, total1 = await hybrid_search(
|
||||
query="agent",
|
||||
page=1,
|
||||
page_size=5,
|
||||
)
|
||||
|
||||
# Page 2
|
||||
results2, total2 = await hybrid_search(
|
||||
query="agent",
|
||||
page=2,
|
||||
page_size=5,
|
||||
)
|
||||
|
||||
# Verify SQL executes without error
|
||||
assert isinstance(results1, list)
|
||||
assert isinstance(results2, list)
|
||||
assert isinstance(total1, int)
|
||||
assert isinstance(total2, int)
|
||||
|
||||
# If page 1 has results, total should be > 0
|
||||
# Note: total from page 2 may be 0 if no results on that page (COUNT(*) OVER limitation)
|
||||
if results1:
|
||||
assert total1 > 0
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SQL Validity Tests (verify queries don't break)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_all_content_types_searchable(server):
|
||||
"""Test that all content types can be searched without SQL errors."""
|
||||
for content_type in [
|
||||
ContentType.STORE_AGENT,
|
||||
ContentType.BLOCK,
|
||||
ContentType.DOCUMENTATION,
|
||||
]:
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=[content_type],
|
||||
page=1,
|
||||
page_size=10,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_multiple_content_types_searchable(server):
|
||||
"""Test searching multiple content types at once."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=[ContentType.BLOCK, ContentType.DOCUMENTATION],
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_search_all_content_types_default(server):
|
||||
"""Test searching all content types (default behavior)."""
|
||||
results, total = await unified_hybrid_search(
|
||||
query="test",
|
||||
content_types=None, # Should search all
|
||||
page=1,
|
||||
page_size=20,
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
assert isinstance(results, list)
|
||||
assert isinstance(total, int)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
@@ -1,314 +0,0 @@
|
||||
"""
|
||||
Integration tests for embeddings with schema handling.
|
||||
|
||||
These tests verify that embeddings operations work correctly across different database schemas.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
from backend.api.features.store.embeddings import EMBEDDING_DIM
|
||||
|
||||
# Schema prefix tests removed - functionality moved to db.raw_with_schema() helper
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_store_content_embedding_with_schema():
|
||||
"""Test storing embeddings with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
embedding=[0.1] * EMBEDDING_DIM,
|
||||
searchable_text="test text",
|
||||
metadata={"test": "data"},
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_client.execute_raw.called
|
||||
|
||||
# Get the SQL query that was executed
|
||||
call_args = mock_client.execute_raw.call_args
|
||||
sql_query = call_args[0][0]
|
||||
|
||||
# Verify schema prefix is in the query
|
||||
assert '"platform"."UnifiedContentEmbedding"' in sql_query
|
||||
|
||||
# Verify result
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_get_content_embedding_with_schema():
|
||||
"""Test retrieving embeddings with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.query_raw.return_value = [
|
||||
{
|
||||
"contentType": "STORE_AGENT",
|
||||
"contentId": "test-id",
|
||||
"userId": None,
|
||||
"embedding": "[0.1, 0.2]",
|
||||
"searchableText": "test",
|
||||
"metadata": {},
|
||||
"createdAt": "2024-01-01",
|
||||
"updatedAt": "2024-01-01",
|
||||
}
|
||||
]
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.get_content_embedding(
|
||||
ContentType.STORE_AGENT,
|
||||
"test-id",
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_client.query_raw.called
|
||||
|
||||
# Get the SQL query that was executed
|
||||
call_args = mock_client.query_raw.call_args
|
||||
sql_query = call_args[0][0]
|
||||
|
||||
# Verify schema prefix is in the query
|
||||
assert '"platform"."UnifiedContentEmbedding"' in sql_query
|
||||
|
||||
# Verify result
|
||||
assert result is not None
|
||||
assert result["contentId"] == "test-id"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_delete_content_embedding_with_schema():
|
||||
"""Test deleting embeddings with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.delete_content_embedding(
|
||||
ContentType.STORE_AGENT,
|
||||
"test-id",
|
||||
)
|
||||
|
||||
# Verify the query was called
|
||||
assert mock_client.execute_raw.called
|
||||
|
||||
# Get the SQL query that was executed
|
||||
call_args = mock_client.execute_raw.call_args
|
||||
sql_query = call_args[0][0]
|
||||
|
||||
# Verify schema prefix is in the query
|
||||
assert '"platform"."UnifiedContentEmbedding"' in sql_query
|
||||
|
||||
# Verify result
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_get_embedding_stats_with_schema():
|
||||
"""Test embedding statistics with proper schema handling via content handlers."""
|
||||
# Mock handler to return stats
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_stats = AsyncMock(
|
||||
return_value={
|
||||
"total": 100,
|
||||
"with_embeddings": 80,
|
||||
"without_embeddings": 20,
|
||||
}
|
||||
)
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
result = await embeddings.get_embedding_stats()
|
||||
|
||||
# Verify handler was called
|
||||
mock_handler.get_stats.assert_called_once()
|
||||
|
||||
# Verify new result structure
|
||||
assert "by_type" in result
|
||||
assert "totals" in result
|
||||
assert result["totals"]["total"] == 100
|
||||
assert result["totals"]["with_embeddings"] == 80
|
||||
assert result["totals"]["without_embeddings"] == 20
|
||||
assert result["totals"]["coverage_percent"] == 80.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_backfill_missing_embeddings_with_schema():
|
||||
"""Test backfilling embeddings via content handlers."""
|
||||
from backend.api.features.store.content_handlers import ContentItem
|
||||
|
||||
# Create mock content item
|
||||
mock_item = ContentItem(
|
||||
content_id="version-1",
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text="Test Agent Test description",
|
||||
metadata={"name": "Test Agent"},
|
||||
)
|
||||
|
||||
# Mock handler
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_missing_items = AsyncMock(return_value=[mock_item])
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding",
|
||||
return_value=[0.1] * EMBEDDING_DIM,
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.store_content_embedding",
|
||||
return_value=True,
|
||||
):
|
||||
result = await embeddings.backfill_missing_embeddings(batch_size=10)
|
||||
|
||||
# Verify handler was called
|
||||
mock_handler.get_missing_items.assert_called_once_with(10)
|
||||
|
||||
# Verify results
|
||||
assert result["processed"] == 1
|
||||
assert result["success"] == 1
|
||||
assert result["failed"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_ensure_content_embedding_with_schema():
|
||||
"""Test ensuring embeddings exist with proper schema handling."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_content_embedding"
|
||||
) as mock_get:
|
||||
# Simulate no existing embedding
|
||||
mock_get.return_value = None
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding"
|
||||
) as mock_generate:
|
||||
mock_generate.return_value = [0.1] * EMBEDDING_DIM
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.store_content_embedding"
|
||||
) as mock_store:
|
||||
mock_store.return_value = True
|
||||
|
||||
result = await embeddings.ensure_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
searchable_text="test text",
|
||||
metadata={"test": "data"},
|
||||
user_id=None,
|
||||
force=False,
|
||||
)
|
||||
|
||||
# Verify the flow
|
||||
assert mock_get.called
|
||||
assert mock_generate.called
|
||||
assert mock_store.called
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_backward_compatibility_store_embedding():
|
||||
"""Test backward compatibility wrapper for store_embedding."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.store_content_embedding"
|
||||
) as mock_store:
|
||||
mock_store.return_value = True
|
||||
|
||||
result = await embeddings.store_embedding(
|
||||
version_id="test-version-id",
|
||||
embedding=[0.1] * EMBEDDING_DIM,
|
||||
tx=None,
|
||||
)
|
||||
|
||||
# Verify it calls the new function with correct parameters
|
||||
assert mock_store.called
|
||||
call_args = mock_store.call_args
|
||||
|
||||
assert call_args[1]["content_type"] == ContentType.STORE_AGENT
|
||||
assert call_args[1]["content_id"] == "test-version-id"
|
||||
assert call_args[1]["user_id"] is None
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_backward_compatibility_get_embedding():
|
||||
"""Test backward compatibility wrapper for get_embedding."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_content_embedding"
|
||||
) as mock_get:
|
||||
mock_get.return_value = {
|
||||
"contentType": "STORE_AGENT",
|
||||
"contentId": "test-version-id",
|
||||
"embedding": "[0.1, 0.2]",
|
||||
"createdAt": "2024-01-01",
|
||||
"updatedAt": "2024-01-01",
|
||||
}
|
||||
|
||||
result = await embeddings.get_embedding("test-version-id")
|
||||
|
||||
# Verify it calls the new function
|
||||
assert mock_get.called
|
||||
|
||||
# Verify it transforms to old format
|
||||
assert result is not None
|
||||
assert result["storeListingVersionId"] == "test-version-id"
|
||||
assert "embedding" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@pytest.mark.integration
|
||||
async def test_schema_handling_error_cases():
|
||||
"""Test error handling in schema-aware operations."""
|
||||
with patch("backend.data.db.get_database_schema") as mock_schema:
|
||||
mock_schema.return_value = "platform"
|
||||
|
||||
with patch("prisma.get_client") as mock_get_client:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.execute_raw.side_effect = Exception("Database error")
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
# Should raise exception on error
|
||||
with pytest.raises(Exception, match="Database error"):
|
||||
await embeddings.store_content_embedding(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
embedding=[0.1] * EMBEDDING_DIM,
|
||||
searchable_text="test",
|
||||
metadata=None,
|
||||
user_id=None,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
@@ -1,399 +0,0 @@
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import prisma
|
||||
import pytest
|
||||
from prisma import Prisma
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.store import embeddings
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def setup_prisma():
|
||||
"""Setup Prisma client for tests."""
|
||||
try:
|
||||
Prisma()
|
||||
except prisma.errors.ClientAlreadyRegisteredError:
|
||||
pass
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_build_searchable_text():
|
||||
"""Test searchable text building from listing fields."""
|
||||
result = embeddings.build_searchable_text(
|
||||
name="AI Assistant",
|
||||
description="A helpful AI assistant for productivity",
|
||||
sub_heading="Boost your productivity",
|
||||
categories=["AI", "Productivity"],
|
||||
)
|
||||
|
||||
expected = "AI Assistant Boost your productivity A helpful AI assistant for productivity AI Productivity"
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_build_searchable_text_empty_fields():
|
||||
"""Test searchable text building with empty fields."""
|
||||
result = embeddings.build_searchable_text(
|
||||
name="", description="Test description", sub_heading="", categories=[]
|
||||
)
|
||||
|
||||
assert result == "Test description"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_success():
|
||||
"""Test successful embedding generation."""
|
||||
# Mock OpenAI response
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.data = [MagicMock()]
|
||||
mock_response.data[0].embedding = [0.1, 0.2, 0.3] * 512 # 1536 dimensions
|
||||
|
||||
# Use AsyncMock for async embeddings.create method
|
||||
mock_client.embeddings.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = await embeddings.generate_embedding("test text")
|
||||
|
||||
assert result is not None
|
||||
assert len(result) == embeddings.EMBEDDING_DIM
|
||||
assert result[0] == 0.1
|
||||
|
||||
mock_client.embeddings.create.assert_called_once_with(
|
||||
model="text-embedding-3-small", input="test text"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_no_api_key():
|
||||
"""Test embedding generation without API key."""
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = None
|
||||
|
||||
with pytest.raises(RuntimeError, match="openai_internal_api_key not set"):
|
||||
await embeddings.generate_embedding("test text")
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_api_error():
|
||||
"""Test embedding generation with API error."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create = AsyncMock(side_effect=Exception("API Error"))
|
||||
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with pytest.raises(Exception, match="API Error"):
|
||||
await embeddings.generate_embedding("test text")
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_generate_embedding_text_truncation():
|
||||
"""Test that long text is properly truncated using tiktoken."""
|
||||
from tiktoken import encoding_for_model
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.data = [MagicMock()]
|
||||
mock_response.data[0].embedding = [0.1] * embeddings.EMBEDDING_DIM
|
||||
|
||||
# Use AsyncMock for async embeddings.create method
|
||||
mock_client.embeddings.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Patch at the point of use in embeddings.py
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.get_openai_client"
|
||||
) as mock_get_client:
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
# Create text that will exceed 8191 tokens
|
||||
# Use varied characters to ensure token-heavy text: each word is ~1 token
|
||||
words = [f"word{i}" for i in range(10000)]
|
||||
long_text = " ".join(words) # ~10000 tokens
|
||||
|
||||
await embeddings.generate_embedding(long_text)
|
||||
|
||||
# Verify text was truncated to 8191 tokens
|
||||
call_args = mock_client.embeddings.create.call_args
|
||||
truncated_text = call_args.kwargs["input"]
|
||||
|
||||
# Count actual tokens in truncated text
|
||||
enc = encoding_for_model("text-embedding-3-small")
|
||||
actual_tokens = len(enc.encode(truncated_text))
|
||||
|
||||
# Should be at or just under 8191 tokens
|
||||
assert actual_tokens <= 8191
|
||||
# Should be close to the limit (not over-truncated)
|
||||
assert actual_tokens >= 8100
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_embedding_success(mocker):
|
||||
"""Test successful embedding storage."""
|
||||
mock_client = mocker.AsyncMock()
|
||||
mock_client.execute_raw = mocker.AsyncMock()
|
||||
|
||||
embedding = [0.1, 0.2, 0.3]
|
||||
|
||||
result = await embeddings.store_embedding(
|
||||
version_id="test-version-id", embedding=embedding, tx=mock_client
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# execute_raw is called once for INSERT (no separate SET search_path needed)
|
||||
assert mock_client.execute_raw.call_count == 1
|
||||
|
||||
# Verify the INSERT query with the actual data
|
||||
call_args = mock_client.execute_raw.call_args_list[0][0]
|
||||
assert "test-version-id" in call_args
|
||||
assert "[0.1,0.2,0.3]" in call_args
|
||||
assert None in call_args # userId should be None for store agents
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_store_embedding_database_error(mocker):
|
||||
"""Test embedding storage with database error."""
|
||||
mock_client = mocker.AsyncMock()
|
||||
mock_client.execute_raw.side_effect = Exception("Database error")
|
||||
|
||||
embedding = [0.1, 0.2, 0.3]
|
||||
|
||||
with pytest.raises(Exception, match="Database error"):
|
||||
await embeddings.store_embedding(
|
||||
version_id="test-version-id", embedding=embedding, tx=mock_client
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_success():
|
||||
"""Test successful embedding retrieval."""
|
||||
mock_result = [
|
||||
{
|
||||
"contentType": "STORE_AGENT",
|
||||
"contentId": "test-version-id",
|
||||
"userId": None,
|
||||
"embedding": "[0.1,0.2,0.3]",
|
||||
"searchableText": "Test text",
|
||||
"metadata": {},
|
||||
"createdAt": "2024-01-01T00:00:00Z",
|
||||
"updatedAt": "2024-01-01T00:00:00Z",
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=mock_result,
|
||||
):
|
||||
result = await embeddings.get_embedding("test-version-id")
|
||||
|
||||
assert result is not None
|
||||
assert result["storeListingVersionId"] == "test-version-id"
|
||||
assert result["embedding"] == "[0.1,0.2,0.3]"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_not_found():
|
||||
"""Test embedding retrieval when not found."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.query_raw_with_schema",
|
||||
return_value=[],
|
||||
):
|
||||
result = await embeddings.get_embedding("test-version-id")
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
@patch("backend.api.features.store.embeddings.store_embedding")
|
||||
@patch("backend.api.features.store.embeddings.get_embedding")
|
||||
async def test_ensure_embedding_already_exists(mock_get, mock_store, mock_generate):
|
||||
"""Test ensure_embedding when embedding already exists."""
|
||||
mock_get.return_value = {"embedding": "[0.1,0.2,0.3]"}
|
||||
|
||||
result = await embeddings.ensure_embedding(
|
||||
version_id="test-id",
|
||||
name="Test",
|
||||
description="Test description",
|
||||
sub_heading="Test heading",
|
||||
categories=["test"],
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_generate.assert_not_called()
|
||||
mock_store.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
@patch("backend.api.features.store.embeddings.store_content_embedding")
|
||||
@patch("backend.api.features.store.embeddings.get_embedding")
|
||||
async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
|
||||
"""Test ensure_embedding creating new embedding."""
|
||||
mock_get.return_value = None
|
||||
mock_generate.return_value = [0.1, 0.2, 0.3]
|
||||
mock_store.return_value = True
|
||||
|
||||
result = await embeddings.ensure_embedding(
|
||||
version_id="test-id",
|
||||
name="Test",
|
||||
description="Test description",
|
||||
sub_heading="Test heading",
|
||||
categories=["test"],
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_generate.assert_called_once_with("Test Test heading Test description test")
|
||||
mock_store.assert_called_once_with(
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
content_id="test-id",
|
||||
embedding=[0.1, 0.2, 0.3],
|
||||
searchable_text="Test Test heading Test description test",
|
||||
metadata={"name": "Test", "subHeading": "Test heading", "categories": ["test"]},
|
||||
user_id=None,
|
||||
tx=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.generate_embedding")
|
||||
@patch("backend.api.features.store.embeddings.get_embedding")
|
||||
async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
|
||||
"""Test ensure_embedding when generation fails."""
|
||||
mock_get.return_value = None
|
||||
mock_generate.side_effect = Exception("Generation failed")
|
||||
|
||||
with pytest.raises(Exception, match="Generation failed"):
|
||||
await embeddings.ensure_embedding(
|
||||
version_id="test-id",
|
||||
name="Test",
|
||||
description="Test description",
|
||||
sub_heading="Test heading",
|
||||
categories=["test"],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_get_embedding_stats():
|
||||
"""Test embedding statistics retrieval."""
|
||||
# Mock handler stats for each content type
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_stats = AsyncMock(
|
||||
return_value={
|
||||
"total": 100,
|
||||
"with_embeddings": 75,
|
||||
"without_embeddings": 25,
|
||||
}
|
||||
)
|
||||
|
||||
# Patch the CONTENT_HANDLERS where it's used (in embeddings module)
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
result = await embeddings.get_embedding_stats()
|
||||
|
||||
assert "by_type" in result
|
||||
assert "totals" in result
|
||||
assert result["totals"]["total"] == 100
|
||||
assert result["totals"]["with_embeddings"] == 75
|
||||
assert result["totals"]["without_embeddings"] == 25
|
||||
assert result["totals"]["coverage_percent"] == 75.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
@patch("backend.api.features.store.embeddings.store_content_embedding")
|
||||
async def test_backfill_missing_embeddings_success(mock_store):
|
||||
"""Test backfill with successful embedding generation."""
|
||||
# Mock ContentItem from handlers
|
||||
from backend.api.features.store.content_handlers import ContentItem
|
||||
|
||||
mock_items = [
|
||||
ContentItem(
|
||||
content_id="version-1",
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text="Agent 1 Description 1",
|
||||
metadata={"name": "Agent 1"},
|
||||
),
|
||||
ContentItem(
|
||||
content_id="version-2",
|
||||
content_type=ContentType.STORE_AGENT,
|
||||
searchable_text="Agent 2 Description 2",
|
||||
metadata={"name": "Agent 2"},
|
||||
),
|
||||
]
|
||||
|
||||
# Mock handler to return missing items
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_missing_items = AsyncMock(return_value=mock_items)
|
||||
|
||||
# Mock store_content_embedding to succeed for first, fail for second
|
||||
mock_store.side_effect = [True, False]
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding",
|
||||
return_value=[0.1] * embeddings.EMBEDDING_DIM,
|
||||
):
|
||||
result = await embeddings.backfill_missing_embeddings(batch_size=5)
|
||||
|
||||
assert result["processed"] == 2
|
||||
assert result["success"] == 1
|
||||
assert result["failed"] == 1
|
||||
assert mock_store.call_count == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_backfill_missing_embeddings_no_missing():
|
||||
"""Test backfill when no embeddings are missing."""
|
||||
# Mock handler to return no missing items
|
||||
mock_handler = MagicMock()
|
||||
mock_handler.get_missing_items = AsyncMock(return_value=[])
|
||||
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.CONTENT_HANDLERS",
|
||||
{ContentType.STORE_AGENT: mock_handler},
|
||||
):
|
||||
result = await embeddings.backfill_missing_embeddings(batch_size=5)
|
||||
|
||||
assert result["processed"] == 0
|
||||
assert result["success"] == 0
|
||||
assert result["failed"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_embedding_to_vector_string():
|
||||
"""Test embedding to PostgreSQL vector string conversion."""
|
||||
embedding = [0.1, 0.2, 0.3, -0.4]
|
||||
result = embeddings.embedding_to_vector_string(embedding)
|
||||
assert result == "[0.1,0.2,0.3,-0.4]"
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_embed_query():
|
||||
"""Test embed_query function (alias for generate_embedding)."""
|
||||
with patch(
|
||||
"backend.api.features.store.embeddings.generate_embedding"
|
||||
) as mock_generate:
|
||||
mock_generate.return_value = [0.1, 0.2, 0.3]
|
||||
|
||||
result = await embeddings.embed_query("test query")
|
||||
|
||||
assert result == [0.1, 0.2, 0.3]
|
||||
mock_generate.assert_called_once_with("test query")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user