mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-09 23:17:59 -05:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
377b84e18c | ||
|
|
223ecda80e | ||
|
|
7dde01e74b | ||
|
|
b768ca845e | ||
|
|
86ed32ea10 | ||
|
|
0e838940f1 | ||
|
|
7cc9a23f99 | ||
|
|
c42d2a32f3 | ||
|
|
4da355d269 |
2
.github/workflows/docs-embeddings.yml
vendored
2
.github/workflows/docs-embeddings.yml
vendored
@@ -32,4 +32,4 @@ jobs:
|
||||
env:
|
||||
DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: bun run scripts/process-docs-embeddings.ts --clear
|
||||
run: bun run scripts/process-docs.ts --clear
|
||||
|
||||
@@ -233,7 +233,7 @@ describe('Copilot Chat API Route', () => {
|
||||
model: 'claude-4.5-sonnet',
|
||||
mode: 'agent',
|
||||
messageId: 'mock-uuid-1234-5678',
|
||||
version: '1.0.0',
|
||||
version: '1.0.1',
|
||||
chatId: 'chat-123',
|
||||
}),
|
||||
})
|
||||
@@ -303,7 +303,7 @@ describe('Copilot Chat API Route', () => {
|
||||
model: 'claude-4.5-sonnet',
|
||||
mode: 'agent',
|
||||
messageId: 'mock-uuid-1234-5678',
|
||||
version: '1.0.0',
|
||||
version: '1.0.1',
|
||||
chatId: 'chat-123',
|
||||
}),
|
||||
})
|
||||
@@ -361,7 +361,7 @@ describe('Copilot Chat API Route', () => {
|
||||
model: 'claude-4.5-sonnet',
|
||||
mode: 'agent',
|
||||
messageId: 'mock-uuid-1234-5678',
|
||||
version: '1.0.0',
|
||||
version: '1.0.1',
|
||||
chatId: 'chat-123',
|
||||
}),
|
||||
})
|
||||
@@ -453,7 +453,7 @@ describe('Copilot Chat API Route', () => {
|
||||
model: 'claude-4.5-sonnet',
|
||||
mode: 'ask',
|
||||
messageId: 'mock-uuid-1234-5678',
|
||||
version: '1.0.0',
|
||||
version: '1.0.1',
|
||||
chatId: 'chat-123',
|
||||
}),
|
||||
})
|
||||
|
||||
131
apps/sim/app/api/copilot/user-models/route.ts
Normal file
131
apps/sim/app/api/copilot/user-models/route.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import { eq } from 'drizzle-orm'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { auth } from '@/lib/auth'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { db } from '@/../../packages/db'
|
||||
import { settings } from '@/../../packages/db/schema'
|
||||
|
||||
const logger = createLogger('CopilotUserModelsAPI')
|
||||
|
||||
const DEFAULT_ENABLED_MODELS: Record<string, boolean> = {
|
||||
'gpt-4o': false,
|
||||
'gpt-4.1': false,
|
||||
'gpt-5-fast': false,
|
||||
'gpt-5': true,
|
||||
'gpt-5-medium': true,
|
||||
'gpt-5-high': false,
|
||||
o3: true,
|
||||
'claude-4-sonnet': true,
|
||||
'claude-4.5-sonnet': true,
|
||||
'claude-4.1-opus': true,
|
||||
}
|
||||
|
||||
// GET - Fetch user's enabled models
|
||||
export async function GET(request: NextRequest) {
|
||||
try {
|
||||
const session = await auth.api.getSession({ headers: request.headers })
|
||||
|
||||
if (!session?.user?.id) {
|
||||
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
|
||||
}
|
||||
|
||||
const userId = session.user.id
|
||||
|
||||
// Try to fetch existing settings record
|
||||
const [userSettings] = await db
|
||||
.select()
|
||||
.from(settings)
|
||||
.where(eq(settings.userId, userId))
|
||||
.limit(1)
|
||||
|
||||
if (userSettings) {
|
||||
const userModelsMap = (userSettings.copilotEnabledModels as Record<string, boolean>) || {}
|
||||
|
||||
// Merge: start with defaults, then override with user's existing preferences
|
||||
const mergedModels = { ...DEFAULT_ENABLED_MODELS }
|
||||
for (const [modelId, enabled] of Object.entries(userModelsMap)) {
|
||||
mergedModels[modelId] = enabled
|
||||
}
|
||||
|
||||
// If we added any new models, update the database
|
||||
const hasNewModels = Object.keys(DEFAULT_ENABLED_MODELS).some(
|
||||
(key) => !(key in userModelsMap)
|
||||
)
|
||||
|
||||
if (hasNewModels) {
|
||||
await db
|
||||
.update(settings)
|
||||
.set({
|
||||
copilotEnabledModels: mergedModels,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(settings.userId, userId))
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
enabledModels: mergedModels,
|
||||
})
|
||||
}
|
||||
|
||||
// If no settings record exists, create one with empty object (client will use defaults)
|
||||
const [created] = await db
|
||||
.insert(settings)
|
||||
.values({
|
||||
id: userId,
|
||||
userId,
|
||||
copilotEnabledModels: {},
|
||||
})
|
||||
.returning()
|
||||
|
||||
return NextResponse.json({
|
||||
enabledModels: DEFAULT_ENABLED_MODELS,
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Failed to fetch user models', { error })
|
||||
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
|
||||
}
|
||||
}
|
||||
|
||||
// PUT - Update user's enabled models
|
||||
export async function PUT(request: NextRequest) {
|
||||
try {
|
||||
const session = await auth.api.getSession({ headers: request.headers })
|
||||
|
||||
if (!session?.user?.id) {
|
||||
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
|
||||
}
|
||||
|
||||
const userId = session.user.id
|
||||
const body = await request.json()
|
||||
|
||||
if (!body.enabledModels || typeof body.enabledModels !== 'object') {
|
||||
return NextResponse.json({ error: 'enabledModels must be an object' }, { status: 400 })
|
||||
}
|
||||
|
||||
// Check if settings record exists
|
||||
const [existing] = await db.select().from(settings).where(eq(settings.userId, userId)).limit(1)
|
||||
|
||||
if (existing) {
|
||||
// Update existing record
|
||||
await db
|
||||
.update(settings)
|
||||
.set({
|
||||
copilotEnabledModels: body.enabledModels,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(settings.userId, userId))
|
||||
} else {
|
||||
// Create new settings record
|
||||
await db.insert(settings).values({
|
||||
id: userId,
|
||||
userId,
|
||||
copilotEnabledModels: body.enabledModels,
|
||||
})
|
||||
}
|
||||
|
||||
return NextResponse.json({ success: true })
|
||||
} catch (error) {
|
||||
logger.error('Failed to update user models', { error })
|
||||
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
|
||||
}
|
||||
}
|
||||
@@ -265,9 +265,8 @@ async function handleS3PresignedUrl(
|
||||
)
|
||||
}
|
||||
|
||||
// For chat images, knowledge base files, and profile pictures, use direct URLs since they need to be accessible by external services
|
||||
const finalPath =
|
||||
uploadType === 'chat' || uploadType === 'knowledge-base' || uploadType === 'profile-pictures'
|
||||
uploadType === 'chat' || uploadType === 'profile-pictures'
|
||||
? `https://${config.bucket}.s3.${config.region}.amazonaws.com/${uniqueKey}`
|
||||
: `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}`
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import { getSession } from '@/lib/auth'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { getUserEntityPermissions } from '@/lib/permissions/utils'
|
||||
import { generateRequestId } from '@/lib/utils'
|
||||
import { extractAndPersistCustomTools } from '@/lib/workflows/custom-tools-persistence'
|
||||
import { saveWorkflowToNormalizedTables } from '@/lib/workflows/db-helpers'
|
||||
import { sanitizeAgentToolsInBlocks } from '@/lib/workflows/validation'
|
||||
|
||||
@@ -207,6 +208,21 @@ export async function PUT(request: NextRequest, { params }: { params: Promise<{
|
||||
)
|
||||
}
|
||||
|
||||
// Extract and persist custom tools to database
|
||||
try {
|
||||
const { saved, errors } = await extractAndPersistCustomTools(workflowState, userId)
|
||||
|
||||
if (saved > 0) {
|
||||
logger.info(`[${requestId}] Persisted ${saved} custom tool(s) to database`, { workflowId })
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
logger.warn(`[${requestId}] Some custom tools failed to persist`, { errors, workflowId })
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`[${requestId}] Failed to persist custom tools`, { error, workflowId })
|
||||
}
|
||||
|
||||
// Update workflow's lastSynced timestamp
|
||||
await db
|
||||
.update(workflow)
|
||||
|
||||
@@ -312,7 +312,7 @@ export function EditChunkModal({
|
||||
<Button
|
||||
onClick={handleSaveContent}
|
||||
disabled={!isFormValid || isSaving || !hasUnsavedChanges || isNavigating}
|
||||
className='bg-[var(--brand-primary-hex)] font-[480] text-muted-foreground shadow-[0_0_0_0_var(--brand-primary-hex)] transition-all duration-200 hover:bg-[var(--brand-primary-hover-hex)] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
|
||||
className='bg-[var(--brand-primary-hex)] font-[480] text-white shadow-[0_0_0_0_var(--brand-primary-hex)] transition-all duration-200 hover:bg-[var(--brand-primary-hover-hex)] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
|
||||
>
|
||||
{isSaving ? (
|
||||
<>
|
||||
|
||||
@@ -64,7 +64,7 @@ export function UploadModal({
|
||||
return `File "${file.name}" is too large. Maximum size is 100MB.`
|
||||
}
|
||||
if (!ACCEPTED_FILE_TYPES.includes(file.type)) {
|
||||
return `File "${file.name}" has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, or HTML files.`
|
||||
return `File "${file.name}" has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSON, YAML, or YML files.`
|
||||
}
|
||||
return null
|
||||
}
|
||||
@@ -193,8 +193,8 @@ export function UploadModal({
|
||||
{isDragging ? 'Drop files here!' : 'Drop files here or click to browse'}
|
||||
</p>
|
||||
<p className='text-muted-foreground text-xs'>
|
||||
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max 100MB
|
||||
each)
|
||||
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSON, YAML,
|
||||
YML (max 100MB each)
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -158,7 +158,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
|
||||
// Check file type
|
||||
if (!ACCEPTED_FILE_TYPES.includes(file.type)) {
|
||||
setFileError(
|
||||
`File ${file.name} has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, or HTML.`
|
||||
`File ${file.name} has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSON, YAML, or YML.`
|
||||
)
|
||||
hasError = true
|
||||
continue
|
||||
@@ -501,8 +501,8 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
|
||||
: 'Drop files here or click to browse'}
|
||||
</p>
|
||||
<p className='text-muted-foreground text-xs'>
|
||||
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max
|
||||
100MB each)
|
||||
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML,
|
||||
JSON, YAML, YML (max 100MB each)
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -84,16 +84,200 @@ class ProcessingError extends KnowledgeUploadError {
|
||||
}
|
||||
|
||||
const UPLOAD_CONFIG = {
|
||||
BATCH_SIZE: 15, // Upload files in parallel - this is fast and not the bottleneck
|
||||
MAX_RETRIES: 3, // Standard retry count
|
||||
RETRY_DELAY: 2000, // Initial retry delay in ms (2 seconds)
|
||||
RETRY_MULTIPLIER: 2, // Standard exponential backoff (2s, 4s, 8s)
|
||||
CHUNK_SIZE: 5 * 1024 * 1024,
|
||||
DIRECT_UPLOAD_THRESHOLD: 4 * 1024 * 1024, // Files > 4MB must use presigned URLs
|
||||
LARGE_FILE_THRESHOLD: 50 * 1024 * 1024, // Files > 50MB need multipart upload
|
||||
UPLOAD_TIMEOUT: 60000, // 60 second timeout per upload
|
||||
MAX_PARALLEL_UPLOADS: 3, // Prevent client saturation – mirrors guidance on limiting simultaneous transfers (@Web)
|
||||
MAX_RETRIES: 3,
|
||||
RETRY_DELAY_MS: 2000,
|
||||
RETRY_BACKOFF: 2,
|
||||
CHUNK_SIZE: 8 * 1024 * 1024, // 8MB keeps us well above S3 minimum part size while reducing part count (@Web)
|
||||
DIRECT_UPLOAD_THRESHOLD: 4 * 1024 * 1024,
|
||||
LARGE_FILE_THRESHOLD: 50 * 1024 * 1024,
|
||||
BASE_TIMEOUT_MS: 2 * 60 * 1000, // baseline per transfer window per large-file guidance (@Web)
|
||||
TIMEOUT_PER_MB_MS: 1500,
|
||||
MAX_TIMEOUT_MS: 10 * 60 * 1000,
|
||||
MULTIPART_PART_CONCURRENCY: 3,
|
||||
MULTIPART_MAX_RETRIES: 3,
|
||||
BATCH_REQUEST_SIZE: 50,
|
||||
} as const
|
||||
|
||||
const calculateUploadTimeoutMs = (fileSize: number) => {
|
||||
const sizeInMb = fileSize / (1024 * 1024)
|
||||
const dynamicBudget = UPLOAD_CONFIG.BASE_TIMEOUT_MS + sizeInMb * UPLOAD_CONFIG.TIMEOUT_PER_MB_MS
|
||||
return Math.min(dynamicBudget, UPLOAD_CONFIG.MAX_TIMEOUT_MS)
|
||||
}
|
||||
|
||||
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
const getHighResTime = () =>
|
||||
typeof performance !== 'undefined' && typeof performance.now === 'function'
|
||||
? performance.now()
|
||||
: Date.now()
|
||||
|
||||
const formatMegabytes = (bytes: number) => Number((bytes / (1024 * 1024)).toFixed(2))
|
||||
|
||||
const calculateThroughputMbps = (bytes: number, durationMs: number) => {
|
||||
if (!bytes || !durationMs) return 0
|
||||
return Number((((bytes * 8) / durationMs) * 0.001).toFixed(2))
|
||||
}
|
||||
|
||||
const formatDurationSeconds = (durationMs: number) => Number((durationMs / 1000).toFixed(2))
|
||||
|
||||
const runWithConcurrency = async <T, R>(
|
||||
items: T[],
|
||||
limit: number,
|
||||
worker: (item: T, index: number) => Promise<R>
|
||||
): Promise<Array<PromiseSettledResult<R>>> => {
|
||||
const results: Array<PromiseSettledResult<R>> = Array(items.length)
|
||||
|
||||
if (items.length === 0) {
|
||||
return results
|
||||
}
|
||||
|
||||
const concurrency = Math.max(1, Math.min(limit, items.length))
|
||||
let nextIndex = 0
|
||||
|
||||
const runners = Array.from({ length: concurrency }, async () => {
|
||||
while (true) {
|
||||
const currentIndex = nextIndex++
|
||||
if (currentIndex >= items.length) {
|
||||
break
|
||||
}
|
||||
|
||||
try {
|
||||
const value = await worker(items[currentIndex], currentIndex)
|
||||
results[currentIndex] = { status: 'fulfilled', value }
|
||||
} catch (error) {
|
||||
results[currentIndex] = { status: 'rejected', reason: error }
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
await Promise.all(runners)
|
||||
return results
|
||||
}
|
||||
|
||||
const getErrorName = (error: unknown) =>
|
||||
typeof error === 'object' && error !== null && 'name' in error ? String((error as any).name) : ''
|
||||
|
||||
const getErrorMessage = (error: unknown) =>
|
||||
error instanceof Error ? error.message : typeof error === 'string' ? error : 'Unknown error'
|
||||
|
||||
const isAbortError = (error: unknown) => getErrorName(error) === 'AbortError'
|
||||
|
||||
const isNetworkError = (error: unknown) => {
|
||||
if (!(error instanceof Error)) {
|
||||
return false
|
||||
}
|
||||
|
||||
const message = error.message.toLowerCase()
|
||||
return (
|
||||
message.includes('network') ||
|
||||
message.includes('fetch') ||
|
||||
message.includes('connection') ||
|
||||
message.includes('timeout') ||
|
||||
message.includes('timed out') ||
|
||||
message.includes('ecconnreset')
|
||||
)
|
||||
}
|
||||
|
||||
interface PresignedFileInfo {
|
||||
path: string
|
||||
key: string
|
||||
name: string
|
||||
size: number
|
||||
type: string
|
||||
}
|
||||
|
||||
interface PresignedUploadInfo {
|
||||
fileName: string
|
||||
presignedUrl: string
|
||||
fileInfo: PresignedFileInfo
|
||||
uploadHeaders?: Record<string, string>
|
||||
directUploadSupported: boolean
|
||||
presignedUrls?: any
|
||||
}
|
||||
|
||||
const normalizePresignedData = (data: any, context: string): PresignedUploadInfo => {
|
||||
const presignedUrl = data?.presignedUrl || data?.uploadUrl
|
||||
const fileInfo = data?.fileInfo
|
||||
|
||||
if (!presignedUrl || !fileInfo?.path) {
|
||||
throw new PresignedUrlError(`Invalid presigned response for ${context}`, data)
|
||||
}
|
||||
|
||||
return {
|
||||
fileName: data.fileName || fileInfo.name || context,
|
||||
presignedUrl,
|
||||
fileInfo: {
|
||||
path: fileInfo.path,
|
||||
key: fileInfo.key,
|
||||
name: fileInfo.name || context,
|
||||
size: fileInfo.size || data.fileSize || 0,
|
||||
type: fileInfo.type || data.contentType || '',
|
||||
},
|
||||
uploadHeaders: data.uploadHeaders || undefined,
|
||||
directUploadSupported: data.directUploadSupported !== false,
|
||||
presignedUrls: data.presignedUrls,
|
||||
}
|
||||
}
|
||||
|
||||
const getPresignedData = async (
|
||||
file: File,
|
||||
timeoutMs: number,
|
||||
controller?: AbortController
|
||||
): Promise<PresignedUploadInfo> => {
|
||||
const localController = controller ?? new AbortController()
|
||||
const timeoutId = setTimeout(() => localController.abort(), timeoutMs)
|
||||
const startTime = getHighResTime()
|
||||
|
||||
try {
|
||||
const presignedResponse = await fetch('/api/files/presigned?type=knowledge-base', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
fileName: file.name,
|
||||
contentType: file.type,
|
||||
fileSize: file.size,
|
||||
}),
|
||||
signal: localController.signal,
|
||||
})
|
||||
|
||||
if (!presignedResponse.ok) {
|
||||
let errorDetails: any = null
|
||||
try {
|
||||
errorDetails = await presignedResponse.json()
|
||||
} catch {
|
||||
// Ignore JSON parsing errors (@Web)
|
||||
}
|
||||
|
||||
logger.error('Presigned URL request failed', {
|
||||
status: presignedResponse.status,
|
||||
fileSize: file.size,
|
||||
})
|
||||
|
||||
throw new PresignedUrlError(
|
||||
`Failed to get presigned URL for ${file.name}: ${presignedResponse.status} ${presignedResponse.statusText}`,
|
||||
errorDetails
|
||||
)
|
||||
}
|
||||
|
||||
const presignedData = await presignedResponse.json()
|
||||
const durationMs = getHighResTime() - startTime
|
||||
logger.info('Fetched presigned URL', {
|
||||
fileName: file.name,
|
||||
sizeMB: formatMegabytes(file.size),
|
||||
durationMs: formatDurationSeconds(durationMs),
|
||||
})
|
||||
return normalizePresignedData(presignedData, file.name)
|
||||
} finally {
|
||||
clearTimeout(timeoutId)
|
||||
if (!controller) {
|
||||
localController.abort()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
const [isUploading, setIsUploading] = useState(false)
|
||||
const [uploadProgress, setUploadProgress] = useState<UploadProgress>({
|
||||
@@ -153,85 +337,51 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
const uploadSingleFileWithRetry = async (
|
||||
file: File,
|
||||
retryCount = 0,
|
||||
fileIndex?: number
|
||||
fileIndex?: number,
|
||||
presignedOverride?: PresignedUploadInfo
|
||||
): Promise<UploadedFile> => {
|
||||
const timeoutMs = calculateUploadTimeoutMs(file.size)
|
||||
let presignedData: PresignedUploadInfo | undefined
|
||||
const attempt = retryCount + 1
|
||||
logger.info('Upload attempt started', {
|
||||
fileName: file.name,
|
||||
attempt,
|
||||
sizeMB: formatMegabytes(file.size),
|
||||
timeoutMs: formatDurationSeconds(timeoutMs),
|
||||
})
|
||||
|
||||
try {
|
||||
// Create abort controller for timeout
|
||||
const controller = new AbortController()
|
||||
const timeoutId = setTimeout(() => controller.abort(), UPLOAD_CONFIG.UPLOAD_TIMEOUT)
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs)
|
||||
|
||||
try {
|
||||
// Get presigned URL
|
||||
const presignedResponse = await fetch('/api/files/presigned?type=knowledge-base', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
fileName: file.name,
|
||||
contentType: file.type,
|
||||
fileSize: file.size,
|
||||
}),
|
||||
signal: controller.signal,
|
||||
})
|
||||
|
||||
clearTimeout(timeoutId)
|
||||
|
||||
if (!presignedResponse.ok) {
|
||||
let errorDetails: any = null
|
||||
try {
|
||||
errorDetails = await presignedResponse.json()
|
||||
} catch {
|
||||
// Ignore JSON parsing errors
|
||||
}
|
||||
|
||||
logger.error('Presigned URL request failed', {
|
||||
status: presignedResponse.status,
|
||||
fileSize: file.size,
|
||||
retryCount,
|
||||
})
|
||||
|
||||
throw new PresignedUrlError(
|
||||
`Failed to get presigned URL for ${file.name}: ${presignedResponse.status} ${presignedResponse.statusText}`,
|
||||
errorDetails
|
||||
)
|
||||
}
|
||||
|
||||
const presignedData = await presignedResponse.json()
|
||||
presignedData = presignedOverride ?? (await getPresignedData(file, timeoutMs, controller))
|
||||
|
||||
if (presignedData.directUploadSupported) {
|
||||
// Use presigned URLs for all uploads when cloud storage is available
|
||||
// Check if file needs multipart upload for large files
|
||||
if (file.size > UPLOAD_CONFIG.LARGE_FILE_THRESHOLD) {
|
||||
return await uploadFileInChunks(file, presignedData)
|
||||
return await uploadFileInChunks(file, presignedData, timeoutMs, fileIndex)
|
||||
}
|
||||
return await uploadFileDirectly(file, presignedData, fileIndex)
|
||||
return await uploadFileDirectly(file, presignedData, timeoutMs, controller, fileIndex)
|
||||
}
|
||||
// Fallback to traditional upload through API route
|
||||
// This is only used when cloud storage is not configured
|
||||
// Must check file size due to Vercel's 4.5MB limit
|
||||
|
||||
if (file.size > UPLOAD_CONFIG.DIRECT_UPLOAD_THRESHOLD) {
|
||||
throw new DirectUploadError(
|
||||
`File ${file.name} is too large (${(file.size / 1024 / 1024).toFixed(2)}MB) for upload. Cloud storage must be configured for files over 4MB.`,
|
||||
{ fileSize: file.size, limit: UPLOAD_CONFIG.DIRECT_UPLOAD_THRESHOLD }
|
||||
)
|
||||
}
|
||||
|
||||
logger.warn(`Using API upload fallback for ${file.name} - cloud storage not configured`)
|
||||
return await uploadFileThroughAPI(file)
|
||||
return await uploadFileThroughAPI(file, timeoutMs)
|
||||
} finally {
|
||||
clearTimeout(timeoutId)
|
||||
}
|
||||
} catch (error) {
|
||||
const isTimeout = error instanceof Error && error.name === 'AbortError'
|
||||
const isNetwork =
|
||||
error instanceof Error &&
|
||||
(error.message.includes('fetch') ||
|
||||
error.message.includes('network') ||
|
||||
error.message.includes('Failed to fetch'))
|
||||
const isTimeout = isAbortError(error)
|
||||
const isNetwork = isNetworkError(error)
|
||||
|
||||
// Retry logic
|
||||
if (retryCount < UPLOAD_CONFIG.MAX_RETRIES) {
|
||||
const delay = UPLOAD_CONFIG.RETRY_DELAY * UPLOAD_CONFIG.RETRY_MULTIPLIER ** retryCount // More aggressive exponential backoff
|
||||
const delay = UPLOAD_CONFIG.RETRY_DELAY_MS * UPLOAD_CONFIG.RETRY_BACKOFF ** retryCount // More aggressive exponential backoff (@Web)
|
||||
if (isTimeout || isNetwork) {
|
||||
logger.warn(
|
||||
`Upload failed (${isTimeout ? 'timeout' : 'network'}), retrying in ${delay / 1000}s...`,
|
||||
@@ -243,7 +393,6 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
)
|
||||
}
|
||||
|
||||
// Reset progress to 0 before retry to indicate restart
|
||||
if (fileIndex !== undefined) {
|
||||
setUploadProgress((prev) => ({
|
||||
...prev,
|
||||
@@ -253,8 +402,14 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
}))
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
return uploadSingleFileWithRetry(file, retryCount + 1, fileIndex)
|
||||
await sleep(delay)
|
||||
const shouldReusePresigned = (isTimeout || isNetwork) && presignedData
|
||||
return uploadSingleFileWithRetry(
|
||||
file,
|
||||
retryCount + 1,
|
||||
fileIndex,
|
||||
shouldReusePresigned ? presignedData : undefined
|
||||
)
|
||||
}
|
||||
|
||||
logger.error('Upload failed after retries', {
|
||||
@@ -271,12 +426,15 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
*/
|
||||
const uploadFileDirectly = async (
|
||||
file: File,
|
||||
presignedData: any,
|
||||
presignedData: PresignedUploadInfo,
|
||||
timeoutMs: number,
|
||||
outerController: AbortController,
|
||||
fileIndex?: number
|
||||
): Promise<UploadedFile> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const xhr = new XMLHttpRequest()
|
||||
let isCompleted = false // Track if this upload has completed to prevent duplicate state updates
|
||||
let isCompleted = false
|
||||
const startTime = getHighResTime()
|
||||
|
||||
const timeoutId = setTimeout(() => {
|
||||
if (!isCompleted) {
|
||||
@@ -284,7 +442,18 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
xhr.abort()
|
||||
reject(new Error('Upload timeout'))
|
||||
}
|
||||
}, UPLOAD_CONFIG.UPLOAD_TIMEOUT)
|
||||
}, timeoutMs)
|
||||
|
||||
const abortHandler = () => {
|
||||
if (!isCompleted) {
|
||||
isCompleted = true
|
||||
clearTimeout(timeoutId)
|
||||
xhr.abort()
|
||||
reject(new DirectUploadError(`Upload aborted for ${file.name}`, {}))
|
||||
}
|
||||
}
|
||||
|
||||
outerController.signal.addEventListener('abort', abortHandler)
|
||||
|
||||
// Track upload progress
|
||||
xhr.upload.addEventListener('progress', (event) => {
|
||||
@@ -309,10 +478,19 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
if (!isCompleted) {
|
||||
isCompleted = true
|
||||
clearTimeout(timeoutId)
|
||||
outerController.signal.removeEventListener('abort', abortHandler)
|
||||
const durationMs = getHighResTime() - startTime
|
||||
if (xhr.status >= 200 && xhr.status < 300) {
|
||||
const fullFileUrl = presignedData.fileInfo.path.startsWith('http')
|
||||
? presignedData.fileInfo.path
|
||||
: `${window.location.origin}${presignedData.fileInfo.path}`
|
||||
logger.info('Direct upload completed', {
|
||||
fileName: file.name,
|
||||
sizeMB: formatMegabytes(file.size),
|
||||
durationMs: formatDurationSeconds(durationMs),
|
||||
throughputMbps: calculateThroughputMbps(file.size, durationMs),
|
||||
status: xhr.status,
|
||||
})
|
||||
resolve(createUploadedFile(file.name, fullFileUrl, file.size, file.type, file))
|
||||
} else {
|
||||
logger.error('S3 PUT request failed', {
|
||||
@@ -335,17 +513,18 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
if (!isCompleted) {
|
||||
isCompleted = true
|
||||
clearTimeout(timeoutId)
|
||||
outerController.signal.removeEventListener('abort', abortHandler)
|
||||
const durationMs = getHighResTime() - startTime
|
||||
logger.error('Direct upload network error', {
|
||||
fileName: file.name,
|
||||
sizeMB: formatMegabytes(file.size),
|
||||
durationMs: formatDurationSeconds(durationMs),
|
||||
})
|
||||
reject(new DirectUploadError(`Network error uploading ${file.name}`, {}))
|
||||
}
|
||||
})
|
||||
|
||||
xhr.addEventListener('abort', () => {
|
||||
if (!isCompleted) {
|
||||
isCompleted = true
|
||||
clearTimeout(timeoutId)
|
||||
reject(new DirectUploadError(`Upload aborted for ${file.name}`, {}))
|
||||
}
|
||||
})
|
||||
xhr.addEventListener('abort', abortHandler)
|
||||
|
||||
// Start the upload
|
||||
xhr.open('PUT', presignedData.presignedUrl)
|
||||
@@ -365,10 +544,16 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
/**
|
||||
* Upload large file in chunks (multipart upload)
|
||||
*/
|
||||
const uploadFileInChunks = async (file: File, presignedData: any): Promise<UploadedFile> => {
|
||||
const uploadFileInChunks = async (
|
||||
file: File,
|
||||
presignedData: PresignedUploadInfo,
|
||||
timeoutMs: number,
|
||||
fileIndex?: number
|
||||
): Promise<UploadedFile> => {
|
||||
logger.info(
|
||||
`Uploading large file ${file.name} (${(file.size / 1024 / 1024).toFixed(2)}MB) using multipart upload`
|
||||
)
|
||||
const startTime = getHighResTime()
|
||||
|
||||
try {
|
||||
// Step 1: Initiate multipart upload
|
||||
@@ -419,37 +604,76 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
|
||||
// Step 4: Upload parts in parallel (batch them to avoid overwhelming the browser)
|
||||
const uploadedParts: Array<{ ETag: string; PartNumber: number }> = []
|
||||
const PARALLEL_UPLOADS = 3 // Upload 3 parts at a time
|
||||
|
||||
for (let i = 0; i < presignedUrls.length; i += PARALLEL_UPLOADS) {
|
||||
const batch = presignedUrls.slice(i, i + PARALLEL_UPLOADS)
|
||||
const controller = new AbortController()
|
||||
const multipartTimeoutId = setTimeout(() => controller.abort(), timeoutMs)
|
||||
|
||||
const batchPromises = batch.map(async ({ partNumber, url }: any) => {
|
||||
try {
|
||||
const uploadPart = async ({ partNumber, url }: any) => {
|
||||
const start = (partNumber - 1) * chunkSize
|
||||
const end = Math.min(start + chunkSize, file.size)
|
||||
const chunk = file.slice(start, end)
|
||||
|
||||
const uploadResponse = await fetch(url, {
|
||||
method: 'PUT',
|
||||
body: chunk,
|
||||
headers: {
|
||||
'Content-Type': file.type,
|
||||
},
|
||||
})
|
||||
for (let attempt = 0; attempt <= UPLOAD_CONFIG.MULTIPART_MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const partResponse = await fetch(url, {
|
||||
method: 'PUT',
|
||||
body: chunk,
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'Content-Type': file.type,
|
||||
},
|
||||
})
|
||||
|
||||
if (!uploadResponse.ok) {
|
||||
throw new Error(`Failed to upload part ${partNumber}: ${uploadResponse.statusText}`)
|
||||
if (!partResponse.ok) {
|
||||
throw new Error(`Failed to upload part ${partNumber}: ${partResponse.statusText}`)
|
||||
}
|
||||
|
||||
const etag = partResponse.headers.get('ETag') || ''
|
||||
logger.info(`Uploaded part ${partNumber}/${numParts}`)
|
||||
|
||||
if (fileIndex !== undefined) {
|
||||
const partProgress = Math.min(100, Math.round((partNumber / numParts) * 100))
|
||||
setUploadProgress((prev) => ({
|
||||
...prev,
|
||||
fileStatuses: prev.fileStatuses?.map((fs, idx) =>
|
||||
idx === fileIndex ? { ...fs, progress: partProgress } : fs
|
||||
),
|
||||
}))
|
||||
}
|
||||
|
||||
return { ETag: etag.replace(/"/g, ''), PartNumber: partNumber }
|
||||
} catch (partError) {
|
||||
if (attempt >= UPLOAD_CONFIG.MULTIPART_MAX_RETRIES) {
|
||||
throw partError
|
||||
}
|
||||
|
||||
const delay = UPLOAD_CONFIG.RETRY_DELAY_MS * UPLOAD_CONFIG.RETRY_BACKOFF ** attempt
|
||||
logger.warn(
|
||||
`Part ${partNumber} failed (attempt ${attempt + 1}), retrying in ${Math.round(delay / 1000)}s`
|
||||
)
|
||||
await sleep(delay)
|
||||
}
|
||||
}
|
||||
|
||||
// Get ETag from response headers
|
||||
const etag = uploadResponse.headers.get('ETag') || ''
|
||||
logger.info(`Uploaded part ${partNumber}/${numParts}`)
|
||||
throw new Error(`Retries exhausted for part ${partNumber}`)
|
||||
}
|
||||
|
||||
return { ETag: etag.replace(/"/g, ''), PartNumber: partNumber }
|
||||
const partResults = await runWithConcurrency(
|
||||
presignedUrls,
|
||||
UPLOAD_CONFIG.MULTIPART_PART_CONCURRENCY,
|
||||
uploadPart
|
||||
)
|
||||
|
||||
partResults.forEach((result) => {
|
||||
if (result?.status === 'fulfilled') {
|
||||
uploadedParts.push(result.value)
|
||||
} else if (result?.status === 'rejected') {
|
||||
throw result.reason
|
||||
}
|
||||
})
|
||||
|
||||
const batchResults = await Promise.all(batchPromises)
|
||||
uploadedParts.push(...batchResults)
|
||||
} finally {
|
||||
clearTimeout(multipartTimeoutId)
|
||||
}
|
||||
|
||||
// Step 5: Complete multipart upload
|
||||
@@ -470,23 +694,37 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
const { path } = await completeResponse.json()
|
||||
logger.info(`Completed multipart upload for ${file.name}`)
|
||||
|
||||
const durationMs = getHighResTime() - startTime
|
||||
logger.info('Multipart upload metrics', {
|
||||
fileName: file.name,
|
||||
sizeMB: formatMegabytes(file.size),
|
||||
parts: uploadedParts.length,
|
||||
durationMs: formatDurationSeconds(durationMs),
|
||||
throughputMbps: calculateThroughputMbps(file.size, durationMs),
|
||||
})
|
||||
|
||||
const fullFileUrl = path.startsWith('http') ? path : `${window.location.origin}${path}`
|
||||
|
||||
return createUploadedFile(file.name, fullFileUrl, file.size, file.type, file)
|
||||
} catch (error) {
|
||||
logger.error(`Multipart upload failed for ${file.name}:`, error)
|
||||
const durationMs = getHighResTime() - startTime
|
||||
logger.warn('Falling back to direct upload after multipart failure', {
|
||||
fileName: file.name,
|
||||
sizeMB: formatMegabytes(file.size),
|
||||
durationMs: formatDurationSeconds(durationMs),
|
||||
})
|
||||
// Fall back to direct upload if multipart fails
|
||||
logger.info('Falling back to direct upload')
|
||||
return uploadFileDirectly(file, presignedData)
|
||||
return uploadFileDirectly(file, presignedData, timeoutMs, new AbortController(), fileIndex)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback upload through API
|
||||
*/
|
||||
const uploadFileThroughAPI = async (file: File): Promise<UploadedFile> => {
|
||||
const uploadFileThroughAPI = async (file: File, timeoutMs: number): Promise<UploadedFile> => {
|
||||
const controller = new AbortController()
|
||||
const timeoutId = setTimeout(() => controller.abort(), UPLOAD_CONFIG.UPLOAD_TIMEOUT)
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs)
|
||||
|
||||
try {
|
||||
const formData = new FormData()
|
||||
@@ -559,19 +797,20 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
logger.info(`Starting batch upload of ${files.length} files`)
|
||||
|
||||
try {
|
||||
const BATCH_SIZE = 100 // Process 100 files at a time
|
||||
const batches = []
|
||||
|
||||
// Create all batches
|
||||
for (let batchStart = 0; batchStart < files.length; batchStart += BATCH_SIZE) {
|
||||
const batchFiles = files.slice(batchStart, batchStart + BATCH_SIZE)
|
||||
for (
|
||||
let batchStart = 0;
|
||||
batchStart < files.length;
|
||||
batchStart += UPLOAD_CONFIG.BATCH_REQUEST_SIZE
|
||||
) {
|
||||
const batchFiles = files.slice(batchStart, batchStart + UPLOAD_CONFIG.BATCH_REQUEST_SIZE)
|
||||
const batchIndexOffset = batchStart
|
||||
batches.push({ batchFiles, batchIndexOffset })
|
||||
}
|
||||
|
||||
logger.info(`Starting parallel processing of ${batches.length} batches`)
|
||||
|
||||
// Step 1: Get ALL presigned URLs in parallel
|
||||
const presignedPromises = batches.map(async ({ batchFiles }, batchIndex) => {
|
||||
logger.info(
|
||||
`Getting presigned URLs for batch ${batchIndex + 1}/${batches.length} (${batchFiles.length} files)`
|
||||
@@ -604,9 +843,8 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
const allPresignedData = await Promise.all(presignedPromises)
|
||||
logger.info(`Got all presigned URLs, starting uploads`)
|
||||
|
||||
// Step 2: Upload all files with global concurrency control
|
||||
const allUploads = allPresignedData.flatMap(({ batchFiles, presignedData, batchIndex }) => {
|
||||
const batchIndexOffset = batchIndex * BATCH_SIZE
|
||||
const batchIndexOffset = batchIndex * UPLOAD_CONFIG.BATCH_REQUEST_SIZE
|
||||
|
||||
return batchFiles.map((file, batchFileIndex) => {
|
||||
const fileIndex = batchIndexOffset + batchFileIndex
|
||||
@@ -616,16 +854,14 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
})
|
||||
})
|
||||
|
||||
// Process all uploads with concurrency control
|
||||
for (let i = 0; i < allUploads.length; i += UPLOAD_CONFIG.BATCH_SIZE) {
|
||||
const concurrentBatch = allUploads.slice(i, i + UPLOAD_CONFIG.BATCH_SIZE)
|
||||
|
||||
const uploadPromises = concurrentBatch.map(async ({ file, presigned, fileIndex }) => {
|
||||
const uploadResults = await runWithConcurrency(
|
||||
allUploads,
|
||||
UPLOAD_CONFIG.MAX_PARALLEL_UPLOADS,
|
||||
async ({ file, presigned, fileIndex }) => {
|
||||
if (!presigned) {
|
||||
throw new Error(`No presigned data for file ${file.name}`)
|
||||
}
|
||||
|
||||
// Mark as uploading
|
||||
setUploadProgress((prev) => ({
|
||||
...prev,
|
||||
fileStatuses: prev.fileStatuses?.map((fs, idx) =>
|
||||
@@ -634,10 +870,8 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
}))
|
||||
|
||||
try {
|
||||
// Upload directly to storage
|
||||
const result = await uploadFileDirectly(file, presigned, fileIndex)
|
||||
const result = await uploadSingleFileWithRetry(file, 0, fileIndex, presigned)
|
||||
|
||||
// Mark as completed
|
||||
setUploadProgress((prev) => ({
|
||||
...prev,
|
||||
filesCompleted: prev.filesCompleted + 1,
|
||||
@@ -648,7 +882,6 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
|
||||
return result
|
||||
} catch (error) {
|
||||
// Mark as failed
|
||||
setUploadProgress((prev) => ({
|
||||
...prev,
|
||||
fileStatuses: prev.fileStatuses?.map((fs, idx) =>
|
||||
@@ -656,30 +889,27 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
|
||||
? {
|
||||
...fs,
|
||||
status: 'failed' as const,
|
||||
error: error instanceof Error ? error.message : 'Upload failed',
|
||||
error: getErrorMessage(error),
|
||||
}
|
||||
: fs
|
||||
),
|
||||
}))
|
||||
throw error
|
||||
}
|
||||
})
|
||||
|
||||
const batchResults = await Promise.allSettled(uploadPromises)
|
||||
|
||||
for (let j = 0; j < batchResults.length; j++) {
|
||||
const result = batchResults[j]
|
||||
if (result.status === 'fulfilled') {
|
||||
results.push(result.value)
|
||||
} else {
|
||||
failedFiles.push({
|
||||
file: concurrentBatch[j].file,
|
||||
error:
|
||||
result.reason instanceof Error ? result.reason : new Error(String(result.reason)),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
uploadResults.forEach((result, idx) => {
|
||||
if (result?.status === 'fulfilled') {
|
||||
results.push(result.value)
|
||||
} else if (result?.status === 'rejected') {
|
||||
failedFiles.push({
|
||||
file: allUploads[idx].file,
|
||||
error:
|
||||
result.reason instanceof Error ? result.reason : new Error(String(result.reason)),
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
if (failedFiles.length > 0) {
|
||||
logger.error(`Failed to upload ${failedFiles.length} files`)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import {
|
||||
forwardRef,
|
||||
type KeyboardEvent,
|
||||
useCallback,
|
||||
useEffect,
|
||||
useImperativeHandle,
|
||||
useRef,
|
||||
@@ -41,7 +42,6 @@ import {
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
Switch,
|
||||
Textarea,
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
@@ -49,6 +49,7 @@ import {
|
||||
TooltipTrigger,
|
||||
} from '@/components/ui'
|
||||
import { useSession } from '@/lib/auth-client'
|
||||
import { isHosted } from '@/lib/environment'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { useCopilotStore } from '@/stores/copilot/store'
|
||||
@@ -92,6 +93,7 @@ interface UserInputProps {
|
||||
onModeChange?: (mode: 'ask' | 'agent') => void
|
||||
value?: string // Controlled value from outside
|
||||
onChange?: (value: string) => void // Callback when value changes
|
||||
panelWidth?: number // Panel width to adjust truncation
|
||||
}
|
||||
|
||||
interface UserInputRef {
|
||||
@@ -112,6 +114,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
onModeChange,
|
||||
value: controlledValue,
|
||||
onChange: onControlledChange,
|
||||
panelWidth = 308,
|
||||
},
|
||||
ref
|
||||
) => {
|
||||
@@ -179,7 +182,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
const [isLoadingLogs, setIsLoadingLogs] = useState(false)
|
||||
|
||||
const { data: session } = useSession()
|
||||
const { currentChat, workflowId } = useCopilotStore()
|
||||
const { currentChat, workflowId, enabledModels, setEnabledModels } = useCopilotStore()
|
||||
const params = useParams()
|
||||
const workspaceId = params.workspaceId as string
|
||||
// Track per-chat preference for auto-adding workflow context
|
||||
@@ -224,6 +227,30 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
}
|
||||
}, [workflowId])
|
||||
|
||||
// Fetch enabled models when dropdown is opened for the first time
|
||||
const fetchEnabledModelsOnce = useCallback(async () => {
|
||||
if (!isHosted) return
|
||||
if (enabledModels !== null) return // Already loaded
|
||||
|
||||
try {
|
||||
const res = await fetch('/api/copilot/user-models')
|
||||
if (!res.ok) {
|
||||
logger.error('Failed to fetch enabled models')
|
||||
return
|
||||
}
|
||||
const data = await res.json()
|
||||
const modelsMap = data.enabledModels || {}
|
||||
|
||||
// Convert to array for store (API already merged with defaults)
|
||||
const enabledArray = Object.entries(modelsMap)
|
||||
.filter(([_, enabled]) => enabled)
|
||||
.map(([modelId]) => modelId)
|
||||
setEnabledModels(enabledArray)
|
||||
} catch (error) {
|
||||
logger.error('Error fetching enabled models', { error })
|
||||
}
|
||||
}, [enabledModels, setEnabledModels])
|
||||
|
||||
// Track the last chat ID we've seen to detect chat changes
|
||||
const [lastChatId, setLastChatId] = useState<string | undefined>(undefined)
|
||||
// Track if we just sent a message to avoid re-adding context after submit
|
||||
@@ -1780,7 +1807,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
const { selectedModel, agentPrefetch, setSelectedModel, setAgentPrefetch } = useCopilotStore()
|
||||
|
||||
// Model configurations with their display names
|
||||
const modelOptions = [
|
||||
const allModelOptions = [
|
||||
{ value: 'gpt-5-fast', label: 'gpt-5-fast' },
|
||||
{ value: 'gpt-5', label: 'gpt-5' },
|
||||
{ value: 'gpt-5-medium', label: 'gpt-5-medium' },
|
||||
@@ -1793,23 +1820,36 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
{ value: 'claude-4.1-opus', label: 'claude-4.1-opus' },
|
||||
] as const
|
||||
|
||||
// Filter models based on user preferences (only for hosted)
|
||||
const modelOptions =
|
||||
isHosted && enabledModels !== null
|
||||
? allModelOptions.filter((model) => enabledModels.includes(model.value))
|
||||
: allModelOptions
|
||||
|
||||
const getCollapsedModeLabel = () => {
|
||||
const model = modelOptions.find((m) => m.value === selectedModel)
|
||||
return model ? model.label : 'Claude 4.5 Sonnet'
|
||||
return model ? model.label : 'claude-4.5-sonnet'
|
||||
}
|
||||
|
||||
const getModelIcon = () => {
|
||||
const colorClass = !agentPrefetch
|
||||
? 'text-[var(--brand-primary-hover-hex)]'
|
||||
: 'text-muted-foreground'
|
||||
// Only Brain and BrainCircuit models show purple when agentPrefetch is false
|
||||
const isBrainModel = [
|
||||
'gpt-5',
|
||||
'gpt-5-medium',
|
||||
'claude-4-sonnet',
|
||||
'claude-4.5-sonnet',
|
||||
].includes(selectedModel)
|
||||
const isBrainCircuitModel = ['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(selectedModel)
|
||||
const colorClass =
|
||||
(isBrainModel || isBrainCircuitModel) && !agentPrefetch
|
||||
? 'text-[var(--brand-primary-hover-hex)]'
|
||||
: 'text-muted-foreground'
|
||||
|
||||
// Match the dropdown icon logic exactly
|
||||
if (['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(selectedModel)) {
|
||||
if (isBrainCircuitModel) {
|
||||
return <BrainCircuit className={`h-3 w-3 ${colorClass}`} />
|
||||
}
|
||||
if (
|
||||
['gpt-5', 'gpt-5-medium', 'claude-4-sonnet', 'claude-4.5-sonnet'].includes(selectedModel)
|
||||
) {
|
||||
if (isBrainModel) {
|
||||
return <Brain className={`h-3 w-3 ${colorClass}`} />
|
||||
}
|
||||
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel)) {
|
||||
@@ -3068,7 +3108,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
variant='ghost'
|
||||
size='sm'
|
||||
disabled={!onModeChange}
|
||||
className='flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs'
|
||||
className='flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs focus-visible:ring-0 focus-visible:ring-offset-0'
|
||||
>
|
||||
{getModeIcon()}
|
||||
<span>{getModeText()}</span>
|
||||
@@ -3134,191 +3174,183 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
|
||||
</TooltipProvider>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
{
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
variant='ghost'
|
||||
size='sm'
|
||||
className={cn(
|
||||
'flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs',
|
||||
!agentPrefetch
|
||||
? 'border-[var(--brand-primary-hover-hex)] text-[var(--brand-primary-hover-hex)] hover:bg-[color-mix(in_srgb,var(--brand-primary-hover-hex)_8%,transparent)] hover:text-[var(--brand-primary-hover-hex)]'
|
||||
: 'border-border text-foreground'
|
||||
)}
|
||||
title='Choose mode'
|
||||
>
|
||||
{getModelIcon()}
|
||||
<span>
|
||||
{getCollapsedModeLabel()}
|
||||
{!agentPrefetch &&
|
||||
!['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel) && (
|
||||
<span className='ml-1 font-semibold'>MAX</span>
|
||||
)}
|
||||
</span>
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align='start' side='top' className='max-h-[400px] p-0'>
|
||||
<TooltipProvider delayDuration={100} skipDelayDuration={0}>
|
||||
<div className='w-[220px]'>
|
||||
<div className='p-2 pb-0'>
|
||||
<div className='mb-2 flex items-center justify-between'>
|
||||
<div className='flex items-center gap-1.5'>
|
||||
<span className='font-medium text-xs'>MAX mode</span>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<button
|
||||
type='button'
|
||||
className='h-3.5 w-3.5 rounded text-muted-foreground transition-colors hover:text-foreground'
|
||||
aria-label='MAX mode info'
|
||||
>
|
||||
<Info className='h-3.5 w-3.5' />
|
||||
</button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent
|
||||
side='right'
|
||||
sideOffset={6}
|
||||
align='center'
|
||||
className='max-w-[220px] border bg-popover p-2 text-[11px] text-popover-foreground leading-snug shadow-md'
|
||||
>
|
||||
Significantly increases depth of reasoning
|
||||
<br />
|
||||
<span className='text-[10px] text-muted-foreground italic'>
|
||||
Only available for advanced models
|
||||
</span>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</div>
|
||||
<Switch
|
||||
checked={!agentPrefetch}
|
||||
disabled={['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel)}
|
||||
title={
|
||||
['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel)
|
||||
? 'MAX mode is only available for advanced models'
|
||||
: undefined
|
||||
}
|
||||
onCheckedChange={(checked) => {
|
||||
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel))
|
||||
return
|
||||
setAgentPrefetch(!checked)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<div className='my-1.5 flex justify-center'>
|
||||
<div className='h-px w-[100%] bg-border' />
|
||||
</div>
|
||||
</div>
|
||||
<div className='max-h-[280px] overflow-y-auto px-2 pb-2'>
|
||||
<div>
|
||||
<div className='mb-1'>
|
||||
<span className='font-medium text-xs'>Model</span>
|
||||
</div>
|
||||
<div className='space-y-2'>
|
||||
{/* Helper function to get icon for a model */}
|
||||
{(() => {
|
||||
const getModelIcon = (modelValue: string) => {
|
||||
if (
|
||||
['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(modelValue)
|
||||
) {
|
||||
return (
|
||||
<BrainCircuit className='h-3 w-3 text-muted-foreground' />
|
||||
)
|
||||
}
|
||||
if (
|
||||
[
|
||||
'gpt-5',
|
||||
'gpt-5-medium',
|
||||
'claude-4-sonnet',
|
||||
'claude-4.5-sonnet',
|
||||
].includes(modelValue)
|
||||
) {
|
||||
return <Brain className='h-3 w-3 text-muted-foreground' />
|
||||
}
|
||||
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(modelValue)) {
|
||||
return <Zap className='h-3 w-3 text-muted-foreground' />
|
||||
}
|
||||
return <div className='h-3 w-3' />
|
||||
}
|
||||
{(() => {
|
||||
const isBrainModel = [
|
||||
'gpt-5',
|
||||
'gpt-5-medium',
|
||||
'claude-4-sonnet',
|
||||
'claude-4.5-sonnet',
|
||||
].includes(selectedModel)
|
||||
const isBrainCircuitModel = ['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(
|
||||
selectedModel
|
||||
)
|
||||
const showPurple = (isBrainModel || isBrainCircuitModel) && !agentPrefetch
|
||||
|
||||
const renderModelOption = (
|
||||
option: (typeof modelOptions)[number]
|
||||
) => (
|
||||
<DropdownMenuItem
|
||||
key={option.value}
|
||||
onSelect={() => {
|
||||
setSelectedModel(option.value)
|
||||
// Automatically turn off max mode for fast models (Zap icon)
|
||||
if (
|
||||
['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(
|
||||
option.value
|
||||
) &&
|
||||
!agentPrefetch
|
||||
) {
|
||||
setAgentPrefetch(true)
|
||||
}
|
||||
}}
|
||||
className={cn(
|
||||
'flex h-7 items-center gap-1.5 px-2 py-1 text-left text-xs',
|
||||
selectedModel === option.value ? 'bg-muted/50' : ''
|
||||
)}
|
||||
>
|
||||
{getModelIcon(option.value)}
|
||||
<span>{option.label}</span>
|
||||
</DropdownMenuItem>
|
||||
)
|
||||
return (
|
||||
<DropdownMenu
|
||||
onOpenChange={(open) => {
|
||||
if (open) {
|
||||
fetchEnabledModelsOnce()
|
||||
}
|
||||
}}
|
||||
>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
variant='ghost'
|
||||
size='sm'
|
||||
className={cn(
|
||||
'flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs focus-visible:ring-0 focus-visible:ring-offset-0',
|
||||
showPurple
|
||||
? 'border-[var(--brand-primary-hover-hex)] text-[var(--brand-primary-hover-hex)] hover:bg-[color-mix(in_srgb,var(--brand-primary-hover-hex)_8%,transparent)] hover:text-[var(--brand-primary-hover-hex)]'
|
||||
: 'border-border text-foreground'
|
||||
)}
|
||||
title='Choose mode'
|
||||
>
|
||||
{getModelIcon()}
|
||||
<span className={cn(panelWidth < 360 ? 'max-w-[72px] truncate' : '')}>
|
||||
{getCollapsedModeLabel()}
|
||||
{agentPrefetch &&
|
||||
!['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel) && (
|
||||
<span className='ml-1 font-semibold'>Lite</span>
|
||||
)}
|
||||
</span>
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align='start' side='top' className='max-h-[400px] p-0'>
|
||||
<TooltipProvider delayDuration={100} skipDelayDuration={0}>
|
||||
<div className='w-[220px]'>
|
||||
<div className='max-h-[280px] overflow-y-auto p-2'>
|
||||
<div>
|
||||
<div className='mb-1'>
|
||||
<span className='font-medium text-xs'>Model</span>
|
||||
</div>
|
||||
<div className='space-y-2'>
|
||||
{/* Helper function to get icon for a model */}
|
||||
{(() => {
|
||||
const getModelIcon = (modelValue: string) => {
|
||||
if (
|
||||
['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(modelValue)
|
||||
) {
|
||||
return (
|
||||
<BrainCircuit className='h-3 w-3 text-muted-foreground' />
|
||||
)
|
||||
}
|
||||
if (
|
||||
[
|
||||
'gpt-5',
|
||||
'gpt-5-medium',
|
||||
'claude-4-sonnet',
|
||||
'claude-4.5-sonnet',
|
||||
].includes(modelValue)
|
||||
) {
|
||||
return <Brain className='h-3 w-3 text-muted-foreground' />
|
||||
}
|
||||
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(modelValue)) {
|
||||
return <Zap className='h-3 w-3 text-muted-foreground' />
|
||||
}
|
||||
return <div className='h-3 w-3' />
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* OpenAI Models */}
|
||||
<div>
|
||||
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
|
||||
OpenAI
|
||||
</div>
|
||||
<div className='space-y-0.5'>
|
||||
{modelOptions
|
||||
.filter((option) =>
|
||||
[
|
||||
'gpt-5-fast',
|
||||
'gpt-5',
|
||||
'gpt-5-medium',
|
||||
'gpt-5-high',
|
||||
'gpt-4o',
|
||||
'gpt-4.1',
|
||||
'o3',
|
||||
].includes(option.value)
|
||||
)
|
||||
.map(renderModelOption)}
|
||||
</div>
|
||||
</div>
|
||||
const renderModelOption = (
|
||||
option: (typeof modelOptions)[number]
|
||||
) => (
|
||||
<DropdownMenuItem
|
||||
key={option.value}
|
||||
onSelect={() => {
|
||||
setSelectedModel(option.value)
|
||||
// Automatically turn off Lite mode for fast models (Zap icon)
|
||||
if (
|
||||
['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(
|
||||
option.value
|
||||
) &&
|
||||
agentPrefetch
|
||||
) {
|
||||
setAgentPrefetch(false)
|
||||
}
|
||||
}}
|
||||
className={cn(
|
||||
'flex h-7 items-center gap-1.5 px-2 py-1 text-left text-xs',
|
||||
selectedModel === option.value ? 'bg-muted/50' : ''
|
||||
)}
|
||||
>
|
||||
{getModelIcon(option.value)}
|
||||
<span>{option.label}</span>
|
||||
</DropdownMenuItem>
|
||||
)
|
||||
|
||||
{/* Anthropic Models */}
|
||||
<div>
|
||||
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
|
||||
Anthropic
|
||||
return (
|
||||
<>
|
||||
{/* OpenAI Models */}
|
||||
<div>
|
||||
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
|
||||
OpenAI
|
||||
</div>
|
||||
<div className='space-y-0.5'>
|
||||
{modelOptions
|
||||
.filter((option) =>
|
||||
[
|
||||
'gpt-5-fast',
|
||||
'gpt-5',
|
||||
'gpt-5-medium',
|
||||
'gpt-5-high',
|
||||
'gpt-4o',
|
||||
'gpt-4.1',
|
||||
'o3',
|
||||
].includes(option.value)
|
||||
)
|
||||
.map(renderModelOption)}
|
||||
</div>
|
||||
</div>
|
||||
<div className='space-y-0.5'>
|
||||
{modelOptions
|
||||
.filter((option) =>
|
||||
[
|
||||
'claude-4-sonnet',
|
||||
'claude-4.5-sonnet',
|
||||
'claude-4.1-opus',
|
||||
].includes(option.value)
|
||||
)
|
||||
.map(renderModelOption)}
|
||||
|
||||
{/* Anthropic Models */}
|
||||
<div>
|
||||
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
|
||||
Anthropic
|
||||
</div>
|
||||
<div className='space-y-0.5'>
|
||||
{modelOptions
|
||||
.filter((option) =>
|
||||
[
|
||||
'claude-4-sonnet',
|
||||
'claude-4.5-sonnet',
|
||||
'claude-4.1-opus',
|
||||
].includes(option.value)
|
||||
)
|
||||
.map(renderModelOption)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
})()}
|
||||
|
||||
{/* More Models Button (only for hosted) */}
|
||||
{isHosted && (
|
||||
<div className='mt-1 border-t pt-1'>
|
||||
<button
|
||||
type='button'
|
||||
onClick={() => {
|
||||
// Dispatch event to open settings modal on copilot tab
|
||||
window.dispatchEvent(
|
||||
new CustomEvent('open-settings', {
|
||||
detail: { tab: 'copilot' },
|
||||
})
|
||||
)
|
||||
}}
|
||||
className='w-full rounded-sm px-2 py-1.5 text-left text-muted-foreground text-xs transition-colors hover:bg-muted/50'
|
||||
>
|
||||
More Models...
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</TooltipProvider>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
}
|
||||
</TooltipProvider>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
)
|
||||
})()}
|
||||
<Button
|
||||
variant='ghost'
|
||||
size='icon'
|
||||
|
||||
@@ -440,6 +440,7 @@ export const Copilot = forwardRef<CopilotRef, CopilotProps>(({ panelWidth }, ref
|
||||
onModeChange={setMode}
|
||||
value={inputValue}
|
||||
onChange={setInputValue}
|
||||
panelWidth={panelWidth}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
|
||||
@@ -155,60 +155,30 @@ export function CreateMenu({ onCreateWorkflow, isCreatingWorkflow = false }: Cre
|
||||
workspaceId,
|
||||
})
|
||||
|
||||
// Load the imported workflow state into stores immediately (optimistic update)
|
||||
const { useWorkflowStore } = await import('@/stores/workflows/workflow/store')
|
||||
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
||||
|
||||
// Set the workflow as active in the registry to prevent reload
|
||||
useWorkflowRegistry.setState({ activeWorkflowId: newWorkflowId })
|
||||
|
||||
// Set the workflow state immediately
|
||||
useWorkflowStore.setState({
|
||||
blocks: workflowData.blocks || {},
|
||||
edges: workflowData.edges || [],
|
||||
loops: workflowData.loops || {},
|
||||
parallels: workflowData.parallels || {},
|
||||
lastSaved: Date.now(),
|
||||
})
|
||||
|
||||
// Initialize subblock store with the imported blocks
|
||||
useSubBlockStore.getState().initializeFromWorkflow(newWorkflowId, workflowData.blocks || {})
|
||||
|
||||
// Also set subblock values if they exist in the imported data
|
||||
const subBlockStore = useSubBlockStore.getState()
|
||||
Object.entries(workflowData.blocks).forEach(([blockId, block]: [string, any]) => {
|
||||
if (block.subBlocks) {
|
||||
Object.entries(block.subBlocks).forEach(([subBlockId, subBlock]: [string, any]) => {
|
||||
if (subBlock.value !== null && subBlock.value !== undefined) {
|
||||
subBlockStore.setValue(blockId, subBlockId, subBlock.value)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// Navigate to the new workflow after setting state
|
||||
router.push(`/workspace/${workspaceId}/w/${newWorkflowId}`)
|
||||
|
||||
logger.info('Workflow imported successfully from JSON')
|
||||
|
||||
// Save to database in the background (fire and forget)
|
||||
fetch(`/api/workflows/${newWorkflowId}/state`, {
|
||||
// Save workflow state to database first
|
||||
const response = await fetch(`/api/workflows/${newWorkflowId}/state`, {
|
||||
method: 'PUT',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(workflowData),
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.ok) {
|
||||
logger.error('Failed to persist imported workflow to database')
|
||||
} else {
|
||||
logger.info('Imported workflow persisted to database')
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
logger.error('Failed to persist imported workflow:', error)
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
logger.error('Failed to persist imported workflow to database')
|
||||
throw new Error('Failed to save workflow')
|
||||
}
|
||||
|
||||
logger.info('Imported workflow persisted to database')
|
||||
|
||||
// Pre-load the workflow state before navigating
|
||||
const { setActiveWorkflow } = useWorkflowRegistry.getState()
|
||||
await setActiveWorkflow(newWorkflowId)
|
||||
|
||||
// Navigate to the new workflow (replace to avoid history entry)
|
||||
router.replace(`/workspace/${workspaceId}/w/${newWorkflowId}`)
|
||||
|
||||
logger.info('Workflow imported successfully from JSON')
|
||||
} catch (error) {
|
||||
logger.error('Failed to import workflow:', { error })
|
||||
} finally {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { Check, Copy, Plus, Search } from 'lucide-react'
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import { Brain, BrainCircuit, Check, Copy, Plus, Zap } from 'lucide-react'
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogAction,
|
||||
@@ -10,11 +10,12 @@ import {
|
||||
AlertDialogHeader,
|
||||
AlertDialogTitle,
|
||||
Button,
|
||||
Input,
|
||||
Label,
|
||||
Skeleton,
|
||||
Switch,
|
||||
} from '@/components/ui'
|
||||
import { isHosted } from '@/lib/environment'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { useCopilotStore } from '@/stores/copilot/store'
|
||||
|
||||
const logger = createLogger('CopilotSettings')
|
||||
|
||||
@@ -23,26 +24,78 @@ interface CopilotKey {
|
||||
displayKey: string
|
||||
}
|
||||
|
||||
interface ModelOption {
|
||||
value: string
|
||||
label: string
|
||||
icon: 'brain' | 'brainCircuit' | 'zap'
|
||||
}
|
||||
|
||||
const OPENAI_MODELS: ModelOption[] = [
|
||||
// Zap models first
|
||||
{ value: 'gpt-4o', label: 'gpt-4o', icon: 'zap' },
|
||||
{ value: 'gpt-4.1', label: 'gpt-4.1', icon: 'zap' },
|
||||
{ value: 'gpt-5-fast', label: 'gpt-5-fast', icon: 'zap' },
|
||||
// Brain models
|
||||
{ value: 'gpt-5', label: 'gpt-5', icon: 'brain' },
|
||||
{ value: 'gpt-5-medium', label: 'gpt-5-medium', icon: 'brain' },
|
||||
// BrainCircuit models
|
||||
{ value: 'gpt-5-high', label: 'gpt-5-high', icon: 'brainCircuit' },
|
||||
{ value: 'o3', label: 'o3', icon: 'brainCircuit' },
|
||||
]
|
||||
|
||||
const ANTHROPIC_MODELS: ModelOption[] = [
|
||||
// Brain models
|
||||
{ value: 'claude-4-sonnet', label: 'claude-4-sonnet', icon: 'brain' },
|
||||
{ value: 'claude-4.5-sonnet', label: 'claude-4.5-sonnet', icon: 'brain' },
|
||||
// BrainCircuit models
|
||||
{ value: 'claude-4.1-opus', label: 'claude-4.1-opus', icon: 'brainCircuit' },
|
||||
]
|
||||
|
||||
const ALL_MODELS: ModelOption[] = [...OPENAI_MODELS, ...ANTHROPIC_MODELS]
|
||||
|
||||
// Default enabled/disabled state for all models
|
||||
const DEFAULT_ENABLED_MODELS: Record<string, boolean> = {
|
||||
'gpt-4o': false,
|
||||
'gpt-4.1': false,
|
||||
'gpt-5-fast': false,
|
||||
'gpt-5': true,
|
||||
'gpt-5-medium': true,
|
||||
'gpt-5-high': false,
|
||||
o3: true,
|
||||
'claude-4-sonnet': true,
|
||||
'claude-4.5-sonnet': true,
|
||||
'claude-4.1-opus': true,
|
||||
}
|
||||
|
||||
const getModelIcon = (iconType: 'brain' | 'brainCircuit' | 'zap') => {
|
||||
switch (iconType) {
|
||||
case 'brainCircuit':
|
||||
return <BrainCircuit className='h-3.5 w-3.5 text-muted-foreground' />
|
||||
case 'brain':
|
||||
return <Brain className='h-3.5 w-3.5 text-muted-foreground' />
|
||||
case 'zap':
|
||||
return <Zap className='h-3.5 w-3.5 text-muted-foreground' />
|
||||
}
|
||||
}
|
||||
|
||||
export function Copilot() {
|
||||
const [keys, setKeys] = useState<CopilotKey[]>([])
|
||||
const [isLoading, setIsLoading] = useState(true)
|
||||
const [searchTerm, setSearchTerm] = useState('')
|
||||
const [enabledModelsMap, setEnabledModelsMap] = useState<Record<string, boolean>>({})
|
||||
const [isModelsLoading, setIsModelsLoading] = useState(true)
|
||||
const hasFetchedModels = useRef(false)
|
||||
|
||||
const { setEnabledModels: setStoreEnabledModels } = useCopilotStore()
|
||||
|
||||
// Create flow state
|
||||
const [showNewKeyDialog, setShowNewKeyDialog] = useState(false)
|
||||
const [newKey, setNewKey] = useState<string | null>(null)
|
||||
const [isCreatingKey] = useState(false)
|
||||
const [newKeyCopySuccess, setNewKeyCopySuccess] = useState(false)
|
||||
|
||||
// Delete flow state
|
||||
const [deleteKey, setDeleteKey] = useState<CopilotKey | null>(null)
|
||||
const [showDeleteDialog, setShowDeleteDialog] = useState(false)
|
||||
|
||||
// Filter keys based on search term (by masked display value)
|
||||
const filteredKeys = keys.filter((key) =>
|
||||
key.displayKey.toLowerCase().includes(searchTerm.toLowerCase())
|
||||
)
|
||||
|
||||
const fetchKeys = useCallback(async () => {
|
||||
try {
|
||||
setIsLoading(true)
|
||||
@@ -58,9 +111,41 @@ export function Copilot() {
|
||||
}
|
||||
}, [])
|
||||
|
||||
const fetchEnabledModels = useCallback(async () => {
|
||||
if (hasFetchedModels.current) return
|
||||
hasFetchedModels.current = true
|
||||
|
||||
try {
|
||||
setIsModelsLoading(true)
|
||||
const res = await fetch('/api/copilot/user-models')
|
||||
if (!res.ok) throw new Error(`Failed to fetch: ${res.status}`)
|
||||
const data = await res.json()
|
||||
const modelsMap = data.enabledModels || DEFAULT_ENABLED_MODELS
|
||||
|
||||
setEnabledModelsMap(modelsMap)
|
||||
|
||||
// Convert to array for store (API already merged with defaults)
|
||||
const enabledArray = Object.entries(modelsMap)
|
||||
.filter(([_, enabled]) => enabled)
|
||||
.map(([modelId]) => modelId)
|
||||
setStoreEnabledModels(enabledArray)
|
||||
} catch (error) {
|
||||
logger.error('Failed to fetch enabled models', { error })
|
||||
setEnabledModelsMap(DEFAULT_ENABLED_MODELS)
|
||||
setStoreEnabledModels(
|
||||
Object.keys(DEFAULT_ENABLED_MODELS).filter((key) => DEFAULT_ENABLED_MODELS[key])
|
||||
)
|
||||
} finally {
|
||||
setIsModelsLoading(false)
|
||||
}
|
||||
}, [setStoreEnabledModels])
|
||||
|
||||
useEffect(() => {
|
||||
fetchKeys()
|
||||
}, [fetchKeys])
|
||||
if (isHosted) {
|
||||
fetchKeys()
|
||||
}
|
||||
fetchEnabledModels()
|
||||
}, [])
|
||||
|
||||
const onGenerate = async () => {
|
||||
try {
|
||||
@@ -102,63 +187,97 @@ export function Copilot() {
|
||||
}
|
||||
}
|
||||
|
||||
const onCopy = async (value: string, keyId?: string) => {
|
||||
const onCopy = async (value: string) => {
|
||||
try {
|
||||
await navigator.clipboard.writeText(value)
|
||||
if (!keyId) {
|
||||
setNewKeyCopySuccess(true)
|
||||
setTimeout(() => setNewKeyCopySuccess(false), 1500)
|
||||
}
|
||||
setNewKeyCopySuccess(true)
|
||||
setTimeout(() => setNewKeyCopySuccess(false), 1500)
|
||||
} catch (error) {
|
||||
logger.error('Copy failed', { error })
|
||||
}
|
||||
}
|
||||
|
||||
const toggleModel = async (modelValue: string, enabled: boolean) => {
|
||||
const newModelsMap = { ...enabledModelsMap, [modelValue]: enabled }
|
||||
setEnabledModelsMap(newModelsMap)
|
||||
|
||||
// Convert to array for store
|
||||
const enabledArray = Object.entries(newModelsMap)
|
||||
.filter(([_, isEnabled]) => isEnabled)
|
||||
.map(([modelId]) => modelId)
|
||||
setStoreEnabledModels(enabledArray)
|
||||
|
||||
try {
|
||||
const res = await fetch('/api/copilot/user-models', {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ enabledModels: newModelsMap }),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error('Failed to update models')
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Failed to update enabled models', { error })
|
||||
// Revert on error
|
||||
setEnabledModelsMap(enabledModelsMap)
|
||||
const revertedArray = Object.entries(enabledModelsMap)
|
||||
.filter(([_, isEnabled]) => isEnabled)
|
||||
.map(([modelId]) => modelId)
|
||||
setStoreEnabledModels(revertedArray)
|
||||
}
|
||||
}
|
||||
|
||||
const enabledCount = Object.values(enabledModelsMap).filter(Boolean).length
|
||||
const totalCount = ALL_MODELS.length
|
||||
|
||||
return (
|
||||
<div className='relative flex h-full flex-col'>
|
||||
{/* Fixed Header */}
|
||||
<div className='px-6 pt-4 pb-2'>
|
||||
{/* Search Input */}
|
||||
{isLoading ? (
|
||||
<Skeleton className='h-9 w-56 rounded-lg' />
|
||||
) : (
|
||||
<div className='flex h-9 w-56 items-center gap-2 rounded-lg border bg-transparent pr-2 pl-3'>
|
||||
<Search className='h-4 w-4 flex-shrink-0 text-muted-foreground' strokeWidth={2} />
|
||||
<Input
|
||||
placeholder='Search API keys...'
|
||||
value={searchTerm}
|
||||
onChange={(e) => setSearchTerm(e.target.value)}
|
||||
className='flex-1 border-0 bg-transparent px-0 font-[380] font-sans text-base text-foreground leading-none placeholder:text-muted-foreground focus-visible:ring-0 focus-visible:ring-offset-0'
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Sticky Header with API Keys (only for hosted) */}
|
||||
{isHosted && (
|
||||
<div className='sticky top-0 z-10 border-b bg-background px-6 py-4'>
|
||||
<div className='space-y-3'>
|
||||
{/* API Keys Header */}
|
||||
<div className='flex items-center justify-between'>
|
||||
<div>
|
||||
<h3 className='font-semibold text-foreground text-sm'>API Keys</h3>
|
||||
<p className='text-muted-foreground text-xs'>
|
||||
Generate keys for programmatic access
|
||||
</p>
|
||||
</div>
|
||||
<Button
|
||||
onClick={onGenerate}
|
||||
variant='ghost'
|
||||
size='sm'
|
||||
className='h-8 rounded-[8px] border bg-background px-3 shadow-xs hover:bg-muted focus:outline-none focus-visible:ring-0 focus-visible:ring-offset-0'
|
||||
disabled={isLoading}
|
||||
>
|
||||
<Plus className='h-3.5 w-3.5 stroke-[2px]' />
|
||||
Create
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Scrollable Content */}
|
||||
<div className='scrollbar-thin scrollbar-thumb-muted scrollbar-track-transparent min-h-0 flex-1 overflow-y-auto px-6'>
|
||||
<div className='h-full space-y-2 py-2'>
|
||||
{isLoading ? (
|
||||
{/* API Keys List */}
|
||||
<div className='space-y-2'>
|
||||
<CopilotKeySkeleton />
|
||||
<CopilotKeySkeleton />
|
||||
<CopilotKeySkeleton />
|
||||
</div>
|
||||
) : keys.length === 0 ? (
|
||||
<div className='flex h-full items-center justify-center text-muted-foreground text-sm'>
|
||||
Click "Generate Key" below to get started
|
||||
</div>
|
||||
) : (
|
||||
<div className='space-y-2'>
|
||||
{filteredKeys.map((k) => (
|
||||
<div key={k.id} className='flex flex-col gap-2'>
|
||||
<Label className='font-normal text-muted-foreground text-xs uppercase'>
|
||||
Copilot API Key
|
||||
</Label>
|
||||
<div className='flex items-center justify-between gap-4'>
|
||||
<div className='flex items-center gap-3'>
|
||||
<div className='flex h-8 items-center rounded-[8px] bg-muted px-3'>
|
||||
<code className='font-mono text-foreground text-xs'>{k.displayKey}</code>
|
||||
</div>
|
||||
{isLoading ? (
|
||||
<>
|
||||
<CopilotKeySkeleton />
|
||||
<CopilotKeySkeleton />
|
||||
</>
|
||||
) : keys.length === 0 ? (
|
||||
<div className='py-3 text-center text-muted-foreground text-xs'>
|
||||
No API keys yet
|
||||
</div>
|
||||
) : (
|
||||
keys.map((k) => (
|
||||
<div
|
||||
key={k.id}
|
||||
className='flex items-center justify-between gap-4 rounded-lg border bg-muted/30 px-3 py-2'
|
||||
>
|
||||
<div className='flex min-w-0 items-center gap-3'>
|
||||
<code className='truncate font-mono text-foreground text-xs'>
|
||||
{k.displayKey}
|
||||
</code>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
@@ -168,44 +287,103 @@ export function Copilot() {
|
||||
setDeleteKey(k)
|
||||
setShowDeleteDialog(true)
|
||||
}}
|
||||
className='h-8 text-muted-foreground hover:text-foreground'
|
||||
className='h-7 flex-shrink-0 text-muted-foreground text-xs hover:text-foreground'
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
{/* Show message when search has no results but there are keys */}
|
||||
{searchTerm.trim() && filteredKeys.length === 0 && keys.length > 0 && (
|
||||
<div className='py-8 text-center text-muted-foreground text-sm'>
|
||||
No API keys found matching "{searchTerm}"
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Footer */}
|
||||
<div className='bg-background'>
|
||||
<div className='flex w-full items-center justify-between px-6 py-4'>
|
||||
{isLoading ? (
|
||||
<>
|
||||
<Skeleton className='h-9 w-[117px] rounded-[8px]' />
|
||||
<div className='w-[108px]' />
|
||||
</>
|
||||
{/* Scrollable Content - Models Section */}
|
||||
<div className='scrollbar-thin scrollbar-thumb-muted scrollbar-track-transparent flex-1 overflow-y-auto px-6 py-4'>
|
||||
<div className='space-y-3'>
|
||||
{/* Models Header */}
|
||||
<div>
|
||||
<h3 className='font-semibold text-foreground text-sm'>Models</h3>
|
||||
<div className='text-muted-foreground text-xs'>
|
||||
{isModelsLoading ? (
|
||||
<Skeleton className='mt-0.5 h-3 w-32' />
|
||||
) : (
|
||||
<span>
|
||||
{enabledCount} of {totalCount} enabled
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Models List */}
|
||||
{isModelsLoading ? (
|
||||
<div className='space-y-2'>
|
||||
{[1, 2, 3, 4, 5].map((i) => (
|
||||
<div key={i} className='flex items-center justify-between py-1.5'>
|
||||
<Skeleton className='h-4 w-32' />
|
||||
<Skeleton className='h-5 w-9 rounded-full' />
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<Button
|
||||
onClick={onGenerate}
|
||||
variant='ghost'
|
||||
className='h-9 rounded-[8px] border bg-background px-3 shadow-xs hover:bg-muted focus:outline-none focus-visible:ring-0 focus-visible:ring-offset-0'
|
||||
disabled={isLoading}
|
||||
>
|
||||
<Plus className='h-4 w-4 stroke-[2px]' />
|
||||
Create Key
|
||||
</Button>
|
||||
</>
|
||||
<div className='space-y-4'>
|
||||
{/* OpenAI Models */}
|
||||
<div>
|
||||
<div className='mb-2 px-2 font-medium text-[10px] text-muted-foreground uppercase'>
|
||||
OpenAI
|
||||
</div>
|
||||
<div className='space-y-1'>
|
||||
{OPENAI_MODELS.map((model) => {
|
||||
const isEnabled = enabledModelsMap[model.value] ?? false
|
||||
return (
|
||||
<div
|
||||
key={model.value}
|
||||
className='-mx-2 flex items-center justify-between rounded px-2 py-1.5 hover:bg-muted/50'
|
||||
>
|
||||
<div className='flex items-center gap-2'>
|
||||
{getModelIcon(model.icon)}
|
||||
<span className='text-foreground text-sm'>{model.label}</span>
|
||||
</div>
|
||||
<Switch
|
||||
checked={isEnabled}
|
||||
onCheckedChange={(checked) => toggleModel(model.value, checked)}
|
||||
className='scale-90'
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Anthropic Models */}
|
||||
<div>
|
||||
<div className='mb-2 px-2 font-medium text-[10px] text-muted-foreground uppercase'>
|
||||
Anthropic
|
||||
</div>
|
||||
<div className='space-y-1'>
|
||||
{ANTHROPIC_MODELS.map((model) => {
|
||||
const isEnabled = enabledModelsMap[model.value] ?? false
|
||||
return (
|
||||
<div
|
||||
key={model.value}
|
||||
className='-mx-2 flex items-center justify-between rounded px-2 py-1.5 hover:bg-muted/50'
|
||||
>
|
||||
<div className='flex items-center gap-2'>
|
||||
{getModelIcon(model.icon)}
|
||||
<span className='text-foreground text-sm'>{model.label}</span>
|
||||
</div>
|
||||
<Switch
|
||||
checked={isEnabled}
|
||||
onCheckedChange={(checked) => toggleModel(model.value, checked)}
|
||||
className='scale-90'
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
@@ -292,15 +470,9 @@ export function Copilot() {
|
||||
|
||||
function CopilotKeySkeleton() {
|
||||
return (
|
||||
<div className='flex flex-col gap-2'>
|
||||
<Skeleton className='h-4 w-32' />
|
||||
<div className='flex items-center justify-between gap-4'>
|
||||
<div className='flex items-center gap-3'>
|
||||
<Skeleton className='h-8 w-20 rounded-[8px]' />
|
||||
<Skeleton className='h-4 w-24' />
|
||||
</div>
|
||||
<Skeleton className='h-8 w-16' />
|
||||
</div>
|
||||
<div className='flex items-center justify-between gap-4 rounded-lg border bg-muted/30 px-3 py-2'>
|
||||
<Skeleton className='h-4 w-48' />
|
||||
<Skeleton className='h-7 w-14' />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -96,7 +96,7 @@ const allNavigationItems: NavigationItem[] = [
|
||||
},
|
||||
{
|
||||
id: 'copilot',
|
||||
label: 'Copilot Keys',
|
||||
label: 'Copilot',
|
||||
icon: Bot,
|
||||
},
|
||||
{
|
||||
@@ -163,9 +163,6 @@ export function SettingsNavigation({
|
||||
}, [userId, isHosted])
|
||||
|
||||
const navigationItems = allNavigationItems.filter((item) => {
|
||||
if (item.id === 'copilot' && !isHosted) {
|
||||
return false
|
||||
}
|
||||
if (item.hideWhenBillingDisabled && !isBillingEnabled) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import { useEffect, useRef, useState } from 'react'
|
||||
import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui'
|
||||
import { getEnv, isTruthy } from '@/lib/env'
|
||||
import { isHosted } from '@/lib/environment'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import {
|
||||
Account,
|
||||
@@ -181,7 +180,7 @@ export function SettingsModal({ open, onOpenChange }: SettingsModalProps) {
|
||||
<SSO />
|
||||
</div>
|
||||
)}
|
||||
{isHosted && activeSection === 'copilot' && (
|
||||
{activeSection === 'copilot' && (
|
||||
<div className='h-full'>
|
||||
<Copilot />
|
||||
</div>
|
||||
|
||||
@@ -2,6 +2,7 @@ import { useCallback, useEffect, useRef } from 'react'
|
||||
import type { Edge } from 'reactflow'
|
||||
import { useSession } from '@/lib/auth-client'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { getBlockOutputs } from '@/lib/workflows/block-outputs'
|
||||
import { getBlock } from '@/blocks'
|
||||
import { resolveOutputType } from '@/blocks/utils'
|
||||
import { useSocket } from '@/contexts/socket-context'
|
||||
@@ -761,7 +762,11 @@ export function useCollaborativeWorkflow() {
|
||||
})
|
||||
}
|
||||
|
||||
const outputs = resolveOutputType(blockConfig.outputs)
|
||||
// Get outputs based on trigger mode
|
||||
const isTriggerMode = triggerMode || false
|
||||
const outputs = isTriggerMode
|
||||
? getBlockOutputs(type, subBlocks, isTriggerMode)
|
||||
: resolveOutputType(blockConfig.outputs)
|
||||
|
||||
const completeBlockData = {
|
||||
id,
|
||||
@@ -775,7 +780,7 @@ export function useCollaborativeWorkflow() {
|
||||
horizontalHandles: true,
|
||||
isWide: false,
|
||||
advancedMode: false,
|
||||
triggerMode: triggerMode || false,
|
||||
triggerMode: isTriggerMode,
|
||||
height: 0, // Default height, will be set by the UI
|
||||
parentId,
|
||||
extent,
|
||||
|
||||
@@ -137,15 +137,29 @@ export async function handleInvoicePaymentSucceeded(event: Stripe.Event) {
|
||||
|
||||
/**
|
||||
* Handle invoice payment failed webhook
|
||||
* This is triggered when a user's payment fails for a usage billing invoice
|
||||
* This is triggered when a user's payment fails for any invoice (subscription or overage)
|
||||
*/
|
||||
export async function handleInvoicePaymentFailed(event: Stripe.Event) {
|
||||
try {
|
||||
const invoice = event.data.object as Stripe.Invoice
|
||||
|
||||
// Check if this is an overage billing invoice
|
||||
if (invoice.metadata?.type !== 'overage_billing') {
|
||||
logger.info('Ignoring non-overage billing invoice payment failure', { invoiceId: invoice.id })
|
||||
const isOverageInvoice = invoice.metadata?.type === 'overage_billing'
|
||||
let stripeSubscriptionId: string | undefined
|
||||
|
||||
if (isOverageInvoice) {
|
||||
// Overage invoices store subscription ID in metadata
|
||||
stripeSubscriptionId = invoice.metadata?.subscriptionId as string | undefined
|
||||
} else {
|
||||
// Regular subscription invoices have it in parent.subscription_details
|
||||
const subscription = invoice.parent?.subscription_details?.subscription
|
||||
stripeSubscriptionId = typeof subscription === 'string' ? subscription : subscription?.id
|
||||
}
|
||||
|
||||
if (!stripeSubscriptionId) {
|
||||
logger.info('No subscription found on invoice; skipping payment failed handler', {
|
||||
invoiceId: invoice.id,
|
||||
isOverageInvoice,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -154,7 +168,7 @@ export async function handleInvoicePaymentFailed(event: Stripe.Event) {
|
||||
const billingPeriod = invoice.metadata?.billingPeriod || 'unknown'
|
||||
const attemptCount = invoice.attempt_count || 1
|
||||
|
||||
logger.warn('Overage billing invoice payment failed', {
|
||||
logger.warn('Invoice payment failed', {
|
||||
invoiceId: invoice.id,
|
||||
customerId,
|
||||
failedAmount,
|
||||
@@ -162,47 +176,59 @@ export async function handleInvoicePaymentFailed(event: Stripe.Event) {
|
||||
attemptCount,
|
||||
customerEmail: invoice.customer_email,
|
||||
hostedInvoiceUrl: invoice.hosted_invoice_url,
|
||||
isOverageInvoice,
|
||||
invoiceType: isOverageInvoice ? 'overage' : 'subscription',
|
||||
})
|
||||
|
||||
// Implement dunning management logic here
|
||||
// For example: suspend service after multiple failures, notify admins, etc.
|
||||
// Block users after first payment failure
|
||||
if (attemptCount >= 1) {
|
||||
logger.error('Multiple payment failures for overage billing', {
|
||||
logger.error('Payment failure - blocking users', {
|
||||
invoiceId: invoice.id,
|
||||
customerId,
|
||||
attemptCount,
|
||||
isOverageInvoice,
|
||||
stripeSubscriptionId,
|
||||
})
|
||||
// Block all users under this customer (org members or individual)
|
||||
// Overage invoices are manual invoices without parent.subscription_details
|
||||
// We store the subscription ID in metadata when creating them
|
||||
const stripeSubscriptionId = invoice.metadata?.subscriptionId as string | undefined
|
||||
if (stripeSubscriptionId) {
|
||||
const records = await db
|
||||
.select()
|
||||
.from(subscriptionTable)
|
||||
.where(eq(subscriptionTable.stripeSubscriptionId, stripeSubscriptionId))
|
||||
.limit(1)
|
||||
|
||||
if (records.length > 0) {
|
||||
const sub = records[0]
|
||||
if (sub.plan === 'team' || sub.plan === 'enterprise') {
|
||||
const members = await db
|
||||
.select({ userId: member.userId })
|
||||
.from(member)
|
||||
.where(eq(member.organizationId, sub.referenceId))
|
||||
for (const m of members) {
|
||||
await db
|
||||
.update(userStats)
|
||||
.set({ billingBlocked: true })
|
||||
.where(eq(userStats.userId, m.userId))
|
||||
}
|
||||
} else {
|
||||
const records = await db
|
||||
.select()
|
||||
.from(subscriptionTable)
|
||||
.where(eq(subscriptionTable.stripeSubscriptionId, stripeSubscriptionId))
|
||||
.limit(1)
|
||||
|
||||
if (records.length > 0) {
|
||||
const sub = records[0]
|
||||
if (sub.plan === 'team' || sub.plan === 'enterprise') {
|
||||
const members = await db
|
||||
.select({ userId: member.userId })
|
||||
.from(member)
|
||||
.where(eq(member.organizationId, sub.referenceId))
|
||||
for (const m of members) {
|
||||
await db
|
||||
.update(userStats)
|
||||
.set({ billingBlocked: true })
|
||||
.where(eq(userStats.userId, sub.referenceId))
|
||||
.where(eq(userStats.userId, m.userId))
|
||||
}
|
||||
logger.info('Blocked team/enterprise members due to payment failure', {
|
||||
organizationId: sub.referenceId,
|
||||
memberCount: members.length,
|
||||
isOverageInvoice,
|
||||
})
|
||||
} else {
|
||||
await db
|
||||
.update(userStats)
|
||||
.set({ billingBlocked: true })
|
||||
.where(eq(userStats.userId, sub.referenceId))
|
||||
logger.info('Blocked user due to payment failure', {
|
||||
userId: sub.referenceId,
|
||||
isOverageInvoice,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
logger.warn('Subscription not found in database for failed payment', {
|
||||
stripeSubscriptionId,
|
||||
invoiceId: invoice.id,
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
import fs from 'fs/promises'
|
||||
import path from 'path'
|
||||
import { generateEmbeddings } from '@/lib/embeddings/utils'
|
||||
import { isDev } from '@/lib/environment'
|
||||
import { TextChunker } from '@/lib/knowledge/documents/chunker'
|
||||
import type { DocChunk, DocsChunkerOptions, HeaderInfo } from '@/lib/knowledge/documents/types'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { TextChunker } from './text-chunker'
|
||||
import type { DocChunk, DocsChunkerOptions } from './types'
|
||||
|
||||
interface HeaderInfo {
|
||||
level: number
|
||||
text: string
|
||||
slug?: string
|
||||
anchor?: string
|
||||
position?: number
|
||||
}
|
||||
|
||||
interface Frontmatter {
|
||||
title?: string
|
||||
@@ -29,7 +36,7 @@ export class DocsChunker {
|
||||
overlap: options.overlap ?? 50,
|
||||
})
|
||||
// Use localhost docs in development, production docs otherwise
|
||||
this.baseUrl = options.baseUrl ?? (isDev ? 'http://localhost:3001' : 'https://docs.sim.ai')
|
||||
this.baseUrl = options.baseUrl ?? 'https://docs.sim.ai'
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -108,9 +115,7 @@ export class DocsChunker {
|
||||
metadata: {
|
||||
startIndex: chunkStart,
|
||||
endIndex: chunkEnd,
|
||||
hasFrontmatter: i === 0 && content.startsWith('---'),
|
||||
documentTitle: frontmatter.title,
|
||||
documentDescription: frontmatter.description,
|
||||
title: frontmatter.title,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -200,7 +205,7 @@ export class DocsChunker {
|
||||
let relevantHeader: HeaderInfo | null = null
|
||||
|
||||
for (const header of headers) {
|
||||
if (header.position <= position) {
|
||||
if (header.position !== undefined && header.position <= position) {
|
||||
relevantHeader = header
|
||||
} else {
|
||||
break
|
||||
@@ -285,53 +290,6 @@ export class DocsChunker {
|
||||
return { data, content: markdownContent }
|
||||
}
|
||||
|
||||
/**
|
||||
* Split content by headers to respect document structure
|
||||
*/
|
||||
private splitByHeaders(
|
||||
content: string
|
||||
): Array<{ header: string | null; content: string; level: number }> {
|
||||
const lines = content.split('\n')
|
||||
const sections: Array<{ header: string | null; content: string; level: number }> = []
|
||||
|
||||
let currentHeader: string | null = null
|
||||
let currentLevel = 0
|
||||
let currentContent: string[] = []
|
||||
|
||||
for (const line of lines) {
|
||||
const headerMatch = line.match(/^(#{1,3})\s+(.+)$/) // Only split on H1-H3, not H4-H6
|
||||
|
||||
if (headerMatch) {
|
||||
// Save previous section
|
||||
if (currentContent.length > 0) {
|
||||
sections.push({
|
||||
header: currentHeader,
|
||||
content: currentContent.join('\n').trim(),
|
||||
level: currentLevel,
|
||||
})
|
||||
}
|
||||
|
||||
// Start new section
|
||||
currentHeader = line
|
||||
currentLevel = headerMatch[1].length
|
||||
currentContent = []
|
||||
} else {
|
||||
currentContent.push(line)
|
||||
}
|
||||
}
|
||||
|
||||
// Add final section
|
||||
if (currentContent.length > 0) {
|
||||
sections.push({
|
||||
header: currentHeader,
|
||||
content: currentContent.join('\n').trim(),
|
||||
level: currentLevel,
|
||||
})
|
||||
}
|
||||
|
||||
return sections.filter((section) => section.content.trim().length > 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate token count (rough approximation)
|
||||
*/
|
||||
@@ -340,175 +298,6 @@ export class DocsChunker {
|
||||
return Math.ceil(text.length / 4)
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge small adjacent chunks to reach target size
|
||||
*/
|
||||
private mergeSmallChunks(chunks: string[]): string[] {
|
||||
const merged: string[] = []
|
||||
let currentChunk = ''
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const currentTokens = this.estimateTokens(currentChunk)
|
||||
const chunkTokens = this.estimateTokens(chunk)
|
||||
|
||||
// If adding this chunk would exceed target size, save current and start new
|
||||
if (currentTokens > 0 && currentTokens + chunkTokens > 500) {
|
||||
if (currentChunk.trim()) {
|
||||
merged.push(currentChunk.trim())
|
||||
}
|
||||
currentChunk = chunk
|
||||
} else {
|
||||
// Merge with current chunk
|
||||
currentChunk = currentChunk ? `${currentChunk}\n\n${chunk}` : chunk
|
||||
}
|
||||
}
|
||||
|
||||
// Add final chunk
|
||||
if (currentChunk.trim()) {
|
||||
merged.push(currentChunk.trim())
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk a section while preserving tables and structure
|
||||
*/
|
||||
private async chunkSection(section: {
|
||||
header: string | null
|
||||
content: string
|
||||
level: number
|
||||
}): Promise<string[]> {
|
||||
const content = section.content
|
||||
const header = section.header
|
||||
|
||||
// Check if content contains tables
|
||||
const hasTable = this.containsTable(content)
|
||||
|
||||
if (hasTable) {
|
||||
// Split by tables and handle each part
|
||||
return this.splitContentWithTables(content, header)
|
||||
}
|
||||
// Regular chunking for text-only content
|
||||
const chunks = await this.textChunker.chunk(content)
|
||||
return chunks.map((chunk, index) => {
|
||||
// Add header to first chunk only
|
||||
if (index === 0 && header) {
|
||||
return `${header}\n\n${chunk.text}`.trim()
|
||||
}
|
||||
return chunk.text
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content contains markdown tables
|
||||
*/
|
||||
private containsTable(content: string): boolean {
|
||||
const lines = content.split('\n')
|
||||
return lines.some((line, index) => {
|
||||
if (line.includes('|') && line.split('|').length >= 3) {
|
||||
const nextLine = lines[index + 1]
|
||||
return nextLine?.includes('|') && nextLine.includes('-')
|
||||
}
|
||||
return false
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Split content that contains tables, keeping tables intact
|
||||
*/
|
||||
private splitContentWithTables(content: string, header: string | null): string[] {
|
||||
const lines = content.split('\n')
|
||||
const chunks: string[] = []
|
||||
let currentChunk: string[] = []
|
||||
let inTable = false
|
||||
let tableLines: string[] = []
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i]
|
||||
|
||||
// Detect table start
|
||||
if (line.includes('|') && line.split('|').length >= 3 && !inTable) {
|
||||
const nextLine = lines[i + 1]
|
||||
if (nextLine?.includes('|') && nextLine.includes('-')) {
|
||||
inTable = true
|
||||
|
||||
// Save current chunk if it has content
|
||||
if (currentChunk.length > 0 && currentChunk.join('\n').trim().length > 50) {
|
||||
const chunkText = currentChunk.join('\n').trim()
|
||||
const withHeader =
|
||||
chunks.length === 0 && header ? `${header}\n\n${chunkText}` : chunkText
|
||||
chunks.push(withHeader)
|
||||
currentChunk = []
|
||||
}
|
||||
|
||||
tableLines = [line]
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (inTable) {
|
||||
tableLines.push(line)
|
||||
|
||||
// Detect table end
|
||||
if (!line.includes('|') || line.trim() === '') {
|
||||
inTable = false
|
||||
|
||||
// Save table as its own chunk
|
||||
const tableText = tableLines
|
||||
.filter((l) => l.trim())
|
||||
.join('\n')
|
||||
.trim()
|
||||
if (tableText.length > 0) {
|
||||
const withHeader =
|
||||
chunks.length === 0 && header ? `${header}\n\n${tableText}` : tableText
|
||||
chunks.push(withHeader)
|
||||
}
|
||||
|
||||
tableLines = []
|
||||
|
||||
// Start new chunk if current line has content
|
||||
if (line.trim() !== '') {
|
||||
currentChunk = [line]
|
||||
}
|
||||
}
|
||||
} else {
|
||||
currentChunk.push(line)
|
||||
|
||||
// If chunk is getting large, save it
|
||||
if (this.estimateTokens(currentChunk.join('\n')) > 250) {
|
||||
const chunkText = currentChunk.join('\n').trim()
|
||||
if (chunkText.length > 50) {
|
||||
const withHeader =
|
||||
chunks.length === 0 && header ? `${header}\n\n${chunkText}` : chunkText
|
||||
chunks.push(withHeader)
|
||||
}
|
||||
currentChunk = []
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remaining content
|
||||
if (inTable && tableLines.length > 0) {
|
||||
const tableText = tableLines
|
||||
.filter((l) => l.trim())
|
||||
.join('\n')
|
||||
.trim()
|
||||
if (tableText.length > 0) {
|
||||
const withHeader = chunks.length === 0 && header ? `${header}\n\n${tableText}` : tableText
|
||||
chunks.push(withHeader)
|
||||
}
|
||||
} else if (currentChunk.length > 0) {
|
||||
const chunkText = currentChunk.join('\n').trim()
|
||||
if (chunkText.length > 50) {
|
||||
const withHeader = chunks.length === 0 && header ? `${header}\n\n${chunkText}` : chunkText
|
||||
chunks.push(withHeader)
|
||||
}
|
||||
}
|
||||
|
||||
return chunks.filter((chunk) => chunk.trim().length > 50)
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect table boundaries in markdown content to avoid splitting them
|
||||
*/
|
||||
5
apps/sim/lib/chunkers/index.ts
Normal file
5
apps/sim/lib/chunkers/index.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
export { DocsChunker } from './docs-chunker'
|
||||
export { JsonYamlChunker } from './json-yaml-chunker'
|
||||
export { StructuredDataChunker } from './structured-data-chunker'
|
||||
export { TextChunker } from './text-chunker'
|
||||
export * from './types'
|
||||
317
apps/sim/lib/chunkers/json-yaml-chunker.ts
Normal file
317
apps/sim/lib/chunkers/json-yaml-chunker.ts
Normal file
@@ -0,0 +1,317 @@
|
||||
import { estimateTokenCount } from '@/lib/tokenization/estimators'
|
||||
import type { Chunk, ChunkerOptions } from './types'
|
||||
|
||||
function getTokenCount(text: string): number {
|
||||
const estimate = estimateTokenCount(text)
|
||||
return estimate.count
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for JSON/YAML chunking
|
||||
*/
|
||||
const JSON_YAML_CHUNKING_CONFIG = {
|
||||
TARGET_CHUNK_SIZE: 2000, // Target tokens per chunk
|
||||
MIN_CHUNK_SIZE: 100, // Minimum tokens per chunk
|
||||
MAX_CHUNK_SIZE: 3000, // Maximum tokens per chunk
|
||||
MAX_DEPTH_FOR_SPLITTING: 5, // Maximum depth to traverse for splitting
|
||||
}
|
||||
|
||||
export class JsonYamlChunker {
|
||||
private chunkSize: number
|
||||
private minChunkSize: number
|
||||
|
||||
constructor(options: ChunkerOptions = {}) {
|
||||
this.chunkSize = options.chunkSize || JSON_YAML_CHUNKING_CONFIG.TARGET_CHUNK_SIZE
|
||||
this.minChunkSize = options.minChunkSize || JSON_YAML_CHUNKING_CONFIG.MIN_CHUNK_SIZE
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content is structured JSON/YAML data
|
||||
*/
|
||||
static isStructuredData(content: string): boolean {
|
||||
try {
|
||||
JSON.parse(content)
|
||||
return true
|
||||
} catch {
|
||||
try {
|
||||
const yaml = require('js-yaml')
|
||||
yaml.load(content)
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk JSON/YAML content intelligently based on structure
|
||||
*/
|
||||
async chunk(content: string): Promise<Chunk[]> {
|
||||
try {
|
||||
const data = JSON.parse(content)
|
||||
return this.chunkStructuredData(data)
|
||||
} catch (error) {
|
||||
return this.chunkAsText(content)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk structured data based on its structure
|
||||
*/
|
||||
private chunkStructuredData(data: any, path: string[] = []): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
|
||||
if (Array.isArray(data)) {
|
||||
return this.chunkArray(data, path)
|
||||
}
|
||||
|
||||
if (typeof data === 'object' && data !== null) {
|
||||
return this.chunkObject(data, path)
|
||||
}
|
||||
|
||||
const content = JSON.stringify(data, null, 2)
|
||||
const tokenCount = getTokenCount(content)
|
||||
|
||||
if (tokenCount >= this.minChunkSize) {
|
||||
chunks.push({
|
||||
text: content,
|
||||
tokenCount,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: content.length,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk an array intelligently
|
||||
*/
|
||||
private chunkArray(arr: any[], path: string[]): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
let currentBatch: any[] = []
|
||||
let currentTokens = 0
|
||||
|
||||
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
|
||||
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
const item = arr[i]
|
||||
const itemStr = JSON.stringify(item, null, 2)
|
||||
const itemTokens = getTokenCount(itemStr)
|
||||
|
||||
if (itemTokens > this.chunkSize) {
|
||||
// Save current batch if it has items
|
||||
if (currentBatch.length > 0) {
|
||||
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
|
||||
chunks.push({
|
||||
text: batchContent,
|
||||
tokenCount: getTokenCount(batchContent),
|
||||
metadata: {
|
||||
startIndex: i - currentBatch.length,
|
||||
endIndex: i - 1,
|
||||
},
|
||||
})
|
||||
currentBatch = []
|
||||
currentTokens = 0
|
||||
}
|
||||
|
||||
if (typeof item === 'object' && item !== null) {
|
||||
const subChunks = this.chunkStructuredData(item, [...path, `[${i}]`])
|
||||
chunks.push(...subChunks)
|
||||
} else {
|
||||
chunks.push({
|
||||
text: contextHeader + itemStr,
|
||||
tokenCount: itemTokens,
|
||||
metadata: {
|
||||
startIndex: i,
|
||||
endIndex: i,
|
||||
},
|
||||
})
|
||||
}
|
||||
} else if (currentTokens + itemTokens > this.chunkSize && currentBatch.length > 0) {
|
||||
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
|
||||
chunks.push({
|
||||
text: batchContent,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex: i - currentBatch.length,
|
||||
endIndex: i - 1,
|
||||
},
|
||||
})
|
||||
currentBatch = [item]
|
||||
currentTokens = itemTokens
|
||||
} else {
|
||||
currentBatch.push(item)
|
||||
currentTokens += itemTokens
|
||||
}
|
||||
}
|
||||
|
||||
if (currentBatch.length > 0) {
|
||||
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
|
||||
chunks.push({
|
||||
text: batchContent,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex: arr.length - currentBatch.length,
|
||||
endIndex: arr.length - 1,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk an object intelligently
|
||||
*/
|
||||
private chunkObject(obj: Record<string, any>, path: string[]): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
const entries = Object.entries(obj)
|
||||
|
||||
const fullContent = JSON.stringify(obj, null, 2)
|
||||
const fullTokens = getTokenCount(fullContent)
|
||||
|
||||
if (fullTokens <= this.chunkSize) {
|
||||
chunks.push({
|
||||
text: fullContent,
|
||||
tokenCount: fullTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: fullContent.length,
|
||||
},
|
||||
})
|
||||
return chunks
|
||||
}
|
||||
|
||||
let currentObj: Record<string, any> = {}
|
||||
let currentTokens = 0
|
||||
let currentKeys: string[] = []
|
||||
|
||||
for (const [key, value] of entries) {
|
||||
const valueStr = JSON.stringify({ [key]: value }, null, 2)
|
||||
const valueTokens = getTokenCount(valueStr)
|
||||
|
||||
if (valueTokens > this.chunkSize) {
|
||||
// Save current object if it has properties
|
||||
if (Object.keys(currentObj).length > 0) {
|
||||
const objContent = JSON.stringify(currentObj, null, 2)
|
||||
chunks.push({
|
||||
text: objContent,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: objContent.length,
|
||||
},
|
||||
})
|
||||
currentObj = {}
|
||||
currentTokens = 0
|
||||
currentKeys = []
|
||||
}
|
||||
|
||||
if (typeof value === 'object' && value !== null) {
|
||||
const subChunks = this.chunkStructuredData(value, [...path, key])
|
||||
chunks.push(...subChunks)
|
||||
} else {
|
||||
chunks.push({
|
||||
text: valueStr,
|
||||
tokenCount: valueTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: valueStr.length,
|
||||
},
|
||||
})
|
||||
}
|
||||
} else if (
|
||||
currentTokens + valueTokens > this.chunkSize &&
|
||||
Object.keys(currentObj).length > 0
|
||||
) {
|
||||
const objContent = JSON.stringify(currentObj, null, 2)
|
||||
chunks.push({
|
||||
text: objContent,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: objContent.length,
|
||||
},
|
||||
})
|
||||
currentObj = { [key]: value }
|
||||
currentTokens = valueTokens
|
||||
currentKeys = [key]
|
||||
} else {
|
||||
currentObj[key] = value
|
||||
currentTokens += valueTokens
|
||||
currentKeys.push(key)
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(currentObj).length > 0) {
|
||||
const objContent = JSON.stringify(currentObj, null, 2)
|
||||
chunks.push({
|
||||
text: objContent,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: objContent.length,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Fall back to text chunking if JSON parsing fails.
|
||||
*/
|
||||
private async chunkAsText(content: string): Promise<Chunk[]> {
|
||||
const chunks: Chunk[] = []
|
||||
const lines = content.split('\n')
|
||||
let currentChunk = ''
|
||||
let currentTokens = 0
|
||||
let startIndex = 0
|
||||
|
||||
for (const line of lines) {
|
||||
const lineTokens = getTokenCount(line)
|
||||
|
||||
if (currentTokens + lineTokens > this.chunkSize && currentChunk) {
|
||||
chunks.push({
|
||||
text: currentChunk,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex,
|
||||
endIndex: startIndex + currentChunk.length,
|
||||
},
|
||||
})
|
||||
|
||||
startIndex += currentChunk.length + 1
|
||||
currentChunk = line
|
||||
currentTokens = lineTokens
|
||||
} else {
|
||||
currentChunk = currentChunk ? `${currentChunk}\n${line}` : line
|
||||
currentTokens += lineTokens
|
||||
}
|
||||
}
|
||||
|
||||
if (currentChunk && currentTokens >= this.minChunkSize) {
|
||||
chunks.push({
|
||||
text: currentChunk,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex,
|
||||
endIndex: startIndex + currentChunk.length,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Static method for chunking JSON/YAML data with default options.
|
||||
*/
|
||||
static async chunkJsonYaml(content: string, options: ChunkerOptions = {}): Promise<Chunk[]> {
|
||||
const chunker = new JsonYamlChunker(options)
|
||||
return chunker.chunk(content)
|
||||
}
|
||||
}
|
||||
220
apps/sim/lib/chunkers/structured-data-chunker.ts
Normal file
220
apps/sim/lib/chunkers/structured-data-chunker.ts
Normal file
@@ -0,0 +1,220 @@
|
||||
import type { Chunk, StructuredDataOptions } from './types'
|
||||
|
||||
// Configuration for structured data chunking (CSV, XLSX, etc.)
|
||||
const STRUCTURED_CHUNKING_CONFIG = {
|
||||
// Target 2000-3000 tokens per chunk for better semantic meaning
|
||||
TARGET_CHUNK_SIZE: 2500,
|
||||
MIN_CHUNK_SIZE: 500,
|
||||
MAX_CHUNK_SIZE: 4000,
|
||||
|
||||
// For spreadsheets, group rows together
|
||||
ROWS_PER_CHUNK: 100, // Start with 100 rows per chunk
|
||||
MIN_ROWS_PER_CHUNK: 20,
|
||||
MAX_ROWS_PER_CHUNK: 500,
|
||||
|
||||
// For better embeddings quality
|
||||
INCLUDE_HEADERS_IN_EACH_CHUNK: true,
|
||||
MAX_HEADER_SIZE: 200, // tokens
|
||||
}
|
||||
|
||||
/**
|
||||
* Smart chunker for structured data (CSV, XLSX) that preserves semantic meaning
|
||||
*/
|
||||
export class StructuredDataChunker {
|
||||
/**
|
||||
* Chunk structured data intelligently based on rows and semantic boundaries
|
||||
*/
|
||||
static async chunkStructuredData(
|
||||
content: string,
|
||||
options: StructuredDataOptions = {}
|
||||
): Promise<Chunk[]> {
|
||||
const chunks: Chunk[] = []
|
||||
const lines = content.split('\n').filter((line) => line.trim())
|
||||
|
||||
if (lines.length === 0) {
|
||||
return chunks
|
||||
}
|
||||
|
||||
// Detect headers (first line or provided)
|
||||
const headerLine = options.headers?.join('\t') || lines[0]
|
||||
const dataStartIndex = options.headers ? 0 : 1
|
||||
|
||||
// Calculate optimal rows per chunk based on content
|
||||
const estimatedTokensPerRow = StructuredDataChunker.estimateTokensPerRow(
|
||||
lines.slice(dataStartIndex, Math.min(10, lines.length))
|
||||
)
|
||||
const optimalRowsPerChunk =
|
||||
StructuredDataChunker.calculateOptimalRowsPerChunk(estimatedTokensPerRow)
|
||||
|
||||
console.log(
|
||||
`Structured data chunking: ${lines.length} rows, ~${estimatedTokensPerRow} tokens/row, ${optimalRowsPerChunk} rows/chunk`
|
||||
)
|
||||
|
||||
let currentChunkRows: string[] = []
|
||||
let currentTokenEstimate = 0
|
||||
const headerTokens = StructuredDataChunker.estimateTokens(headerLine)
|
||||
let chunkStartRow = dataStartIndex
|
||||
|
||||
for (let i = dataStartIndex; i < lines.length; i++) {
|
||||
const row = lines[i]
|
||||
const rowTokens = StructuredDataChunker.estimateTokens(row)
|
||||
|
||||
// Check if adding this row would exceed our target
|
||||
const projectedTokens =
|
||||
currentTokenEstimate +
|
||||
rowTokens +
|
||||
(STRUCTURED_CHUNKING_CONFIG.INCLUDE_HEADERS_IN_EACH_CHUNK ? headerTokens : 0)
|
||||
|
||||
const shouldCreateChunk =
|
||||
(projectedTokens > STRUCTURED_CHUNKING_CONFIG.TARGET_CHUNK_SIZE &&
|
||||
currentChunkRows.length >= STRUCTURED_CHUNKING_CONFIG.MIN_ROWS_PER_CHUNK) ||
|
||||
currentChunkRows.length >= optimalRowsPerChunk
|
||||
|
||||
if (shouldCreateChunk && currentChunkRows.length > 0) {
|
||||
// Create chunk with current rows
|
||||
const chunkContent = StructuredDataChunker.formatChunk(
|
||||
headerLine,
|
||||
currentChunkRows,
|
||||
options.sheetName
|
||||
)
|
||||
chunks.push(StructuredDataChunker.createChunk(chunkContent, chunkStartRow, i - 1))
|
||||
|
||||
// Reset for next chunk
|
||||
currentChunkRows = []
|
||||
currentTokenEstimate = 0
|
||||
chunkStartRow = i
|
||||
}
|
||||
|
||||
currentChunkRows.push(row)
|
||||
currentTokenEstimate += rowTokens
|
||||
}
|
||||
|
||||
// Add remaining rows as final chunk
|
||||
if (currentChunkRows.length > 0) {
|
||||
const chunkContent = StructuredDataChunker.formatChunk(
|
||||
headerLine,
|
||||
currentChunkRows,
|
||||
options.sheetName
|
||||
)
|
||||
chunks.push(StructuredDataChunker.createChunk(chunkContent, chunkStartRow, lines.length - 1))
|
||||
}
|
||||
|
||||
console.log(`Created ${chunks.length} chunks from ${lines.length} rows of structured data`)
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a chunk with headers and context
|
||||
*/
|
||||
private static formatChunk(headerLine: string, rows: string[], sheetName?: string): string {
|
||||
let content = ''
|
||||
|
||||
// Add sheet name context if available
|
||||
if (sheetName) {
|
||||
content += `=== ${sheetName} ===\n\n`
|
||||
}
|
||||
|
||||
// Add headers for context
|
||||
if (STRUCTURED_CHUNKING_CONFIG.INCLUDE_HEADERS_IN_EACH_CHUNK) {
|
||||
content += `Headers: ${headerLine}\n`
|
||||
content += `${'-'.repeat(Math.min(80, headerLine.length))}\n`
|
||||
}
|
||||
|
||||
// Add data rows
|
||||
content += rows.join('\n')
|
||||
|
||||
// Add row count for context
|
||||
content += `\n\n[Rows ${rows.length} of data]`
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a chunk object with actual row indices
|
||||
*/
|
||||
private static createChunk(content: string, startRow: number, endRow: number): Chunk {
|
||||
const tokenCount = StructuredDataChunker.estimateTokens(content)
|
||||
|
||||
return {
|
||||
text: content,
|
||||
tokenCount,
|
||||
metadata: {
|
||||
startIndex: startRow,
|
||||
endIndex: endRow,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate tokens in text (rough approximation)
|
||||
*/
|
||||
private static estimateTokens(text: string): number {
|
||||
// Rough estimate: 1 token per 4 characters for English text
|
||||
// For structured data with numbers, it's closer to 1 token per 3 characters
|
||||
return Math.ceil(text.length / 3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate average tokens per row from sample
|
||||
*/
|
||||
private static estimateTokensPerRow(sampleRows: string[]): number {
|
||||
if (sampleRows.length === 0) return 50 // default estimate
|
||||
|
||||
const totalTokens = sampleRows.reduce(
|
||||
(sum, row) => sum + StructuredDataChunker.estimateTokens(row),
|
||||
0
|
||||
)
|
||||
return Math.ceil(totalTokens / sampleRows.length)
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate optimal rows per chunk based on token estimates
|
||||
*/
|
||||
private static calculateOptimalRowsPerChunk(tokensPerRow: number): number {
|
||||
const optimal = Math.floor(STRUCTURED_CHUNKING_CONFIG.TARGET_CHUNK_SIZE / tokensPerRow)
|
||||
|
||||
return Math.min(
|
||||
Math.max(optimal, STRUCTURED_CHUNKING_CONFIG.MIN_ROWS_PER_CHUNK),
|
||||
STRUCTURED_CHUNKING_CONFIG.MAX_ROWS_PER_CHUNK
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content appears to be structured data
|
||||
*/
|
||||
static isStructuredData(content: string, mimeType?: string): boolean {
|
||||
// Check mime type first
|
||||
if (mimeType) {
|
||||
const structuredMimeTypes = [
|
||||
'text/csv',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.ms-excel',
|
||||
'text/tab-separated-values',
|
||||
]
|
||||
if (structuredMimeTypes.includes(mimeType)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check content structure
|
||||
const lines = content.split('\n').slice(0, 10) // Check first 10 lines
|
||||
if (lines.length < 2) return false
|
||||
|
||||
// Check for consistent delimiters (comma, tab, pipe)
|
||||
const delimiters = [',', '\t', '|']
|
||||
for (const delimiter of delimiters) {
|
||||
const counts = lines.map(
|
||||
(line) => (line.match(new RegExp(`\\${delimiter}`, 'g')) || []).length
|
||||
)
|
||||
const avgCount = counts.reduce((a, b) => a + b, 0) / counts.length
|
||||
|
||||
// If most lines have similar delimiter counts, it's likely structured
|
||||
if (avgCount > 2 && counts.every((c) => Math.abs(c - avgCount) <= 2)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -1,28 +1,4 @@
|
||||
export interface ChunkMetadata {
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
tokenCount: number
|
||||
}
|
||||
|
||||
export interface TextChunk {
|
||||
text: string
|
||||
metadata: ChunkMetadata
|
||||
}
|
||||
|
||||
export interface ChunkerOptions {
|
||||
chunkSize?: number
|
||||
minChunkSize?: number
|
||||
overlap?: number
|
||||
}
|
||||
|
||||
export interface Chunk {
|
||||
text: string
|
||||
tokenCount: number
|
||||
metadata: {
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
}
|
||||
}
|
||||
import type { Chunk, ChunkerOptions } from './types'
|
||||
|
||||
/**
|
||||
* Lightweight text chunker optimized for RAG applications
|
||||
53
apps/sim/lib/chunkers/types.ts
Normal file
53
apps/sim/lib/chunkers/types.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
export interface ChunkMetadata {
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
tokenCount: number
|
||||
}
|
||||
|
||||
export interface TextChunk {
|
||||
text: string
|
||||
metadata: ChunkMetadata
|
||||
}
|
||||
|
||||
export interface ChunkerOptions {
|
||||
chunkSize?: number
|
||||
minChunkSize?: number
|
||||
overlap?: number
|
||||
}
|
||||
|
||||
export interface Chunk {
|
||||
text: string
|
||||
tokenCount: number
|
||||
metadata: {
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface StructuredDataOptions {
|
||||
headers?: string[]
|
||||
totalRows?: number
|
||||
sheetName?: string
|
||||
}
|
||||
|
||||
export interface DocChunk {
|
||||
text: string
|
||||
tokenCount: number
|
||||
sourceDocument: string
|
||||
headerLink: string
|
||||
headerText: string
|
||||
headerLevel: number
|
||||
embedding: number[]
|
||||
embeddingModel: string
|
||||
metadata: {
|
||||
sourceUrl?: string
|
||||
headers?: string[]
|
||||
title?: string
|
||||
startIndex: number
|
||||
endIndex: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface DocsChunkerOptions extends ChunkerOptions {
|
||||
baseUrl?: string
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
import { Loader2, MinusCircle, XCircle, Zap } from 'lucide-react'
|
||||
import {
|
||||
BaseClientTool,
|
||||
type BaseClientToolMetadata,
|
||||
ClientToolCallState,
|
||||
} from '@/lib/copilot/tools/client/base-tool'
|
||||
|
||||
export class GetOperationsExamplesClientTool extends BaseClientTool {
|
||||
static readonly id = 'get_operations_examples'
|
||||
|
||||
constructor(toolCallId: string) {
|
||||
super(toolCallId, GetOperationsExamplesClientTool.id, GetOperationsExamplesClientTool.metadata)
|
||||
}
|
||||
|
||||
static readonly metadata: BaseClientToolMetadata = {
|
||||
displayNames: {
|
||||
[ClientToolCallState.generating]: { text: 'Designing workflow component', icon: Loader2 },
|
||||
[ClientToolCallState.pending]: { text: 'Designing workflow component', icon: Loader2 },
|
||||
[ClientToolCallState.executing]: { text: 'Designing workflow component', icon: Loader2 },
|
||||
[ClientToolCallState.success]: { text: 'Designed workflow component', icon: Zap },
|
||||
[ClientToolCallState.error]: { text: 'Failed to design workflow component', icon: XCircle },
|
||||
[ClientToolCallState.aborted]: {
|
||||
text: 'Aborted designing workflow component',
|
||||
icon: MinusCircle,
|
||||
},
|
||||
[ClientToolCallState.rejected]: {
|
||||
text: 'Skipped designing workflow component',
|
||||
icon: MinusCircle,
|
||||
},
|
||||
},
|
||||
interrupt: undefined,
|
||||
}
|
||||
|
||||
async execute(): Promise<void> {
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -20,11 +20,11 @@ export class PlanClientTool extends BaseClientTool {
|
||||
|
||||
static readonly metadata: BaseClientToolMetadata = {
|
||||
displayNames: {
|
||||
[ClientToolCallState.generating]: { text: 'Crafting an approach', icon: Loader2 },
|
||||
[ClientToolCallState.pending]: { text: 'Crafting an approach', icon: Loader2 },
|
||||
[ClientToolCallState.executing]: { text: 'Crafting an approach', icon: Loader2 },
|
||||
[ClientToolCallState.success]: { text: 'Crafted an approach', icon: ListTodo },
|
||||
[ClientToolCallState.error]: { text: 'Failed to craft an approach', icon: X },
|
||||
[ClientToolCallState.generating]: { text: 'Planning', icon: Loader2 },
|
||||
[ClientToolCallState.pending]: { text: 'Planning', icon: Loader2 },
|
||||
[ClientToolCallState.executing]: { text: 'Planning an approach', icon: Loader2 },
|
||||
[ClientToolCallState.success]: { text: 'Finished planning', icon: ListTodo },
|
||||
[ClientToolCallState.error]: { text: 'Failed to plan', icon: X },
|
||||
[ClientToolCallState.aborted]: { text: 'Aborted planning', icon: XCircle },
|
||||
[ClientToolCallState.rejected]: { text: 'Skipped planning approach', icon: XCircle },
|
||||
},
|
||||
|
||||
@@ -4,6 +4,8 @@ import { workflow as workflowTable } from '@sim/db/schema'
|
||||
import { eq } from 'drizzle-orm'
|
||||
import type { BaseServerTool } from '@/lib/copilot/tools/server/base-tool'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { getBlockOutputs } from '@/lib/workflows/block-outputs'
|
||||
import { extractAndPersistCustomTools } from '@/lib/workflows/custom-tools-persistence'
|
||||
import { loadWorkflowFromNormalizedTables } from '@/lib/workflows/db-helpers'
|
||||
import { validateWorkflowState } from '@/lib/workflows/validation'
|
||||
import { getAllBlocks } from '@/blocks/registry'
|
||||
@@ -22,12 +24,123 @@ interface EditWorkflowParams {
|
||||
currentUserWorkflow?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Topologically sort insert operations to ensure parents are created before children
|
||||
* Returns sorted array where parent inserts always come before child inserts
|
||||
*/
|
||||
function topologicalSortInserts(
|
||||
inserts: EditWorkflowOperation[],
|
||||
adds: EditWorkflowOperation[]
|
||||
): EditWorkflowOperation[] {
|
||||
if (inserts.length === 0) return []
|
||||
|
||||
// Build a map of blockId -> operation for quick lookup
|
||||
const insertMap = new Map<string, EditWorkflowOperation>()
|
||||
inserts.forEach((op) => insertMap.set(op.block_id, op))
|
||||
|
||||
// Build a set of blocks being added (potential parents)
|
||||
const addedBlocks = new Set(adds.map((op) => op.block_id))
|
||||
|
||||
// Build dependency graph: block -> blocks that depend on it
|
||||
const dependents = new Map<string, Set<string>>()
|
||||
const dependencies = new Map<string, Set<string>>()
|
||||
|
||||
inserts.forEach((op) => {
|
||||
const blockId = op.block_id
|
||||
const parentId = op.params?.subflowId
|
||||
|
||||
dependencies.set(blockId, new Set())
|
||||
|
||||
if (parentId) {
|
||||
// Track dependency if parent is being inserted OR being added
|
||||
// This ensures children wait for parents regardless of operation type
|
||||
const parentBeingCreated = insertMap.has(parentId) || addedBlocks.has(parentId)
|
||||
|
||||
if (parentBeingCreated) {
|
||||
// Only add dependency if parent is also being inserted (not added)
|
||||
// Because adds run before inserts, added parents are already created
|
||||
if (insertMap.has(parentId)) {
|
||||
dependencies.get(blockId)!.add(parentId)
|
||||
if (!dependents.has(parentId)) {
|
||||
dependents.set(parentId, new Set())
|
||||
}
|
||||
dependents.get(parentId)!.add(blockId)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Topological sort using Kahn's algorithm
|
||||
const sorted: EditWorkflowOperation[] = []
|
||||
const queue: string[] = []
|
||||
|
||||
// Start with nodes that have no dependencies (or depend only on added blocks)
|
||||
inserts.forEach((op) => {
|
||||
const deps = dependencies.get(op.block_id)!
|
||||
if (deps.size === 0) {
|
||||
queue.push(op.block_id)
|
||||
}
|
||||
})
|
||||
|
||||
while (queue.length > 0) {
|
||||
const blockId = queue.shift()!
|
||||
const op = insertMap.get(blockId)
|
||||
if (op) {
|
||||
sorted.push(op)
|
||||
}
|
||||
|
||||
// Remove this node from dependencies of others
|
||||
const children = dependents.get(blockId)
|
||||
if (children) {
|
||||
children.forEach((childId) => {
|
||||
const childDeps = dependencies.get(childId)!
|
||||
childDeps.delete(blockId)
|
||||
if (childDeps.size === 0) {
|
||||
queue.push(childId)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// If sorted length doesn't match input, there's a cycle (shouldn't happen with valid operations)
|
||||
// Just append remaining operations
|
||||
if (sorted.length < inserts.length) {
|
||||
inserts.forEach((op) => {
|
||||
if (!sorted.includes(op)) {
|
||||
sorted.push(op)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return sorted
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to create a block state from operation params
|
||||
*/
|
||||
function createBlockFromParams(blockId: string, params: any, parentId?: string): any {
|
||||
const blockConfig = getAllBlocks().find((b) => b.type === params.type)
|
||||
|
||||
// Determine outputs based on trigger mode
|
||||
const triggerMode = params.triggerMode || false
|
||||
let outputs: Record<string, any>
|
||||
|
||||
if (params.outputs) {
|
||||
outputs = params.outputs
|
||||
} else if (blockConfig) {
|
||||
const subBlocks: Record<string, any> = {}
|
||||
if (params.inputs) {
|
||||
Object.entries(params.inputs).forEach(([key, value]) => {
|
||||
subBlocks[key] = { id: key, type: 'short-input', value: value }
|
||||
})
|
||||
}
|
||||
outputs = triggerMode
|
||||
? getBlockOutputs(params.type, subBlocks, triggerMode)
|
||||
: resolveOutputType(blockConfig.outputs)
|
||||
} else {
|
||||
outputs = {}
|
||||
}
|
||||
|
||||
const blockState: any = {
|
||||
id: blockId,
|
||||
type: params.type,
|
||||
@@ -38,19 +151,39 @@ function createBlockFromParams(blockId: string, params: any, parentId?: string):
|
||||
isWide: false,
|
||||
advancedMode: params.advancedMode || false,
|
||||
height: 0,
|
||||
triggerMode: params.triggerMode || false,
|
||||
triggerMode: triggerMode,
|
||||
subBlocks: {},
|
||||
outputs: params.outputs || (blockConfig ? resolveOutputType(blockConfig.outputs) : {}),
|
||||
outputs: outputs,
|
||||
data: parentId ? { parentId, extent: 'parent' as const } : {},
|
||||
}
|
||||
|
||||
// Add inputs as subBlocks
|
||||
if (params.inputs) {
|
||||
Object.entries(params.inputs).forEach(([key, value]) => {
|
||||
let sanitizedValue = value
|
||||
|
||||
// Special handling for inputFormat - ensure it's an array
|
||||
if (key === 'inputFormat' && value !== null && value !== undefined) {
|
||||
if (!Array.isArray(value)) {
|
||||
// Invalid format, default to empty array
|
||||
sanitizedValue = []
|
||||
}
|
||||
}
|
||||
|
||||
// Special handling for tools - normalize to restore sanitized fields
|
||||
if (key === 'tools' && Array.isArray(value)) {
|
||||
sanitizedValue = normalizeTools(value)
|
||||
}
|
||||
|
||||
// Special handling for responseFormat - normalize to ensure consistent format
|
||||
if (key === 'responseFormat' && value) {
|
||||
sanitizedValue = normalizeResponseFormat(value)
|
||||
}
|
||||
|
||||
blockState.subBlocks[key] = {
|
||||
id: key,
|
||||
type: 'short-input',
|
||||
value: value,
|
||||
value: sanitizedValue,
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -71,6 +204,90 @@ function createBlockFromParams(blockId: string, params: any, parentId?: string):
|
||||
return blockState
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize tools array by adding back fields that were sanitized for training
|
||||
*/
|
||||
function normalizeTools(tools: any[]): any[] {
|
||||
return tools.map((tool) => {
|
||||
if (tool.type === 'custom-tool') {
|
||||
// Reconstruct sanitized custom tool fields
|
||||
const normalized: any = {
|
||||
...tool,
|
||||
params: tool.params || {},
|
||||
isExpanded: tool.isExpanded ?? true,
|
||||
}
|
||||
|
||||
// Ensure schema has proper structure
|
||||
if (normalized.schema?.function) {
|
||||
normalized.schema = {
|
||||
type: 'function',
|
||||
function: {
|
||||
name: tool.title, // Derive name from title
|
||||
description: normalized.schema.function.description,
|
||||
parameters: normalized.schema.function.parameters,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
// For other tool types, just ensure isExpanded exists
|
||||
return {
|
||||
...tool,
|
||||
isExpanded: tool.isExpanded ?? true,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize responseFormat to ensure consistent storage
|
||||
* Handles both string (JSON) and object formats
|
||||
* Returns pretty-printed JSON for better UI readability
|
||||
*/
|
||||
function normalizeResponseFormat(value: any): string {
|
||||
try {
|
||||
let obj = value
|
||||
|
||||
// If it's already a string, parse it first
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim()
|
||||
if (!trimmed) {
|
||||
return ''
|
||||
}
|
||||
obj = JSON.parse(trimmed)
|
||||
}
|
||||
|
||||
// If it's an object, stringify it with consistent formatting
|
||||
if (obj && typeof obj === 'object') {
|
||||
// Sort keys recursively for consistent comparison
|
||||
const sortKeys = (item: any): any => {
|
||||
if (Array.isArray(item)) {
|
||||
return item.map(sortKeys)
|
||||
}
|
||||
if (item !== null && typeof item === 'object') {
|
||||
return Object.keys(item)
|
||||
.sort()
|
||||
.reduce((result: any, key: string) => {
|
||||
result[key] = sortKeys(item[key])
|
||||
return result
|
||||
}, {})
|
||||
}
|
||||
return item
|
||||
}
|
||||
|
||||
// Return pretty-printed with 2-space indentation for UI readability
|
||||
// The sanitizer will normalize it to minified format for comparison
|
||||
return JSON.stringify(sortKeys(obj), null, 2)
|
||||
}
|
||||
|
||||
return String(value)
|
||||
} catch (error) {
|
||||
// If parsing fails, return the original value as string
|
||||
return String(value)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to add connections as edges for a block
|
||||
*/
|
||||
@@ -106,13 +323,13 @@ function applyOperationsToWorkflowState(
|
||||
|
||||
// Log initial state
|
||||
const logger = createLogger('EditWorkflowServerTool')
|
||||
logger.debug('Initial blocks before operations:', {
|
||||
blockCount: Object.keys(modifiedState.blocks || {}).length,
|
||||
blockTypes: Object.entries(modifiedState.blocks || {}).map(([id, block]: [string, any]) => ({
|
||||
id,
|
||||
type: block.type,
|
||||
hasType: block.type !== undefined,
|
||||
})),
|
||||
logger.info('Applying operations to workflow:', {
|
||||
totalOperations: operations.length,
|
||||
operationTypes: operations.reduce((acc: any, op) => {
|
||||
acc[op.operation_type] = (acc[op.operation_type] || 0) + 1
|
||||
return acc
|
||||
}, {}),
|
||||
initialBlockCount: Object.keys(modifiedState.blocks || {}).length,
|
||||
})
|
||||
|
||||
// Reorder operations: delete -> extract -> add -> insert -> edit
|
||||
@@ -121,17 +338,34 @@ function applyOperationsToWorkflowState(
|
||||
const adds = operations.filter((op) => op.operation_type === 'add')
|
||||
const inserts = operations.filter((op) => op.operation_type === 'insert_into_subflow')
|
||||
const edits = operations.filter((op) => op.operation_type === 'edit')
|
||||
|
||||
// Sort insert operations to ensure parents are inserted before children
|
||||
// This handles cases where a loop/parallel is being added along with its children
|
||||
const sortedInserts = topologicalSortInserts(inserts, adds)
|
||||
|
||||
const orderedOperations: EditWorkflowOperation[] = [
|
||||
...deletes,
|
||||
...extracts,
|
||||
...adds,
|
||||
...inserts,
|
||||
...sortedInserts,
|
||||
...edits,
|
||||
]
|
||||
|
||||
logger.info('Operations after reordering:', {
|
||||
order: orderedOperations.map(
|
||||
(op) =>
|
||||
`${op.operation_type}:${op.block_id}${op.params?.subflowId ? `(parent:${op.params.subflowId})` : ''}`
|
||||
),
|
||||
})
|
||||
|
||||
for (const operation of orderedOperations) {
|
||||
const { operation_type, block_id, params } = operation
|
||||
|
||||
logger.debug(`Executing operation: ${operation_type} for block ${block_id}`, {
|
||||
params: params ? Object.keys(params) : [],
|
||||
currentBlockCount: Object.keys(modifiedState.blocks).length,
|
||||
})
|
||||
|
||||
switch (operation_type) {
|
||||
case 'delete': {
|
||||
if (modifiedState.blocks[block_id]) {
|
||||
@@ -175,14 +409,34 @@ function applyOperationsToWorkflowState(
|
||||
if (params?.inputs) {
|
||||
if (!block.subBlocks) block.subBlocks = {}
|
||||
Object.entries(params.inputs).forEach(([key, value]) => {
|
||||
let sanitizedValue = value
|
||||
|
||||
// Special handling for inputFormat - ensure it's an array
|
||||
if (key === 'inputFormat' && value !== null && value !== undefined) {
|
||||
if (!Array.isArray(value)) {
|
||||
// Invalid format, default to empty array
|
||||
sanitizedValue = []
|
||||
}
|
||||
}
|
||||
|
||||
// Special handling for tools - normalize to restore sanitized fields
|
||||
if (key === 'tools' && Array.isArray(value)) {
|
||||
sanitizedValue = normalizeTools(value)
|
||||
}
|
||||
|
||||
// Special handling for responseFormat - normalize to ensure consistent format
|
||||
if (key === 'responseFormat' && value) {
|
||||
sanitizedValue = normalizeResponseFormat(value)
|
||||
}
|
||||
|
||||
if (!block.subBlocks[key]) {
|
||||
block.subBlocks[key] = {
|
||||
id: key,
|
||||
type: 'short-input',
|
||||
value: value,
|
||||
value: sanitizedValue,
|
||||
}
|
||||
} else {
|
||||
block.subBlocks[key].value = value
|
||||
block.subBlocks[key].value = sanitizedValue
|
||||
}
|
||||
})
|
||||
|
||||
@@ -335,18 +589,8 @@ function applyOperationsToWorkflowState(
|
||||
// Create new block with proper structure
|
||||
const newBlock = createBlockFromParams(block_id, params)
|
||||
|
||||
// Handle nested nodes (for loops/parallels created from scratch)
|
||||
// Set loop/parallel data on parent block BEFORE adding to blocks
|
||||
if (params.nestedNodes) {
|
||||
Object.entries(params.nestedNodes).forEach(([childId, childBlock]: [string, any]) => {
|
||||
const childBlockState = createBlockFromParams(childId, childBlock, block_id)
|
||||
modifiedState.blocks[childId] = childBlockState
|
||||
|
||||
if (childBlock.connections) {
|
||||
addConnectionsAsEdges(modifiedState, childId, childBlock.connections)
|
||||
}
|
||||
})
|
||||
|
||||
// Set loop/parallel data on parent block
|
||||
if (params.type === 'loop') {
|
||||
newBlock.data = {
|
||||
...newBlock.data,
|
||||
@@ -364,8 +608,22 @@ function applyOperationsToWorkflowState(
|
||||
}
|
||||
}
|
||||
|
||||
// Add parent block FIRST before adding children
|
||||
// This ensures children can reference valid parentId
|
||||
modifiedState.blocks[block_id] = newBlock
|
||||
|
||||
// Handle nested nodes (for loops/parallels created from scratch)
|
||||
if (params.nestedNodes) {
|
||||
Object.entries(params.nestedNodes).forEach(([childId, childBlock]: [string, any]) => {
|
||||
const childBlockState = createBlockFromParams(childId, childBlock, block_id)
|
||||
modifiedState.blocks[childId] = childBlockState
|
||||
|
||||
if (childBlock.connections) {
|
||||
addConnectionsAsEdges(modifiedState, childId, childBlock.connections)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Add connections as edges
|
||||
if (params.connections) {
|
||||
addConnectionsAsEdges(modifiedState, block_id, params.connections)
|
||||
@@ -377,15 +635,28 @@ function applyOperationsToWorkflowState(
|
||||
case 'insert_into_subflow': {
|
||||
const subflowId = params?.subflowId
|
||||
if (!subflowId || !params?.type || !params?.name) {
|
||||
logger.warn('Missing required params for insert_into_subflow', { block_id, params })
|
||||
logger.error('Missing required params for insert_into_subflow', { block_id, params })
|
||||
break
|
||||
}
|
||||
|
||||
const subflowBlock = modifiedState.blocks[subflowId]
|
||||
if (!subflowBlock || (subflowBlock.type !== 'loop' && subflowBlock.type !== 'parallel')) {
|
||||
logger.warn('Subflow block not found or invalid type', {
|
||||
if (!subflowBlock) {
|
||||
logger.error('Subflow block not found - parent must be created first', {
|
||||
subflowId,
|
||||
type: subflowBlock?.type,
|
||||
block_id,
|
||||
existingBlocks: Object.keys(modifiedState.blocks),
|
||||
operationType: 'insert_into_subflow',
|
||||
})
|
||||
// This is a critical error - the operation ordering is wrong
|
||||
// Skip this operation but don't break the entire workflow
|
||||
break
|
||||
}
|
||||
|
||||
if (subflowBlock.type !== 'loop' && subflowBlock.type !== 'parallel') {
|
||||
logger.error('Subflow block has invalid type', {
|
||||
subflowId,
|
||||
type: subflowBlock.type,
|
||||
block_id,
|
||||
})
|
||||
break
|
||||
}
|
||||
@@ -407,10 +678,32 @@ function applyOperationsToWorkflowState(
|
||||
// Update inputs if provided
|
||||
if (params.inputs) {
|
||||
Object.entries(params.inputs).forEach(([key, value]) => {
|
||||
let sanitizedValue = value
|
||||
|
||||
if (key === 'inputFormat' && value !== null && value !== undefined) {
|
||||
if (!Array.isArray(value)) {
|
||||
sanitizedValue = []
|
||||
}
|
||||
}
|
||||
|
||||
// Special handling for tools - normalize to restore sanitized fields
|
||||
if (key === 'tools' && Array.isArray(value)) {
|
||||
sanitizedValue = normalizeTools(value)
|
||||
}
|
||||
|
||||
// Special handling for responseFormat - normalize to ensure consistent format
|
||||
if (key === 'responseFormat' && value) {
|
||||
sanitizedValue = normalizeResponseFormat(value)
|
||||
}
|
||||
|
||||
if (!existingBlock.subBlocks[key]) {
|
||||
existingBlock.subBlocks[key] = { id: key, type: 'short-input', value }
|
||||
existingBlock.subBlocks[key] = {
|
||||
id: key,
|
||||
type: 'short-input',
|
||||
value: sanitizedValue,
|
||||
}
|
||||
} else {
|
||||
existingBlock.subBlocks[key].value = value
|
||||
existingBlock.subBlocks[key].value = sanitizedValue
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -553,7 +846,7 @@ async function getCurrentWorkflowStateFromDb(
|
||||
|
||||
export const editWorkflowServerTool: BaseServerTool<EditWorkflowParams, any> = {
|
||||
name: 'edit_workflow',
|
||||
async execute(params: EditWorkflowParams): Promise<any> {
|
||||
async execute(params: EditWorkflowParams, context?: { userId: string }): Promise<any> {
|
||||
const logger = createLogger('EditWorkflowServerTool')
|
||||
const { operations, workflowId, currentUserWorkflow } = params
|
||||
if (!operations || operations.length === 0) throw new Error('operations are required')
|
||||
@@ -599,6 +892,29 @@ export const editWorkflowServerTool: BaseServerTool<EditWorkflowParams, any> = {
|
||||
})
|
||||
}
|
||||
|
||||
// Extract and persist custom tools to database
|
||||
if (context?.userId) {
|
||||
try {
|
||||
const finalWorkflowState = validation.sanitizedState || modifiedWorkflowState
|
||||
const { saved, errors } = await extractAndPersistCustomTools(
|
||||
finalWorkflowState,
|
||||
context.userId
|
||||
)
|
||||
|
||||
if (saved > 0) {
|
||||
logger.info(`Persisted ${saved} custom tool(s) to database`, { workflowId })
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
logger.warn('Some custom tools failed to persist', { errors, workflowId })
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Failed to persist custom tools', { error, workflowId })
|
||||
}
|
||||
} else {
|
||||
logger.warn('No userId in context - skipping custom tools persistence', { workflowId })
|
||||
}
|
||||
|
||||
logger.info('edit_workflow successfully applied operations', {
|
||||
operationCount: operations.length,
|
||||
blocksCount: Object.keys(modifiedWorkflowState.blocks).length,
|
||||
|
||||
@@ -114,7 +114,8 @@ export async function generateEmbeddings(
|
||||
|
||||
logger.info(`Using ${config.useAzure ? 'Azure OpenAI' : 'OpenAI'} for embeddings generation`)
|
||||
|
||||
const batchSize = 100
|
||||
// Reduced batch size to prevent API timeouts and improve reliability
|
||||
const batchSize = 50 // Reduced from 100 to prevent issues with large documents
|
||||
const allEmbeddings: number[][] = []
|
||||
|
||||
for (let i = 0; i < texts.length; i += batchSize) {
|
||||
@@ -125,6 +126,11 @@ export async function generateEmbeddings(
|
||||
logger.info(
|
||||
`Generated embeddings for batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(texts.length / batchSize)}`
|
||||
)
|
||||
|
||||
// Add small delay between batches to avoid rate limiting
|
||||
if (i + batchSize < texts.length) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
}
|
||||
}
|
||||
|
||||
return allEmbeddings
|
||||
|
||||
@@ -17,8 +17,6 @@ export const env = createEnv({
|
||||
server: {
|
||||
// Core Database & Authentication
|
||||
DATABASE_URL: z.string().url(), // Primary database connection string
|
||||
DATABASE_SSL: z.enum(['disable', 'prefer', 'require', 'verify-ca', 'verify-full']).optional(), // PostgreSQL SSL mode
|
||||
DATABASE_SSL_CA: z.string().optional(), // Base64-encoded CA certificate for SSL verification
|
||||
BETTER_AUTH_URL: z.string().url(), // Base URL for Better Auth service
|
||||
BETTER_AUTH_SECRET: z.string().min(32), // Secret key for Better Auth JWT signing
|
||||
DISABLE_REGISTRATION: z.boolean().optional(), // Flag to disable new user registration
|
||||
@@ -36,7 +34,6 @@ export const env = createEnv({
|
||||
AGENT_INDEXER_URL: z.string().url().optional(), // URL for agent training data indexer
|
||||
AGENT_INDEXER_API_KEY: z.string().min(1).optional(), // API key for agent indexer authentication
|
||||
|
||||
|
||||
// Database & Storage
|
||||
REDIS_URL: z.string().url().optional(), // Redis connection string for caching/sessions
|
||||
|
||||
@@ -92,7 +89,6 @@ export const env = createEnv({
|
||||
TELEMETRY_ENDPOINT: z.string().url().optional(), // Custom telemetry/analytics endpoint
|
||||
COST_MULTIPLIER: z.number().optional(), // Multiplier for cost calculations
|
||||
LOG_LEVEL: z.enum(['DEBUG', 'INFO', 'WARN', 'ERROR']).optional(), // Minimum log level to display (defaults to ERROR in production, DEBUG in development)
|
||||
POSTHOG_ENABLED: z.boolean().optional(), // Enable PostHog analytics and session recording
|
||||
|
||||
// External Services
|
||||
BROWSERBASE_API_KEY: z.string().min(1).optional(), // Browserbase API key for browser automation
|
||||
|
||||
@@ -1,108 +1,154 @@
|
||||
import { existsSync, readFileSync } from 'fs'
|
||||
import * as Papa from 'papaparse'
|
||||
import { createReadStream, existsSync } from 'fs'
|
||||
import { Readable } from 'stream'
|
||||
import { type Options, parse } from 'csv-parse'
|
||||
import type { FileParseResult, FileParser } from '@/lib/file-parsers/types'
|
||||
import { sanitizeTextForUTF8 } from '@/lib/file-parsers/utils'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
|
||||
const logger = createLogger('CsvParser')
|
||||
|
||||
const PARSE_OPTIONS = {
|
||||
header: true,
|
||||
skipEmptyLines: true,
|
||||
transformHeader: (header: string) => sanitizeTextForUTF8(String(header)),
|
||||
transform: (value: string) => sanitizeTextForUTF8(String(value || '')),
|
||||
const CONFIG = {
|
||||
MAX_PREVIEW_ROWS: 1000, // Only keep first 1000 rows for preview
|
||||
MAX_SAMPLE_ROWS: 100, // Sample for metadata
|
||||
MAX_ERRORS: 100, // Stop after 100 errors
|
||||
STREAM_CHUNK_SIZE: 16384, // 16KB chunks for streaming
|
||||
}
|
||||
|
||||
export class CsvParser implements FileParser {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
try {
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided')
|
||||
}
|
||||
|
||||
if (!existsSync(filePath)) {
|
||||
throw new Error(`File not found: ${filePath}`)
|
||||
}
|
||||
|
||||
const fileContent = readFileSync(filePath, 'utf8')
|
||||
|
||||
const parseResult = Papa.parse(fileContent, PARSE_OPTIONS)
|
||||
|
||||
if (parseResult.errors && parseResult.errors.length > 0) {
|
||||
const errorMessages = parseResult.errors.map((err) => err.message).join(', ')
|
||||
logger.error('CSV parsing errors:', parseResult.errors)
|
||||
throw new Error(`Failed to parse CSV file: ${errorMessages}`)
|
||||
}
|
||||
|
||||
const results = parseResult.data as Record<string, any>[]
|
||||
const headers = parseResult.meta.fields || []
|
||||
|
||||
let content = ''
|
||||
|
||||
if (headers.length > 0) {
|
||||
const cleanHeaders = headers.map((h) => sanitizeTextForUTF8(String(h)))
|
||||
content += `${cleanHeaders.join(', ')}\n`
|
||||
}
|
||||
|
||||
results.forEach((row) => {
|
||||
const cleanValues = Object.values(row).map((v) => sanitizeTextForUTF8(String(v || '')))
|
||||
content += `${cleanValues.join(', ')}\n`
|
||||
})
|
||||
|
||||
return {
|
||||
content: sanitizeTextForUTF8(content),
|
||||
metadata: {
|
||||
rowCount: results.length,
|
||||
headers: headers,
|
||||
rawData: results,
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('CSV general error:', error)
|
||||
throw new Error(`Failed to process CSV file: ${(error as Error).message}`)
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided')
|
||||
}
|
||||
|
||||
if (!existsSync(filePath)) {
|
||||
throw new Error(`File not found: ${filePath}`)
|
||||
}
|
||||
|
||||
const stream = createReadStream(filePath, {
|
||||
highWaterMark: CONFIG.STREAM_CHUNK_SIZE,
|
||||
})
|
||||
|
||||
return this.parseStream(stream)
|
||||
}
|
||||
|
||||
async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
|
||||
try {
|
||||
logger.info('Parsing buffer, size:', buffer.length)
|
||||
const bufferSize = buffer.length
|
||||
logger.info(
|
||||
`Parsing CSV buffer, size: ${bufferSize} bytes (${(bufferSize / 1024 / 1024).toFixed(2)} MB)`
|
||||
)
|
||||
|
||||
const fileContent = buffer.toString('utf8')
|
||||
const stream = Readable.from(buffer, {
|
||||
highWaterMark: CONFIG.STREAM_CHUNK_SIZE,
|
||||
})
|
||||
|
||||
const parseResult = Papa.parse(fileContent, PARSE_OPTIONS)
|
||||
return this.parseStream(stream)
|
||||
}
|
||||
|
||||
if (parseResult.errors && parseResult.errors.length > 0) {
|
||||
const errorMessages = parseResult.errors.map((err) => err.message).join(', ')
|
||||
logger.error('CSV parsing errors:', parseResult.errors)
|
||||
throw new Error(`Failed to parse CSV buffer: ${errorMessages}`)
|
||||
private parseStream(inputStream: NodeJS.ReadableStream): Promise<FileParseResult> {
|
||||
return new Promise((resolve, reject) => {
|
||||
let rowCount = 0
|
||||
let errorCount = 0
|
||||
let headers: string[] = []
|
||||
let processedContent = ''
|
||||
const sampledRows: any[] = []
|
||||
const errors: string[] = []
|
||||
let firstRowProcessed = false
|
||||
let aborted = false
|
||||
|
||||
const parserOptions: Options = {
|
||||
columns: true, // Use first row as headers
|
||||
skip_empty_lines: true, // Skip empty lines
|
||||
trim: true, // Trim whitespace
|
||||
relax_column_count: true, // Allow variable column counts
|
||||
relax_quotes: true, // Be lenient with quotes
|
||||
skip_records_with_error: true, // Skip bad records
|
||||
raw: false,
|
||||
cast: false,
|
||||
}
|
||||
const parser = parse(parserOptions)
|
||||
|
||||
const results = parseResult.data as Record<string, any>[]
|
||||
const headers = parseResult.meta.fields || []
|
||||
parser.on('readable', () => {
|
||||
let record
|
||||
while ((record = parser.read()) !== null && !aborted) {
|
||||
rowCount++
|
||||
|
||||
let content = ''
|
||||
if (!firstRowProcessed && record) {
|
||||
headers = Object.keys(record).map((h) => sanitizeTextForUTF8(String(h)))
|
||||
processedContent = `${headers.join(', ')}\n`
|
||||
firstRowProcessed = true
|
||||
}
|
||||
|
||||
if (headers.length > 0) {
|
||||
const cleanHeaders = headers.map((h) => sanitizeTextForUTF8(String(h)))
|
||||
content += `${cleanHeaders.join(', ')}\n`
|
||||
}
|
||||
if (rowCount <= CONFIG.MAX_PREVIEW_ROWS) {
|
||||
try {
|
||||
const cleanValues = Object.values(record).map((v: any) =>
|
||||
sanitizeTextForUTF8(String(v || ''))
|
||||
)
|
||||
processedContent += `${cleanValues.join(', ')}\n`
|
||||
|
||||
results.forEach((row) => {
|
||||
const cleanValues = Object.values(row).map((v) => sanitizeTextForUTF8(String(v || '')))
|
||||
content += `${cleanValues.join(', ')}\n`
|
||||
if (rowCount <= CONFIG.MAX_SAMPLE_ROWS) {
|
||||
sampledRows.push(record)
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(`Error processing row ${rowCount}:`, err)
|
||||
}
|
||||
}
|
||||
|
||||
if (rowCount % 10000 === 0) {
|
||||
logger.info(`Processed ${rowCount} rows...`)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
content: sanitizeTextForUTF8(content),
|
||||
metadata: {
|
||||
rowCount: results.length,
|
||||
headers: headers,
|
||||
rawData: results,
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('CSV buffer parsing error:', error)
|
||||
throw new Error(`Failed to process CSV buffer: ${(error as Error).message}`)
|
||||
}
|
||||
parser.on('skip', (err: any) => {
|
||||
errorCount++
|
||||
|
||||
if (errorCount <= 5) {
|
||||
const errorMsg = `Row ${err.lines || rowCount}: ${err.message || 'Unknown error'}`
|
||||
errors.push(errorMsg)
|
||||
logger.warn('CSV skip:', errorMsg)
|
||||
}
|
||||
|
||||
if (errorCount >= CONFIG.MAX_ERRORS) {
|
||||
aborted = true
|
||||
parser.destroy()
|
||||
reject(new Error(`Too many errors (${errorCount}). File may be corrupted.`))
|
||||
}
|
||||
})
|
||||
|
||||
parser.on('error', (err: Error) => {
|
||||
logger.error('CSV parser error:', err)
|
||||
reject(new Error(`CSV parsing failed: ${err.message}`))
|
||||
})
|
||||
|
||||
parser.on('end', () => {
|
||||
if (!aborted) {
|
||||
if (rowCount > CONFIG.MAX_PREVIEW_ROWS) {
|
||||
processedContent += `\n[... ${rowCount.toLocaleString()} total rows, showing first ${CONFIG.MAX_PREVIEW_ROWS} ...]\n`
|
||||
}
|
||||
|
||||
logger.info(`CSV parsing complete: ${rowCount} rows, ${errorCount} errors`)
|
||||
|
||||
resolve({
|
||||
content: sanitizeTextForUTF8(processedContent),
|
||||
metadata: {
|
||||
rowCount,
|
||||
headers,
|
||||
errorCount,
|
||||
errors: errors.slice(0, 10),
|
||||
truncated: rowCount > CONFIG.MAX_PREVIEW_ROWS,
|
||||
sampledData: sampledRows,
|
||||
},
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
inputStream.on('error', (err) => {
|
||||
logger.error('Input stream error:', err)
|
||||
parser.destroy()
|
||||
reject(new Error(`Stream error: ${err.message}`))
|
||||
})
|
||||
|
||||
inputStream.pipe(parser)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,8 +27,9 @@ function getParserInstances(): Record<string, FileParser> {
|
||||
try {
|
||||
const { CsvParser } = require('@/lib/file-parsers/csv-parser')
|
||||
parserInstances.csv = new CsvParser()
|
||||
logger.info('Loaded streaming CSV parser with csv-parse library')
|
||||
} catch (error) {
|
||||
logger.error('Failed to load CSV parser:', error)
|
||||
logger.error('Failed to load streaming CSV parser:', error)
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -63,6 +64,7 @@ function getParserInstances(): Record<string, FileParser> {
|
||||
const { XlsxParser } = require('@/lib/file-parsers/xlsx-parser')
|
||||
parserInstances.xlsx = new XlsxParser()
|
||||
parserInstances.xls = new XlsxParser()
|
||||
logger.info('Loaded XLSX parser')
|
||||
} catch (error) {
|
||||
logger.error('Failed to load XLSX parser:', error)
|
||||
}
|
||||
@@ -82,6 +84,32 @@ function getParserInstances(): Record<string, FileParser> {
|
||||
} catch (error) {
|
||||
logger.error('Failed to load HTML parser:', error)
|
||||
}
|
||||
|
||||
try {
|
||||
const { parseJSON, parseJSONBuffer } = require('@/lib/file-parsers/json-parser')
|
||||
parserInstances.json = {
|
||||
parseFile: parseJSON,
|
||||
parseBuffer: parseJSONBuffer,
|
||||
}
|
||||
logger.info('Loaded JSON parser')
|
||||
} catch (error) {
|
||||
logger.error('Failed to load JSON parser:', error)
|
||||
}
|
||||
|
||||
try {
|
||||
const { parseYAML, parseYAMLBuffer } = require('@/lib/file-parsers/yaml-parser')
|
||||
parserInstances.yaml = {
|
||||
parseFile: parseYAML,
|
||||
parseBuffer: parseYAMLBuffer,
|
||||
}
|
||||
parserInstances.yml = {
|
||||
parseFile: parseYAML,
|
||||
parseBuffer: parseYAMLBuffer,
|
||||
}
|
||||
logger.info('Loaded YAML parser')
|
||||
} catch (error) {
|
||||
logger.error('Failed to load YAML parser:', error)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error loading file parsers:', error)
|
||||
}
|
||||
|
||||
74
apps/sim/lib/file-parsers/json-parser.ts
Normal file
74
apps/sim/lib/file-parsers/json-parser.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import type { FileParseResult } from './types'
|
||||
|
||||
/**
|
||||
* Parse JSON files
|
||||
*/
|
||||
export async function parseJSON(filePath: string): Promise<FileParseResult> {
|
||||
const fs = await import('fs/promises')
|
||||
const content = await fs.readFile(filePath, 'utf-8')
|
||||
|
||||
try {
|
||||
// Parse to validate JSON
|
||||
const jsonData = JSON.parse(content)
|
||||
|
||||
// Return pretty-printed JSON for better readability
|
||||
const formattedContent = JSON.stringify(jsonData, null, 2)
|
||||
|
||||
// Extract metadata about the JSON structure
|
||||
const metadata = {
|
||||
type: 'json',
|
||||
isArray: Array.isArray(jsonData),
|
||||
keys: Array.isArray(jsonData) ? [] : Object.keys(jsonData),
|
||||
itemCount: Array.isArray(jsonData) ? jsonData.length : undefined,
|
||||
depth: getJsonDepth(jsonData),
|
||||
}
|
||||
|
||||
return {
|
||||
content: formattedContent,
|
||||
metadata,
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error(`Invalid JSON: ${error instanceof Error ? error.message : 'Unknown error'}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse JSON from buffer
|
||||
*/
|
||||
export async function parseJSONBuffer(buffer: Buffer): Promise<FileParseResult> {
|
||||
const content = buffer.toString('utf-8')
|
||||
|
||||
try {
|
||||
const jsonData = JSON.parse(content)
|
||||
const formattedContent = JSON.stringify(jsonData, null, 2)
|
||||
|
||||
const metadata = {
|
||||
type: 'json',
|
||||
isArray: Array.isArray(jsonData),
|
||||
keys: Array.isArray(jsonData) ? [] : Object.keys(jsonData),
|
||||
itemCount: Array.isArray(jsonData) ? jsonData.length : undefined,
|
||||
depth: getJsonDepth(jsonData),
|
||||
}
|
||||
|
||||
return {
|
||||
content: formattedContent,
|
||||
metadata,
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error(`Invalid JSON: ${error instanceof Error ? error.message : 'Unknown error'}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the depth of a JSON object
|
||||
*/
|
||||
function getJsonDepth(obj: any): number {
|
||||
if (obj === null || typeof obj !== 'object') return 0
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
return obj.length > 0 ? 1 + Math.max(...obj.map(getJsonDepth)) : 1
|
||||
}
|
||||
|
||||
const depths = Object.values(obj).map(getJsonDepth)
|
||||
return depths.length > 0 ? 1 + Math.max(...depths) : 1
|
||||
}
|
||||
@@ -6,6 +6,15 @@ import { createLogger } from '@/lib/logs/console/logger'
|
||||
|
||||
const logger = createLogger('XlsxParser')
|
||||
|
||||
// Configuration for handling large XLSX files
|
||||
const CONFIG = {
|
||||
MAX_PREVIEW_ROWS: 1000, // Only keep first 1000 rows for preview
|
||||
MAX_SAMPLE_ROWS: 100, // Sample for metadata
|
||||
ROWS_PER_CHUNK: 50, // Aggregate 50 rows per chunk to reduce chunk count
|
||||
MAX_CELL_LENGTH: 1000, // Truncate very long cell values
|
||||
MAX_CONTENT_SIZE: 10 * 1024 * 1024, // 10MB max content size
|
||||
}
|
||||
|
||||
export class XlsxParser implements FileParser {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
try {
|
||||
@@ -19,7 +28,12 @@ export class XlsxParser implements FileParser {
|
||||
|
||||
logger.info(`Parsing XLSX file: ${filePath}`)
|
||||
|
||||
const workbook = XLSX.readFile(filePath)
|
||||
// Read with streaming option for large files
|
||||
const workbook = XLSX.readFile(filePath, {
|
||||
dense: true, // Use dense mode for better memory efficiency
|
||||
sheetStubs: false, // Don't create stub cells
|
||||
})
|
||||
|
||||
return this.processWorkbook(workbook)
|
||||
} catch (error) {
|
||||
logger.error('XLSX file parsing error:', error)
|
||||
@@ -29,13 +43,21 @@ export class XlsxParser implements FileParser {
|
||||
|
||||
async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
|
||||
try {
|
||||
logger.info('Parsing XLSX buffer, size:', buffer.length)
|
||||
const bufferSize = buffer.length
|
||||
logger.info(
|
||||
`Parsing XLSX buffer, size: ${bufferSize} bytes (${(bufferSize / 1024 / 1024).toFixed(2)} MB)`
|
||||
)
|
||||
|
||||
if (!buffer || buffer.length === 0) {
|
||||
throw new Error('Empty buffer provided')
|
||||
}
|
||||
|
||||
const workbook = XLSX.read(buffer, { type: 'buffer' })
|
||||
const workbook = XLSX.read(buffer, {
|
||||
type: 'buffer',
|
||||
dense: true, // Use dense mode for better memory efficiency
|
||||
sheetStubs: false, // Don't create stub cells
|
||||
})
|
||||
|
||||
return this.processWorkbook(workbook)
|
||||
} catch (error) {
|
||||
logger.error('XLSX buffer parsing error:', error)
|
||||
@@ -45,44 +67,111 @@ export class XlsxParser implements FileParser {
|
||||
|
||||
private processWorkbook(workbook: XLSX.WorkBook): FileParseResult {
|
||||
const sheetNames = workbook.SheetNames
|
||||
const sheets: Record<string, any[]> = {}
|
||||
let content = ''
|
||||
let totalRows = 0
|
||||
let truncated = false
|
||||
let contentSize = 0
|
||||
const sampledData: any[] = []
|
||||
|
||||
for (const sheetName of sheetNames) {
|
||||
const worksheet = workbook.Sheets[sheetName]
|
||||
|
||||
const sheetData = XLSX.utils.sheet_to_json(worksheet, { header: 1 })
|
||||
sheets[sheetName] = sheetData
|
||||
totalRows += sheetData.length
|
||||
// Get sheet dimensions
|
||||
const range = XLSX.utils.decode_range(worksheet['!ref'] || 'A1')
|
||||
const rowCount = range.e.r - range.s.r + 1
|
||||
|
||||
logger.info(`Processing sheet: ${sheetName} with ${rowCount} rows`)
|
||||
|
||||
// Convert to JSON with header row
|
||||
const sheetData = XLSX.utils.sheet_to_json(worksheet, {
|
||||
header: 1,
|
||||
defval: '', // Default value for empty cells
|
||||
blankrows: false, // Skip blank rows
|
||||
})
|
||||
|
||||
const actualRowCount = sheetData.length
|
||||
totalRows += actualRowCount
|
||||
|
||||
// Store limited sample for metadata
|
||||
if (sampledData.length < CONFIG.MAX_SAMPLE_ROWS) {
|
||||
const sampleSize = Math.min(CONFIG.MAX_SAMPLE_ROWS - sampledData.length, actualRowCount)
|
||||
sampledData.push(...sheetData.slice(0, sampleSize))
|
||||
}
|
||||
|
||||
// Only process limited rows for preview
|
||||
const rowsToProcess = Math.min(actualRowCount, CONFIG.MAX_PREVIEW_ROWS)
|
||||
const cleanSheetName = sanitizeTextForUTF8(sheetName)
|
||||
content += `Sheet: ${cleanSheetName}\n`
|
||||
content += `=${'='.repeat(cleanSheetName.length + 6)}\n\n`
|
||||
|
||||
if (sheetData.length > 0) {
|
||||
sheetData.forEach((row: unknown, rowIndex: number) => {
|
||||
if (Array.isArray(row) && row.length > 0) {
|
||||
const rowString = row
|
||||
.map((cell) => {
|
||||
if (cell === null || cell === undefined) {
|
||||
return ''
|
||||
}
|
||||
return sanitizeTextForUTF8(String(cell))
|
||||
})
|
||||
.join('\t')
|
||||
// Add sheet header
|
||||
const sheetHeader = `\n=== Sheet: ${cleanSheetName} ===\n`
|
||||
content += sheetHeader
|
||||
contentSize += sheetHeader.length
|
||||
|
||||
content += `${rowString}\n`
|
||||
if (actualRowCount > 0) {
|
||||
// Get headers if available
|
||||
const headers = sheetData[0] as any[]
|
||||
if (headers && headers.length > 0) {
|
||||
const headerRow = headers.map((h) => this.truncateCell(h)).join('\t')
|
||||
content += `${headerRow}\n`
|
||||
content += `${'-'.repeat(Math.min(80, headerRow.length))}\n`
|
||||
contentSize += headerRow.length + 82
|
||||
}
|
||||
|
||||
// Process data rows in chunks
|
||||
let chunkContent = ''
|
||||
let chunkRowCount = 0
|
||||
|
||||
for (let i = 1; i < rowsToProcess; i++) {
|
||||
const row = sheetData[i] as any[]
|
||||
if (row && row.length > 0) {
|
||||
const rowString = row.map((cell) => this.truncateCell(cell)).join('\t')
|
||||
|
||||
chunkContent += `${rowString}\n`
|
||||
chunkRowCount++
|
||||
|
||||
// Add chunk separator every N rows for better readability
|
||||
if (chunkRowCount >= CONFIG.ROWS_PER_CHUNK) {
|
||||
content += chunkContent
|
||||
contentSize += chunkContent.length
|
||||
chunkContent = ''
|
||||
chunkRowCount = 0
|
||||
|
||||
// Check content size limit
|
||||
if (contentSize > CONFIG.MAX_CONTENT_SIZE) {
|
||||
truncated = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Add remaining chunk content
|
||||
if (chunkContent && contentSize < CONFIG.MAX_CONTENT_SIZE) {
|
||||
content += chunkContent
|
||||
contentSize += chunkContent.length
|
||||
}
|
||||
|
||||
// Add truncation notice if needed
|
||||
if (actualRowCount > rowsToProcess) {
|
||||
const notice = `\n[... ${actualRowCount.toLocaleString()} total rows, showing first ${rowsToProcess.toLocaleString()} ...]\n`
|
||||
content += notice
|
||||
truncated = true
|
||||
}
|
||||
} else {
|
||||
content += '[Empty sheet]\n'
|
||||
}
|
||||
|
||||
content += '\n'
|
||||
// Stop processing if content is too large
|
||||
if (contentSize > CONFIG.MAX_CONTENT_SIZE) {
|
||||
content += '\n[... Content truncated due to size limits ...]\n'
|
||||
truncated = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`XLSX parsing completed: ${sheetNames.length} sheets, ${totalRows} total rows`)
|
||||
logger.info(
|
||||
`XLSX parsing completed: ${sheetNames.length} sheets, ${totalRows} total rows, truncated: ${truncated}`
|
||||
)
|
||||
|
||||
const cleanContent = sanitizeTextForUTF8(content).trim()
|
||||
|
||||
@@ -92,8 +181,25 @@ export class XlsxParser implements FileParser {
|
||||
sheetCount: sheetNames.length,
|
||||
sheetNames: sheetNames,
|
||||
totalRows: totalRows,
|
||||
sheets: sheets,
|
||||
truncated: truncated,
|
||||
sampledData: sampledData.slice(0, CONFIG.MAX_SAMPLE_ROWS),
|
||||
contentSize: contentSize,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
private truncateCell(cell: any): string {
|
||||
if (cell === null || cell === undefined) {
|
||||
return ''
|
||||
}
|
||||
|
||||
let cellStr = String(cell)
|
||||
|
||||
// Truncate very long cells
|
||||
if (cellStr.length > CONFIG.MAX_CELL_LENGTH) {
|
||||
cellStr = `${cellStr.substring(0, CONFIG.MAX_CELL_LENGTH)}...`
|
||||
}
|
||||
|
||||
return sanitizeTextForUTF8(cellStr)
|
||||
}
|
||||
}
|
||||
|
||||
75
apps/sim/lib/file-parsers/yaml-parser.ts
Normal file
75
apps/sim/lib/file-parsers/yaml-parser.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import * as yaml from 'js-yaml'
|
||||
import type { FileParseResult } from './types'
|
||||
|
||||
/**
|
||||
* Parse YAML files
|
||||
*/
|
||||
export async function parseYAML(filePath: string): Promise<FileParseResult> {
|
||||
const fs = await import('fs/promises')
|
||||
const content = await fs.readFile(filePath, 'utf-8')
|
||||
|
||||
try {
|
||||
// Parse YAML to validate and extract structure
|
||||
const yamlData = yaml.load(content)
|
||||
|
||||
// Convert to JSON for consistent processing
|
||||
const jsonContent = JSON.stringify(yamlData, null, 2)
|
||||
|
||||
// Extract metadata about the YAML structure
|
||||
const metadata = {
|
||||
type: 'yaml',
|
||||
isArray: Array.isArray(yamlData),
|
||||
keys: Array.isArray(yamlData) ? [] : Object.keys(yamlData || {}),
|
||||
itemCount: Array.isArray(yamlData) ? yamlData.length : undefined,
|
||||
depth: getYamlDepth(yamlData),
|
||||
}
|
||||
|
||||
return {
|
||||
content: jsonContent,
|
||||
metadata,
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error(`Invalid YAML: ${error instanceof Error ? error.message : 'Unknown error'}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse YAML from buffer
|
||||
*/
|
||||
export async function parseYAMLBuffer(buffer: Buffer): Promise<FileParseResult> {
|
||||
const content = buffer.toString('utf-8')
|
||||
|
||||
try {
|
||||
const yamlData = yaml.load(content)
|
||||
const jsonContent = JSON.stringify(yamlData, null, 2)
|
||||
|
||||
const metadata = {
|
||||
type: 'yaml',
|
||||
isArray: Array.isArray(yamlData),
|
||||
keys: Array.isArray(yamlData) ? [] : Object.keys(yamlData || {}),
|
||||
itemCount: Array.isArray(yamlData) ? yamlData.length : undefined,
|
||||
depth: getYamlDepth(yamlData),
|
||||
}
|
||||
|
||||
return {
|
||||
content: jsonContent,
|
||||
metadata,
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error(`Invalid YAML: ${error instanceof Error ? error.message : 'Unknown error'}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the depth of a YAML/JSON object
|
||||
*/
|
||||
function getYamlDepth(obj: any): number {
|
||||
if (obj === null || typeof obj !== 'object') return 0
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
return obj.length > 0 ? 1 + Math.max(...obj.map(getYamlDepth)) : 1
|
||||
}
|
||||
|
||||
const depths = Object.values(obj).map(getYamlDepth)
|
||||
return depths.length > 0 ? 1 + Math.max(...depths) : 1
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
import { type Chunk, JsonYamlChunker, StructuredDataChunker, TextChunker } from '@/lib/chunkers'
|
||||
import { env } from '@/lib/env'
|
||||
import { parseBuffer, parseFile } from '@/lib/file-parsers'
|
||||
import { type Chunk, TextChunker } from '@/lib/knowledge/documents/chunker'
|
||||
import { retryWithExponentialBackoff } from '@/lib/knowledge/documents/utils'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import {
|
||||
@@ -15,8 +15,8 @@ import { mistralParserTool } from '@/tools/mistral/parser'
|
||||
const logger = createLogger('DocumentProcessor')
|
||||
|
||||
const TIMEOUTS = {
|
||||
FILE_DOWNLOAD: 60000,
|
||||
MISTRAL_OCR_API: 90000,
|
||||
FILE_DOWNLOAD: 180000,
|
||||
MISTRAL_OCR_API: 120000,
|
||||
} as const
|
||||
|
||||
type OCRResult = {
|
||||
@@ -97,8 +97,32 @@ export async function processDocument(
|
||||
const { content, processingMethod } = parseResult
|
||||
const cloudUrl = 'cloudUrl' in parseResult ? parseResult.cloudUrl : undefined
|
||||
|
||||
const chunker = new TextChunker({ chunkSize, overlap: chunkOverlap, minChunkSize })
|
||||
const chunks = await chunker.chunk(content)
|
||||
let chunks: Chunk[]
|
||||
const metadata = 'metadata' in parseResult ? parseResult.metadata : {}
|
||||
|
||||
const isJsonYaml =
|
||||
metadata.type === 'json' ||
|
||||
metadata.type === 'yaml' ||
|
||||
mimeType.includes('json') ||
|
||||
mimeType.includes('yaml')
|
||||
|
||||
if (isJsonYaml && JsonYamlChunker.isStructuredData(content)) {
|
||||
logger.info('Using JSON/YAML chunker for structured data')
|
||||
chunks = await JsonYamlChunker.chunkJsonYaml(content, {
|
||||
chunkSize,
|
||||
minChunkSize,
|
||||
})
|
||||
} else if (StructuredDataChunker.isStructuredData(content, mimeType)) {
|
||||
logger.info('Using structured data chunker for spreadsheet/CSV content')
|
||||
chunks = await StructuredDataChunker.chunkStructuredData(content, {
|
||||
headers: metadata.headers,
|
||||
totalRows: metadata.totalRows || metadata.rowCount,
|
||||
sheetName: metadata.sheetNames?.[0],
|
||||
})
|
||||
} else {
|
||||
const chunker = new TextChunker({ chunkSize, overlap: chunkOverlap, minChunkSize })
|
||||
chunks = await chunker.chunk(content)
|
||||
}
|
||||
|
||||
const characterCount = content.length
|
||||
const tokenCount = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0)
|
||||
@@ -132,22 +156,23 @@ async function parseDocument(
|
||||
content: string
|
||||
processingMethod: 'file-parser' | 'mistral-ocr'
|
||||
cloudUrl?: string
|
||||
metadata?: any
|
||||
}> {
|
||||
const isPDF = mimeType === 'application/pdf'
|
||||
const hasAzureMistralOCR =
|
||||
env.OCR_AZURE_API_KEY && env.OCR_AZURE_ENDPOINT && env.OCR_AZURE_MODEL_NAME
|
||||
const hasMistralOCR = env.MISTRAL_API_KEY
|
||||
|
||||
// Check Azure Mistral OCR configuration
|
||||
if (isPDF && (hasAzureMistralOCR || hasMistralOCR)) {
|
||||
if (hasAzureMistralOCR) {
|
||||
logger.info(`Using Azure Mistral OCR: ${filename}`)
|
||||
return parseWithAzureMistralOCR(fileUrl, filename, mimeType)
|
||||
}
|
||||
|
||||
if (isPDF && hasAzureMistralOCR) {
|
||||
logger.info(`Using Azure Mistral OCR: ${filename}`)
|
||||
return parseWithAzureMistralOCR(fileUrl, filename, mimeType)
|
||||
}
|
||||
|
||||
if (isPDF && hasMistralOCR) {
|
||||
logger.info(`Using Mistral OCR: ${filename}`)
|
||||
return parseWithMistralOCR(fileUrl, filename, mimeType)
|
||||
if (hasMistralOCR) {
|
||||
logger.info(`Using Mistral OCR: ${filename}`)
|
||||
return parseWithMistralOCR(fileUrl, filename, mimeType)
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Using file parser: ${filename}`)
|
||||
@@ -200,9 +225,7 @@ async function downloadFileWithTimeout(fileUrl: string): Promise<Buffer> {
|
||||
}
|
||||
|
||||
async function downloadFileForBase64(fileUrl: string): Promise<Buffer> {
|
||||
// Handle different URL types for Azure Mistral OCR base64 requirement
|
||||
if (fileUrl.startsWith('data:')) {
|
||||
// Extract base64 data from data URI
|
||||
const [, base64Data] = fileUrl.split(',')
|
||||
if (!base64Data) {
|
||||
throw new Error('Invalid data URI format')
|
||||
@@ -210,10 +233,8 @@ async function downloadFileForBase64(fileUrl: string): Promise<Buffer> {
|
||||
return Buffer.from(base64Data, 'base64')
|
||||
}
|
||||
if (fileUrl.startsWith('http')) {
|
||||
// Download from HTTP(S) URL
|
||||
return downloadFileWithTimeout(fileUrl)
|
||||
}
|
||||
// Local file - read it
|
||||
const fs = await import('fs/promises')
|
||||
return fs.readFile(fileUrl)
|
||||
}
|
||||
@@ -315,7 +336,6 @@ async function parseWithAzureMistralOCR(fileUrl: string, filename: string, mimeT
|
||||
'Azure Mistral OCR'
|
||||
)
|
||||
|
||||
// Azure Mistral OCR accepts data URIs with base64 content
|
||||
const fileBuffer = await downloadFileForBase64(fileUrl)
|
||||
const base64Data = fileBuffer.toString('base64')
|
||||
const dataUri = `data:${mimeType};base64,${base64Data}`
|
||||
@@ -409,21 +429,25 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
|
||||
async function parseWithFileParser(fileUrl: string, filename: string, mimeType: string) {
|
||||
try {
|
||||
let content: string
|
||||
let metadata: any = {}
|
||||
|
||||
if (fileUrl.startsWith('data:')) {
|
||||
content = await parseDataURI(fileUrl, filename, mimeType)
|
||||
} else if (fileUrl.startsWith('http')) {
|
||||
content = await parseHttpFile(fileUrl, filename)
|
||||
const result = await parseHttpFile(fileUrl, filename)
|
||||
content = result.content
|
||||
metadata = result.metadata || {}
|
||||
} else {
|
||||
const result = await parseFile(fileUrl)
|
||||
content = result.content
|
||||
metadata = result.metadata || {}
|
||||
}
|
||||
|
||||
if (!content.trim()) {
|
||||
throw new Error('File parser returned empty content')
|
||||
}
|
||||
|
||||
return { content, processingMethod: 'file-parser' as const, cloudUrl: undefined }
|
||||
return { content, processingMethod: 'file-parser' as const, cloudUrl: undefined, metadata }
|
||||
} catch (error) {
|
||||
logger.error(`File parser failed for ${filename}:`, error)
|
||||
throw error
|
||||
@@ -448,7 +472,10 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string)
|
||||
return result.content
|
||||
}
|
||||
|
||||
async function parseHttpFile(fileUrl: string, filename: string): Promise<string> {
|
||||
async function parseHttpFile(
|
||||
fileUrl: string,
|
||||
filename: string
|
||||
): Promise<{ content: string; metadata?: any }> {
|
||||
const buffer = await downloadFileWithTimeout(fileUrl)
|
||||
|
||||
const extension = filename.split('.').pop()?.toLowerCase()
|
||||
@@ -457,5 +484,5 @@ async function parseHttpFile(fileUrl: string, filename: string): Promise<string>
|
||||
}
|
||||
|
||||
const result = await parseBuffer(buffer, extension)
|
||||
return result.content
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -17,10 +17,18 @@ import type { DocumentSortField, SortOrder } from './types'
|
||||
const logger = createLogger('DocumentService')
|
||||
|
||||
const TIMEOUTS = {
|
||||
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 300) * 1000,
|
||||
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Increased to 10 minutes to match Trigger's timeout
|
||||
EMBEDDINGS_API: (env.KB_CONFIG_MAX_TIMEOUT || 10000) * 18,
|
||||
} as const
|
||||
|
||||
// Configuration for handling large documents
|
||||
const LARGE_DOC_CONFIG = {
|
||||
MAX_CHUNKS_PER_BATCH: 500, // Insert embeddings in batches of 500
|
||||
MAX_EMBEDDING_BATCH: 50, // Generate embeddings in batches of 50
|
||||
MAX_FILE_SIZE: 100 * 1024 * 1024, // 100MB max file size
|
||||
MAX_CHUNKS_PER_DOCUMENT: 100000, // Maximum chunks allowed per document
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a timeout wrapper for async operations
|
||||
*/
|
||||
@@ -448,14 +456,38 @@ export async function processDocumentAsync(
|
||||
processingOptions.minCharactersPerChunk || 1
|
||||
)
|
||||
|
||||
if (processed.chunks.length > LARGE_DOC_CONFIG.MAX_CHUNKS_PER_DOCUMENT) {
|
||||
throw new Error(
|
||||
`Document has ${processed.chunks.length.toLocaleString()} chunks, exceeding maximum of ${LARGE_DOC_CONFIG.MAX_CHUNKS_PER_DOCUMENT.toLocaleString()}. ` +
|
||||
`This document is unusually large and may need to be split into multiple files or preprocessed to reduce content.`
|
||||
)
|
||||
}
|
||||
|
||||
const now = new Date()
|
||||
|
||||
logger.info(
|
||||
`[${documentId}] Document parsed successfully, generating embeddings for ${processed.chunks.length} chunks`
|
||||
)
|
||||
|
||||
// Generate embeddings in batches for large documents
|
||||
const chunkTexts = processed.chunks.map((chunk) => chunk.text)
|
||||
const embeddings = chunkTexts.length > 0 ? await generateEmbeddings(chunkTexts) : []
|
||||
const embeddings: number[][] = []
|
||||
|
||||
if (chunkTexts.length > 0) {
|
||||
const batchSize = LARGE_DOC_CONFIG.MAX_EMBEDDING_BATCH
|
||||
const totalBatches = Math.ceil(chunkTexts.length / batchSize)
|
||||
|
||||
logger.info(`[${documentId}] Generating embeddings in ${totalBatches} batches`)
|
||||
|
||||
for (let i = 0; i < chunkTexts.length; i += batchSize) {
|
||||
const batch = chunkTexts.slice(i, i + batchSize)
|
||||
const batchNum = Math.floor(i / batchSize) + 1
|
||||
|
||||
logger.info(`[${documentId}] Processing embedding batch ${batchNum}/${totalBatches}`)
|
||||
const batchEmbeddings = await generateEmbeddings(batch)
|
||||
embeddings.push(...batchEmbeddings)
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`[${documentId}] Embeddings generated, fetching document tags`)
|
||||
|
||||
@@ -503,8 +535,24 @@ export async function processDocumentAsync(
|
||||
}))
|
||||
|
||||
await db.transaction(async (tx) => {
|
||||
// Insert embeddings in batches for large documents
|
||||
if (embeddingRecords.length > 0) {
|
||||
await tx.insert(embedding).values(embeddingRecords)
|
||||
const batchSize = LARGE_DOC_CONFIG.MAX_CHUNKS_PER_BATCH
|
||||
const totalBatches = Math.ceil(embeddingRecords.length / batchSize)
|
||||
|
||||
logger.info(
|
||||
`[${documentId}] Inserting ${embeddingRecords.length} embeddings in ${totalBatches} batches`
|
||||
)
|
||||
|
||||
for (let i = 0; i < embeddingRecords.length; i += batchSize) {
|
||||
const batch = embeddingRecords.slice(i, i + batchSize)
|
||||
const batchNum = Math.floor(i / batchSize) + 1
|
||||
|
||||
await tx.insert(embedding).values(batch)
|
||||
logger.info(
|
||||
`[${documentId}] Inserted batch ${batchNum}/${totalBatches} (${batch.length} records)`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
await tx
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
export const SIM_AGENT_API_URL_DEFAULT = 'https://copilot.sim.ai'
|
||||
export const SIM_AGENT_VERSION = '1.0.0'
|
||||
export const SIM_AGENT_VERSION = '1.0.1'
|
||||
|
||||
@@ -15,6 +15,9 @@ export const SUPPORTED_DOCUMENT_EXTENSIONS = [
|
||||
'pptx',
|
||||
'html',
|
||||
'htm',
|
||||
'json',
|
||||
'yaml',
|
||||
'yml',
|
||||
] as const
|
||||
|
||||
export type SupportedDocumentExtension = (typeof SUPPORTED_DOCUMENT_EXTENSIONS)[number]
|
||||
@@ -46,6 +49,9 @@ export const SUPPORTED_MIME_TYPES: Record<SupportedDocumentExtension, string[]>
|
||||
],
|
||||
html: ['text/html', 'application/xhtml+xml'],
|
||||
htm: ['text/html', 'application/xhtml+xml'],
|
||||
json: ['application/json', 'text/json', 'application/x-json'],
|
||||
yaml: ['text/yaml', 'text/x-yaml', 'application/yaml', 'application/x-yaml'],
|
||||
yml: ['text/yaml', 'text/x-yaml', 'application/yaml', 'application/x-yaml'],
|
||||
}
|
||||
|
||||
export const ACCEPTED_FILE_TYPES = Object.values(SUPPORTED_MIME_TYPES).flat()
|
||||
|
||||
@@ -4,6 +4,9 @@ import { assignLayers, groupByLayer } from './layering'
|
||||
import { calculatePositions } from './positioning'
|
||||
import type { Edge, LayoutOptions } from './types'
|
||||
import {
|
||||
CONTAINER_PADDING,
|
||||
CONTAINER_PADDING_X,
|
||||
CONTAINER_PADDING_Y,
|
||||
DEFAULT_CONTAINER_HEIGHT,
|
||||
DEFAULT_CONTAINER_WIDTH,
|
||||
getBlocksByParent,
|
||||
@@ -12,10 +15,6 @@ import {
|
||||
|
||||
const logger = createLogger('AutoLayout:Containers')
|
||||
|
||||
const CONTAINER_PADDING = 150
|
||||
const CONTAINER_HORIZONTAL_PADDING = 180
|
||||
const CONTAINER_VERTICAL_PADDING = 100
|
||||
|
||||
export function layoutContainers(
|
||||
blocks: Record<string, BlockState>,
|
||||
edges: Edge[],
|
||||
@@ -26,7 +25,7 @@ export function layoutContainers(
|
||||
const containerOptions: LayoutOptions = {
|
||||
horizontalSpacing: options.horizontalSpacing ? options.horizontalSpacing * 0.85 : 400,
|
||||
verticalSpacing: options.verticalSpacing ? options.verticalSpacing : 200,
|
||||
padding: { x: CONTAINER_HORIZONTAL_PADDING, y: CONTAINER_VERTICAL_PADDING },
|
||||
padding: { x: CONTAINER_PADDING_X, y: CONTAINER_PADDING_Y },
|
||||
alignment: options.alignment,
|
||||
}
|
||||
|
||||
@@ -68,8 +67,8 @@ export function layoutContainers(
|
||||
}
|
||||
|
||||
// Adjust all child positions to start at proper padding from container edges
|
||||
const xOffset = CONTAINER_HORIZONTAL_PADDING - minX
|
||||
const yOffset = CONTAINER_VERTICAL_PADDING - minY
|
||||
const xOffset = CONTAINER_PADDING_X - minX
|
||||
const yOffset = CONTAINER_PADDING_Y - minY
|
||||
|
||||
for (const node of childNodes.values()) {
|
||||
childBlocks[node.id].position = {
|
||||
|
||||
@@ -100,4 +100,6 @@ export function adjustForNewBlock(
|
||||
}
|
||||
|
||||
export type { LayoutOptions, LayoutResult, AdjustmentOptions, Edge, Loop, Parallel }
|
||||
export type { TargetedLayoutOptions } from './targeted'
|
||||
export { applyTargetedLayout, transferBlockHeights } from './targeted'
|
||||
export { getBlockMetrics, isContainerType } from './utils'
|
||||
|
||||
352
apps/sim/lib/workflows/autolayout/targeted.ts
Normal file
352
apps/sim/lib/workflows/autolayout/targeted.ts
Normal file
@@ -0,0 +1,352 @@
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import type { BlockState } from '@/stores/workflows/workflow/types'
|
||||
import { assignLayers, groupByLayer } from './layering'
|
||||
import { calculatePositions } from './positioning'
|
||||
import type { Edge, LayoutOptions } from './types'
|
||||
import {
|
||||
CONTAINER_PADDING,
|
||||
CONTAINER_PADDING_X,
|
||||
CONTAINER_PADDING_Y,
|
||||
DEFAULT_CONTAINER_HEIGHT,
|
||||
DEFAULT_CONTAINER_WIDTH,
|
||||
getBlockMetrics,
|
||||
getBlocksByParent,
|
||||
isContainerType,
|
||||
prepareBlockMetrics,
|
||||
ROOT_PADDING_X,
|
||||
ROOT_PADDING_Y,
|
||||
} from './utils'
|
||||
|
||||
const logger = createLogger('AutoLayout:Targeted')
|
||||
|
||||
export interface TargetedLayoutOptions extends LayoutOptions {
|
||||
changedBlockIds: string[]
|
||||
verticalSpacing?: number
|
||||
horizontalSpacing?: number
|
||||
}
|
||||
|
||||
export function applyTargetedLayout(
|
||||
blocks: Record<string, BlockState>,
|
||||
edges: Edge[],
|
||||
options: TargetedLayoutOptions
|
||||
): Record<string, BlockState> {
|
||||
const { changedBlockIds, verticalSpacing = 200, horizontalSpacing = 550 } = options
|
||||
|
||||
if (!changedBlockIds || changedBlockIds.length === 0) {
|
||||
return blocks
|
||||
}
|
||||
|
||||
const changedSet = new Set(changedBlockIds)
|
||||
const blocksCopy: Record<string, BlockState> = JSON.parse(JSON.stringify(blocks))
|
||||
|
||||
const groups = getBlocksByParent(blocksCopy)
|
||||
|
||||
layoutGroup(null, groups.root, blocksCopy, edges, changedSet, verticalSpacing, horizontalSpacing)
|
||||
|
||||
for (const [parentId, childIds] of groups.children.entries()) {
|
||||
layoutGroup(
|
||||
parentId,
|
||||
childIds,
|
||||
blocksCopy,
|
||||
edges,
|
||||
changedSet,
|
||||
verticalSpacing,
|
||||
horizontalSpacing
|
||||
)
|
||||
}
|
||||
|
||||
return blocksCopy
|
||||
}
|
||||
|
||||
function layoutGroup(
|
||||
parentId: string | null,
|
||||
childIds: string[],
|
||||
blocks: Record<string, BlockState>,
|
||||
edges: Edge[],
|
||||
changedSet: Set<string>,
|
||||
verticalSpacing: number,
|
||||
horizontalSpacing: number
|
||||
): void {
|
||||
if (childIds.length === 0) return
|
||||
|
||||
const parentBlock = parentId ? blocks[parentId] : undefined
|
||||
|
||||
const requestedLayout = childIds.filter((id) => {
|
||||
const block = blocks[id]
|
||||
if (!block) return false
|
||||
// Never reposition containers, only update their dimensions
|
||||
if (isContainerType(block.type)) return false
|
||||
return changedSet.has(id)
|
||||
})
|
||||
const missingPositions = childIds.filter((id) => {
|
||||
const block = blocks[id]
|
||||
if (!block) return false
|
||||
// Containers with missing positions should still get positioned
|
||||
return !hasPosition(block)
|
||||
})
|
||||
const needsLayoutSet = new Set([...requestedLayout, ...missingPositions])
|
||||
const needsLayout = Array.from(needsLayoutSet)
|
||||
|
||||
if (parentBlock) {
|
||||
updateContainerDimensions(parentBlock, childIds, blocks)
|
||||
}
|
||||
|
||||
// Always update container dimensions even if no blocks need repositioning
|
||||
// This ensures containers resize properly when children are added/removed
|
||||
if (needsLayout.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const oldPositions = new Map<string, { x: number; y: number }>()
|
||||
|
||||
for (const id of childIds) {
|
||||
const block = blocks[id]
|
||||
if (!block) continue
|
||||
oldPositions.set(id, { ...block.position })
|
||||
}
|
||||
|
||||
const layoutPositions = computeLayoutPositions(
|
||||
childIds,
|
||||
blocks,
|
||||
edges,
|
||||
parentBlock,
|
||||
horizontalSpacing,
|
||||
verticalSpacing
|
||||
)
|
||||
|
||||
if (layoutPositions.size === 0) {
|
||||
// No layout positions computed, but still update container dimensions
|
||||
if (parentBlock) {
|
||||
updateContainerDimensions(parentBlock, childIds, blocks)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
let offsetX = 0
|
||||
let offsetY = 0
|
||||
|
||||
const anchorId = childIds.find((id) => !needsLayout.includes(id) && layoutPositions.has(id))
|
||||
|
||||
if (anchorId) {
|
||||
const oldPos = oldPositions.get(anchorId)
|
||||
const newPos = layoutPositions.get(anchorId)
|
||||
if (oldPos && newPos) {
|
||||
offsetX = oldPos.x - newPos.x
|
||||
offsetY = oldPos.y - newPos.y
|
||||
}
|
||||
} else {
|
||||
// No anchor - positions from calculatePositions are already correct relative to padding
|
||||
// Container positions are parent-relative, root positions are absolute
|
||||
// The normalization in computeLayoutPositions already handled the padding offset
|
||||
offsetX = 0
|
||||
offsetY = 0
|
||||
}
|
||||
|
||||
for (const id of needsLayout) {
|
||||
const block = blocks[id]
|
||||
const newPos = layoutPositions.get(id)
|
||||
if (!block || !newPos) continue
|
||||
block.position = {
|
||||
x: newPos.x + offsetX,
|
||||
y: newPos.y + offsetY,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function computeLayoutPositions(
|
||||
childIds: string[],
|
||||
blocks: Record<string, BlockState>,
|
||||
edges: Edge[],
|
||||
parentBlock: BlockState | undefined,
|
||||
horizontalSpacing: number,
|
||||
verticalSpacing: number
|
||||
): Map<string, { x: number; y: number }> {
|
||||
const subsetBlocks: Record<string, BlockState> = {}
|
||||
for (const id of childIds) {
|
||||
subsetBlocks[id] = blocks[id]
|
||||
}
|
||||
|
||||
const subsetEdges = edges.filter(
|
||||
(edge) => childIds.includes(edge.source) && childIds.includes(edge.target)
|
||||
)
|
||||
|
||||
if (Object.keys(subsetBlocks).length === 0) {
|
||||
return new Map()
|
||||
}
|
||||
|
||||
const nodes = assignLayers(subsetBlocks, subsetEdges)
|
||||
prepareBlockMetrics(nodes)
|
||||
|
||||
const layoutOptions: LayoutOptions = parentBlock
|
||||
? {
|
||||
horizontalSpacing: horizontalSpacing * 0.85,
|
||||
verticalSpacing,
|
||||
padding: { x: CONTAINER_PADDING_X, y: CONTAINER_PADDING_Y },
|
||||
alignment: 'center',
|
||||
}
|
||||
: {
|
||||
horizontalSpacing,
|
||||
verticalSpacing,
|
||||
padding: { x: ROOT_PADDING_X, y: ROOT_PADDING_Y },
|
||||
alignment: 'center',
|
||||
}
|
||||
|
||||
calculatePositions(groupByLayer(nodes), layoutOptions)
|
||||
|
||||
// Now normalize positions to start from 0,0 relative to the container/root
|
||||
let minX = Number.POSITIVE_INFINITY
|
||||
let minY = Number.POSITIVE_INFINITY
|
||||
let maxX = Number.NEGATIVE_INFINITY
|
||||
let maxY = Number.NEGATIVE_INFINITY
|
||||
|
||||
for (const node of nodes.values()) {
|
||||
minX = Math.min(minX, node.position.x)
|
||||
minY = Math.min(minY, node.position.y)
|
||||
maxX = Math.max(maxX, node.position.x + node.metrics.width)
|
||||
maxY = Math.max(maxY, node.position.y + node.metrics.height)
|
||||
}
|
||||
|
||||
// Adjust all positions to be relative to the padding offset
|
||||
const xOffset = (parentBlock ? CONTAINER_PADDING_X : ROOT_PADDING_X) - minX
|
||||
const yOffset = (parentBlock ? CONTAINER_PADDING_Y : ROOT_PADDING_Y) - minY
|
||||
|
||||
const positions = new Map<string, { x: number; y: number }>()
|
||||
for (const node of nodes.values()) {
|
||||
positions.set(node.id, {
|
||||
x: node.position.x + xOffset,
|
||||
y: node.position.y + yOffset,
|
||||
})
|
||||
}
|
||||
|
||||
if (parentBlock) {
|
||||
const calculatedWidth = maxX - minX + CONTAINER_PADDING * 2
|
||||
const calculatedHeight = maxY - minY + CONTAINER_PADDING * 2
|
||||
|
||||
parentBlock.data = {
|
||||
...parentBlock.data,
|
||||
width: Math.max(calculatedWidth, DEFAULT_CONTAINER_WIDTH),
|
||||
height: Math.max(calculatedHeight, DEFAULT_CONTAINER_HEIGHT),
|
||||
}
|
||||
}
|
||||
|
||||
return positions
|
||||
}
|
||||
|
||||
function getBounds(positions: Map<string, { x: number; y: number }>) {
|
||||
let minX = Number.POSITIVE_INFINITY
|
||||
let minY = Number.POSITIVE_INFINITY
|
||||
|
||||
for (const pos of positions.values()) {
|
||||
minX = Math.min(minX, pos.x)
|
||||
minY = Math.min(minY, pos.y)
|
||||
}
|
||||
|
||||
return { minX, minY }
|
||||
}
|
||||
|
||||
function updateContainerDimensions(
|
||||
parentBlock: BlockState,
|
||||
childIds: string[],
|
||||
blocks: Record<string, BlockState>
|
||||
): void {
|
||||
if (childIds.length === 0) {
|
||||
// No children - use minimum dimensions
|
||||
parentBlock.data = {
|
||||
...parentBlock.data,
|
||||
width: DEFAULT_CONTAINER_WIDTH,
|
||||
height: DEFAULT_CONTAINER_HEIGHT,
|
||||
}
|
||||
parentBlock.layout = {
|
||||
...parentBlock.layout,
|
||||
measuredWidth: DEFAULT_CONTAINER_WIDTH,
|
||||
measuredHeight: DEFAULT_CONTAINER_HEIGHT,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
let minX = Number.POSITIVE_INFINITY
|
||||
let minY = Number.POSITIVE_INFINITY
|
||||
let maxX = Number.NEGATIVE_INFINITY
|
||||
let maxY = Number.NEGATIVE_INFINITY
|
||||
|
||||
for (const id of childIds) {
|
||||
const child = blocks[id]
|
||||
if (!child) continue
|
||||
const metrics = getBlockMetrics(child)
|
||||
|
||||
minX = Math.min(minX, child.position.x)
|
||||
minY = Math.min(minY, child.position.y)
|
||||
maxX = Math.max(maxX, child.position.x + metrics.width)
|
||||
maxY = Math.max(maxY, child.position.y + metrics.height)
|
||||
}
|
||||
|
||||
if (!Number.isFinite(minX) || !Number.isFinite(minY)) {
|
||||
return
|
||||
}
|
||||
|
||||
// Match the regular autolayout's dimension calculation
|
||||
const calculatedWidth = maxX - minX + CONTAINER_PADDING * 2
|
||||
const calculatedHeight = maxY - minY + CONTAINER_PADDING * 2
|
||||
|
||||
parentBlock.data = {
|
||||
...parentBlock.data,
|
||||
width: Math.max(calculatedWidth, DEFAULT_CONTAINER_WIDTH),
|
||||
height: Math.max(calculatedHeight, DEFAULT_CONTAINER_HEIGHT),
|
||||
}
|
||||
|
||||
parentBlock.layout = {
|
||||
...parentBlock.layout,
|
||||
measuredWidth: parentBlock.data.width,
|
||||
measuredHeight: parentBlock.data.height,
|
||||
}
|
||||
}
|
||||
|
||||
function hasPosition(block: BlockState): boolean {
|
||||
if (!block.position) return false
|
||||
const { x, y } = block.position
|
||||
return Number.isFinite(x) && Number.isFinite(y)
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate block heights for diff view by using current workflow measurements
|
||||
* This provides better height estimates than using default values
|
||||
*/
|
||||
export function transferBlockHeights(
|
||||
sourceBlocks: Record<string, BlockState>,
|
||||
targetBlocks: Record<string, BlockState>
|
||||
): void {
|
||||
// Build a map of block type+name to heights from source
|
||||
const heightMap = new Map<string, { height: number; width: number; isWide: boolean }>()
|
||||
|
||||
for (const [id, block] of Object.entries(sourceBlocks)) {
|
||||
const key = `${block.type}:${block.name}`
|
||||
heightMap.set(key, {
|
||||
height: block.height || 100,
|
||||
width: block.layout?.measuredWidth || (block.isWide ? 480 : 350),
|
||||
isWide: block.isWide || false,
|
||||
})
|
||||
}
|
||||
|
||||
// Transfer heights to target blocks
|
||||
for (const block of Object.values(targetBlocks)) {
|
||||
const key = `${block.type}:${block.name}`
|
||||
const measurements = heightMap.get(key)
|
||||
|
||||
if (measurements) {
|
||||
block.height = measurements.height
|
||||
block.isWide = measurements.isWide
|
||||
|
||||
if (!block.layout) {
|
||||
block.layout = {}
|
||||
}
|
||||
block.layout.measuredHeight = measurements.height
|
||||
block.layout.measuredWidth = measurements.width
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug('Transferred block heights from source workflow', {
|
||||
sourceCount: Object.keys(sourceBlocks).length,
|
||||
targetCount: Object.keys(targetBlocks).length,
|
||||
heightsMapped: heightMap.size,
|
||||
})
|
||||
}
|
||||
@@ -9,6 +9,12 @@ export const DEFAULT_CONTAINER_WIDTH = 500
|
||||
export const DEFAULT_CONTAINER_HEIGHT = 300
|
||||
const DEFAULT_PADDING = 40
|
||||
|
||||
export const CONTAINER_PADDING = 150
|
||||
export const CONTAINER_PADDING_X = 180
|
||||
export const CONTAINER_PADDING_Y = 100
|
||||
export const ROOT_PADDING_X = 150
|
||||
export const ROOT_PADDING_Y = 150
|
||||
|
||||
function resolveNumeric(value: number | undefined, fallback: number): number {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : fallback
|
||||
}
|
||||
|
||||
@@ -1,16 +1,30 @@
|
||||
import { getBlock } from '@/blocks'
|
||||
import type { BlockConfig } from '@/blocks/types'
|
||||
import { getTrigger } from '@/triggers'
|
||||
|
||||
/**
|
||||
* Get the effective outputs for a block, including dynamic outputs from inputFormat
|
||||
* and trigger outputs for blocks in trigger mode
|
||||
*/
|
||||
export function getBlockOutputs(
|
||||
blockType: string,
|
||||
subBlocks?: Record<string, any>
|
||||
subBlocks?: Record<string, any>,
|
||||
triggerMode?: boolean
|
||||
): Record<string, any> {
|
||||
const blockConfig = getBlock(blockType)
|
||||
if (!blockConfig) return {}
|
||||
|
||||
// If block is in trigger mode, use trigger outputs instead of block outputs
|
||||
if (triggerMode && blockConfig.triggers?.enabled) {
|
||||
const triggerId = subBlocks?.triggerId?.value || blockConfig.triggers?.available?.[0]
|
||||
if (triggerId) {
|
||||
const trigger = getTrigger(triggerId)
|
||||
if (trigger?.outputs) {
|
||||
return trigger.outputs
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Start with the static outputs defined in the config
|
||||
let outputs = { ...(blockConfig.outputs || {}) }
|
||||
|
||||
@@ -32,12 +46,20 @@ export function getBlockOutputs(
|
||||
startWorkflowValue === 'manual'
|
||||
) {
|
||||
// API/manual mode - use inputFormat fields only
|
||||
const inputFormatValue = subBlocks?.inputFormat?.value
|
||||
let inputFormatValue = subBlocks?.inputFormat?.value
|
||||
outputs = {}
|
||||
|
||||
if (
|
||||
inputFormatValue !== null &&
|
||||
inputFormatValue !== undefined &&
|
||||
!Array.isArray(inputFormatValue)
|
||||
) {
|
||||
inputFormatValue = []
|
||||
}
|
||||
|
||||
if (Array.isArray(inputFormatValue)) {
|
||||
inputFormatValue.forEach((field: { name?: string; type?: string }) => {
|
||||
if (field.name && field.name.trim() !== '') {
|
||||
if (field?.name && field.name.trim() !== '') {
|
||||
outputs[field.name] = {
|
||||
type: (field.type || 'any') as any,
|
||||
description: `Field from input format`,
|
||||
@@ -52,7 +74,17 @@ export function getBlockOutputs(
|
||||
|
||||
// For blocks with inputFormat, add dynamic outputs
|
||||
if (hasInputFormat(blockConfig) && subBlocks?.inputFormat?.value) {
|
||||
const inputFormatValue = subBlocks.inputFormat.value
|
||||
let inputFormatValue = subBlocks.inputFormat.value
|
||||
|
||||
// Sanitize inputFormat - ensure it's an array
|
||||
if (
|
||||
inputFormatValue !== null &&
|
||||
inputFormatValue !== undefined &&
|
||||
!Array.isArray(inputFormatValue)
|
||||
) {
|
||||
// Invalid format, default to empty array
|
||||
inputFormatValue = []
|
||||
}
|
||||
|
||||
if (Array.isArray(inputFormatValue)) {
|
||||
// For API and Input triggers, only use inputFormat fields
|
||||
@@ -61,7 +93,7 @@ export function getBlockOutputs(
|
||||
|
||||
// Add each field from inputFormat as an output at root level
|
||||
inputFormatValue.forEach((field: { name?: string; type?: string }) => {
|
||||
if (field.name && field.name.trim() !== '') {
|
||||
if (field?.name && field.name.trim() !== '') {
|
||||
outputs[field.name] = {
|
||||
type: (field.type || 'any') as any,
|
||||
description: `Field from input format`,
|
||||
@@ -88,27 +120,66 @@ function hasInputFormat(blockConfig: BlockConfig): boolean {
|
||||
/**
|
||||
* Get output paths for a block (for tag dropdown)
|
||||
*/
|
||||
export function getBlockOutputPaths(blockType: string, subBlocks?: Record<string, any>): string[] {
|
||||
const outputs = getBlockOutputs(blockType, subBlocks)
|
||||
return Object.keys(outputs)
|
||||
export function getBlockOutputPaths(
|
||||
blockType: string,
|
||||
subBlocks?: Record<string, any>,
|
||||
triggerMode?: boolean
|
||||
): string[] {
|
||||
const outputs = getBlockOutputs(blockType, subBlocks, triggerMode)
|
||||
|
||||
// Recursively collect all paths from nested outputs
|
||||
const paths: string[] = []
|
||||
|
||||
function collectPaths(obj: Record<string, any>, prefix = ''): void {
|
||||
for (const [key, value] of Object.entries(obj)) {
|
||||
const path = prefix ? `${prefix}.${key}` : key
|
||||
|
||||
// If value has 'type' property, it's a leaf node (output definition)
|
||||
if (value && typeof value === 'object' && 'type' in value) {
|
||||
paths.push(path)
|
||||
}
|
||||
// If value is an object without 'type', recurse into it
|
||||
else if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
collectPaths(value, path)
|
||||
}
|
||||
// Otherwise treat as a leaf node
|
||||
else {
|
||||
paths.push(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collectPaths(outputs)
|
||||
return paths
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the type of a specific output path
|
||||
* Get the type of a specific output path (supports nested paths like "email.subject")
|
||||
*/
|
||||
export function getBlockOutputType(
|
||||
blockType: string,
|
||||
outputPath: string,
|
||||
subBlocks?: Record<string, any>
|
||||
subBlocks?: Record<string, any>,
|
||||
triggerMode?: boolean
|
||||
): string {
|
||||
const outputs = getBlockOutputs(blockType, subBlocks)
|
||||
const output = outputs[outputPath]
|
||||
const outputs = getBlockOutputs(blockType, subBlocks, triggerMode)
|
||||
|
||||
if (!output) return 'any'
|
||||
// Navigate through nested path
|
||||
const pathParts = outputPath.split('.')
|
||||
let current: any = outputs
|
||||
|
||||
if (typeof output === 'object' && 'type' in output) {
|
||||
return output.type
|
||||
for (const part of pathParts) {
|
||||
if (!current || typeof current !== 'object') {
|
||||
return 'any'
|
||||
}
|
||||
current = current[part]
|
||||
}
|
||||
|
||||
return typeof output === 'string' ? output : 'any'
|
||||
if (!current) return 'any'
|
||||
|
||||
if (typeof current === 'object' && 'type' in current) {
|
||||
return current.type
|
||||
}
|
||||
|
||||
return typeof current === 'string' ? current : 'any'
|
||||
}
|
||||
|
||||
203
apps/sim/lib/workflows/custom-tools-persistence.ts
Normal file
203
apps/sim/lib/workflows/custom-tools-persistence.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import { db } from '@sim/db'
|
||||
import { customTools } from '@sim/db/schema'
|
||||
import { eq } from 'drizzle-orm'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
|
||||
const logger = createLogger('CustomToolsPersistence')
|
||||
|
||||
interface CustomTool {
|
||||
id?: string
|
||||
type: 'custom-tool'
|
||||
title: string
|
||||
toolId?: string
|
||||
schema: {
|
||||
function: {
|
||||
name?: string
|
||||
description: string
|
||||
parameters: Record<string, any>
|
||||
}
|
||||
}
|
||||
code: string
|
||||
usageControl?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all custom tools from agent blocks in the workflow state
|
||||
*/
|
||||
export function extractCustomToolsFromWorkflowState(workflowState: any): CustomTool[] {
|
||||
const customToolsMap = new Map<string, CustomTool>()
|
||||
|
||||
if (!workflowState?.blocks) {
|
||||
return []
|
||||
}
|
||||
|
||||
for (const [blockId, block] of Object.entries(workflowState.blocks)) {
|
||||
try {
|
||||
const blockData = block as any
|
||||
|
||||
// Only process agent blocks
|
||||
if (!blockData || blockData.type !== 'agent') {
|
||||
continue
|
||||
}
|
||||
|
||||
const subBlocks = blockData.subBlocks || {}
|
||||
const toolsSubBlock = subBlocks.tools
|
||||
|
||||
if (!toolsSubBlock?.value) {
|
||||
continue
|
||||
}
|
||||
|
||||
let tools = toolsSubBlock.value
|
||||
|
||||
// Parse if it's a string
|
||||
if (typeof tools === 'string') {
|
||||
try {
|
||||
tools = JSON.parse(tools)
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to parse tools in block ${blockId}`, { error })
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (!Array.isArray(tools)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract custom tools
|
||||
for (const tool of tools) {
|
||||
if (
|
||||
tool &&
|
||||
typeof tool === 'object' &&
|
||||
tool.type === 'custom-tool' &&
|
||||
tool.title &&
|
||||
tool.schema?.function &&
|
||||
tool.code
|
||||
) {
|
||||
// Use toolId if available, otherwise generate one from title
|
||||
const toolKey = tool.toolId || tool.title
|
||||
|
||||
// Deduplicate by toolKey (if same tool appears in multiple blocks)
|
||||
if (!customToolsMap.has(toolKey)) {
|
||||
customToolsMap.set(toolKey, tool as CustomTool)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error extracting custom tools from block ${blockId}`, { error })
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(customToolsMap.values())
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist custom tools to the database
|
||||
* Creates new tools or updates existing ones
|
||||
*/
|
||||
export async function persistCustomToolsToDatabase(
|
||||
customToolsList: CustomTool[],
|
||||
userId: string
|
||||
): Promise<{ saved: number; errors: string[] }> {
|
||||
if (!customToolsList || customToolsList.length === 0) {
|
||||
return { saved: 0, errors: [] }
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let saved = 0
|
||||
|
||||
try {
|
||||
await db.transaction(async (tx) => {
|
||||
for (const tool of customToolsList) {
|
||||
try {
|
||||
// Extract the base identifier (without 'custom_' prefix) for database storage
|
||||
// If toolId exists and has the prefix, strip it; otherwise use title as base
|
||||
let baseId: string
|
||||
if (tool.toolId) {
|
||||
baseId = tool.toolId.startsWith('custom_')
|
||||
? tool.toolId.replace('custom_', '')
|
||||
: tool.toolId
|
||||
} else {
|
||||
// Use title as the base identifier (agent handler will add 'custom_' prefix)
|
||||
baseId = tool.title
|
||||
}
|
||||
|
||||
const nowTime = new Date()
|
||||
|
||||
// Check if tool already exists
|
||||
const existingTool = await tx
|
||||
.select()
|
||||
.from(customTools)
|
||||
.where(eq(customTools.id, baseId))
|
||||
.limit(1)
|
||||
|
||||
if (existingTool.length === 0) {
|
||||
// Create new tool
|
||||
await tx.insert(customTools).values({
|
||||
id: baseId,
|
||||
userId,
|
||||
title: tool.title,
|
||||
schema: tool.schema,
|
||||
code: tool.code,
|
||||
createdAt: nowTime,
|
||||
updatedAt: nowTime,
|
||||
})
|
||||
|
||||
logger.info(`Created custom tool: ${tool.title}`, { toolId: baseId })
|
||||
saved++
|
||||
} else if (existingTool[0].userId === userId) {
|
||||
// Update existing tool if it belongs to the user
|
||||
await tx
|
||||
.update(customTools)
|
||||
.set({
|
||||
title: tool.title,
|
||||
schema: tool.schema,
|
||||
code: tool.code,
|
||||
updatedAt: nowTime,
|
||||
})
|
||||
.where(eq(customTools.id, baseId))
|
||||
|
||||
logger.info(`Updated custom tool: ${tool.title}`, { toolId: baseId })
|
||||
saved++
|
||||
} else {
|
||||
// Tool exists but belongs to different user - skip
|
||||
logger.warn(`Skipping custom tool - belongs to different user: ${tool.title}`, {
|
||||
toolId: baseId,
|
||||
})
|
||||
errors.push(`Tool ${tool.title} belongs to a different user`)
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMsg = `Failed to persist tool ${tool.title}: ${error instanceof Error ? error.message : String(error)}`
|
||||
logger.error(errorMsg, { error })
|
||||
errors.push(errorMsg)
|
||||
}
|
||||
}
|
||||
})
|
||||
} catch (error) {
|
||||
const errorMsg = `Transaction failed while persisting custom tools: ${error instanceof Error ? error.message : String(error)}`
|
||||
logger.error(errorMsg, { error })
|
||||
errors.push(errorMsg)
|
||||
}
|
||||
|
||||
return { saved, errors }
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract and persist custom tools from workflow state in one operation
|
||||
*/
|
||||
export async function extractAndPersistCustomTools(
|
||||
workflowState: any,
|
||||
userId: string
|
||||
): Promise<{ saved: number; errors: string[] }> {
|
||||
const customToolsList = extractCustomToolsFromWorkflowState(workflowState)
|
||||
|
||||
if (customToolsList.length === 0) {
|
||||
logger.debug('No custom tools found in workflow state')
|
||||
return { saved: 0, errors: [] }
|
||||
}
|
||||
|
||||
logger.info(`Found ${customToolsList.length} custom tool(s) to persist`, {
|
||||
tools: customToolsList.map((t) => t.title),
|
||||
})
|
||||
|
||||
return await persistCustomToolsToDatabase(customToolsList, userId)
|
||||
}
|
||||
@@ -7,6 +7,206 @@ import type { BlockWithDiff } from './types'
|
||||
|
||||
const logger = createLogger('WorkflowDiffEngine')
|
||||
|
||||
type ParentIdentifier = string | null
|
||||
|
||||
function getParentId(block?: BlockState): ParentIdentifier {
|
||||
return block?.data?.parentId ?? null
|
||||
}
|
||||
|
||||
function buildEdgeKey(edge: Edge): string {
|
||||
const sourceHandle = edge.sourceHandle ?? ''
|
||||
const targetHandle = edge.targetHandle ?? ''
|
||||
const edgeType = edge.type ?? ''
|
||||
return `${edge.source}|${sourceHandle}->${edge.target}|${targetHandle}|${edgeType}`
|
||||
}
|
||||
|
||||
function groupBlocksByParent(blocks: Record<string, BlockState>): {
|
||||
root: string[]
|
||||
children: Map<string, string[]>
|
||||
} {
|
||||
const root: string[] = []
|
||||
const children = new Map<string, string[]>()
|
||||
|
||||
for (const [id, block] of Object.entries(blocks)) {
|
||||
const parentId = getParentId(block)
|
||||
|
||||
if (!parentId) {
|
||||
root.push(id)
|
||||
continue
|
||||
}
|
||||
|
||||
if (!children.has(parentId)) {
|
||||
children.set(parentId, [])
|
||||
}
|
||||
|
||||
children.get(parentId)!.push(id)
|
||||
}
|
||||
|
||||
return { root, children }
|
||||
}
|
||||
|
||||
function buildAdjacency(edges: Edge[]): Map<string, Set<string>> {
|
||||
const adjacency = new Map<string, Set<string>>()
|
||||
|
||||
for (const edge of edges) {
|
||||
if (!adjacency.has(edge.source)) {
|
||||
adjacency.set(edge.source, new Set())
|
||||
}
|
||||
adjacency.get(edge.source)!.add(edge.target)
|
||||
}
|
||||
|
||||
return adjacency
|
||||
}
|
||||
|
||||
function expandImpactedBlocks(
|
||||
seeds: Set<string>,
|
||||
proposedBlocks: Record<string, BlockState>,
|
||||
adjacency: Map<string, Set<string>>
|
||||
): Set<string> {
|
||||
const impacted = new Set<string>()
|
||||
|
||||
// Only expand to direct downstream neighbors (targets of impacted blocks)
|
||||
// This ensures we make space for new/moved blocks without relocating unaffected ones
|
||||
for (const seed of seeds) {
|
||||
if (!proposedBlocks[seed]) continue
|
||||
impacted.add(seed)
|
||||
|
||||
const seedBlock = proposedBlocks[seed]
|
||||
const seedParent = getParentId(seedBlock)
|
||||
const neighbors = adjacency.get(seed)
|
||||
|
||||
if (neighbors) {
|
||||
for (const next of neighbors) {
|
||||
const nextBlock = proposedBlocks[next]
|
||||
if (!nextBlock) continue
|
||||
// Only expand within same parent
|
||||
if (getParentId(nextBlock) !== seedParent) continue
|
||||
impacted.add(next)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return impacted
|
||||
}
|
||||
|
||||
function computeStructuralLayoutImpact(params: {
|
||||
baselineBlocks: Record<string, BlockState>
|
||||
baselineEdges: Edge[]
|
||||
proposedBlocks: Record<string, BlockState>
|
||||
proposedEdges: Edge[]
|
||||
}): {
|
||||
impactedBlockIds: Set<string>
|
||||
parentsToRelayout: Set<ParentIdentifier>
|
||||
} {
|
||||
const { baselineBlocks, baselineEdges, proposedBlocks, proposedEdges } = params
|
||||
const impactedBlocks = new Set<string>()
|
||||
const parentsToRelayout = new Set<ParentIdentifier>()
|
||||
|
||||
const baselineIds = new Set(Object.keys(baselineBlocks))
|
||||
const proposedIds = new Set(Object.keys(proposedBlocks))
|
||||
|
||||
for (const id of proposedIds) {
|
||||
if (!baselineIds.has(id)) {
|
||||
impactedBlocks.add(id)
|
||||
parentsToRelayout.add(getParentId(proposedBlocks[id]))
|
||||
}
|
||||
}
|
||||
|
||||
for (const id of baselineIds) {
|
||||
if (!proposedIds.has(id)) {
|
||||
parentsToRelayout.add(getParentId(baselineBlocks[id]))
|
||||
}
|
||||
}
|
||||
|
||||
for (const id of proposedIds) {
|
||||
if (!baselineIds.has(id)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const baselineBlock = baselineBlocks[id]
|
||||
const proposedBlock = proposedBlocks[id]
|
||||
|
||||
const baselineParent = getParentId(baselineBlock)
|
||||
const proposedParent = getParentId(proposedBlock)
|
||||
|
||||
if (baselineParent !== proposedParent) {
|
||||
impactedBlocks.add(id)
|
||||
parentsToRelayout.add(baselineParent)
|
||||
parentsToRelayout.add(proposedParent)
|
||||
}
|
||||
}
|
||||
|
||||
const baselineEdgeMap = new Map<string, Edge>()
|
||||
for (const edge of baselineEdges) {
|
||||
baselineEdgeMap.set(buildEdgeKey(edge), edge)
|
||||
}
|
||||
|
||||
const proposedEdgeMap = new Map<string, Edge>()
|
||||
for (const edge of proposedEdges) {
|
||||
proposedEdgeMap.set(buildEdgeKey(edge), edge)
|
||||
}
|
||||
|
||||
for (const [key, edge] of proposedEdgeMap) {
|
||||
if (baselineEdgeMap.has(key)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (proposedBlocks[edge.source]) {
|
||||
impactedBlocks.add(edge.source)
|
||||
}
|
||||
if (proposedBlocks[edge.target]) {
|
||||
impactedBlocks.add(edge.target)
|
||||
}
|
||||
}
|
||||
|
||||
for (const [key, edge] of baselineEdgeMap) {
|
||||
if (proposedEdgeMap.has(key)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (proposedBlocks[edge.source]) {
|
||||
impactedBlocks.add(edge.source)
|
||||
}
|
||||
if (proposedBlocks[edge.target]) {
|
||||
impactedBlocks.add(edge.target)
|
||||
}
|
||||
|
||||
parentsToRelayout.add(getParentId(baselineBlocks[edge.source]))
|
||||
parentsToRelayout.add(getParentId(baselineBlocks[edge.target]))
|
||||
}
|
||||
|
||||
const adjacency = buildAdjacency(proposedEdges)
|
||||
|
||||
const seedBlocks = new Set<string>()
|
||||
for (const id of impactedBlocks) {
|
||||
if (proposedBlocks[id]) {
|
||||
seedBlocks.add(id)
|
||||
}
|
||||
}
|
||||
|
||||
const expandedImpacts = expandImpactedBlocks(seedBlocks, proposedBlocks, adjacency)
|
||||
|
||||
// Add parent containers to impacted set so their updated dimensions get transferred
|
||||
const parentsWithImpactedChildren = new Set<string>()
|
||||
for (const blockId of expandedImpacts) {
|
||||
const block = proposedBlocks[blockId]
|
||||
if (!block) continue
|
||||
const parentId = getParentId(block)
|
||||
if (parentId && proposedBlocks[parentId]) {
|
||||
parentsWithImpactedChildren.add(parentId)
|
||||
}
|
||||
}
|
||||
|
||||
for (const parentId of parentsWithImpactedChildren) {
|
||||
expandedImpacts.add(parentId)
|
||||
}
|
||||
|
||||
return {
|
||||
impactedBlockIds: expandedImpacts,
|
||||
parentsToRelayout,
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to check if a block has changed
|
||||
function hasBlockChanged(currentBlock: BlockState, proposedBlock: BlockState): boolean {
|
||||
// Compare key fields that indicate a change
|
||||
@@ -122,12 +322,12 @@ export class WorkflowDiffEngine {
|
||||
private currentDiff: WorkflowDiff | undefined = undefined
|
||||
|
||||
/**
|
||||
* Create a diff from YAML content
|
||||
* Create a diff from workflow state
|
||||
*/
|
||||
async createDiffFromYaml(yamlContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
|
||||
async createDiff(jsonContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
|
||||
try {
|
||||
logger.info('WorkflowDiffEngine.createDiffFromYaml called with:', {
|
||||
yamlContentLength: yamlContent.length,
|
||||
logger.info('WorkflowDiffEngine.createDiff called with:', {
|
||||
jsonContentLength: jsonContent.length,
|
||||
diffAnalysis: diffAnalysis,
|
||||
diffAnalysisType: typeof diffAnalysis,
|
||||
diffAnalysisUndefined: diffAnalysis === undefined,
|
||||
@@ -163,7 +363,7 @@ export class WorkflowDiffEngine {
|
||||
|
||||
// Call the API route to create the diff
|
||||
const body: any = {
|
||||
yamlContent,
|
||||
jsonContent,
|
||||
currentWorkflowState: mergedBaseline,
|
||||
}
|
||||
|
||||
@@ -211,7 +411,7 @@ export class WorkflowDiffEngine {
|
||||
|
||||
const result = await response.json()
|
||||
|
||||
logger.info('WorkflowDiffEngine.createDiffFromYaml response:', {
|
||||
logger.info('WorkflowDiffEngine.createDiff response:', {
|
||||
success: result.success,
|
||||
hasDiff: !!result.diff,
|
||||
errors: result.errors,
|
||||
@@ -283,24 +483,45 @@ export class WorkflowDiffEngine {
|
||||
hasDiffAnalysis: !!diffAnalysis,
|
||||
})
|
||||
|
||||
// Get current workflow state for comparison
|
||||
// Get baseline for comparison
|
||||
// If we already have a diff, use it as baseline (editing on top of diff)
|
||||
// Otherwise use the current workflow state
|
||||
const { useWorkflowStore } = await import('@/stores/workflows/workflow/store')
|
||||
const currentWorkflowState = useWorkflowStore.getState().getWorkflowState()
|
||||
|
||||
// Check if we're editing on top of an existing diff
|
||||
const baselineForComparison = this.currentDiff?.proposedState || currentWorkflowState
|
||||
const isEditingOnTopOfDiff = !!this.currentDiff
|
||||
|
||||
if (isEditingOnTopOfDiff) {
|
||||
logger.info('Editing on top of existing diff - using diff as baseline for comparison', {
|
||||
diffBlockCount: Object.keys(this.currentDiff!.proposedState.blocks).length,
|
||||
})
|
||||
}
|
||||
|
||||
// Merge subblock values from subblock store to ensure manual edits are included
|
||||
let mergedBaseline: WorkflowState = currentWorkflowState
|
||||
try {
|
||||
mergedBaseline = {
|
||||
...currentWorkflowState,
|
||||
blocks: mergeSubblockState(currentWorkflowState.blocks),
|
||||
let mergedBaseline: WorkflowState = baselineForComparison
|
||||
|
||||
// Only merge subblock values if we're comparing against original workflow
|
||||
// If editing on top of diff, use the diff state as-is
|
||||
if (!isEditingOnTopOfDiff) {
|
||||
try {
|
||||
mergedBaseline = {
|
||||
...baselineForComparison,
|
||||
blocks: mergeSubblockState(baselineForComparison.blocks),
|
||||
}
|
||||
logger.info('Merged subblock values into baseline for diff creation', {
|
||||
blockCount: Object.keys(mergedBaseline.blocks || {}).length,
|
||||
})
|
||||
} catch (mergeError) {
|
||||
logger.warn('Failed to merge subblock values into baseline; proceeding with raw state', {
|
||||
error: mergeError instanceof Error ? mergeError.message : String(mergeError),
|
||||
})
|
||||
}
|
||||
logger.info('Merged subblock values into baseline for diff creation', {
|
||||
blockCount: Object.keys(mergedBaseline.blocks || {}).length,
|
||||
})
|
||||
} catch (mergeError) {
|
||||
logger.warn('Failed to merge subblock values into baseline; proceeding with raw state', {
|
||||
error: mergeError instanceof Error ? mergeError.message : String(mergeError),
|
||||
})
|
||||
} else {
|
||||
logger.info(
|
||||
'Using diff state as baseline without merging subblocks (editing on top of diff)'
|
||||
)
|
||||
}
|
||||
|
||||
// Build a map of existing blocks by type:name for matching
|
||||
@@ -349,6 +570,14 @@ export class WorkflowDiffEngine {
|
||||
id: finalId,
|
||||
}
|
||||
|
||||
// Update parentId in data if it exists and has been remapped
|
||||
if (finalBlock.data?.parentId && idMap[finalBlock.data.parentId]) {
|
||||
finalBlock.data = {
|
||||
...finalBlock.data,
|
||||
parentId: idMap[finalBlock.data.parentId],
|
||||
}
|
||||
}
|
||||
|
||||
finalBlocks[finalId] = finalBlock
|
||||
}
|
||||
|
||||
@@ -399,44 +628,153 @@ export class WorkflowDiffEngine {
|
||||
finalProposedState.parallels = generateParallelBlocks(finalProposedState.blocks)
|
||||
}
|
||||
|
||||
// Transfer block heights from baseline workflow for better measurements in diff view
|
||||
// If editing on top of diff, this transfers from the diff (which already has good heights)
|
||||
// Otherwise transfers from original workflow
|
||||
logger.info('Transferring block heights from baseline workflow', {
|
||||
isEditingOnTopOfDiff,
|
||||
baselineBlockCount: Object.keys(mergedBaseline.blocks).length,
|
||||
})
|
||||
try {
|
||||
const { transferBlockHeights } = await import('@/lib/workflows/autolayout')
|
||||
transferBlockHeights(mergedBaseline.blocks, finalBlocks)
|
||||
} catch (error) {
|
||||
logger.warn('Failed to transfer block heights', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
}
|
||||
|
||||
// Apply autolayout to the proposed state
|
||||
logger.info('Applying autolayout to proposed workflow state')
|
||||
try {
|
||||
const { applyAutoLayout: applyNativeAutoLayout } = await import(
|
||||
'@/lib/workflows/autolayout'
|
||||
)
|
||||
// Compute diff analysis if not already provided to determine changed blocks
|
||||
let tempComputed = diffAnalysis
|
||||
if (!tempComputed) {
|
||||
const currentIds = new Set(Object.keys(mergedBaseline.blocks))
|
||||
const newBlocks: string[] = []
|
||||
const editedBlocks: string[] = []
|
||||
|
||||
const autoLayoutOptions = {
|
||||
horizontalSpacing: 550,
|
||||
verticalSpacing: 200,
|
||||
padding: {
|
||||
x: 150,
|
||||
y: 150,
|
||||
},
|
||||
alignment: 'center' as const,
|
||||
for (const [id, block] of Object.entries(finalBlocks)) {
|
||||
if (!currentIds.has(id)) {
|
||||
newBlocks.push(id)
|
||||
} else {
|
||||
const currentBlock = mergedBaseline.blocks[id]
|
||||
if (hasBlockChanged(currentBlock, block)) {
|
||||
editedBlocks.push(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tempComputed = { new_blocks: newBlocks, edited_blocks: editedBlocks, deleted_blocks: [] }
|
||||
}
|
||||
|
||||
const layoutResult = applyNativeAutoLayout(
|
||||
finalBlocks,
|
||||
finalProposedState.edges,
|
||||
finalProposedState.loops || {},
|
||||
finalProposedState.parallels || {},
|
||||
autoLayoutOptions
|
||||
)
|
||||
const { impactedBlockIds } = computeStructuralLayoutImpact({
|
||||
baselineBlocks: mergedBaseline.blocks,
|
||||
baselineEdges: mergedBaseline.edges as Edge[],
|
||||
proposedBlocks: finalBlocks,
|
||||
proposedEdges: finalEdges,
|
||||
})
|
||||
|
||||
if (layoutResult.success && layoutResult.blocks) {
|
||||
Object.entries(layoutResult.blocks).forEach(([id, layoutBlock]) => {
|
||||
const impactedBlockArray = Array.from(impactedBlockIds)
|
||||
const totalBlocks = Object.keys(finalBlocks).length
|
||||
const unchangedBlocks = totalBlocks - impactedBlockArray.length
|
||||
|
||||
if (impactedBlockArray.length === 0) {
|
||||
logger.info('No structural changes detected; skipping autolayout', {
|
||||
totalBlocks,
|
||||
})
|
||||
} else if (unchangedBlocks > 0) {
|
||||
// Use targeted layout - preserves positions of unchanged blocks
|
||||
logger.info('Using targeted layout for copilot edits (has unchanged blocks)', {
|
||||
changedBlocks: impactedBlockArray.length,
|
||||
unchangedBlocks: unchangedBlocks,
|
||||
totalBlocks: totalBlocks,
|
||||
percentChanged: Math.round((impactedBlockArray.length / totalBlocks) * 100),
|
||||
})
|
||||
|
||||
const { applyTargetedLayout } = await import('@/lib/workflows/autolayout')
|
||||
|
||||
const layoutedBlocks = applyTargetedLayout(finalBlocks, finalProposedState.edges, {
|
||||
changedBlockIds: impactedBlockArray,
|
||||
horizontalSpacing: 550,
|
||||
verticalSpacing: 200,
|
||||
})
|
||||
|
||||
Object.entries(layoutedBlocks).forEach(([id, layoutBlock]) => {
|
||||
if (finalBlocks[id]) {
|
||||
finalBlocks[id].position = layoutBlock.position
|
||||
|
||||
if (layoutBlock.data) {
|
||||
finalBlocks[id].data = {
|
||||
...finalBlocks[id].data,
|
||||
...layoutBlock.data,
|
||||
}
|
||||
}
|
||||
|
||||
if (layoutBlock.layout) {
|
||||
finalBlocks[id].layout = {
|
||||
...finalBlocks[id].layout,
|
||||
...layoutBlock.layout,
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof layoutBlock.height === 'number') {
|
||||
finalBlocks[id].height = layoutBlock.height
|
||||
}
|
||||
|
||||
if (typeof layoutBlock.isWide === 'boolean') {
|
||||
finalBlocks[id].isWide = layoutBlock.isWide
|
||||
}
|
||||
}
|
||||
})
|
||||
logger.info('Successfully applied autolayout to proposed state', {
|
||||
blocksLayouted: Object.keys(layoutResult.blocks).length,
|
||||
|
||||
logger.info('Successfully applied targeted layout to proposed state', {
|
||||
blocksLayouted: Object.keys(layoutedBlocks).length,
|
||||
changedBlocks: impactedBlockArray.length,
|
||||
})
|
||||
} else {
|
||||
logger.warn('Autolayout failed, using default positions', {
|
||||
error: layoutResult.error,
|
||||
// Use full autolayout only when copilot built 100% of the workflow from scratch
|
||||
logger.info('Using full autolayout (copilot built 100% of workflow)', {
|
||||
totalBlocks: totalBlocks,
|
||||
allBlocksAreNew: impactedBlockArray.length === totalBlocks,
|
||||
})
|
||||
|
||||
const { applyAutoLayout: applyNativeAutoLayout } = await import(
|
||||
'@/lib/workflows/autolayout'
|
||||
)
|
||||
|
||||
const autoLayoutOptions = {
|
||||
horizontalSpacing: 550,
|
||||
verticalSpacing: 200,
|
||||
padding: {
|
||||
x: 150,
|
||||
y: 150,
|
||||
},
|
||||
alignment: 'center' as const,
|
||||
}
|
||||
|
||||
const layoutResult = applyNativeAutoLayout(
|
||||
finalBlocks,
|
||||
finalProposedState.edges,
|
||||
finalProposedState.loops || {},
|
||||
finalProposedState.parallels || {},
|
||||
autoLayoutOptions
|
||||
)
|
||||
|
||||
if (layoutResult.success && layoutResult.blocks) {
|
||||
Object.entries(layoutResult.blocks).forEach(([id, layoutBlock]) => {
|
||||
if (finalBlocks[id]) {
|
||||
finalBlocks[id].position = layoutBlock.position
|
||||
}
|
||||
})
|
||||
logger.info('Successfully applied full autolayout to proposed state', {
|
||||
blocksLayouted: Object.keys(layoutResult.blocks).length,
|
||||
})
|
||||
} else {
|
||||
logger.warn('Autolayout failed, using default positions', {
|
||||
error: layoutResult.error,
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (layoutError) {
|
||||
logger.warn('Error applying autolayout, using default positions', {
|
||||
@@ -614,23 +952,23 @@ export class WorkflowDiffEngine {
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge new YAML content into existing diff
|
||||
* Merge new workflow state into existing diff
|
||||
* Used for cumulative updates within the same message
|
||||
*/
|
||||
async mergeDiffFromYaml(yamlContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
|
||||
async mergeDiff(jsonContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
|
||||
try {
|
||||
logger.info('Merging diff from YAML content')
|
||||
logger.info('Merging diff from workflow state')
|
||||
|
||||
// If no existing diff, create a new one
|
||||
if (!this.currentDiff) {
|
||||
logger.info('No existing diff, creating new diff')
|
||||
return this.createDiffFromYaml(yamlContent, diffAnalysis)
|
||||
return this.createDiff(jsonContent, diffAnalysis)
|
||||
}
|
||||
|
||||
// Call the API route to merge the diff
|
||||
const body: any = {
|
||||
existingDiff: this.currentDiff,
|
||||
yamlContent,
|
||||
jsonContent,
|
||||
}
|
||||
|
||||
if (diffAnalysis !== undefined && diffAnalysis !== null) {
|
||||
|
||||
@@ -18,7 +18,7 @@ export interface CopilotWorkflowState {
|
||||
export interface CopilotBlockState {
|
||||
type: string
|
||||
name: string
|
||||
inputs?: Record<string, string | number | string[][]>
|
||||
inputs?: Record<string, string | number | string[][] | object>
|
||||
outputs: BlockState['outputs']
|
||||
connections?: Record<string, string | string[]>
|
||||
nestedNodes?: Record<string, CopilotBlockState>
|
||||
@@ -83,17 +83,127 @@ function isSensitiveSubBlock(key: string, subBlock: BlockState['subBlocks'][stri
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize condition blocks by removing UI-specific metadata
|
||||
* Returns cleaned JSON string (not parsed array)
|
||||
*/
|
||||
function sanitizeConditions(conditionsJson: string): string {
|
||||
try {
|
||||
const conditions = JSON.parse(conditionsJson)
|
||||
if (!Array.isArray(conditions)) return conditionsJson
|
||||
|
||||
// Keep only id, title, and value - remove UI state
|
||||
const cleaned = conditions.map((cond: any) => ({
|
||||
id: cond.id,
|
||||
title: cond.title,
|
||||
value: cond.value || '',
|
||||
}))
|
||||
|
||||
return JSON.stringify(cleaned)
|
||||
} catch {
|
||||
return conditionsJson
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize tools array by removing UI state and redundant fields
|
||||
*/
|
||||
function sanitizeTools(tools: any[]): any[] {
|
||||
return tools.map((tool) => {
|
||||
if (tool.type === 'custom-tool') {
|
||||
const sanitized: any = {
|
||||
type: tool.type,
|
||||
title: tool.title,
|
||||
toolId: tool.toolId,
|
||||
usageControl: tool.usageControl,
|
||||
}
|
||||
|
||||
if (tool.schema?.function) {
|
||||
sanitized.schema = {
|
||||
function: {
|
||||
description: tool.schema.function.description,
|
||||
parameters: tool.schema.function.parameters,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (tool.code) {
|
||||
sanitized.code = tool.code
|
||||
}
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
const { isExpanded, ...cleanTool } = tool
|
||||
return cleanTool
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize subblocks by removing null values, secrets, and simplifying structure
|
||||
* Maps each subblock key directly to its value instead of the full object
|
||||
* Note: responseFormat is kept as an object for better copilot understanding
|
||||
*/
|
||||
function sanitizeSubBlocks(
|
||||
subBlocks: BlockState['subBlocks']
|
||||
): Record<string, string | number | string[][]> {
|
||||
const sanitized: Record<string, string | number | string[][]> = {}
|
||||
): Record<string, string | number | string[][] | object> {
|
||||
const sanitized: Record<string, string | number | string[][] | object> = {}
|
||||
|
||||
Object.entries(subBlocks).forEach(([key, subBlock]) => {
|
||||
// Skip null/undefined values
|
||||
// Special handling for responseFormat - process BEFORE null check
|
||||
// so we can detect when it's added/removed
|
||||
if (key === 'responseFormat') {
|
||||
try {
|
||||
// Handle null/undefined - skip if no value
|
||||
if (subBlock.value === null || subBlock.value === undefined) {
|
||||
return
|
||||
}
|
||||
|
||||
let obj = subBlock.value
|
||||
|
||||
// Handle string values - parse them first
|
||||
if (typeof subBlock.value === 'string') {
|
||||
const trimmed = subBlock.value.trim()
|
||||
if (!trimmed) {
|
||||
// Empty string - skip this field
|
||||
return
|
||||
}
|
||||
obj = JSON.parse(trimmed)
|
||||
}
|
||||
|
||||
// Handle object values - normalize keys and keep as object for copilot
|
||||
if (obj && typeof obj === 'object') {
|
||||
// Sort keys recursively for consistent comparison
|
||||
const sortKeys = (item: any): any => {
|
||||
if (Array.isArray(item)) {
|
||||
return item.map(sortKeys)
|
||||
}
|
||||
if (item !== null && typeof item === 'object') {
|
||||
return Object.keys(item)
|
||||
.sort()
|
||||
.reduce((result: any, key: string) => {
|
||||
result[key] = sortKeys(item[key])
|
||||
return result
|
||||
}, {})
|
||||
}
|
||||
return item
|
||||
}
|
||||
|
||||
// Keep as object (not stringified) for better copilot understanding
|
||||
const normalized = sortKeys(obj)
|
||||
sanitized[key] = normalized
|
||||
return
|
||||
}
|
||||
|
||||
// If we get here, obj is not an object (maybe null or primitive) - skip it
|
||||
return
|
||||
} catch (error) {
|
||||
// Invalid JSON - skip this field to avoid crashes
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Skip null/undefined values for other fields
|
||||
if (subBlock.value === null || subBlock.value === undefined) {
|
||||
return
|
||||
}
|
||||
@@ -112,36 +222,24 @@ function sanitizeSubBlocks(
|
||||
return
|
||||
}
|
||||
|
||||
// For non-sensitive, non-null values, include them
|
||||
// Special handling for condition-input type - clean UI metadata
|
||||
if (subBlock.type === 'condition-input' && typeof subBlock.value === 'string') {
|
||||
const cleanedConditions: string = sanitizeConditions(subBlock.value)
|
||||
sanitized[key] = cleanedConditions
|
||||
return
|
||||
}
|
||||
|
||||
if (key === 'tools' && Array.isArray(subBlock.value)) {
|
||||
sanitized[key] = sanitizeTools(subBlock.value)
|
||||
return
|
||||
}
|
||||
|
||||
sanitized[key] = subBlock.value
|
||||
})
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstruct full subBlock structure from simplified copilot format
|
||||
* Uses existing block structure as template for id and type fields
|
||||
*/
|
||||
function reconstructSubBlocks(
|
||||
simplifiedSubBlocks: Record<string, string | number | string[][]>,
|
||||
existingSubBlocks?: BlockState['subBlocks']
|
||||
): BlockState['subBlocks'] {
|
||||
const reconstructed: BlockState['subBlocks'] = {}
|
||||
|
||||
Object.entries(simplifiedSubBlocks).forEach(([key, value]) => {
|
||||
const existingSubBlock = existingSubBlocks?.[key]
|
||||
|
||||
reconstructed[key] = {
|
||||
id: existingSubBlock?.id || key,
|
||||
type: existingSubBlock?.type || 'short-input',
|
||||
value,
|
||||
}
|
||||
})
|
||||
|
||||
return reconstructed
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract connections for a block from edges and format as operations-style connections
|
||||
*/
|
||||
@@ -198,14 +296,16 @@ export function sanitizeForCopilot(state: WorkflowState): CopilotWorkflowState {
|
||||
const connections = extractConnectionsForBlock(blockId, state.edges)
|
||||
|
||||
// For loop/parallel blocks, extract config from block.data instead of subBlocks
|
||||
let inputs: Record<string, string | number | string[][]> = {}
|
||||
let inputs: Record<string, string | number | string[][] | object>
|
||||
|
||||
if (block.type === 'loop' || block.type === 'parallel') {
|
||||
// Extract configuration from block.data
|
||||
if (block.data?.loopType) inputs.loopType = block.data.loopType
|
||||
if (block.data?.count !== undefined) inputs.iterations = block.data.count
|
||||
if (block.data?.collection !== undefined) inputs.collection = block.data.collection
|
||||
if (block.data?.parallelType) inputs.parallelType = block.data.parallelType
|
||||
const loopInputs: Record<string, string | number | string[][] | object> = {}
|
||||
if (block.data?.loopType) loopInputs.loopType = block.data.loopType
|
||||
if (block.data?.count !== undefined) loopInputs.iterations = block.data.count
|
||||
if (block.data?.collection !== undefined) loopInputs.collection = block.data.collection
|
||||
if (block.data?.parallelType) loopInputs.parallelType = block.data.parallelType
|
||||
inputs = loopInputs
|
||||
} else {
|
||||
// For regular blocks, sanitize subBlocks
|
||||
inputs = sanitizeSubBlocks(block.subBlocks)
|
||||
@@ -277,14 +377,10 @@ export function sanitizeForExport(state: WorkflowState): ExportWorkflowState {
|
||||
Object.values(clonedState.blocks).forEach((block: any) => {
|
||||
if (block.subBlocks) {
|
||||
Object.entries(block.subBlocks).forEach(([key, subBlock]: [string, any]) => {
|
||||
// Clear OAuth credentials and API keys using regex patterns
|
||||
// Clear OAuth credentials and API keys based on field name only
|
||||
if (
|
||||
/credential|oauth|api[_-]?key|token|secret|auth|password|bearer/i.test(key) ||
|
||||
/credential|oauth|api[_-]?key|token|secret|auth|password|bearer/i.test(
|
||||
subBlock.type || ''
|
||||
) ||
|
||||
(typeof subBlock.value === 'string' &&
|
||||
/credential|oauth|api[_-]?key|token|secret|auth|password|bearer/i.test(subBlock.value))
|
||||
subBlock.type === 'oauth-input'
|
||||
) {
|
||||
subBlock.value = ''
|
||||
}
|
||||
|
||||
@@ -174,6 +174,17 @@ export function computeEditSequence(
|
||||
if (!(blockId in startFlattened)) {
|
||||
const { block, parentId } = endFlattened[blockId]
|
||||
if (parentId) {
|
||||
// Check if this block will be included in parent's nestedNodes
|
||||
const parentData = endFlattened[parentId]
|
||||
const parentIsNew = parentData && !(parentId in startFlattened)
|
||||
const parentHasNestedNodes = parentData?.block?.nestedNodes?.[blockId]
|
||||
|
||||
// Skip if parent is new and will include this block in nestedNodes
|
||||
if (parentIsNew && parentHasNestedNodes) {
|
||||
// Parent's 'add' operation will include this child, skip separate operation
|
||||
continue
|
||||
}
|
||||
|
||||
// Block was added inside a subflow - include full block state
|
||||
const addParams: EditOperation['params'] = {
|
||||
subflowId: parentId,
|
||||
@@ -181,8 +192,14 @@ export function computeEditSequence(
|
||||
name: block.name,
|
||||
outputs: block.outputs,
|
||||
enabled: block.enabled !== undefined ? block.enabled : true,
|
||||
...(block?.triggerMode !== undefined && { triggerMode: Boolean(block.triggerMode) }),
|
||||
...(block?.advancedMode !== undefined && { advancedMode: Boolean(block.advancedMode) }),
|
||||
}
|
||||
|
||||
// Only include triggerMode/advancedMode if true
|
||||
if (block?.triggerMode === true) {
|
||||
addParams.triggerMode = true
|
||||
}
|
||||
if (block?.advancedMode === true) {
|
||||
addParams.advancedMode = true
|
||||
}
|
||||
|
||||
// Add inputs if present
|
||||
@@ -208,8 +225,14 @@ export function computeEditSequence(
|
||||
const addParams: EditOperation['params'] = {
|
||||
type: block.type,
|
||||
name: block.name,
|
||||
...(block?.triggerMode !== undefined && { triggerMode: Boolean(block.triggerMode) }),
|
||||
...(block?.advancedMode !== undefined && { advancedMode: Boolean(block.advancedMode) }),
|
||||
}
|
||||
|
||||
if (block?.triggerMode === true) {
|
||||
addParams.triggerMode = true
|
||||
}
|
||||
|
||||
if (block?.advancedMode === true) {
|
||||
addParams.advancedMode = true
|
||||
}
|
||||
|
||||
// Add inputs if present
|
||||
@@ -224,10 +247,18 @@ export function computeEditSequence(
|
||||
addParams.connections = connections
|
||||
}
|
||||
|
||||
// Add nested nodes if present (for loops/parallels created from scratch)
|
||||
// Add nested nodes if present AND all children are new
|
||||
// This creates the loop/parallel with children in one operation
|
||||
// If some children already exist, they'll have separate insert_into_subflow operations
|
||||
if (block.nestedNodes && Object.keys(block.nestedNodes).length > 0) {
|
||||
addParams.nestedNodes = block.nestedNodes
|
||||
subflowsChanged++
|
||||
const allChildrenNew = Object.keys(block.nestedNodes).every(
|
||||
(childId) => !(childId in startFlattened)
|
||||
)
|
||||
|
||||
if (allChildrenNew) {
|
||||
addParams.nestedNodes = block.nestedNodes
|
||||
subflowsChanged++
|
||||
}
|
||||
}
|
||||
|
||||
operations.push({
|
||||
@@ -266,12 +297,14 @@ export function computeEditSequence(
|
||||
name: endBlock.name,
|
||||
outputs: endBlock.outputs,
|
||||
enabled: endBlock.enabled !== undefined ? endBlock.enabled : true,
|
||||
...(endBlock?.triggerMode !== undefined && {
|
||||
triggerMode: Boolean(endBlock.triggerMode),
|
||||
}),
|
||||
...(endBlock?.advancedMode !== undefined && {
|
||||
advancedMode: Boolean(endBlock.advancedMode),
|
||||
}),
|
||||
}
|
||||
|
||||
// Only include triggerMode/advancedMode if true
|
||||
if (endBlock?.triggerMode === true) {
|
||||
addParams.triggerMode = true
|
||||
}
|
||||
if (endBlock?.advancedMode === true) {
|
||||
addParams.advancedMode = true
|
||||
}
|
||||
|
||||
const inputs = extractInputValues(endBlock)
|
||||
@@ -436,12 +469,13 @@ function computeBlockChanges(
|
||||
hasChanges = true
|
||||
}
|
||||
|
||||
// Check input value changes
|
||||
// Check input value changes - only include changed fields
|
||||
const startInputs = extractInputValues(startBlock)
|
||||
const endInputs = extractInputValues(endBlock)
|
||||
|
||||
if (JSON.stringify(startInputs) !== JSON.stringify(endInputs)) {
|
||||
changes.inputs = endInputs
|
||||
const changedInputs = computeInputDelta(startInputs, endInputs)
|
||||
if (Object.keys(changedInputs).length > 0) {
|
||||
changes.inputs = changedInputs
|
||||
hasChanges = true
|
||||
}
|
||||
|
||||
@@ -457,6 +491,28 @@ function computeBlockChanges(
|
||||
return hasChanges ? changes : null
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute delta between two input objects
|
||||
* Only returns fields that actually changed or were added
|
||||
*/
|
||||
function computeInputDelta(
|
||||
startInputs: Record<string, any>,
|
||||
endInputs: Record<string, any>
|
||||
): Record<string, any> {
|
||||
const delta: Record<string, any> = {}
|
||||
|
||||
for (const key in endInputs) {
|
||||
if (
|
||||
!(key in startInputs) ||
|
||||
JSON.stringify(startInputs[key]) !== JSON.stringify(endInputs[key])
|
||||
) {
|
||||
delta[key] = endInputs[key]
|
||||
}
|
||||
}
|
||||
|
||||
return delta
|
||||
}
|
||||
|
||||
/**
|
||||
* Format edit operations into a human-readable description
|
||||
*/
|
||||
|
||||
@@ -239,10 +239,6 @@ const nextConfig: NextConfig = {
|
||||
return redirects
|
||||
},
|
||||
async rewrites() {
|
||||
if (!isTruthy(env.POSTHOG_ENABLED)) {
|
||||
return []
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
source: '/ingest/static/:path*',
|
||||
|
||||
@@ -70,6 +70,7 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.0.0",
|
||||
"croner": "^9.0.0",
|
||||
"csv-parse": "6.1.0",
|
||||
"date-fns": "4.1.0",
|
||||
"encoding": "0.1.13",
|
||||
"entities": "6.0.1",
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import path from 'path'
|
||||
import { DocsChunker } from '@/lib/knowledge/documents/docs-chunker'
|
||||
import type { DocChunk } from '@/lib/knowledge/documents/types'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
|
||||
const logger = createLogger('ChunkDocsScript')
|
||||
|
||||
/**
|
||||
* Script to chunk all .mdx files in the docs directory
|
||||
*/
|
||||
async function main() {
|
||||
try {
|
||||
// Initialize the docs chunker
|
||||
const chunker = new DocsChunker({
|
||||
chunkSize: 1024,
|
||||
minChunkSize: 100,
|
||||
overlap: 200,
|
||||
baseUrl: 'https://docs.sim.ai',
|
||||
})
|
||||
|
||||
// Path to the docs content directory
|
||||
const docsPath = path.join(process.cwd(), '../../apps/docs/content/docs')
|
||||
|
||||
logger.info(`Processing docs from: ${docsPath}`)
|
||||
|
||||
// Process all .mdx files
|
||||
const chunks = await chunker.chunkAllDocs(docsPath)
|
||||
|
||||
logger.info(`\n=== CHUNKING RESULTS ===`)
|
||||
logger.info(`Total chunks: ${chunks.length}`)
|
||||
|
||||
// Group chunks by document
|
||||
const chunksByDoc = chunks.reduce<Record<string, DocChunk[]>>((acc, chunk) => {
|
||||
if (!acc[chunk.sourceDocument]) {
|
||||
acc[chunk.sourceDocument] = []
|
||||
}
|
||||
acc[chunk.sourceDocument].push(chunk)
|
||||
return acc
|
||||
}, {})
|
||||
|
||||
// Display summary
|
||||
logger.info(`\n=== DOCUMENT SUMMARY ===`)
|
||||
for (const [doc, docChunks] of Object.entries(chunksByDoc)) {
|
||||
logger.info(`${doc}: ${docChunks.length} chunks`)
|
||||
}
|
||||
|
||||
// Display a few sample chunks
|
||||
logger.info(`\n=== SAMPLE CHUNKS ===`)
|
||||
chunks.slice(0, 3).forEach((chunk, index) => {
|
||||
logger.info(`\nChunk ${index + 1}:`)
|
||||
logger.info(` Source: ${chunk.sourceDocument}`)
|
||||
logger.info(` Header: ${chunk.headerText} (Level ${chunk.headerLevel})`)
|
||||
logger.info(` Link: ${chunk.headerLink}`)
|
||||
logger.info(` Tokens: ${chunk.tokenCount}`)
|
||||
logger.info(` Embedding: ${chunk.embedding.length} dimensions (${chunk.embeddingModel})`)
|
||||
logger.info(
|
||||
` Embedding Preview: [${chunk.embedding
|
||||
.slice(0, 5)
|
||||
.map((n) => n.toFixed(4))
|
||||
.join(', ')}...]`
|
||||
)
|
||||
logger.info(` Text Preview: ${chunk.text.slice(0, 100)}...`)
|
||||
})
|
||||
|
||||
// Calculate total token count
|
||||
const totalTokens = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0)
|
||||
const chunksWithEmbeddings = chunks.filter((chunk) => chunk.embedding.length > 0).length
|
||||
|
||||
logger.info(`\n=== STATISTICS ===`)
|
||||
logger.info(`Total tokens: ${totalTokens}`)
|
||||
logger.info(`Average tokens per chunk: ${Math.round(totalTokens / chunks.length)}`)
|
||||
logger.info(`Chunks with embeddings: ${chunksWithEmbeddings}/${chunks.length}`)
|
||||
if (chunks.length > 0 && chunks[0].embedding.length > 0) {
|
||||
logger.info(`Embedding model: ${chunks[0].embeddingModel}`)
|
||||
logger.info(`Embedding dimensions: ${chunks[0].embedding.length}`)
|
||||
}
|
||||
|
||||
const headerLevels = chunks.reduce<Record<number, number>>((acc, chunk) => {
|
||||
acc[chunk.headerLevel] = (acc[chunk.headerLevel] || 0) + 1
|
||||
return acc
|
||||
}, {})
|
||||
|
||||
logger.info(`Header level distribution:`)
|
||||
Object.entries(headerLevels)
|
||||
.sort(([a], [b]) => Number(a) - Number(b))
|
||||
.forEach(([level, count]) => {
|
||||
logger.info(` H${level}: ${count} chunks`)
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error processing docs:', error)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Run the script
|
||||
main().catch(console.error)
|
||||
@@ -1,215 +0,0 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import path from 'path'
|
||||
import { db } from '@sim/db'
|
||||
import { docsEmbeddings } from '@sim/db/schema'
|
||||
import { sql } from 'drizzle-orm'
|
||||
import { isDev } from '@/lib/environment'
|
||||
import { DocsChunker } from '@/lib/knowledge/documents/docs-chunker'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
|
||||
const logger = createLogger('ProcessDocsEmbeddings')
|
||||
|
||||
interface ProcessingOptions {
|
||||
/** Clear existing docs embeddings before processing */
|
||||
clearExisting?: boolean
|
||||
/** Path to docs directory */
|
||||
docsPath?: string
|
||||
/** Base URL for generating links */
|
||||
baseUrl?: string
|
||||
/** Chunk size in tokens */
|
||||
chunkSize?: number
|
||||
/** Minimum chunk size in tokens */
|
||||
minChunkSize?: number
|
||||
/** Overlap between chunks in tokens */
|
||||
overlap?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Production script to process documentation and save embeddings to database
|
||||
*/
|
||||
async function processDocsEmbeddings(options: ProcessingOptions = {}) {
|
||||
const startTime = Date.now()
|
||||
let processedChunks = 0
|
||||
let failedChunks = 0
|
||||
|
||||
try {
|
||||
// Configuration
|
||||
const config = {
|
||||
clearExisting: options.clearExisting ?? false,
|
||||
docsPath: options.docsPath ?? path.join(process.cwd(), '../../apps/docs/content/docs/en'),
|
||||
baseUrl: options.baseUrl ?? (isDev ? 'http://localhost:3001' : 'https://docs.sim.ai'),
|
||||
chunkSize: options.chunkSize ?? 300, // Max 300 tokens per chunk
|
||||
minChunkSize: options.minChunkSize ?? 100,
|
||||
overlap: options.overlap ?? 50,
|
||||
}
|
||||
|
||||
logger.info('🚀 Starting docs embedding processing...')
|
||||
logger.info(`Configuration:`, {
|
||||
docsPath: config.docsPath,
|
||||
baseUrl: config.baseUrl,
|
||||
chunkSize: config.chunkSize,
|
||||
clearExisting: config.clearExisting,
|
||||
})
|
||||
|
||||
const chunker = new DocsChunker({
|
||||
chunkSize: config.chunkSize,
|
||||
minChunkSize: config.minChunkSize,
|
||||
overlap: config.overlap,
|
||||
baseUrl: config.baseUrl,
|
||||
})
|
||||
|
||||
logger.info(`📚 Processing docs from: ${config.docsPath}`)
|
||||
const chunks = await chunker.chunkAllDocs(config.docsPath)
|
||||
|
||||
if (chunks.length === 0) {
|
||||
logger.warn('⚠️ No chunks generated from docs')
|
||||
return { success: false, processedChunks: 0, failedChunks: 0 }
|
||||
}
|
||||
|
||||
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
|
||||
|
||||
if (config.clearExisting) {
|
||||
logger.info('🗑️ Clearing existing docs embeddings...')
|
||||
try {
|
||||
const deleteResult = await db.delete(docsEmbeddings)
|
||||
logger.info(`✅ Successfully deleted existing embeddings`)
|
||||
} catch (error) {
|
||||
logger.error('❌ Failed to delete existing embeddings:', error)
|
||||
throw new Error('Failed to clear existing embeddings')
|
||||
}
|
||||
}
|
||||
|
||||
const batchSize = 10
|
||||
logger.info(`💾 Saving chunks to database (batch size: ${batchSize})...`)
|
||||
|
||||
for (let i = 0; i < chunks.length; i += batchSize) {
|
||||
const batch = chunks.slice(i, i + batchSize)
|
||||
|
||||
try {
|
||||
const batchData = batch.map((chunk) => ({
|
||||
chunkText: chunk.text,
|
||||
sourceDocument: chunk.sourceDocument,
|
||||
sourceLink: chunk.headerLink,
|
||||
headerText: chunk.headerText,
|
||||
headerLevel: chunk.headerLevel,
|
||||
tokenCount: chunk.tokenCount,
|
||||
embedding: chunk.embedding,
|
||||
embeddingModel: chunk.embeddingModel,
|
||||
metadata: chunk.metadata,
|
||||
}))
|
||||
|
||||
await db.insert(docsEmbeddings).values(batchData)
|
||||
|
||||
processedChunks += batch.length
|
||||
|
||||
if (i % (batchSize * 5) === 0 || i + batchSize >= chunks.length) {
|
||||
logger.info(
|
||||
` 💾 Saved ${Math.min(i + batchSize, chunks.length)}/${chunks.length} chunks`
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`❌ Failed to save batch ${Math.floor(i / batchSize) + 1}:`, error)
|
||||
failedChunks += batch.length
|
||||
}
|
||||
}
|
||||
|
||||
const savedCount = await db
|
||||
.select({ count: sql<number>`count(*)` })
|
||||
.from(docsEmbeddings)
|
||||
.then((result) => result[0]?.count || 0)
|
||||
|
||||
const duration = Date.now() - startTime
|
||||
|
||||
logger.info(`✅ Processing complete!`)
|
||||
logger.info(`📊 Results:`)
|
||||
logger.info(` • Total chunks processed: ${chunks.length}`)
|
||||
logger.info(` • Successfully saved: ${processedChunks}`)
|
||||
logger.info(` • Failed: ${failedChunks}`)
|
||||
logger.info(` • Database total: ${savedCount}`)
|
||||
logger.info(` • Duration: ${Math.round(duration / 1000)}s`)
|
||||
|
||||
const documentStats = chunks.reduce(
|
||||
(acc, chunk) => {
|
||||
if (!acc[chunk.sourceDocument]) {
|
||||
acc[chunk.sourceDocument] = { chunks: 0, tokens: 0 }
|
||||
}
|
||||
acc[chunk.sourceDocument].chunks++
|
||||
acc[chunk.sourceDocument].tokens += chunk.tokenCount
|
||||
return acc
|
||||
},
|
||||
{} as Record<string, { chunks: number; tokens: number }>
|
||||
)
|
||||
|
||||
logger.info(`📋 Document breakdown:`)
|
||||
Object.entries(documentStats)
|
||||
.sort(([, a], [, b]) => b.chunks - a.chunks)
|
||||
.slice(0, 10)
|
||||
.forEach(([doc, stats]) => {
|
||||
logger.info(` • ${doc}: ${stats.chunks} chunks, ${stats.tokens} tokens`)
|
||||
})
|
||||
|
||||
if (Object.keys(documentStats).length > 10) {
|
||||
logger.info(` • ... and ${Object.keys(documentStats).length - 10} more documents`)
|
||||
}
|
||||
|
||||
return {
|
||||
success: failedChunks === 0,
|
||||
processedChunks,
|
||||
failedChunks,
|
||||
totalChunks: chunks.length,
|
||||
databaseCount: savedCount,
|
||||
duration,
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('💥 Fatal error during processing:', error)
|
||||
return {
|
||||
success: false,
|
||||
processedChunks,
|
||||
failedChunks,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function - handle command line arguments
|
||||
*/
|
||||
async function main() {
|
||||
const args = process.argv.slice(2)
|
||||
const options: ProcessingOptions = {}
|
||||
|
||||
if (args.includes('--clear')) {
|
||||
options.clearExisting = true
|
||||
}
|
||||
|
||||
if (args.includes('--help') || args.includes('-h')) {
|
||||
console.log(`
|
||||
Usage: bun run scripts/process-docs-embeddings.ts [options]
|
||||
|
||||
Options:
|
||||
--clear Clear existing docs embeddings before processing
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
bun run scripts/process-docs-embeddings.ts
|
||||
bun run scripts/process-docs-embeddings.ts --clear
|
||||
`)
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const result = await processDocsEmbeddings(options)
|
||||
|
||||
if (!result.success) {
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
if (import.meta.url.includes('process-docs-embeddings.ts')) {
|
||||
main().catch((error) => {
|
||||
logger.error('Script failed:', error)
|
||||
process.exit(1)
|
||||
})
|
||||
}
|
||||
|
||||
export { processDocsEmbeddings }
|
||||
256
apps/sim/scripts/process-docs.ts
Normal file
256
apps/sim/scripts/process-docs.ts
Normal file
@@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import path from 'path'
|
||||
import { db } from '@sim/db'
|
||||
import { docsEmbeddings } from '@sim/db/schema'
|
||||
import { sql } from 'drizzle-orm'
|
||||
import { type DocChunk, DocsChunker } from '@/lib/chunkers'
|
||||
import { isDev } from '@/lib/environment'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
|
||||
const logger = createLogger('ProcessDocs')
|
||||
|
||||
interface ProcessingOptions {
|
||||
/** Clear existing docs embeddings before processing */
|
||||
clearExisting?: boolean
|
||||
/** Path to docs directory */
|
||||
docsPath?: string
|
||||
/** Base URL for generating links */
|
||||
baseUrl?: string
|
||||
/** Chunk size in tokens */
|
||||
chunkSize?: number
|
||||
/** Minimum chunk size */
|
||||
minChunkSize?: number
|
||||
/** Overlap between chunks */
|
||||
overlap?: number
|
||||
/** Dry run - only display results, don't save to DB */
|
||||
dryRun?: boolean
|
||||
/** Verbose output */
|
||||
verbose?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Process documentation files and optionally save embeddings to database
|
||||
*/
|
||||
async function processDocs(options: ProcessingOptions = {}) {
|
||||
const config = {
|
||||
docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs'),
|
||||
baseUrl: options.baseUrl || (isDev ? 'http://localhost:4000' : 'https://docs.sim.ai'),
|
||||
chunkSize: options.chunkSize || 1024,
|
||||
minChunkSize: options.minChunkSize || 100,
|
||||
overlap: options.overlap || 200,
|
||||
clearExisting: options.clearExisting ?? false,
|
||||
dryRun: options.dryRun ?? false,
|
||||
verbose: options.verbose ?? false,
|
||||
}
|
||||
|
||||
let processedChunks = 0
|
||||
let failedChunks = 0
|
||||
|
||||
try {
|
||||
logger.info('🚀 Starting docs processing with config:', {
|
||||
docsPath: config.docsPath,
|
||||
baseUrl: config.baseUrl,
|
||||
chunkSize: config.chunkSize,
|
||||
clearExisting: config.clearExisting,
|
||||
dryRun: config.dryRun,
|
||||
})
|
||||
|
||||
// Initialize the chunker
|
||||
const chunker = new DocsChunker({
|
||||
chunkSize: config.chunkSize,
|
||||
minChunkSize: config.minChunkSize,
|
||||
overlap: config.overlap,
|
||||
baseUrl: config.baseUrl,
|
||||
})
|
||||
|
||||
// Process all .mdx files
|
||||
logger.info(`📚 Processing docs from: ${config.docsPath}`)
|
||||
const chunks = await chunker.chunkAllDocs(config.docsPath)
|
||||
|
||||
if (chunks.length === 0) {
|
||||
logger.warn('⚠️ No chunks generated from docs')
|
||||
return { success: false, processedChunks: 0, failedChunks: 0 }
|
||||
}
|
||||
|
||||
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
|
||||
|
||||
// Group chunks by document for summary
|
||||
const chunksByDoc = chunks.reduce<Record<string, DocChunk[]>>((acc, chunk) => {
|
||||
if (!acc[chunk.sourceDocument]) {
|
||||
acc[chunk.sourceDocument] = []
|
||||
}
|
||||
acc[chunk.sourceDocument].push(chunk)
|
||||
return acc
|
||||
}, {})
|
||||
|
||||
// Display summary
|
||||
logger.info(`\n=== DOCUMENT SUMMARY ===`)
|
||||
for (const [doc, docChunks] of Object.entries(chunksByDoc)) {
|
||||
logger.info(`${doc}: ${docChunks.length} chunks`)
|
||||
}
|
||||
|
||||
// Display sample chunks in verbose or dry-run mode
|
||||
if (config.verbose || config.dryRun) {
|
||||
logger.info(`\n=== SAMPLE CHUNKS ===`)
|
||||
chunks.slice(0, 3).forEach((chunk, index) => {
|
||||
logger.info(`\nChunk ${index + 1}:`)
|
||||
logger.info(` Source: ${chunk.sourceDocument}`)
|
||||
logger.info(` Header: ${chunk.headerText} (Level ${chunk.headerLevel})`)
|
||||
logger.info(` Link: ${chunk.headerLink}`)
|
||||
logger.info(` Tokens: ${chunk.tokenCount}`)
|
||||
logger.info(` Embedding: ${chunk.embedding.length} dimensions (${chunk.embeddingModel})`)
|
||||
if (config.verbose) {
|
||||
logger.info(` Text Preview: ${chunk.text.substring(0, 200)}...`)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// If dry run, stop here
|
||||
if (config.dryRun) {
|
||||
logger.info('\n✅ Dry run complete - no data saved to database')
|
||||
return { success: true, processedChunks: chunks.length, failedChunks: 0 }
|
||||
}
|
||||
|
||||
// Clear existing embeddings if requested
|
||||
if (config.clearExisting) {
|
||||
logger.info('🗑️ Clearing existing docs embeddings...')
|
||||
try {
|
||||
await db.delete(docsEmbeddings)
|
||||
logger.info(`✅ Successfully deleted existing embeddings`)
|
||||
} catch (error) {
|
||||
logger.error('❌ Failed to delete existing embeddings:', error)
|
||||
throw new Error('Failed to clear existing embeddings')
|
||||
}
|
||||
}
|
||||
|
||||
// Save chunks to database in batches
|
||||
const batchSize = 10
|
||||
logger.info(`💾 Saving chunks to database (batch size: ${batchSize})...`)
|
||||
|
||||
for (let i = 0; i < chunks.length; i += batchSize) {
|
||||
const batch = chunks.slice(i, i + batchSize)
|
||||
|
||||
try {
|
||||
const batchData = batch.map((chunk) => ({
|
||||
chunkText: chunk.text,
|
||||
sourceDocument: chunk.sourceDocument,
|
||||
sourceLink: chunk.headerLink,
|
||||
headerText: chunk.headerText,
|
||||
headerLevel: chunk.headerLevel,
|
||||
tokenCount: chunk.tokenCount,
|
||||
embedding: chunk.embedding,
|
||||
embeddingModel: chunk.embeddingModel,
|
||||
metadata: chunk.metadata,
|
||||
}))
|
||||
|
||||
await db.insert(docsEmbeddings).values(batchData)
|
||||
processedChunks += batch.length
|
||||
|
||||
if (i % (batchSize * 5) === 0 || i + batchSize >= chunks.length) {
|
||||
logger.info(
|
||||
` 💾 Saved ${Math.min(i + batchSize, chunks.length)}/${chunks.length} chunks`
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`❌ Failed to save batch ${Math.floor(i / batchSize) + 1}:`, error)
|
||||
failedChunks += batch.length
|
||||
}
|
||||
}
|
||||
|
||||
// Verify final count
|
||||
const savedCount = await db
|
||||
.select({ count: sql<number>`count(*)` })
|
||||
.from(docsEmbeddings)
|
||||
.then((res) => res[0]?.count || 0)
|
||||
|
||||
logger.info(
|
||||
`\n✅ Processing complete!\n` +
|
||||
` 📊 Total chunks: ${chunks.length}\n` +
|
||||
` ✅ Processed: ${processedChunks}\n` +
|
||||
` ❌ Failed: ${failedChunks}\n` +
|
||||
` 💾 Total in DB: ${savedCount}`
|
||||
)
|
||||
|
||||
return { success: failedChunks === 0, processedChunks, failedChunks }
|
||||
} catch (error) {
|
||||
logger.error('❌ Fatal error during processing:', error)
|
||||
return { success: false, processedChunks, failedChunks }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point with CLI argument parsing
|
||||
*/
|
||||
async function main() {
|
||||
const args = process.argv.slice(2)
|
||||
|
||||
const options: ProcessingOptions = {
|
||||
clearExisting: args.includes('--clear'),
|
||||
dryRun: args.includes('--dry-run'),
|
||||
verbose: args.includes('--verbose'),
|
||||
}
|
||||
|
||||
// Parse custom path if provided
|
||||
const pathIndex = args.indexOf('--path')
|
||||
if (pathIndex !== -1 && args[pathIndex + 1]) {
|
||||
options.docsPath = args[pathIndex + 1]
|
||||
}
|
||||
|
||||
// Parse custom base URL if provided
|
||||
const urlIndex = args.indexOf('--url')
|
||||
if (urlIndex !== -1 && args[urlIndex + 1]) {
|
||||
options.baseUrl = args[urlIndex + 1]
|
||||
}
|
||||
|
||||
// Parse chunk size if provided
|
||||
const chunkSizeIndex = args.indexOf('--chunk-size')
|
||||
if (chunkSizeIndex !== -1 && args[chunkSizeIndex + 1]) {
|
||||
options.chunkSize = Number.parseInt(args[chunkSizeIndex + 1], 10)
|
||||
}
|
||||
|
||||
// Show help if requested
|
||||
if (args.includes('--help') || args.includes('-h')) {
|
||||
console.log(`
|
||||
📚 Process Documentation Script
|
||||
|
||||
Usage: bun run process-docs.ts [options]
|
||||
|
||||
Options:
|
||||
--clear Clear existing embeddings before processing
|
||||
--dry-run Process and display results without saving to DB
|
||||
--verbose Show detailed output including text previews
|
||||
--path <path> Custom path to docs directory
|
||||
--url <url> Custom base URL for links
|
||||
--chunk-size <n> Custom chunk size in tokens (default: 1024)
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
# Dry run to test chunking
|
||||
bun run process-docs.ts --dry-run
|
||||
|
||||
# Process and save to database
|
||||
bun run process-docs.ts
|
||||
|
||||
# Clear existing and reprocess
|
||||
bun run process-docs.ts --clear
|
||||
|
||||
# Custom path with verbose output
|
||||
bun run process-docs.ts --path ./my-docs --verbose
|
||||
`)
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const result = await processDocs(options)
|
||||
process.exit(result.success ? 0 : 1)
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main().catch((error) => {
|
||||
logger.error('Fatal error:', error)
|
||||
process.exit(1)
|
||||
})
|
||||
}
|
||||
|
||||
export { processDocs }
|
||||
@@ -12,6 +12,7 @@ import { GetBlocksAndToolsClientTool } from '@/lib/copilot/tools/client/blocks/g
|
||||
import { GetBlocksMetadataClientTool } from '@/lib/copilot/tools/client/blocks/get-blocks-metadata'
|
||||
import { GetTriggerBlocksClientTool } from '@/lib/copilot/tools/client/blocks/get-trigger-blocks'
|
||||
import { GetExamplesRagClientTool } from '@/lib/copilot/tools/client/examples/get-examples-rag'
|
||||
import { GetOperationsExamplesClientTool } from '@/lib/copilot/tools/client/examples/get-operations-examples'
|
||||
import { GetTriggerExamplesClientTool } from '@/lib/copilot/tools/client/examples/get-trigger-examples'
|
||||
import { ListGDriveFilesClientTool } from '@/lib/copilot/tools/client/gdrive/list-files'
|
||||
import { ReadGDriveFileClientTool } from '@/lib/copilot/tools/client/gdrive/read-file'
|
||||
@@ -90,6 +91,7 @@ const CLIENT_TOOL_INSTANTIATORS: Record<string, (id: string) => any> = {
|
||||
set_global_workflow_variables: (id) => new SetGlobalWorkflowVariablesClientTool(id),
|
||||
get_trigger_examples: (id) => new GetTriggerExamplesClientTool(id),
|
||||
get_examples_rag: (id) => new GetExamplesRagClientTool(id),
|
||||
get_operations_examples: (id) => new GetOperationsExamplesClientTool(id),
|
||||
}
|
||||
|
||||
// Read-only static metadata for class-based tools (no instances)
|
||||
@@ -120,6 +122,7 @@ export const CLASS_TOOL_METADATA: Record<string, BaseClientToolMetadata | undefi
|
||||
get_trigger_examples: (GetTriggerExamplesClientTool as any)?.metadata,
|
||||
get_examples_rag: (GetExamplesRagClientTool as any)?.metadata,
|
||||
oauth_request_access: (OAuthRequestAccessClientTool as any)?.metadata,
|
||||
get_operations_examples: (GetOperationsExamplesClientTool as any)?.metadata,
|
||||
}
|
||||
|
||||
function ensureClientToolInstance(toolName: string | undefined, toolCallId: string | undefined) {
|
||||
@@ -1273,7 +1276,8 @@ async function* parseSSEStream(
|
||||
const initialState = {
|
||||
mode: 'agent' as const,
|
||||
selectedModel: 'claude-4.5-sonnet' as CopilotStore['selectedModel'],
|
||||
agentPrefetch: true,
|
||||
agentPrefetch: false,
|
||||
enabledModels: null as string[] | null, // Null means not loaded yet, empty array means all disabled
|
||||
isCollapsed: false,
|
||||
currentChat: null as CopilotChat | null,
|
||||
chats: [] as CopilotChat[],
|
||||
@@ -2181,6 +2185,7 @@ export const useCopilotStore = create<CopilotStore>()(
|
||||
|
||||
setSelectedModel: (model) => set({ selectedModel: model }),
|
||||
setAgentPrefetch: (prefetch) => set({ agentPrefetch: prefetch }),
|
||||
setEnabledModels: (models) => set({ enabledModels: models }),
|
||||
}))
|
||||
)
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ export interface CopilotState {
|
||||
| 'claude-4.5-sonnet'
|
||||
| 'claude-4.1-opus'
|
||||
agentPrefetch: boolean
|
||||
enabledModels: string[] | null // Null means not loaded yet, array of model IDs when loaded
|
||||
isCollapsed: boolean
|
||||
|
||||
currentChat: CopilotChat | null
|
||||
@@ -129,6 +130,7 @@ export interface CopilotActions {
|
||||
setMode: (mode: CopilotMode) => void
|
||||
setSelectedModel: (model: CopilotStore['selectedModel']) => void
|
||||
setAgentPrefetch: (prefetch: boolean) => void
|
||||
setEnabledModels: (models: string[] | null) => void
|
||||
|
||||
setWorkflowId: (workflowId: string | null) => Promise<void>
|
||||
validateCurrentChat: () => boolean
|
||||
|
||||
@@ -60,8 +60,8 @@ interface WorkflowDiffState {
|
||||
}
|
||||
|
||||
interface WorkflowDiffActions {
|
||||
setProposedChanges: (yamlContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
|
||||
mergeProposedChanges: (yamlContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
|
||||
setProposedChanges: (jsonContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
|
||||
mergeProposedChanges: (jsonContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
|
||||
clearDiff: () => void
|
||||
getCurrentWorkflowForCanvas: () => WorkflowState
|
||||
toggleDiffView: () => void
|
||||
@@ -131,10 +131,10 @@ export const useWorkflowDiffStore = create<WorkflowDiffState & WorkflowDiffActio
|
||||
|
||||
let result: { success: boolean; diff?: WorkflowDiff; errors?: string[] }
|
||||
|
||||
// Handle both YAML string and direct WorkflowState object
|
||||
// Handle both JSON string and direct WorkflowState object
|
||||
if (typeof proposedContent === 'string') {
|
||||
// Legacy YAML path (for backward compatibility)
|
||||
result = await diffEngine.createDiffFromYaml(proposedContent, diffAnalysis)
|
||||
// JSON string path (for backward compatibility)
|
||||
result = await diffEngine.createDiff(proposedContent, diffAnalysis)
|
||||
} else {
|
||||
// Direct WorkflowState path (new, more efficient)
|
||||
result = await diffEngine.createDiffFromWorkflowState(proposedContent, diffAnalysis)
|
||||
@@ -214,13 +214,13 @@ export const useWorkflowDiffStore = create<WorkflowDiffState & WorkflowDiffActio
|
||||
}
|
||||
},
|
||||
|
||||
mergeProposedChanges: async (yamlContent: string, diffAnalysis?: DiffAnalysis) => {
|
||||
logger.info('Merging proposed changes via YAML')
|
||||
mergeProposedChanges: async (jsonContent: string, diffAnalysis?: DiffAnalysis) => {
|
||||
logger.info('Merging proposed changes from workflow state')
|
||||
|
||||
// First, set isDiffReady to false to prevent premature rendering
|
||||
batchedUpdate({ isDiffReady: false, diffError: null })
|
||||
|
||||
const result = await diffEngine.mergeDiffFromYaml(yamlContent, diffAnalysis)
|
||||
const result = await diffEngine.mergeDiff(jsonContent, diffAnalysis)
|
||||
|
||||
if (result.success && result.diff) {
|
||||
// Validate proposed workflow using serializer round-trip to catch canvas-breaking issues
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { Edge } from 'reactflow'
|
||||
import { create } from 'zustand'
|
||||
import { devtools } from 'zustand/middleware'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { getBlockOutputs } from '@/lib/workflows/block-outputs'
|
||||
import { getBlock } from '@/blocks'
|
||||
import { resolveOutputType } from '@/blocks/utils'
|
||||
import {
|
||||
@@ -166,7 +167,11 @@ export const useWorkflowStore = create<WorkflowStoreWithHistory>()(
|
||||
}
|
||||
})
|
||||
|
||||
const outputs = resolveOutputType(blockConfig.outputs)
|
||||
// Get outputs based on trigger mode
|
||||
const triggerMode = blockProperties?.triggerMode ?? false
|
||||
const outputs = triggerMode
|
||||
? getBlockOutputs(type, subBlocks, triggerMode)
|
||||
: resolveOutputType(blockConfig.outputs)
|
||||
|
||||
const newState = {
|
||||
blocks: {
|
||||
@@ -182,7 +187,7 @@ export const useWorkflowStore = create<WorkflowStoreWithHistory>()(
|
||||
horizontalHandles: blockProperties?.horizontalHandles ?? true,
|
||||
isWide: blockProperties?.isWide ?? false,
|
||||
advancedMode: blockProperties?.advancedMode ?? false,
|
||||
triggerMode: blockProperties?.triggerMode ?? false,
|
||||
triggerMode: triggerMode,
|
||||
height: blockProperties?.height ?? 0,
|
||||
layout: {},
|
||||
data: nodeData,
|
||||
|
||||
@@ -229,19 +229,6 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
}
|
||||
}
|
||||
|
||||
// Log the request (with sensitive data redacted)
|
||||
logger.info('Mistral OCR request:', {
|
||||
url: url.toString(),
|
||||
hasApiKey: !!params.apiKey,
|
||||
model: requestBody.model,
|
||||
options: {
|
||||
includesImages: requestBody.include_image_base64 ?? 'not specified',
|
||||
pages: requestBody.pages ?? 'all pages',
|
||||
imageLimit: requestBody.image_limit ?? 'no limit',
|
||||
imageMinSize: requestBody.image_min_size ?? 'no minimum',
|
||||
},
|
||||
})
|
||||
|
||||
return requestBody
|
||||
},
|
||||
},
|
||||
|
||||
3
bun.lock
3
bun.lock
@@ -104,6 +104,7 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.0.0",
|
||||
"croner": "^9.0.0",
|
||||
"csv-parse": "6.1.0",
|
||||
"date-fns": "4.1.0",
|
||||
"encoding": "0.1.13",
|
||||
"entities": "6.0.1",
|
||||
@@ -1664,6 +1665,8 @@
|
||||
|
||||
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
||||
|
||||
"csv-parse": ["csv-parse@6.1.0", "", {}, "sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw=="],
|
||||
|
||||
"d3-color": ["d3-color@3.1.0", "", {}, "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA=="],
|
||||
|
||||
"d3-dispatch": ["d3-dispatch@3.0.1", "", {}, "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg=="],
|
||||
|
||||
1
packages/db/migrations/0096_tranquil_arachne.sql
Normal file
1
packages/db/migrations/0096_tranquil_arachne.sql
Normal file
@@ -0,0 +1 @@
|
||||
ALTER TABLE "settings" ADD COLUMN "copilot_enabled_models" jsonb DEFAULT '{}' NOT NULL;
|
||||
6966
packages/db/migrations/meta/0096_snapshot.json
Normal file
6966
packages/db/migrations/meta/0096_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -666,6 +666,13 @@
|
||||
"when": 1759182244521,
|
||||
"tag": "0095_cheerful_albert_cleary",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 96,
|
||||
"version": "7",
|
||||
"when": 1759534968812,
|
||||
"tag": "0096_tranquil_arachne",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -375,6 +375,9 @@ export const settings = pgTable('settings', {
|
||||
showFloatingControls: boolean('show_floating_controls').notNull().default(true),
|
||||
showTrainingControls: boolean('show_training_controls').notNull().default(false),
|
||||
|
||||
// Copilot preferences - maps model_id to enabled/disabled boolean
|
||||
copilotEnabledModels: jsonb('copilot_enabled_models').notNull().default('{}'),
|
||||
|
||||
updatedAt: timestamp('updated_at').notNull().defaultNow(),
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user