mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-09 23:17:59 -05:00
* improvement(kb): optimize processes, add more robust fallbacks for large file ops * stronger typing * comments cleanup * ack PR comments * upgraded turborepo * ack more PR comments * fix failing test * moved doc update inside tx for embeddings chunks upload * ack more PR comments
63 lines
1.8 KiB
TypeScript
63 lines
1.8 KiB
TypeScript
import { createLogger } from '@sim/logger'
|
|
import { task } from '@trigger.dev/sdk'
|
|
import { env } from '@/lib/core/config/env'
|
|
import { processDocumentAsync } from '@/lib/knowledge/documents/service'
|
|
|
|
const logger = createLogger('TriggerKnowledgeProcessing')
|
|
|
|
export type DocumentProcessingPayload = {
|
|
knowledgeBaseId: string
|
|
documentId: string
|
|
docData: {
|
|
filename: string
|
|
fileUrl: string
|
|
fileSize: number
|
|
mimeType: string
|
|
}
|
|
processingOptions: {
|
|
chunkSize?: number
|
|
minCharactersPerChunk?: number
|
|
recipe?: string
|
|
lang?: string
|
|
chunkOverlap?: number
|
|
}
|
|
requestId: string
|
|
}
|
|
|
|
export const processDocument = task({
|
|
id: 'knowledge-process-document',
|
|
maxDuration: env.KB_CONFIG_MAX_DURATION || 600,
|
|
machine: 'large-1x', // 2 vCPU, 2GB RAM - needed for large PDF processing
|
|
retry: {
|
|
maxAttempts: env.KB_CONFIG_MAX_ATTEMPTS || 3,
|
|
factor: env.KB_CONFIG_RETRY_FACTOR || 2,
|
|
minTimeoutInMs: env.KB_CONFIG_MIN_TIMEOUT || 1000,
|
|
maxTimeoutInMs: env.KB_CONFIG_MAX_TIMEOUT || 10000,
|
|
},
|
|
queue: {
|
|
concurrencyLimit: env.KB_CONFIG_CONCURRENCY_LIMIT || 20,
|
|
name: 'document-processing-queue',
|
|
},
|
|
run: async (payload: DocumentProcessingPayload) => {
|
|
const { knowledgeBaseId, documentId, docData, processingOptions, requestId } = payload
|
|
|
|
logger.info(`[${requestId}] Starting Trigger.dev processing for document: ${docData.filename}`)
|
|
|
|
try {
|
|
await processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions)
|
|
|
|
logger.info(`[${requestId}] Successfully processed document: ${docData.filename}`)
|
|
|
|
return {
|
|
success: true,
|
|
documentId,
|
|
filename: docData.filename,
|
|
processingTime: Date.now(),
|
|
}
|
|
} catch (error) {
|
|
logger.error(`[${requestId}] Failed to process document: ${docData.filename}`, error)
|
|
throw error
|
|
}
|
|
},
|
|
})
|