Files
sim/apps/sim/background/knowledge-processing.ts
Waleed 75aca00b6e improvement(kb): optimize processes, add more robust fallbacks for large file ops (#2684)
* improvement(kb): optimize processes, add more robust fallbacks for large file ops

* stronger typing

* comments cleanup

* ack PR comments

* upgraded turborepo

* ack more PR comments

* fix failing test

* moved doc update inside tx for embeddings chunks upload

* ack more PR comments
2026-01-05 20:26:16 -08:00

63 lines
1.8 KiB
TypeScript

import { createLogger } from '@sim/logger'
import { task } from '@trigger.dev/sdk'
import { env } from '@/lib/core/config/env'
import { processDocumentAsync } from '@/lib/knowledge/documents/service'
const logger = createLogger('TriggerKnowledgeProcessing')
export type DocumentProcessingPayload = {
knowledgeBaseId: string
documentId: string
docData: {
filename: string
fileUrl: string
fileSize: number
mimeType: string
}
processingOptions: {
chunkSize?: number
minCharactersPerChunk?: number
recipe?: string
lang?: string
chunkOverlap?: number
}
requestId: string
}
export const processDocument = task({
id: 'knowledge-process-document',
maxDuration: env.KB_CONFIG_MAX_DURATION || 600,
machine: 'large-1x', // 2 vCPU, 2GB RAM - needed for large PDF processing
retry: {
maxAttempts: env.KB_CONFIG_MAX_ATTEMPTS || 3,
factor: env.KB_CONFIG_RETRY_FACTOR || 2,
minTimeoutInMs: env.KB_CONFIG_MIN_TIMEOUT || 1000,
maxTimeoutInMs: env.KB_CONFIG_MAX_TIMEOUT || 10000,
},
queue: {
concurrencyLimit: env.KB_CONFIG_CONCURRENCY_LIMIT || 20,
name: 'document-processing-queue',
},
run: async (payload: DocumentProcessingPayload) => {
const { knowledgeBaseId, documentId, docData, processingOptions, requestId } = payload
logger.info(`[${requestId}] Starting Trigger.dev processing for document: ${docData.filename}`)
try {
await processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions)
logger.info(`[${requestId}] Successfully processed document: ${docData.filename}`)
return {
success: true,
documentId,
filename: docData.filename,
processingTime: Date.now(),
}
} catch (error) {
logger.error(`[${requestId}] Failed to process document: ${docData.filename}`, error)
throw error
}
},
})