diff --git a/apps/docs/content/docs/tools/microsoft_excel.mdx b/apps/docs/content/docs/tools/microsoft_excel.mdx
index 4b4d0f1d7..2f7bb4240 100644
--- a/apps/docs/content/docs/tools/microsoft_excel.mdx
+++ b/apps/docs/content/docs/tools/microsoft_excel.mdx
@@ -109,7 +109,7 @@ Read data from a Microsoft Excel spreadsheet
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `spreadsheetId` | string | Yes | The ID of the spreadsheet to read from |
-| `range` | string | No | The range of cells to read from |
+| `range` | string | No | The range of cells to read from. Accepts "SheetName!A1:B2" for explicit ranges or just "SheetName" to read the used range of that sheet. If omitted, reads the used range of the first sheet. |
#### Output
diff --git a/apps/docs/content/docs/tools/onedrive.mdx b/apps/docs/content/docs/tools/onedrive.mdx
index 1708434f0..0233aa87a 100644
--- a/apps/docs/content/docs/tools/onedrive.mdx
+++ b/apps/docs/content/docs/tools/onedrive.mdx
@@ -68,7 +68,7 @@ Upload a file to OneDrive
| `fileName` | string | Yes | The name of the file to upload |
| `content` | string | Yes | The content of the file to upload |
| `folderSelector` | string | No | Select the folder to upload the file to |
-| `folderId` | string | No | The ID of the folder to upload the file to \(internal use\) |
+| `manualFolderId` | string | No | Manually entered folder ID \(advanced mode\) |
#### Output
@@ -87,7 +87,7 @@ Create a new folder in OneDrive
| --------- | ---- | -------- | ----------- |
| `folderName` | string | Yes | Name of the folder to create |
| `folderSelector` | string | No | Select the parent folder to create the folder in |
-| `folderId` | string | No | ID of the parent folder \(internal use\) |
+| `manualFolderId` | string | No | Manually entered parent folder ID \(advanced mode\) |
#### Output
@@ -105,7 +105,7 @@ List files and folders in OneDrive
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `folderSelector` | string | No | Select the folder to list files from |
-| `folderId` | string | No | The ID of the folder to list files from \(internal use\) |
+| `manualFolderId` | string | No | The manually entered folder ID \(advanced mode\) |
| `query` | string | No | A query to filter the files |
| `pageSize` | number | No | The number of files to return |
diff --git a/apps/docs/content/docs/tools/outlook.mdx b/apps/docs/content/docs/tools/outlook.mdx
index f70725f13..d9aa94eeb 100644
--- a/apps/docs/content/docs/tools/outlook.mdx
+++ b/apps/docs/content/docs/tools/outlook.mdx
@@ -211,10 +211,27 @@ Read emails from Outlook
| Parameter | Type | Description |
| --------- | ---- | ----------- |
-| `success` | boolean | Email read operation success status |
-| `messageCount` | number | Number of emails retrieved |
-| `messages` | array | Array of email message objects |
| `message` | string | Success or status message |
+| `results` | array | Array of email message objects |
+
+### `outlook_forward`
+
+Forward an existing Outlook message to specified recipients
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `messageId` | string | Yes | The ID of the message to forward |
+| `to` | string | Yes | Recipient email address\(es\), comma-separated |
+| `comment` | string | No | Optional comment to include with the forwarded message |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `message` | string | Success or error message |
+| `results` | object | Delivery result details |
diff --git a/apps/sim/app/(auth)/login/login-form.tsx b/apps/sim/app/(auth)/login/login-form.tsx
index 2c1d49729..16298b420 100644
--- a/apps/sim/app/(auth)/login/login-form.tsx
+++ b/apps/sim/app/(auth)/login/login-form.tsx
@@ -49,15 +49,12 @@ const PASSWORD_VALIDATIONS = {
},
}
-// Validate callback URL to prevent open redirect vulnerabilities
const validateCallbackUrl = (url: string): boolean => {
try {
- // If it's a relative URL, it's safe
if (url.startsWith('/')) {
return true
}
- // If absolute URL, check if it belongs to the same origin
const currentOrigin = typeof window !== 'undefined' ? window.location.origin : ''
if (url.startsWith(currentOrigin)) {
return true
@@ -70,7 +67,6 @@ const validateCallbackUrl = (url: string): boolean => {
}
}
-// Validate password and return array of error messages
const validatePassword = (passwordValue: string): string[] => {
const errors: string[] = []
@@ -521,9 +517,7 @@ export default function LoginPage({
{resetStatus.type && (
{resetStatus.message}
diff --git a/apps/sim/app/api/environment/variables/route.ts b/apps/sim/app/api/environment/variables/route.ts
index 6a794f566..1689b465f 100644
--- a/apps/sim/app/api/environment/variables/route.ts
+++ b/apps/sim/app/api/environment/variables/route.ts
@@ -109,7 +109,9 @@ export async function PUT(request: NextRequest) {
// If we can't decrypt the existing value, treat as changed and re-encrypt
logger.warn(
`[${requestId}] Could not decrypt existing variable ${key}, re-encrypting`,
- { error: decryptError }
+ {
+ error: decryptError,
+ }
)
variablesToEncrypt[key] = newValue
updatedVariables.push(key)
diff --git a/apps/sim/app/api/files/multipart/route.ts b/apps/sim/app/api/files/multipart/route.ts
index c7d11e4f8..9ac82c9bb 100644
--- a/apps/sim/app/api/files/multipart/route.ts
+++ b/apps/sim/app/api/files/multipart/route.ts
@@ -1,16 +1,8 @@
-import {
- AbortMultipartUploadCommand,
- CompleteMultipartUploadCommand,
- CreateMultipartUploadCommand,
- UploadPartCommand,
-} from '@aws-sdk/client-s3'
-import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
import { type NextRequest, NextResponse } from 'next/server'
-import { v4 as uuidv4 } from 'uuid'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console/logger'
import { getStorageProvider, isUsingCloudStorage } from '@/lib/uploads'
-import { S3_KB_CONFIG } from '@/lib/uploads/setup'
+import { BLOB_KB_CONFIG } from '@/lib/uploads/setup'
const logger = createLogger('MultipartUploadAPI')
@@ -26,15 +18,6 @@ interface GetPartUrlsRequest {
partNumbers: number[]
}
-interface CompleteMultipartRequest {
- uploadId: string
- key: string
- parts: Array<{
- ETag: string
- PartNumber: number
- }>
-}
-
export async function POST(request: NextRequest) {
try {
const session = await getSession()
@@ -44,106 +27,214 @@ export async function POST(request: NextRequest) {
const action = request.nextUrl.searchParams.get('action')
- if (!isUsingCloudStorage() || getStorageProvider() !== 's3') {
+ if (!isUsingCloudStorage()) {
return NextResponse.json(
- { error: 'Multipart upload is only available with S3 storage' },
+ { error: 'Multipart upload is only available with cloud storage (S3 or Azure Blob)' },
{ status: 400 }
)
}
- const { getS3Client } = await import('@/lib/uploads/s3/s3-client')
- const s3Client = getS3Client()
+ const storageProvider = getStorageProvider()
switch (action) {
case 'initiate': {
const data: InitiateMultipartRequest = await request.json()
- const { fileName, contentType } = data
+ const { fileName, contentType, fileSize } = data
- const safeFileName = fileName.replace(/\s+/g, '-').replace(/[^a-zA-Z0-9.-]/g, '_')
- const uniqueKey = `kb/${uuidv4()}-${safeFileName}`
+ if (storageProvider === 's3') {
+ const { initiateS3MultipartUpload } = await import('@/lib/uploads/s3/s3-client')
- const command = new CreateMultipartUploadCommand({
- Bucket: S3_KB_CONFIG.bucket,
- Key: uniqueKey,
- ContentType: contentType,
- Metadata: {
- originalName: fileName,
- uploadedAt: new Date().toISOString(),
- purpose: 'knowledge-base',
- },
- })
+ const result = await initiateS3MultipartUpload({
+ fileName,
+ contentType,
+ fileSize,
+ })
- const response = await s3Client.send(command)
+ logger.info(`Initiated S3 multipart upload for ${fileName}: ${result.uploadId}`)
- logger.info(`Initiated multipart upload for ${fileName}: ${response.UploadId}`)
+ return NextResponse.json({
+ uploadId: result.uploadId,
+ key: result.key,
+ })
+ }
+ if (storageProvider === 'blob') {
+ const { initiateMultipartUpload } = await import('@/lib/uploads/blob/blob-client')
- return NextResponse.json({
- uploadId: response.UploadId,
- key: uniqueKey,
- })
+ const result = await initiateMultipartUpload({
+ fileName,
+ contentType,
+ fileSize,
+ customConfig: {
+ containerName: BLOB_KB_CONFIG.containerName,
+ accountName: BLOB_KB_CONFIG.accountName,
+ accountKey: BLOB_KB_CONFIG.accountKey,
+ connectionString: BLOB_KB_CONFIG.connectionString,
+ },
+ })
+
+ logger.info(`Initiated Azure multipart upload for ${fileName}: ${result.uploadId}`)
+
+ return NextResponse.json({
+ uploadId: result.uploadId,
+ key: result.key,
+ })
+ }
+
+ return NextResponse.json(
+ { error: `Unsupported storage provider: ${storageProvider}` },
+ { status: 400 }
+ )
}
case 'get-part-urls': {
const data: GetPartUrlsRequest = await request.json()
const { uploadId, key, partNumbers } = data
- const presignedUrls = await Promise.all(
- partNumbers.map(async (partNumber) => {
- const command = new UploadPartCommand({
- Bucket: S3_KB_CONFIG.bucket,
- Key: key,
- PartNumber: partNumber,
- UploadId: uploadId,
- })
+ if (storageProvider === 's3') {
+ const { getS3MultipartPartUrls } = await import('@/lib/uploads/s3/s3-client')
- const url = await getSignedUrl(s3Client, command, { expiresIn: 3600 })
- return { partNumber, url }
+ const presignedUrls = await getS3MultipartPartUrls(key, uploadId, partNumbers)
+
+ return NextResponse.json({ presignedUrls })
+ }
+ if (storageProvider === 'blob') {
+ const { getMultipartPartUrls } = await import('@/lib/uploads/blob/blob-client')
+
+ const presignedUrls = await getMultipartPartUrls(key, uploadId, partNumbers, {
+ containerName: BLOB_KB_CONFIG.containerName,
+ accountName: BLOB_KB_CONFIG.accountName,
+ accountKey: BLOB_KB_CONFIG.accountKey,
+ connectionString: BLOB_KB_CONFIG.connectionString,
})
- )
- return NextResponse.json({ presignedUrls })
+ return NextResponse.json({ presignedUrls })
+ }
+
+ return NextResponse.json(
+ { error: `Unsupported storage provider: ${storageProvider}` },
+ { status: 400 }
+ )
}
case 'complete': {
- const data: CompleteMultipartRequest = await request.json()
+ const data = await request.json()
+
+ // Handle batch completion
+ if ('uploads' in data) {
+ const results = await Promise.all(
+ data.uploads.map(async (upload: any) => {
+ const { uploadId, key } = upload
+
+ if (storageProvider === 's3') {
+ const { completeS3MultipartUpload } = await import('@/lib/uploads/s3/s3-client')
+ const parts = upload.parts // S3 format: { ETag, PartNumber }
+
+ const result = await completeS3MultipartUpload(key, uploadId, parts)
+
+ return {
+ success: true,
+ location: result.location,
+ path: result.path,
+ key: result.key,
+ }
+ }
+ if (storageProvider === 'blob') {
+ const { completeMultipartUpload } = await import('@/lib/uploads/blob/blob-client')
+ const parts = upload.parts // Azure format: { blockId, partNumber }
+
+ const result = await completeMultipartUpload(key, uploadId, parts, {
+ containerName: BLOB_KB_CONFIG.containerName,
+ accountName: BLOB_KB_CONFIG.accountName,
+ accountKey: BLOB_KB_CONFIG.accountKey,
+ connectionString: BLOB_KB_CONFIG.connectionString,
+ })
+
+ return {
+ success: true,
+ location: result.location,
+ path: result.path,
+ key: result.key,
+ }
+ }
+
+ throw new Error(`Unsupported storage provider: ${storageProvider}`)
+ })
+ )
+
+ logger.info(`Completed ${data.uploads.length} multipart uploads`)
+ return NextResponse.json({ results })
+ }
+
+ // Handle single completion
const { uploadId, key, parts } = data
- const command = new CompleteMultipartUploadCommand({
- Bucket: S3_KB_CONFIG.bucket,
- Key: key,
- UploadId: uploadId,
- MultipartUpload: {
- Parts: parts.sort((a, b) => a.PartNumber - b.PartNumber),
- },
- })
+ if (storageProvider === 's3') {
+ const { completeS3MultipartUpload } = await import('@/lib/uploads/s3/s3-client')
- const response = await s3Client.send(command)
+ const result = await completeS3MultipartUpload(key, uploadId, parts)
- logger.info(`Completed multipart upload for key ${key}`)
+ logger.info(`Completed S3 multipart upload for key ${key}`)
- const finalPath = `/api/files/serve/s3/${encodeURIComponent(key)}`
+ return NextResponse.json({
+ success: true,
+ location: result.location,
+ path: result.path,
+ key: result.key,
+ })
+ }
+ if (storageProvider === 'blob') {
+ const { completeMultipartUpload } = await import('@/lib/uploads/blob/blob-client')
- return NextResponse.json({
- success: true,
- location: response.Location,
- path: finalPath,
- key,
- })
+ const result = await completeMultipartUpload(key, uploadId, parts, {
+ containerName: BLOB_KB_CONFIG.containerName,
+ accountName: BLOB_KB_CONFIG.accountName,
+ accountKey: BLOB_KB_CONFIG.accountKey,
+ connectionString: BLOB_KB_CONFIG.connectionString,
+ })
+
+ logger.info(`Completed Azure multipart upload for key ${key}`)
+
+ return NextResponse.json({
+ success: true,
+ location: result.location,
+ path: result.path,
+ key: result.key,
+ })
+ }
+
+ return NextResponse.json(
+ { error: `Unsupported storage provider: ${storageProvider}` },
+ { status: 400 }
+ )
}
case 'abort': {
const data = await request.json()
const { uploadId, key } = data
- const command = new AbortMultipartUploadCommand({
- Bucket: S3_KB_CONFIG.bucket,
- Key: key,
- UploadId: uploadId,
- })
+ if (storageProvider === 's3') {
+ const { abortS3MultipartUpload } = await import('@/lib/uploads/s3/s3-client')
- await s3Client.send(command)
+ await abortS3MultipartUpload(key, uploadId)
- logger.info(`Aborted multipart upload for key ${key}`)
+ logger.info(`Aborted S3 multipart upload for key ${key}`)
+ } else if (storageProvider === 'blob') {
+ const { abortMultipartUpload } = await import('@/lib/uploads/blob/blob-client')
+
+ await abortMultipartUpload(key, uploadId, {
+ containerName: BLOB_KB_CONFIG.containerName,
+ accountName: BLOB_KB_CONFIG.accountName,
+ accountKey: BLOB_KB_CONFIG.accountKey,
+ connectionString: BLOB_KB_CONFIG.connectionString,
+ })
+
+ logger.info(`Aborted Azure multipart upload for key ${key}`)
+ } else {
+ return NextResponse.json(
+ { error: `Unsupported storage provider: ${storageProvider}` },
+ { status: 400 }
+ )
+ }
return NextResponse.json({ success: true })
}
diff --git a/apps/sim/app/api/files/presigned/batch/route.ts b/apps/sim/app/api/files/presigned/batch/route.ts
new file mode 100644
index 000000000..1e82f6107
--- /dev/null
+++ b/apps/sim/app/api/files/presigned/batch/route.ts
@@ -0,0 +1,361 @@
+import { PutObjectCommand } from '@aws-sdk/client-s3'
+import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
+import { type NextRequest, NextResponse } from 'next/server'
+import { v4 as uuidv4 } from 'uuid'
+import { getSession } from '@/lib/auth'
+import { createLogger } from '@/lib/logs/console/logger'
+import { getStorageProvider, isUsingCloudStorage } from '@/lib/uploads'
+import {
+ BLOB_CHAT_CONFIG,
+ BLOB_CONFIG,
+ BLOB_COPILOT_CONFIG,
+ BLOB_KB_CONFIG,
+ S3_CHAT_CONFIG,
+ S3_CONFIG,
+ S3_COPILOT_CONFIG,
+ S3_KB_CONFIG,
+} from '@/lib/uploads/setup'
+import { validateFileType } from '@/lib/uploads/validation'
+import { createErrorResponse, createOptionsResponse } from '@/app/api/files/utils'
+
+const logger = createLogger('BatchPresignedUploadAPI')
+
+interface BatchFileRequest {
+ fileName: string
+ contentType: string
+ fileSize: number
+}
+
+interface BatchPresignedUrlRequest {
+ files: BatchFileRequest[]
+}
+
+type UploadType = 'general' | 'knowledge-base' | 'chat' | 'copilot'
+
+export async function POST(request: NextRequest) {
+ try {
+ const session = await getSession()
+ if (!session?.user?.id) {
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
+ }
+
+ let data: BatchPresignedUrlRequest
+ try {
+ data = await request.json()
+ } catch {
+ return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 })
+ }
+
+ const { files } = data
+
+ if (!files || !Array.isArray(files) || files.length === 0) {
+ return NextResponse.json(
+ { error: 'files array is required and cannot be empty' },
+ { status: 400 }
+ )
+ }
+
+ if (files.length > 100) {
+ return NextResponse.json(
+ { error: 'Cannot process more than 100 files at once' },
+ { status: 400 }
+ )
+ }
+
+ const uploadTypeParam = request.nextUrl.searchParams.get('type')
+ const uploadType: UploadType =
+ uploadTypeParam === 'knowledge-base'
+ ? 'knowledge-base'
+ : uploadTypeParam === 'chat'
+ ? 'chat'
+ : uploadTypeParam === 'copilot'
+ ? 'copilot'
+ : 'general'
+
+ const MAX_FILE_SIZE = 100 * 1024 * 1024
+ for (const file of files) {
+ if (!file.fileName?.trim()) {
+ return NextResponse.json({ error: 'fileName is required for all files' }, { status: 400 })
+ }
+ if (!file.contentType?.trim()) {
+ return NextResponse.json(
+ { error: 'contentType is required for all files' },
+ { status: 400 }
+ )
+ }
+ if (!file.fileSize || file.fileSize <= 0) {
+ return NextResponse.json(
+ { error: 'fileSize must be positive for all files' },
+ { status: 400 }
+ )
+ }
+ if (file.fileSize > MAX_FILE_SIZE) {
+ return NextResponse.json(
+ { error: `File ${file.fileName} exceeds maximum size of ${MAX_FILE_SIZE} bytes` },
+ { status: 400 }
+ )
+ }
+
+ if (uploadType === 'knowledge-base') {
+ const fileValidationError = validateFileType(file.fileName, file.contentType)
+ if (fileValidationError) {
+ return NextResponse.json(
+ {
+ error: fileValidationError.message,
+ code: fileValidationError.code,
+ supportedTypes: fileValidationError.supportedTypes,
+ },
+ { status: 400 }
+ )
+ }
+ }
+ }
+
+ const sessionUserId = session.user.id
+
+ if (uploadType === 'copilot' && !sessionUserId?.trim()) {
+ return NextResponse.json(
+ { error: 'Authenticated user session is required for copilot uploads' },
+ { status: 400 }
+ )
+ }
+
+ if (!isUsingCloudStorage()) {
+ return NextResponse.json(
+ { error: 'Direct uploads are only available when cloud storage is enabled' },
+ { status: 400 }
+ )
+ }
+
+ const storageProvider = getStorageProvider()
+ logger.info(
+ `Generating batch ${uploadType} presigned URLs for ${files.length} files using ${storageProvider}`
+ )
+
+ const startTime = Date.now()
+
+ let result
+ switch (storageProvider) {
+ case 's3':
+ result = await handleBatchS3PresignedUrls(files, uploadType, sessionUserId)
+ break
+ case 'blob':
+ result = await handleBatchBlobPresignedUrls(files, uploadType, sessionUserId)
+ break
+ default:
+ return NextResponse.json(
+ { error: `Unknown storage provider: ${storageProvider}` },
+ { status: 500 }
+ )
+ }
+
+ const duration = Date.now() - startTime
+ logger.info(
+ `Generated ${files.length} presigned URLs in ${duration}ms (avg ${Math.round(duration / files.length)}ms per file)`
+ )
+
+ return NextResponse.json(result)
+ } catch (error) {
+ logger.error('Error generating batch presigned URLs:', error)
+ return createErrorResponse(
+ error instanceof Error ? error : new Error('Failed to generate batch presigned URLs')
+ )
+ }
+}
+
+async function handleBatchS3PresignedUrls(
+ files: BatchFileRequest[],
+ uploadType: UploadType,
+ userId?: string
+) {
+ const config =
+ uploadType === 'knowledge-base'
+ ? S3_KB_CONFIG
+ : uploadType === 'chat'
+ ? S3_CHAT_CONFIG
+ : uploadType === 'copilot'
+ ? S3_COPILOT_CONFIG
+ : S3_CONFIG
+
+ if (!config.bucket || !config.region) {
+ throw new Error(`S3 configuration missing for ${uploadType} uploads`)
+ }
+
+ const { getS3Client, sanitizeFilenameForMetadata } = await import('@/lib/uploads/s3/s3-client')
+ const s3Client = getS3Client()
+
+ let prefix = ''
+ if (uploadType === 'knowledge-base') {
+ prefix = 'kb/'
+ } else if (uploadType === 'chat') {
+ prefix = 'chat/'
+ } else if (uploadType === 'copilot') {
+ prefix = `${userId}/`
+ }
+
+ const baseMetadata: Record = {
+ uploadedAt: new Date().toISOString(),
+ }
+
+ if (uploadType === 'knowledge-base') {
+ baseMetadata.purpose = 'knowledge-base'
+ } else if (uploadType === 'chat') {
+ baseMetadata.purpose = 'chat'
+ } else if (uploadType === 'copilot') {
+ baseMetadata.purpose = 'copilot'
+ baseMetadata.userId = userId || ''
+ }
+
+ const results = await Promise.all(
+ files.map(async (file) => {
+ const safeFileName = file.fileName.replace(/\s+/g, '-').replace(/[^a-zA-Z0-9.-]/g, '_')
+ const uniqueKey = `${prefix}${uuidv4()}-${safeFileName}`
+ const sanitizedOriginalName = sanitizeFilenameForMetadata(file.fileName)
+
+ const metadata = {
+ ...baseMetadata,
+ originalName: sanitizedOriginalName,
+ }
+
+ const command = new PutObjectCommand({
+ Bucket: config.bucket,
+ Key: uniqueKey,
+ ContentType: file.contentType,
+ Metadata: metadata,
+ })
+
+ const presignedUrl = await getSignedUrl(s3Client, command, { expiresIn: 3600 })
+
+ const finalPath =
+ uploadType === 'chat'
+ ? `https://${config.bucket}.s3.${config.region}.amazonaws.com/${uniqueKey}`
+ : `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}`
+
+ return {
+ fileName: file.fileName,
+ presignedUrl,
+ fileInfo: {
+ path: finalPath,
+ key: uniqueKey,
+ name: file.fileName,
+ size: file.fileSize,
+ type: file.contentType,
+ },
+ }
+ })
+ )
+
+ return {
+ files: results,
+ directUploadSupported: true,
+ }
+}
+
+async function handleBatchBlobPresignedUrls(
+ files: BatchFileRequest[],
+ uploadType: UploadType,
+ userId?: string
+) {
+ const config =
+ uploadType === 'knowledge-base'
+ ? BLOB_KB_CONFIG
+ : uploadType === 'chat'
+ ? BLOB_CHAT_CONFIG
+ : uploadType === 'copilot'
+ ? BLOB_COPILOT_CONFIG
+ : BLOB_CONFIG
+
+ if (
+ !config.accountName ||
+ !config.containerName ||
+ (!config.accountKey && !config.connectionString)
+ ) {
+ throw new Error(`Azure Blob configuration missing for ${uploadType} uploads`)
+ }
+
+ const { getBlobServiceClient } = await import('@/lib/uploads/blob/blob-client')
+ const { BlobSASPermissions, generateBlobSASQueryParameters, StorageSharedKeyCredential } =
+ await import('@azure/storage-blob')
+
+ const blobServiceClient = getBlobServiceClient()
+ const containerClient = blobServiceClient.getContainerClient(config.containerName)
+
+ let prefix = ''
+ if (uploadType === 'knowledge-base') {
+ prefix = 'kb/'
+ } else if (uploadType === 'chat') {
+ prefix = 'chat/'
+ } else if (uploadType === 'copilot') {
+ prefix = `${userId}/`
+ }
+
+ const baseUploadHeaders: Record = {
+ 'x-ms-blob-type': 'BlockBlob',
+ 'x-ms-meta-uploadedat': new Date().toISOString(),
+ }
+
+ if (uploadType === 'knowledge-base') {
+ baseUploadHeaders['x-ms-meta-purpose'] = 'knowledge-base'
+ } else if (uploadType === 'chat') {
+ baseUploadHeaders['x-ms-meta-purpose'] = 'chat'
+ } else if (uploadType === 'copilot') {
+ baseUploadHeaders['x-ms-meta-purpose'] = 'copilot'
+ baseUploadHeaders['x-ms-meta-userid'] = encodeURIComponent(userId || '')
+ }
+
+ const results = await Promise.all(
+ files.map(async (file) => {
+ const safeFileName = file.fileName.replace(/\s+/g, '-').replace(/[^a-zA-Z0-9.-]/g, '_')
+ const uniqueKey = `${prefix}${uuidv4()}-${safeFileName}`
+ const blockBlobClient = containerClient.getBlockBlobClient(uniqueKey)
+
+ const sasOptions = {
+ containerName: config.containerName,
+ blobName: uniqueKey,
+ permissions: BlobSASPermissions.parse('w'),
+ startsOn: new Date(),
+ expiresOn: new Date(Date.now() + 3600 * 1000),
+ }
+
+ const sasToken = generateBlobSASQueryParameters(
+ sasOptions,
+ new StorageSharedKeyCredential(config.accountName, config.accountKey || '')
+ ).toString()
+
+ const presignedUrl = `${blockBlobClient.url}?${sasToken}`
+
+ const finalPath =
+ uploadType === 'chat'
+ ? blockBlobClient.url
+ : `/api/files/serve/blob/${encodeURIComponent(uniqueKey)}`
+
+ const uploadHeaders = {
+ ...baseUploadHeaders,
+ 'x-ms-blob-content-type': file.contentType,
+ 'x-ms-meta-originalname': encodeURIComponent(file.fileName),
+ }
+
+ return {
+ fileName: file.fileName,
+ presignedUrl,
+ fileInfo: {
+ path: finalPath,
+ key: uniqueKey,
+ name: file.fileName,
+ size: file.fileSize,
+ type: file.contentType,
+ },
+ uploadHeaders,
+ }
+ })
+ )
+
+ return {
+ files: results,
+ directUploadSupported: true,
+ }
+}
+
+export async function OPTIONS() {
+ return createOptionsResponse()
+}
diff --git a/apps/sim/app/api/files/presigned/route.ts b/apps/sim/app/api/files/presigned/route.ts
index bfb86796c..2775f96a6 100644
--- a/apps/sim/app/api/files/presigned/route.ts
+++ b/apps/sim/app/api/files/presigned/route.ts
@@ -16,6 +16,7 @@ import {
S3_COPILOT_CONFIG,
S3_KB_CONFIG,
} from '@/lib/uploads/setup'
+import { validateFileType } from '@/lib/uploads/validation'
import { createErrorResponse, createOptionsResponse } from '@/app/api/files/utils'
const logger = createLogger('PresignedUploadAPI')
@@ -96,6 +97,13 @@ export async function POST(request: NextRequest) {
? 'copilot'
: 'general'
+ if (uploadType === 'knowledge-base') {
+ const fileValidationError = validateFileType(fileName, contentType)
+ if (fileValidationError) {
+ throw new ValidationError(`${fileValidationError.message}`)
+ }
+ }
+
// Evaluate user id from session for copilot uploads
const sessionUserId = session.user.id
diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/[chunkId]/route.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/[chunkId]/route.ts
index 0367241c5..1df8cde31 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/[chunkId]/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/[chunkId]/route.ts
@@ -1,12 +1,10 @@
-import { createHash, randomUUID } from 'crypto'
-import { eq, sql } from 'drizzle-orm'
+import { randomUUID } from 'crypto'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
+import { deleteChunk, updateChunk } from '@/lib/knowledge/chunks/service'
import { createLogger } from '@/lib/logs/console/logger'
import { checkChunkAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, embedding } from '@/db/schema'
const logger = createLogger('ChunkByIdAPI')
@@ -102,33 +100,7 @@ export async function PUT(
try {
const validatedData = UpdateChunkSchema.parse(body)
- const updateData: Partial<{
- content: string
- contentLength: number
- tokenCount: number
- chunkHash: string
- enabled: boolean
- updatedAt: Date
- }> = {}
-
- if (validatedData.content) {
- updateData.content = validatedData.content
- updateData.contentLength = validatedData.content.length
- // Update token count estimation (rough approximation: 4 chars per token)
- updateData.tokenCount = Math.ceil(validatedData.content.length / 4)
- updateData.chunkHash = createHash('sha256').update(validatedData.content).digest('hex')
- }
-
- if (validatedData.enabled !== undefined) updateData.enabled = validatedData.enabled
-
- await db.update(embedding).set(updateData).where(eq(embedding.id, chunkId))
-
- // Fetch the updated chunk
- const updatedChunk = await db
- .select()
- .from(embedding)
- .where(eq(embedding.id, chunkId))
- .limit(1)
+ const updatedChunk = await updateChunk(chunkId, validatedData, requestId)
logger.info(
`[${requestId}] Chunk updated: ${chunkId} in document ${documentId} in knowledge base ${knowledgeBaseId}`
@@ -136,7 +108,7 @@ export async function PUT(
return NextResponse.json({
success: true,
- data: updatedChunk[0],
+ data: updatedChunk,
})
} catch (validationError) {
if (validationError instanceof z.ZodError) {
@@ -190,37 +162,7 @@ export async function DELETE(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Use transaction to atomically delete chunk and update document statistics
- await db.transaction(async (tx) => {
- // Get chunk data before deletion for statistics update
- const chunkToDelete = await tx
- .select({
- tokenCount: embedding.tokenCount,
- contentLength: embedding.contentLength,
- })
- .from(embedding)
- .where(eq(embedding.id, chunkId))
- .limit(1)
-
- if (chunkToDelete.length === 0) {
- throw new Error('Chunk not found')
- }
-
- const chunk = chunkToDelete[0]
-
- // Delete the chunk
- await tx.delete(embedding).where(eq(embedding.id, chunkId))
-
- // Update document statistics
- await tx
- .update(document)
- .set({
- chunkCount: sql`${document.chunkCount} - 1`,
- tokenCount: sql`${document.tokenCount} - ${chunk.tokenCount}`,
- characterCount: sql`${document.characterCount} - ${chunk.contentLength}`,
- })
- .where(eq(document.id, documentId))
- })
+ await deleteChunk(chunkId, documentId, requestId)
logger.info(
`[${requestId}] Chunk deleted: ${chunkId} from document ${documentId} in knowledge base ${knowledgeBaseId}`
diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.test.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.test.ts
deleted file mode 100644
index 3ebd69da2..000000000
--- a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.test.ts
+++ /dev/null
@@ -1,378 +0,0 @@
-/**
- * Tests for knowledge document chunks API route
- *
- * @vitest-environment node
- */
-import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
-import {
- createMockRequest,
- mockAuth,
- mockConsoleLogger,
- mockDrizzleOrm,
- mockKnowledgeSchemas,
-} from '@/app/api/__test-utils__/utils'
-
-mockKnowledgeSchemas()
-mockDrizzleOrm()
-mockConsoleLogger()
-
-vi.mock('@/lib/tokenization/estimators', () => ({
- estimateTokenCount: vi.fn().mockReturnValue({ count: 452 }),
-}))
-
-vi.mock('@/providers/utils', () => ({
- calculateCost: vi.fn().mockReturnValue({
- input: 0.00000904,
- output: 0,
- total: 0.00000904,
- pricing: {
- input: 0.02,
- output: 0,
- updatedAt: '2025-07-10',
- },
- }),
-}))
-
-vi.mock('@/app/api/knowledge/utils', () => ({
- checkKnowledgeBaseAccess: vi.fn(),
- checkKnowledgeBaseWriteAccess: vi.fn(),
- checkDocumentAccess: vi.fn(),
- checkDocumentWriteAccess: vi.fn(),
- checkChunkAccess: vi.fn(),
- generateEmbeddings: vi.fn().mockResolvedValue([[0.1, 0.2, 0.3, 0.4, 0.5]]),
- processDocumentAsync: vi.fn(),
-}))
-
-describe('Knowledge Document Chunks API Route', () => {
- const mockAuth$ = mockAuth()
-
- const mockDbChain = {
- select: vi.fn().mockReturnThis(),
- from: vi.fn().mockReturnThis(),
- where: vi.fn().mockReturnThis(),
- orderBy: vi.fn().mockReturnThis(),
- limit: vi.fn().mockReturnThis(),
- offset: vi.fn().mockReturnThis(),
- insert: vi.fn().mockReturnThis(),
- values: vi.fn().mockResolvedValue(undefined),
- update: vi.fn().mockReturnThis(),
- set: vi.fn().mockReturnThis(),
- returning: vi.fn().mockResolvedValue([]),
- delete: vi.fn().mockReturnThis(),
- transaction: vi.fn(),
- }
-
- const mockGetUserId = vi.fn()
-
- beforeEach(async () => {
- vi.clearAllMocks()
-
- vi.doMock('@/db', () => ({
- db: mockDbChain,
- }))
-
- vi.doMock('@/app/api/auth/oauth/utils', () => ({
- getUserId: mockGetUserId,
- }))
-
- Object.values(mockDbChain).forEach((fn) => {
- if (typeof fn === 'function' && fn !== mockDbChain.values && fn !== mockDbChain.returning) {
- fn.mockClear().mockReturnThis()
- }
- })
-
- vi.stubGlobal('crypto', {
- randomUUID: vi.fn().mockReturnValue('mock-chunk-uuid-1234'),
- createHash: vi.fn().mockReturnValue({
- update: vi.fn().mockReturnThis(),
- digest: vi.fn().mockReturnValue('mock-hash-123'),
- }),
- })
- })
-
- afterEach(() => {
- vi.clearAllMocks()
- })
-
- describe('POST /api/knowledge/[id]/documents/[documentId]/chunks', () => {
- const validChunkData = {
- content: 'This is test chunk content for uploading to the knowledge base document.',
- enabled: true,
- }
-
- const mockDocumentAccess = {
- hasAccess: true,
- notFound: false,
- reason: '',
- document: {
- id: 'doc-123',
- processingStatus: 'completed',
- tag1: 'tag1-value',
- tag2: 'tag2-value',
- tag3: null,
- tag4: null,
- tag5: null,
- tag6: null,
- tag7: null,
- },
- }
-
- const mockParams = Promise.resolve({ id: 'kb-123', documentId: 'doc-123' })
-
- it('should create chunk successfully with cost tracking', async () => {
- const { checkDocumentWriteAccess, generateEmbeddings } = await import(
- '@/app/api/knowledge/utils'
- )
- const { estimateTokenCount } = await import('@/lib/tokenization/estimators')
- const { calculateCost } = await import('@/providers/utils')
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- ...mockDocumentAccess,
- knowledgeBase: { id: 'kb-123', userId: 'user-123' },
- } as any)
-
- // Mock generateEmbeddings
- vi.mocked(generateEmbeddings).mockResolvedValue([[0.1, 0.2, 0.3]])
-
- // Mock transaction
- const mockTx = {
- select: vi.fn().mockReturnThis(),
- from: vi.fn().mockReturnThis(),
- where: vi.fn().mockReturnThis(),
- orderBy: vi.fn().mockReturnThis(),
- limit: vi.fn().mockResolvedValue([{ chunkIndex: 0 }]),
- insert: vi.fn().mockReturnThis(),
- values: vi.fn().mockResolvedValue(undefined),
- update: vi.fn().mockReturnThis(),
- set: vi.fn().mockReturnThis(),
- }
-
- mockDbChain.transaction.mockImplementation(async (callback) => {
- return await callback(mockTx)
- })
-
- const req = createMockRequest('POST', validChunkData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(200)
- expect(data.success).toBe(true)
-
- // Verify cost tracking
- expect(data.data.cost).toBeDefined()
- expect(data.data.cost.input).toBe(0.00000904)
- expect(data.data.cost.output).toBe(0)
- expect(data.data.cost.total).toBe(0.00000904)
- expect(data.data.cost.tokens).toEqual({
- prompt: 452,
- completion: 0,
- total: 452,
- })
- expect(data.data.cost.model).toBe('text-embedding-3-small')
- expect(data.data.cost.pricing).toEqual({
- input: 0.02,
- output: 0,
- updatedAt: '2025-07-10',
- })
-
- // Verify function calls
- expect(estimateTokenCount).toHaveBeenCalledWith(validChunkData.content, 'openai')
- expect(calculateCost).toHaveBeenCalledWith('text-embedding-3-small', 452, 0, false)
- })
-
- it('should handle workflow-based authentication', async () => {
- const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
-
- const workflowData = {
- ...validChunkData,
- workflowId: 'workflow-123',
- }
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- ...mockDocumentAccess,
- knowledgeBase: { id: 'kb-123', userId: 'user-123' },
- } as any)
-
- const mockTx = {
- select: vi.fn().mockReturnThis(),
- from: vi.fn().mockReturnThis(),
- where: vi.fn().mockReturnThis(),
- orderBy: vi.fn().mockReturnThis(),
- limit: vi.fn().mockResolvedValue([]),
- insert: vi.fn().mockReturnThis(),
- values: vi.fn().mockResolvedValue(undefined),
- update: vi.fn().mockReturnThis(),
- set: vi.fn().mockReturnThis(),
- }
-
- mockDbChain.transaction.mockImplementation(async (callback) => {
- return await callback(mockTx)
- })
-
- const req = createMockRequest('POST', workflowData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(200)
- expect(data.success).toBe(true)
- expect(mockGetUserId).toHaveBeenCalledWith(expect.any(String), 'workflow-123')
- })
-
- it.concurrent('should return unauthorized for unauthenticated request', async () => {
- mockGetUserId.mockResolvedValue(null)
-
- const req = createMockRequest('POST', validChunkData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(401)
- expect(data.error).toBe('Unauthorized')
- })
-
- it('should return not found for workflow that does not exist', async () => {
- const workflowData = {
- ...validChunkData,
- workflowId: 'nonexistent-workflow',
- }
-
- mockGetUserId.mockResolvedValue(null)
-
- const req = createMockRequest('POST', workflowData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(404)
- expect(data.error).toBe('Workflow not found')
- })
-
- it.concurrent('should return not found for document access denied', async () => {
- const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- hasAccess: false,
- notFound: true,
- reason: 'Document not found',
- })
-
- const req = createMockRequest('POST', validChunkData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(404)
- expect(data.error).toBe('Document not found')
- })
-
- it('should return unauthorized for unauthorized document access', async () => {
- const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- hasAccess: false,
- notFound: false,
- reason: 'Unauthorized access',
- })
-
- const req = createMockRequest('POST', validChunkData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(401)
- expect(data.error).toBe('Unauthorized')
- })
-
- it('should reject chunks for failed documents', async () => {
- const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- ...mockDocumentAccess,
- document: {
- ...mockDocumentAccess.document!,
- processingStatus: 'failed',
- },
- knowledgeBase: { id: 'kb-123', userId: 'user-123' },
- } as any)
-
- const req = createMockRequest('POST', validChunkData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(400)
- expect(data.error).toBe('Cannot add chunks to failed document')
- })
-
- it.concurrent('should validate chunk data', async () => {
- const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- ...mockDocumentAccess,
- knowledgeBase: { id: 'kb-123', userId: 'user-123' },
- } as any)
-
- const invalidData = {
- content: '', // Empty content
- enabled: true,
- }
-
- const req = createMockRequest('POST', invalidData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- const response = await POST(req, { params: mockParams })
- const data = await response.json()
-
- expect(response.status).toBe(400)
- expect(data.error).toBe('Invalid request data')
- expect(data.details).toBeDefined()
- })
-
- it('should inherit tags from parent document', async () => {
- const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
-
- mockGetUserId.mockResolvedValue('user-123')
- vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
- ...mockDocumentAccess,
- knowledgeBase: { id: 'kb-123', userId: 'user-123' },
- } as any)
-
- const mockTx = {
- select: vi.fn().mockReturnThis(),
- from: vi.fn().mockReturnThis(),
- where: vi.fn().mockReturnThis(),
- orderBy: vi.fn().mockReturnThis(),
- limit: vi.fn().mockResolvedValue([]),
- insert: vi.fn().mockReturnThis(),
- values: vi.fn().mockImplementation((data) => {
- // Verify that tags are inherited from document
- expect(data.tag1).toBe('tag1-value')
- expect(data.tag2).toBe('tag2-value')
- expect(data.tag3).toBe(null)
- return Promise.resolve(undefined)
- }),
- update: vi.fn().mockReturnThis(),
- set: vi.fn().mockReturnThis(),
- }
-
- mockDbChain.transaction.mockImplementation(async (callback) => {
- return await callback(mockTx)
- })
-
- const req = createMockRequest('POST', validChunkData)
- const { POST } = await import('@/app/api/knowledge/[id]/documents/[documentId]/chunks/route')
- await POST(req, { params: mockParams })
-
- expect(mockTx.values).toHaveBeenCalled()
- })
-
- // REMOVED: "should handle cost calculation with different content lengths" test - it was failing
- })
-})
diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts
index f529e4f96..4ce12ff38 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts
@@ -1,18 +1,11 @@
import crypto from 'crypto'
-import { and, asc, eq, ilike, inArray, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
+import { batchChunkOperation, createChunk, queryChunks } from '@/lib/knowledge/chunks/service'
import { createLogger } from '@/lib/logs/console/logger'
-import { estimateTokenCount } from '@/lib/tokenization/estimators'
import { getUserId } from '@/app/api/auth/oauth/utils'
-import {
- checkDocumentAccess,
- checkDocumentWriteAccess,
- generateEmbeddings,
-} from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, embedding } from '@/db/schema'
+import { checkDocumentAccess, checkDocumentWriteAccess } from '@/app/api/knowledge/utils'
import { calculateCost } from '@/providers/utils'
const logger = createLogger('DocumentChunksAPI')
@@ -66,7 +59,6 @@ export async function GET(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if document processing is completed
const doc = accessCheck.document
if (!doc) {
logger.warn(
@@ -89,7 +81,6 @@ export async function GET(
)
}
- // Parse query parameters
const { searchParams } = new URL(req.url)
const queryParams = GetChunksQuerySchema.parse({
search: searchParams.get('search') || undefined,
@@ -98,67 +89,12 @@ export async function GET(
offset: searchParams.get('offset') || undefined,
})
- // Build query conditions
- const conditions = [eq(embedding.documentId, documentId)]
-
- // Add enabled filter
- if (queryParams.enabled === 'true') {
- conditions.push(eq(embedding.enabled, true))
- } else if (queryParams.enabled === 'false') {
- conditions.push(eq(embedding.enabled, false))
- }
-
- // Add search filter
- if (queryParams.search) {
- conditions.push(ilike(embedding.content, `%${queryParams.search}%`))
- }
-
- // Fetch chunks
- const chunks = await db
- .select({
- id: embedding.id,
- chunkIndex: embedding.chunkIndex,
- content: embedding.content,
- contentLength: embedding.contentLength,
- tokenCount: embedding.tokenCount,
- enabled: embedding.enabled,
- startOffset: embedding.startOffset,
- endOffset: embedding.endOffset,
- tag1: embedding.tag1,
- tag2: embedding.tag2,
- tag3: embedding.tag3,
- tag4: embedding.tag4,
- tag5: embedding.tag5,
- tag6: embedding.tag6,
- tag7: embedding.tag7,
- createdAt: embedding.createdAt,
- updatedAt: embedding.updatedAt,
- })
- .from(embedding)
- .where(and(...conditions))
- .orderBy(asc(embedding.chunkIndex))
- .limit(queryParams.limit)
- .offset(queryParams.offset)
-
- // Get total count for pagination
- const totalCount = await db
- .select({ count: sql`count(*)` })
- .from(embedding)
- .where(and(...conditions))
-
- logger.info(
- `[${requestId}] Retrieved ${chunks.length} chunks for document ${documentId} in knowledge base ${knowledgeBaseId}`
- )
+ const result = await queryChunks(documentId, queryParams, requestId)
return NextResponse.json({
success: true,
- data: chunks,
- pagination: {
- total: Number(totalCount[0]?.count || 0),
- limit: queryParams.limit,
- offset: queryParams.offset,
- hasMore: chunks.length === queryParams.limit,
- },
+ data: result.chunks,
+ pagination: result.pagination,
})
} catch (error) {
logger.error(`[${requestId}] Error fetching chunks`, error)
@@ -219,76 +155,27 @@ export async function POST(
try {
const validatedData = CreateChunkSchema.parse(searchParams)
- // Generate embedding for the content first (outside transaction for performance)
- logger.info(`[${requestId}] Generating embedding for manual chunk`)
- const embeddings = await generateEmbeddings([validatedData.content])
+ const docTags = {
+ tag1: doc.tag1 ?? null,
+ tag2: doc.tag2 ?? null,
+ tag3: doc.tag3 ?? null,
+ tag4: doc.tag4 ?? null,
+ tag5: doc.tag5 ?? null,
+ tag6: doc.tag6 ?? null,
+ tag7: doc.tag7 ?? null,
+ }
- // Calculate accurate token count for both database storage and cost calculation
- const tokenCount = estimateTokenCount(validatedData.content, 'openai')
+ const newChunk = await createChunk(
+ knowledgeBaseId,
+ documentId,
+ docTags,
+ validatedData,
+ requestId
+ )
- const chunkId = crypto.randomUUID()
- const now = new Date()
-
- // Use transaction to atomically get next index and insert chunk
- const newChunk = await db.transaction(async (tx) => {
- // Get the next chunk index atomically within the transaction
- const lastChunk = await tx
- .select({ chunkIndex: embedding.chunkIndex })
- .from(embedding)
- .where(eq(embedding.documentId, documentId))
- .orderBy(sql`${embedding.chunkIndex} DESC`)
- .limit(1)
-
- const nextChunkIndex = lastChunk.length > 0 ? lastChunk[0].chunkIndex + 1 : 0
-
- const chunkData = {
- id: chunkId,
- knowledgeBaseId,
- documentId,
- chunkIndex: nextChunkIndex,
- chunkHash: crypto.createHash('sha256').update(validatedData.content).digest('hex'),
- content: validatedData.content,
- contentLength: validatedData.content.length,
- tokenCount: tokenCount.count, // Use accurate token count
- embedding: embeddings[0],
- embeddingModel: 'text-embedding-3-small',
- startOffset: 0, // Manual chunks don't have document offsets
- endOffset: validatedData.content.length,
- // Inherit tags from parent document
- tag1: doc.tag1,
- tag2: doc.tag2,
- tag3: doc.tag3,
- tag4: doc.tag4,
- tag5: doc.tag5,
- tag6: doc.tag6,
- tag7: doc.tag7,
- enabled: validatedData.enabled,
- createdAt: now,
- updatedAt: now,
- }
-
- // Insert the new chunk
- await tx.insert(embedding).values(chunkData)
-
- // Update document statistics
- await tx
- .update(document)
- .set({
- chunkCount: sql`${document.chunkCount} + 1`,
- tokenCount: sql`${document.tokenCount} + ${chunkData.tokenCount}`,
- characterCount: sql`${document.characterCount} + ${chunkData.contentLength}`,
- })
- .where(eq(document.id, documentId))
-
- return chunkData
- })
-
- logger.info(`[${requestId}] Manual chunk created: ${chunkId} in document ${documentId}`)
-
- // Calculate cost for the embedding (with fallback if calculation fails)
let cost = null
try {
- cost = calculateCost('text-embedding-3-small', tokenCount.count, 0, false)
+ cost = calculateCost('text-embedding-3-small', newChunk.tokenCount, 0, false)
} catch (error) {
logger.warn(`[${requestId}] Failed to calculate cost for chunk upload`, {
error: error instanceof Error ? error.message : 'Unknown error',
@@ -307,9 +194,9 @@ export async function POST(
output: cost.output,
total: cost.total,
tokens: {
- prompt: tokenCount.count,
+ prompt: newChunk.tokenCount,
completion: 0,
- total: tokenCount.count,
+ total: newChunk.tokenCount,
},
model: 'text-embedding-3-small',
pricing: cost.pricing,
@@ -371,92 +258,16 @@ export async function PATCH(
const validatedData = BatchOperationSchema.parse(body)
const { operation, chunkIds } = validatedData
- logger.info(
- `[${requestId}] Starting batch ${operation} operation on ${chunkIds.length} chunks for document ${documentId}`
- )
-
- const results = []
- let successCount = 0
- const errorCount = 0
-
- if (operation === 'delete') {
- // Handle batch delete with transaction for consistency
- await db.transaction(async (tx) => {
- // Get chunks to delete for statistics update
- const chunksToDelete = await tx
- .select({
- id: embedding.id,
- tokenCount: embedding.tokenCount,
- contentLength: embedding.contentLength,
- })
- .from(embedding)
- .where(and(eq(embedding.documentId, documentId), inArray(embedding.id, chunkIds)))
-
- if (chunksToDelete.length === 0) {
- throw new Error('No valid chunks found to delete')
- }
-
- // Delete chunks
- await tx
- .delete(embedding)
- .where(and(eq(embedding.documentId, documentId), inArray(embedding.id, chunkIds)))
-
- // Update document statistics
- const totalTokens = chunksToDelete.reduce((sum, chunk) => sum + chunk.tokenCount, 0)
- const totalCharacters = chunksToDelete.reduce(
- (sum, chunk) => sum + chunk.contentLength,
- 0
- )
-
- await tx
- .update(document)
- .set({
- chunkCount: sql`${document.chunkCount} - ${chunksToDelete.length}`,
- tokenCount: sql`${document.tokenCount} - ${totalTokens}`,
- characterCount: sql`${document.characterCount} - ${totalCharacters}`,
- })
- .where(eq(document.id, documentId))
-
- successCount = chunksToDelete.length
- results.push({
- operation: 'delete',
- deletedCount: chunksToDelete.length,
- chunkIds: chunksToDelete.map((c) => c.id),
- })
- })
- } else {
- // Handle batch enable/disable
- const enabled = operation === 'enable'
-
- // Update chunks in a single query
- const updateResult = await db
- .update(embedding)
- .set({
- enabled,
- updatedAt: new Date(),
- })
- .where(and(eq(embedding.documentId, documentId), inArray(embedding.id, chunkIds)))
- .returning({ id: embedding.id })
-
- successCount = updateResult.length
- results.push({
- operation,
- updatedCount: updateResult.length,
- chunkIds: updateResult.map((r) => r.id),
- })
- }
-
- logger.info(
- `[${requestId}] Batch ${operation} operation completed: ${successCount} successful, ${errorCount} errors`
- )
+ const result = await batchChunkOperation(documentId, operation, chunkIds, requestId)
return NextResponse.json({
success: true,
data: {
operation,
- successCount,
- errorCount,
- results,
+ successCount: result.processed,
+ errorCount: result.errors.length,
+ processed: result.processed,
+ errors: result.errors,
},
})
} catch (validationError) {
diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.test.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.test.ts
index 302d5f0b1..8d3449407 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.test.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.test.ts
@@ -24,7 +24,14 @@ vi.mock('@/app/api/knowledge/utils', () => ({
processDocumentAsync: vi.fn(),
}))
-// Setup common mocks
+vi.mock('@/lib/knowledge/documents/service', () => ({
+ updateDocument: vi.fn(),
+ deleteDocument: vi.fn(),
+ markDocumentAsFailedTimeout: vi.fn(),
+ retryDocumentProcessing: vi.fn(),
+ processDocumentAsync: vi.fn(),
+}))
+
mockDrizzleOrm()
mockConsoleLogger()
@@ -42,8 +49,6 @@ describe('Document By ID API Route', () => {
transaction: vi.fn(),
}
- // Mock functions will be imported dynamically in tests
-
const mockDocument = {
id: 'doc-123',
knowledgeBaseId: 'kb-123',
@@ -73,7 +78,6 @@ describe('Document By ID API Route', () => {
}
}
})
- // Mock functions are cleared automatically by vitest
}
beforeEach(async () => {
@@ -83,8 +87,6 @@ describe('Document By ID API Route', () => {
db: mockDbChain,
}))
- // Utils are mocked at the top level
-
vi.stubGlobal('crypto', {
randomUUID: vi.fn().mockReturnValue('mock-uuid-1234-5678'),
})
@@ -195,6 +197,7 @@ describe('Document By ID API Route', () => {
it('should update document successfully', async () => {
const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { updateDocument } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
@@ -203,31 +206,12 @@ describe('Document By ID API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Create a sequence of mocks for the database operations
- const updateChain = {
- set: vi.fn().mockReturnValue({
- where: vi.fn().mockResolvedValue(undefined), // Update operation completes
- }),
+ const updatedDocument = {
+ ...mockDocument,
+ ...validUpdateData,
+ deletedAt: null,
}
-
- const selectChain = {
- from: vi.fn().mockReturnValue({
- where: vi.fn().mockReturnValue({
- limit: vi.fn().mockResolvedValue([{ ...mockDocument, ...validUpdateData }]),
- }),
- }),
- }
-
- // Mock transaction
- mockDbChain.transaction.mockImplementation(async (callback) => {
- const mockTx = {
- update: vi.fn().mockReturnValue(updateChain),
- }
- await callback(mockTx)
- })
-
- // Mock db operations in sequence
- mockDbChain.select.mockReturnValue(selectChain)
+ vi.mocked(updateDocument).mockResolvedValue(updatedDocument)
const req = createMockRequest('PUT', validUpdateData)
const { PUT } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
@@ -238,8 +222,11 @@ describe('Document By ID API Route', () => {
expect(data.success).toBe(true)
expect(data.data.filename).toBe('updated-document.pdf')
expect(data.data.enabled).toBe(false)
- expect(mockDbChain.transaction).toHaveBeenCalled()
- expect(mockDbChain.select).toHaveBeenCalled()
+ expect(vi.mocked(updateDocument)).toHaveBeenCalledWith(
+ 'doc-123',
+ validUpdateData,
+ expect.any(String)
+ )
})
it('should validate update data', async () => {
@@ -274,6 +261,7 @@ describe('Document By ID API Route', () => {
it('should mark document as failed due to timeout successfully', async () => {
const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { markDocumentAsFailedTimeout } = await import('@/lib/knowledge/documents/service')
const processingDocument = {
...mockDocument,
@@ -288,34 +276,11 @@ describe('Document By ID API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Create a sequence of mocks for the database operations
- const updateChain = {
- set: vi.fn().mockReturnValue({
- where: vi.fn().mockResolvedValue(undefined), // Update operation completes
- }),
- }
-
- const selectChain = {
- from: vi.fn().mockReturnValue({
- where: vi.fn().mockReturnValue({
- limit: vi
- .fn()
- .mockResolvedValue([{ ...processingDocument, processingStatus: 'failed' }]),
- }),
- }),
- }
-
- // Mock transaction
- mockDbChain.transaction.mockImplementation(async (callback) => {
- const mockTx = {
- update: vi.fn().mockReturnValue(updateChain),
- }
- await callback(mockTx)
+ vi.mocked(markDocumentAsFailedTimeout).mockResolvedValue({
+ success: true,
+ processingDuration: 200000,
})
- // Mock db operations in sequence
- mockDbChain.select.mockReturnValue(selectChain)
-
const req = createMockRequest('PUT', { markFailedDueToTimeout: true })
const { PUT } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
const response = await PUT(req, { params: mockParams })
@@ -323,13 +288,13 @@ describe('Document By ID API Route', () => {
expect(response.status).toBe(200)
expect(data.success).toBe(true)
- expect(mockDbChain.transaction).toHaveBeenCalled()
- expect(updateChain.set).toHaveBeenCalledWith(
- expect.objectContaining({
- processingStatus: 'failed',
- processingError: 'Processing timed out - background process may have been terminated',
- processingCompletedAt: expect.any(Date),
- })
+ expect(data.data.documentId).toBe('doc-123')
+ expect(data.data.status).toBe('failed')
+ expect(data.data.message).toBe('Document marked as failed due to timeout')
+ expect(vi.mocked(markDocumentAsFailedTimeout)).toHaveBeenCalledWith(
+ 'doc-123',
+ processingDocument.processingStartedAt,
+ expect.any(String)
)
})
@@ -354,6 +319,7 @@ describe('Document By ID API Route', () => {
it('should reject marking failed for recently started processing', async () => {
const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { markDocumentAsFailedTimeout } = await import('@/lib/knowledge/documents/service')
const recentProcessingDocument = {
...mockDocument,
@@ -368,6 +334,10 @@ describe('Document By ID API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
+ vi.mocked(markDocumentAsFailedTimeout).mockRejectedValue(
+ new Error('Document has not been processing long enough to be considered dead')
+ )
+
const req = createMockRequest('PUT', { markFailedDueToTimeout: true })
const { PUT } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
const response = await PUT(req, { params: mockParams })
@@ -382,9 +352,8 @@ describe('Document By ID API Route', () => {
const mockParams = Promise.resolve({ id: 'kb-123', documentId: 'doc-123' })
it('should retry processing successfully', async () => {
- const { checkDocumentWriteAccess, processDocumentAsync } = await import(
- '@/app/api/knowledge/utils'
- )
+ const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { retryDocumentProcessing } = await import('@/lib/knowledge/documents/service')
const failedDocument = {
...mockDocument,
@@ -399,23 +368,12 @@ describe('Document By ID API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock transaction
- mockDbChain.transaction.mockImplementation(async (callback) => {
- const mockTx = {
- delete: vi.fn().mockReturnValue({
- where: vi.fn().mockResolvedValue(undefined),
- }),
- update: vi.fn().mockReturnValue({
- set: vi.fn().mockReturnValue({
- where: vi.fn().mockResolvedValue(undefined),
- }),
- }),
- }
- return await callback(mockTx)
+ vi.mocked(retryDocumentProcessing).mockResolvedValue({
+ success: true,
+ status: 'pending',
+ message: 'Document retry processing started',
})
- vi.mocked(processDocumentAsync).mockResolvedValue(undefined)
-
const req = createMockRequest('PUT', { retryProcessing: true })
const { PUT } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
const response = await PUT(req, { params: mockParams })
@@ -425,8 +383,17 @@ describe('Document By ID API Route', () => {
expect(data.success).toBe(true)
expect(data.data.status).toBe('pending')
expect(data.data.message).toBe('Document retry processing started')
- expect(mockDbChain.transaction).toHaveBeenCalled()
- expect(vi.mocked(processDocumentAsync)).toHaveBeenCalled()
+ expect(vi.mocked(retryDocumentProcessing)).toHaveBeenCalledWith(
+ 'kb-123',
+ 'doc-123',
+ {
+ filename: failedDocument.filename,
+ fileUrl: failedDocument.fileUrl,
+ fileSize: failedDocument.fileSize,
+ mimeType: failedDocument.mimeType,
+ },
+ expect.any(String)
+ )
})
it('should reject retry for non-failed document', async () => {
@@ -486,6 +453,7 @@ describe('Document By ID API Route', () => {
it('should handle database errors during update', async () => {
const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { updateDocument } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
@@ -494,8 +462,7 @@ describe('Document By ID API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock transaction to throw an error
- mockDbChain.transaction.mockRejectedValue(new Error('Database error'))
+ vi.mocked(updateDocument).mockRejectedValue(new Error('Database error'))
const req = createMockRequest('PUT', validUpdateData)
const { PUT } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
@@ -512,6 +479,7 @@ describe('Document By ID API Route', () => {
it('should delete document successfully', async () => {
const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { deleteDocument } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
@@ -520,10 +488,10 @@ describe('Document By ID API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Properly chain the mock database operations for soft delete
- mockDbChain.update.mockReturnValue(mockDbChain)
- mockDbChain.set.mockReturnValue(mockDbChain)
- mockDbChain.where.mockResolvedValue(undefined) // Update operation resolves
+ vi.mocked(deleteDocument).mockResolvedValue({
+ success: true,
+ message: 'Document deleted successfully',
+ })
const req = createMockRequest('DELETE')
const { DELETE } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
@@ -533,12 +501,7 @@ describe('Document By ID API Route', () => {
expect(response.status).toBe(200)
expect(data.success).toBe(true)
expect(data.data.message).toBe('Document deleted successfully')
- expect(mockDbChain.update).toHaveBeenCalled()
- expect(mockDbChain.set).toHaveBeenCalledWith(
- expect.objectContaining({
- deletedAt: expect.any(Date),
- })
- )
+ expect(vi.mocked(deleteDocument)).toHaveBeenCalledWith('doc-123', expect.any(String))
})
it('should return unauthorized for unauthenticated user', async () => {
@@ -592,6 +555,7 @@ describe('Document By ID API Route', () => {
it('should handle database errors during deletion', async () => {
const { checkDocumentWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { deleteDocument } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkDocumentWriteAccess).mockResolvedValue({
@@ -599,7 +563,7 @@ describe('Document By ID API Route', () => {
document: mockDocument,
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- mockDbChain.set.mockRejectedValue(new Error('Database error'))
+ vi.mocked(deleteDocument).mockRejectedValue(new Error('Database error'))
const req = createMockRequest('DELETE')
const { DELETE } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')
diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.ts
index 3d462f9bf..43f7f051b 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/route.ts
@@ -1,16 +1,14 @@
-import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
-import { TAG_SLOTS } from '@/lib/constants/knowledge'
-import { createLogger } from '@/lib/logs/console/logger'
import {
- checkDocumentAccess,
- checkDocumentWriteAccess,
- processDocumentAsync,
-} from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, embedding } from '@/db/schema'
+ deleteDocument,
+ markDocumentAsFailedTimeout,
+ retryDocumentProcessing,
+ updateDocument,
+} from '@/lib/knowledge/documents/service'
+import { createLogger } from '@/lib/logs/console/logger'
+import { checkDocumentAccess, checkDocumentWriteAccess } from '@/app/api/knowledge/utils'
const logger = createLogger('DocumentByIdAPI')
@@ -113,9 +111,7 @@ export async function PUT(
const updateData: any = {}
- // Handle special operations first
if (validatedData.markFailedDueToTimeout) {
- // Mark document as failed due to timeout (replaces mark-failed endpoint)
const doc = accessCheck.document
if (doc.processingStatus !== 'processing') {
@@ -132,58 +128,30 @@ export async function PUT(
)
}
- const now = new Date()
- const processingDuration = now.getTime() - new Date(doc.processingStartedAt).getTime()
- const DEAD_PROCESS_THRESHOLD_MS = 150 * 1000
+ try {
+ await markDocumentAsFailedTimeout(documentId, doc.processingStartedAt, requestId)
- if (processingDuration <= DEAD_PROCESS_THRESHOLD_MS) {
- return NextResponse.json(
- { error: 'Document has not been processing long enough to be considered dead' },
- { status: 400 }
- )
+ return NextResponse.json({
+ success: true,
+ data: {
+ documentId,
+ status: 'failed',
+ message: 'Document marked as failed due to timeout',
+ },
+ })
+ } catch (error) {
+ if (error instanceof Error) {
+ return NextResponse.json({ error: error.message }, { status: 400 })
+ }
+ throw error
}
-
- updateData.processingStatus = 'failed'
- updateData.processingError =
- 'Processing timed out - background process may have been terminated'
- updateData.processingCompletedAt = now
-
- logger.info(
- `[${requestId}] Marked document ${documentId} as failed due to dead process (processing time: ${Math.round(processingDuration / 1000)}s)`
- )
} else if (validatedData.retryProcessing) {
- // Retry processing (replaces retry endpoint)
const doc = accessCheck.document
if (doc.processingStatus !== 'failed') {
return NextResponse.json({ error: 'Document is not in failed state' }, { status: 400 })
}
- // Clear existing embeddings and reset document state
- await db.transaction(async (tx) => {
- await tx.delete(embedding).where(eq(embedding.documentId, documentId))
-
- await tx
- .update(document)
- .set({
- processingStatus: 'pending',
- processingStartedAt: null,
- processingCompletedAt: null,
- processingError: null,
- chunkCount: 0,
- tokenCount: 0,
- characterCount: 0,
- })
- .where(eq(document.id, documentId))
- })
-
- const processingOptions = {
- chunkSize: 1024,
- minCharactersPerChunk: 24,
- recipe: 'default',
- lang: 'en',
- }
-
const docData = {
filename: doc.filename,
fileUrl: doc.fileUrl,
@@ -191,80 +159,33 @@ export async function PUT(
mimeType: doc.mimeType,
}
- processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions).catch(
- (error: unknown) => {
- logger.error(`[${requestId}] Background retry processing error:`, error)
- }
+ const result = await retryDocumentProcessing(
+ knowledgeBaseId,
+ documentId,
+ docData,
+ requestId
)
- logger.info(`[${requestId}] Document retry initiated: ${documentId}`)
-
return NextResponse.json({
success: true,
data: {
documentId,
- status: 'pending',
- message: 'Document retry processing started',
+ status: result.status,
+ message: result.message,
},
})
} else {
- // Regular field updates
- if (validatedData.filename !== undefined) updateData.filename = validatedData.filename
- if (validatedData.enabled !== undefined) updateData.enabled = validatedData.enabled
- if (validatedData.chunkCount !== undefined) updateData.chunkCount = validatedData.chunkCount
- if (validatedData.tokenCount !== undefined) updateData.tokenCount = validatedData.tokenCount
- if (validatedData.characterCount !== undefined)
- updateData.characterCount = validatedData.characterCount
- if (validatedData.processingStatus !== undefined)
- updateData.processingStatus = validatedData.processingStatus
- if (validatedData.processingError !== undefined)
- updateData.processingError = validatedData.processingError
+ const updatedDocument = await updateDocument(documentId, validatedData, requestId)
- // Tag field updates
- TAG_SLOTS.forEach((slot) => {
- if ((validatedData as any)[slot] !== undefined) {
- ;(updateData as any)[slot] = (validatedData as any)[slot]
- }
+ logger.info(
+ `[${requestId}] Document updated: ${documentId} in knowledge base ${knowledgeBaseId}`
+ )
+
+ return NextResponse.json({
+ success: true,
+ data: updatedDocument,
})
}
-
- await db.transaction(async (tx) => {
- // Update the document
- await tx.update(document).set(updateData).where(eq(document.id, documentId))
-
- // If any tag fields were updated, also update the embeddings
- const hasTagUpdates = TAG_SLOTS.some((field) => (validatedData as any)[field] !== undefined)
-
- if (hasTagUpdates) {
- const embeddingUpdateData: Record = {}
- TAG_SLOTS.forEach((field) => {
- if ((validatedData as any)[field] !== undefined) {
- embeddingUpdateData[field] = (validatedData as any)[field] || null
- }
- })
-
- await tx
- .update(embedding)
- .set(embeddingUpdateData)
- .where(eq(embedding.documentId, documentId))
- }
- })
-
- // Fetch the updated document
- const updatedDocument = await db
- .select()
- .from(document)
- .where(eq(document.id, documentId))
- .limit(1)
-
- logger.info(
- `[${requestId}] Document updated: ${documentId} in knowledge base ${knowledgeBaseId}`
- )
-
- return NextResponse.json({
- success: true,
- data: updatedDocument[0],
- })
} catch (validationError) {
if (validationError instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid document update data`, {
@@ -313,13 +234,7 @@ export async function DELETE(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Soft delete by setting deletedAt timestamp
- await db
- .update(document)
- .set({
- deletedAt: new Date(),
- })
- .where(eq(document.id, documentId))
+ const result = await deleteDocument(documentId, requestId)
logger.info(
`[${requestId}] Document deleted: ${documentId} from knowledge base ${knowledgeBaseId}`
@@ -327,7 +242,7 @@ export async function DELETE(
return NextResponse.json({
success: true,
- data: { message: 'Document deleted successfully' },
+ data: result,
})
} catch (error) {
logger.error(`[${requestId}] Error deleting document`, error)
diff --git a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/tag-definitions/route.ts b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/tag-definitions/route.ts
index de013a3e3..2da59e975 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/[documentId]/tag-definitions/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/[documentId]/tag-definitions/route.ts
@@ -1,17 +1,17 @@
import { randomUUID } from 'crypto'
-import { and, eq, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
+import { SUPPORTED_FIELD_TYPES } from '@/lib/constants/knowledge'
import {
- getMaxSlotsForFieldType,
- getSlotsForFieldType,
- SUPPORTED_FIELD_TYPES,
-} from '@/lib/constants/knowledge'
+ cleanupUnusedTagDefinitions,
+ createOrUpdateTagDefinitionsBulk,
+ deleteAllTagDefinitions,
+ getDocumentTagDefinitions,
+} from '@/lib/knowledge/tags/service'
+import type { BulkTagDefinitionsData } from '@/lib/knowledge/tags/types'
import { createLogger } from '@/lib/logs/console/logger'
-import { checkKnowledgeBaseAccess, checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, knowledgeBaseTagDefinitions } from '@/db/schema'
+import { checkDocumentAccess, checkDocumentWriteAccess } from '@/app/api/knowledge/utils'
export const dynamic = 'force-dynamic'
@@ -29,106 +29,6 @@ const BulkTagDefinitionsSchema = z.object({
definitions: z.array(TagDefinitionSchema),
})
-// Helper function to get the next available slot for a knowledge base and field type
-async function getNextAvailableSlot(
- knowledgeBaseId: string,
- fieldType: string,
- existingBySlot?: Map
-): Promise {
- // Get available slots for this field type
- const availableSlots = getSlotsForFieldType(fieldType)
- let usedSlots: Set
-
- if (existingBySlot) {
- // Use provided map if available (for performance in batch operations)
- // Filter by field type
- usedSlots = new Set(
- Array.from(existingBySlot.entries())
- .filter(([_, def]) => def.fieldType === fieldType)
- .map(([slot, _]) => slot)
- )
- } else {
- // Query database for existing tag definitions of the same field type
- const existingDefinitions = await db
- .select({ tagSlot: knowledgeBaseTagDefinitions.tagSlot })
- .from(knowledgeBaseTagDefinitions)
- .where(
- and(
- eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId),
- eq(knowledgeBaseTagDefinitions.fieldType, fieldType)
- )
- )
-
- usedSlots = new Set(existingDefinitions.map((def) => def.tagSlot))
- }
-
- // Find the first available slot for this field type
- for (const slot of availableSlots) {
- if (!usedSlots.has(slot)) {
- return slot
- }
- }
-
- return null // No available slots for this field type
-}
-
-// Helper function to clean up unused tag definitions
-async function cleanupUnusedTagDefinitions(knowledgeBaseId: string, requestId: string) {
- try {
- logger.info(`[${requestId}] Starting cleanup for KB ${knowledgeBaseId}`)
-
- // Get all tag definitions for this KB
- const allDefinitions = await db
- .select()
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
-
- logger.info(`[${requestId}] Found ${allDefinitions.length} tag definitions to check`)
-
- if (allDefinitions.length === 0) {
- return 0
- }
-
- let cleanedCount = 0
-
- // For each tag definition, check if any documents use that tag slot
- for (const definition of allDefinitions) {
- const slot = definition.tagSlot
-
- // Use raw SQL with proper column name injection
- const countResult = await db.execute(sql`
- SELECT count(*) as count
- FROM document
- WHERE knowledge_base_id = ${knowledgeBaseId}
- AND ${sql.raw(slot)} IS NOT NULL
- AND trim(${sql.raw(slot)}) != ''
- `)
- const count = Number(countResult[0]?.count) || 0
-
- logger.info(
- `[${requestId}] Tag ${definition.displayName} (${slot}): ${count} documents using it`
- )
-
- // If count is 0, remove this tag definition
- if (count === 0) {
- await db
- .delete(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.id, definition.id))
-
- cleanedCount++
- logger.info(
- `[${requestId}] Removed unused tag definition: ${definition.displayName} (${definition.tagSlot})`
- )
- }
- }
-
- return cleanedCount
- } catch (error) {
- logger.warn(`[${requestId}] Failed to cleanup unused tag definitions:`, error)
- return 0 // Don't fail the main operation if cleanup fails
- }
-}
-
// GET /api/knowledge/[id]/documents/[documentId]/tag-definitions - Get tag definitions for a document
export async function GET(
req: NextRequest,
@@ -145,35 +45,22 @@ export async function GET(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has access to the knowledge base
- const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
- if (!accessCheck.hasAccess) {
- return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
- }
-
// Verify document exists and belongs to the knowledge base
- const documentExists = await db
- .select({ id: document.id })
- .from(document)
- .where(and(eq(document.id, documentId), eq(document.knowledgeBaseId, knowledgeBaseId)))
- .limit(1)
-
- if (documentExists.length === 0) {
- return NextResponse.json({ error: 'Document not found' }, { status: 404 })
+ const accessCheck = await checkDocumentAccess(knowledgeBaseId, documentId, session.user.id)
+ if (!accessCheck.hasAccess) {
+ if (accessCheck.notFound) {
+ logger.warn(
+ `[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
+ )
+ return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
+ }
+ logger.warn(
+ `[${requestId}] User ${session.user.id} attempted unauthorized document access: ${accessCheck.reason}`
+ )
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Get tag definitions for the knowledge base
- const tagDefinitions = await db
- .select({
- id: knowledgeBaseTagDefinitions.id,
- tagSlot: knowledgeBaseTagDefinitions.tagSlot,
- displayName: knowledgeBaseTagDefinitions.displayName,
- fieldType: knowledgeBaseTagDefinitions.fieldType,
- createdAt: knowledgeBaseTagDefinitions.createdAt,
- updatedAt: knowledgeBaseTagDefinitions.updatedAt,
- })
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
+ const tagDefinitions = await getDocumentTagDefinitions(knowledgeBaseId)
logger.info(`[${requestId}] Retrieved ${tagDefinitions.length} tag definitions`)
@@ -203,21 +90,19 @@ export async function POST(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has write access to the knowledge base
- const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, session.user.id)
+ // Verify document exists and user has write access
+ const accessCheck = await checkDocumentWriteAccess(knowledgeBaseId, documentId, session.user.id)
if (!accessCheck.hasAccess) {
- return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
- }
-
- // Verify document exists and belongs to the knowledge base
- const documentExists = await db
- .select({ id: document.id })
- .from(document)
- .where(and(eq(document.id, documentId), eq(document.knowledgeBaseId, knowledgeBaseId)))
- .limit(1)
-
- if (documentExists.length === 0) {
- return NextResponse.json({ error: 'Document not found' }, { status: 404 })
+ if (accessCheck.notFound) {
+ logger.warn(
+ `[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
+ )
+ return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
+ }
+ logger.warn(
+ `[${requestId}] User ${session.user.id} attempted unauthorized document write access: ${accessCheck.reason}`
+ )
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
let body
@@ -238,197 +123,24 @@ export async function POST(
const validatedData = BulkTagDefinitionsSchema.parse(body)
- // Validate slots are valid for their field types
- for (const definition of validatedData.definitions) {
- const validSlots = getSlotsForFieldType(definition.fieldType)
- if (validSlots.length === 0) {
- return NextResponse.json(
- { error: `Unsupported field type: ${definition.fieldType}` },
- { status: 400 }
- )
- }
-
- if (!validSlots.includes(definition.tagSlot)) {
- return NextResponse.json(
- {
- error: `Invalid slot '${definition.tagSlot}' for field type '${definition.fieldType}'. Valid slots: ${validSlots.join(', ')}`,
- },
- { status: 400 }
- )
- }
+ const bulkData: BulkTagDefinitionsData = {
+ definitions: validatedData.definitions.map((def) => ({
+ tagSlot: def.tagSlot,
+ displayName: def.displayName,
+ fieldType: def.fieldType,
+ originalDisplayName: def._originalDisplayName,
+ })),
}
- // Validate no duplicate tag slots within the same field type
- const slotsByFieldType = new Map>()
- for (const definition of validatedData.definitions) {
- if (!slotsByFieldType.has(definition.fieldType)) {
- slotsByFieldType.set(definition.fieldType, new Set())
- }
- const slotsForType = slotsByFieldType.get(definition.fieldType)!
- if (slotsForType.has(definition.tagSlot)) {
- return NextResponse.json(
- {
- error: `Duplicate slot '${definition.tagSlot}' for field type '${definition.fieldType}'`,
- },
- { status: 400 }
- )
- }
- slotsForType.add(definition.tagSlot)
- }
-
- const now = new Date()
- const createdDefinitions: (typeof knowledgeBaseTagDefinitions.$inferSelect)[] = []
-
- // Get existing definitions
- const existingDefinitions = await db
- .select()
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
-
- // Group by field type for validation
- const existingByFieldType = new Map()
- for (const def of existingDefinitions) {
- existingByFieldType.set(def.fieldType, (existingByFieldType.get(def.fieldType) || 0) + 1)
- }
-
- // Validate we don't exceed limits per field type
- const newByFieldType = new Map()
- for (const definition of validatedData.definitions) {
- // Skip validation for edit operations - they don't create new slots
- if (definition._originalDisplayName) {
- continue
- }
-
- const existingTagNames = new Set(
- existingDefinitions
- .filter((def) => def.fieldType === definition.fieldType)
- .map((def) => def.displayName)
- )
-
- if (!existingTagNames.has(definition.displayName)) {
- newByFieldType.set(
- definition.fieldType,
- (newByFieldType.get(definition.fieldType) || 0) + 1
- )
- }
- }
-
- for (const [fieldType, newCount] of newByFieldType.entries()) {
- const existingCount = existingByFieldType.get(fieldType) || 0
- const maxSlots = getMaxSlotsForFieldType(fieldType)
-
- if (existingCount + newCount > maxSlots) {
- return NextResponse.json(
- {
- error: `Cannot create ${newCount} new '${fieldType}' tags. Knowledge base already has ${existingCount} '${fieldType}' tag definitions. Maximum is ${maxSlots} per field type.`,
- },
- { status: 400 }
- )
- }
- }
-
- // Use transaction to ensure consistency
- await db.transaction(async (tx) => {
- // Create maps for lookups
- const existingByName = new Map(existingDefinitions.map((def) => [def.displayName, def]))
- const existingBySlot = new Map(existingDefinitions.map((def) => [def.tagSlot, def]))
-
- // Process each definition
- for (const definition of validatedData.definitions) {
- if (definition._originalDisplayName) {
- // This is an EDIT operation - find by original name and update
- const originalDefinition = existingByName.get(definition._originalDisplayName)
-
- if (originalDefinition) {
- logger.info(
- `[${requestId}] Editing tag definition: ${definition._originalDisplayName} -> ${definition.displayName} (slot ${originalDefinition.tagSlot})`
- )
-
- await tx
- .update(knowledgeBaseTagDefinitions)
- .set({
- displayName: definition.displayName,
- fieldType: definition.fieldType,
- updatedAt: now,
- })
- .where(eq(knowledgeBaseTagDefinitions.id, originalDefinition.id))
-
- createdDefinitions.push({
- ...originalDefinition,
- displayName: definition.displayName,
- fieldType: definition.fieldType,
- updatedAt: now,
- })
- continue
- }
- logger.warn(
- `[${requestId}] Could not find original definition for: ${definition._originalDisplayName}`
- )
- }
-
- // Regular create/update logic
- const existingByDisplayName = existingByName.get(definition.displayName)
-
- if (existingByDisplayName) {
- // Display name exists - UPDATE operation
- logger.info(
- `[${requestId}] Updating existing tag definition: ${definition.displayName} (slot ${existingByDisplayName.tagSlot})`
- )
-
- await tx
- .update(knowledgeBaseTagDefinitions)
- .set({
- fieldType: definition.fieldType,
- updatedAt: now,
- })
- .where(eq(knowledgeBaseTagDefinitions.id, existingByDisplayName.id))
-
- createdDefinitions.push({
- ...existingByDisplayName,
- fieldType: definition.fieldType,
- updatedAt: now,
- })
- } else {
- // Display name doesn't exist - CREATE operation
- const targetSlot = await getNextAvailableSlot(
- knowledgeBaseId,
- definition.fieldType,
- existingBySlot
- )
-
- if (!targetSlot) {
- logger.error(
- `[${requestId}] No available slots for new tag definition: ${definition.displayName}`
- )
- continue
- }
-
- logger.info(
- `[${requestId}] Creating new tag definition: ${definition.displayName} -> ${targetSlot}`
- )
-
- const newDefinition = {
- id: randomUUID(),
- knowledgeBaseId,
- tagSlot: targetSlot as any,
- displayName: definition.displayName,
- fieldType: definition.fieldType,
- createdAt: now,
- updatedAt: now,
- }
-
- await tx.insert(knowledgeBaseTagDefinitions).values(newDefinition)
- existingBySlot.set(targetSlot as any, newDefinition)
- createdDefinitions.push(newDefinition as any)
- }
- }
- })
-
- logger.info(`[${requestId}] Created/updated ${createdDefinitions.length} tag definitions`)
+ const result = await createOrUpdateTagDefinitionsBulk(knowledgeBaseId, bulkData, requestId)
return NextResponse.json({
success: true,
- data: createdDefinitions,
+ data: {
+ created: result.created,
+ updated: result.updated,
+ errors: result.errors,
+ },
})
} catch (error) {
if (error instanceof z.ZodError) {
@@ -459,10 +171,19 @@ export async function DELETE(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has write access to the knowledge base
- const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, session.user.id)
+ // Verify document exists and user has write access
+ const accessCheck = await checkDocumentWriteAccess(knowledgeBaseId, documentId, session.user.id)
if (!accessCheck.hasAccess) {
- return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
+ if (accessCheck.notFound) {
+ logger.warn(
+ `[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
+ )
+ return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
+ }
+ logger.warn(
+ `[${requestId}] User ${session.user.id} attempted unauthorized document write access: ${accessCheck.reason}`
+ )
+ return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
if (action === 'cleanup') {
@@ -478,13 +199,12 @@ export async function DELETE(
// Delete all tag definitions (original behavior)
logger.info(`[${requestId}] Deleting all tag definitions for KB ${knowledgeBaseId}`)
- const result = await db
- .delete(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
+ const deletedCount = await deleteAllTagDefinitions(knowledgeBaseId, requestId)
return NextResponse.json({
success: true,
message: 'Tag definitions deleted successfully',
+ data: { deleted: deletedCount },
})
} catch (error) {
logger.error(`[${requestId}] Error with tag definitions operation`, error)
diff --git a/apps/sim/app/api/knowledge/[id]/documents/route.test.ts b/apps/sim/app/api/knowledge/[id]/documents/route.test.ts
index 61a702cc7..84ef5cf9b 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/route.test.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/route.test.ts
@@ -24,6 +24,19 @@ vi.mock('@/app/api/knowledge/utils', () => ({
processDocumentAsync: vi.fn(),
}))
+vi.mock('@/lib/knowledge/documents/service', () => ({
+ getDocuments: vi.fn(),
+ createSingleDocument: vi.fn(),
+ createDocumentRecords: vi.fn(),
+ processDocumentsWithQueue: vi.fn(),
+ getProcessingConfig: vi.fn(),
+ bulkDocumentOperation: vi.fn(),
+ updateDocument: vi.fn(),
+ deleteDocument: vi.fn(),
+ markDocumentAsFailedTimeout: vi.fn(),
+ retryDocumentProcessing: vi.fn(),
+}))
+
mockDrizzleOrm()
mockConsoleLogger()
@@ -72,7 +85,6 @@ describe('Knowledge Base Documents API Route', () => {
}
}
})
- // Clear all mocks - they will be set up in individual tests
}
beforeEach(async () => {
@@ -96,6 +108,7 @@ describe('Knowledge Base Documents API Route', () => {
it('should retrieve documents successfully for authenticated user', async () => {
const { checkKnowledgeBaseAccess } = await import('@/app/api/knowledge/utils')
+ const { getDocuments } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseAccess).mockResolvedValue({
@@ -103,11 +116,15 @@ describe('Knowledge Base Documents API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock the count query (first query)
- mockDbChain.where.mockResolvedValueOnce([{ count: 1 }])
-
- // Mock the documents query (second query)
- mockDbChain.offset.mockResolvedValue([mockDocument])
+ vi.mocked(getDocuments).mockResolvedValue({
+ documents: [mockDocument],
+ pagination: {
+ total: 1,
+ limit: 50,
+ offset: 0,
+ hasMore: false,
+ },
+ })
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/documents/route')
@@ -118,12 +135,22 @@ describe('Knowledge Base Documents API Route', () => {
expect(data.success).toBe(true)
expect(data.data.documents).toHaveLength(1)
expect(data.data.documents[0].id).toBe('doc-123')
- expect(mockDbChain.select).toHaveBeenCalled()
expect(vi.mocked(checkKnowledgeBaseAccess)).toHaveBeenCalledWith('kb-123', 'user-123')
+ expect(vi.mocked(getDocuments)).toHaveBeenCalledWith(
+ 'kb-123',
+ {
+ includeDisabled: false,
+ search: undefined,
+ limit: 50,
+ offset: 0,
+ },
+ expect.any(String)
+ )
})
it('should filter disabled documents by default', async () => {
const { checkKnowledgeBaseAccess } = await import('@/app/api/knowledge/utils')
+ const { getDocuments } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseAccess).mockResolvedValue({
@@ -131,22 +158,36 @@ describe('Knowledge Base Documents API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock the count query (first query)
- mockDbChain.where.mockResolvedValueOnce([{ count: 1 }])
-
- // Mock the documents query (second query)
- mockDbChain.offset.mockResolvedValue([mockDocument])
+ vi.mocked(getDocuments).mockResolvedValue({
+ documents: [mockDocument],
+ pagination: {
+ total: 1,
+ limit: 50,
+ offset: 0,
+ hasMore: false,
+ },
+ })
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/documents/route')
const response = await GET(req, { params: mockParams })
expect(response.status).toBe(200)
- expect(mockDbChain.where).toHaveBeenCalled()
+ expect(vi.mocked(getDocuments)).toHaveBeenCalledWith(
+ 'kb-123',
+ {
+ includeDisabled: false,
+ search: undefined,
+ limit: 50,
+ offset: 0,
+ },
+ expect.any(String)
+ )
})
it('should include disabled documents when requested', async () => {
const { checkKnowledgeBaseAccess } = await import('@/app/api/knowledge/utils')
+ const { getDocuments } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseAccess).mockResolvedValue({
@@ -154,11 +195,15 @@ describe('Knowledge Base Documents API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock the count query (first query)
- mockDbChain.where.mockResolvedValueOnce([{ count: 1 }])
-
- // Mock the documents query (second query)
- mockDbChain.offset.mockResolvedValue([mockDocument])
+ vi.mocked(getDocuments).mockResolvedValue({
+ documents: [mockDocument],
+ pagination: {
+ total: 1,
+ limit: 50,
+ offset: 0,
+ hasMore: false,
+ },
+ })
const url = 'http://localhost:3000/api/knowledge/kb-123/documents?includeDisabled=true'
const req = new Request(url, { method: 'GET' }) as any
@@ -167,6 +212,16 @@ describe('Knowledge Base Documents API Route', () => {
const response = await GET(req, { params: mockParams })
expect(response.status).toBe(200)
+ expect(vi.mocked(getDocuments)).toHaveBeenCalledWith(
+ 'kb-123',
+ {
+ includeDisabled: true,
+ search: undefined,
+ limit: 50,
+ offset: 0,
+ },
+ expect.any(String)
+ )
})
it('should return unauthorized for unauthenticated user', async () => {
@@ -216,13 +271,14 @@ describe('Knowledge Base Documents API Route', () => {
it('should handle database errors', async () => {
const { checkKnowledgeBaseAccess } = await import('@/app/api/knowledge/utils')
+ const { getDocuments } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseAccess).mockResolvedValue({
hasAccess: true,
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- mockDbChain.orderBy.mockRejectedValue(new Error('Database error'))
+ vi.mocked(getDocuments).mockRejectedValue(new Error('Database error'))
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/documents/route')
@@ -245,13 +301,35 @@ describe('Knowledge Base Documents API Route', () => {
it('should create single document successfully', async () => {
const { checkKnowledgeBaseWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { createSingleDocument } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseWriteAccess).mockResolvedValue({
hasAccess: true,
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- mockDbChain.values.mockResolvedValue(undefined)
+
+ const createdDocument = {
+ id: 'doc-123',
+ knowledgeBaseId: 'kb-123',
+ filename: validDocumentData.filename,
+ fileUrl: validDocumentData.fileUrl,
+ fileSize: validDocumentData.fileSize,
+ mimeType: validDocumentData.mimeType,
+ chunkCount: 0,
+ tokenCount: 0,
+ characterCount: 0,
+ enabled: true,
+ uploadedAt: new Date(),
+ tag1: null,
+ tag2: null,
+ tag3: null,
+ tag4: null,
+ tag5: null,
+ tag6: null,
+ tag7: null,
+ }
+ vi.mocked(createSingleDocument).mockResolvedValue(createdDocument)
const req = createMockRequest('POST', validDocumentData)
const { POST } = await import('@/app/api/knowledge/[id]/documents/route')
@@ -262,7 +340,11 @@ describe('Knowledge Base Documents API Route', () => {
expect(data.success).toBe(true)
expect(data.data.filename).toBe(validDocumentData.filename)
expect(data.data.fileUrl).toBe(validDocumentData.fileUrl)
- expect(mockDbChain.insert).toHaveBeenCalled()
+ expect(vi.mocked(createSingleDocument)).toHaveBeenCalledWith(
+ validDocumentData,
+ 'kb-123',
+ expect.any(String)
+ )
})
it('should validate single document data', async () => {
@@ -320,9 +402,9 @@ describe('Knowledge Base Documents API Route', () => {
}
it('should create bulk documents successfully', async () => {
- const { checkKnowledgeBaseWriteAccess, processDocumentAsync } = await import(
- '@/app/api/knowledge/utils'
- )
+ const { checkKnowledgeBaseWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { createDocumentRecords, processDocumentsWithQueue, getProcessingConfig } =
+ await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseWriteAccess).mockResolvedValue({
@@ -330,17 +412,31 @@ describe('Knowledge Base Documents API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock transaction to return the created documents
- mockDbChain.transaction.mockImplementation(async (callback) => {
- const mockTx = {
- insert: vi.fn().mockReturnValue({
- values: vi.fn().mockResolvedValue(undefined),
- }),
- }
- return await callback(mockTx)
- })
+ const createdDocuments = [
+ {
+ documentId: 'doc-1',
+ filename: 'doc1.pdf',
+ fileUrl: 'https://example.com/doc1.pdf',
+ fileSize: 1024,
+ mimeType: 'application/pdf',
+ },
+ {
+ documentId: 'doc-2',
+ filename: 'doc2.pdf',
+ fileUrl: 'https://example.com/doc2.pdf',
+ fileSize: 2048,
+ mimeType: 'application/pdf',
+ },
+ ]
- vi.mocked(processDocumentAsync).mockResolvedValue(undefined)
+ vi.mocked(createDocumentRecords).mockResolvedValue(createdDocuments)
+ vi.mocked(processDocumentsWithQueue).mockResolvedValue(undefined)
+ vi.mocked(getProcessingConfig).mockReturnValue({
+ maxConcurrentDocuments: 8,
+ batchSize: 20,
+ delayBetweenBatches: 100,
+ delayBetweenDocuments: 0,
+ })
const req = createMockRequest('POST', validBulkData)
const { POST } = await import('@/app/api/knowledge/[id]/documents/route')
@@ -352,7 +448,12 @@ describe('Knowledge Base Documents API Route', () => {
expect(data.data.total).toBe(2)
expect(data.data.documentsCreated).toHaveLength(2)
expect(data.data.processingMethod).toBe('background')
- expect(mockDbChain.transaction).toHaveBeenCalled()
+ expect(vi.mocked(createDocumentRecords)).toHaveBeenCalledWith(
+ validBulkData.documents,
+ 'kb-123',
+ expect.any(String)
+ )
+ expect(vi.mocked(processDocumentsWithQueue)).toHaveBeenCalled()
})
it('should validate bulk document data', async () => {
@@ -394,9 +495,9 @@ describe('Knowledge Base Documents API Route', () => {
})
it('should handle processing errors gracefully', async () => {
- const { checkKnowledgeBaseWriteAccess, processDocumentAsync } = await import(
- '@/app/api/knowledge/utils'
- )
+ const { checkKnowledgeBaseWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { createDocumentRecords, processDocumentsWithQueue, getProcessingConfig } =
+ await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseWriteAccess).mockResolvedValue({
@@ -404,26 +505,30 @@ describe('Knowledge Base Documents API Route', () => {
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- // Mock transaction to succeed but processing to fail
- mockDbChain.transaction.mockImplementation(async (callback) => {
- const mockTx = {
- insert: vi.fn().mockReturnValue({
- values: vi.fn().mockResolvedValue(undefined),
- }),
- }
- return await callback(mockTx)
- })
+ const createdDocuments = [
+ {
+ documentId: 'doc-1',
+ filename: 'doc1.pdf',
+ fileUrl: 'https://example.com/doc1.pdf',
+ fileSize: 1024,
+ mimeType: 'application/pdf',
+ },
+ ]
- // Don't reject the promise - the processing is async and catches errors internally
- vi.mocked(processDocumentAsync).mockResolvedValue(undefined)
+ vi.mocked(createDocumentRecords).mockResolvedValue(createdDocuments)
+ vi.mocked(processDocumentsWithQueue).mockResolvedValue(undefined)
+ vi.mocked(getProcessingConfig).mockReturnValue({
+ maxConcurrentDocuments: 8,
+ batchSize: 20,
+ delayBetweenBatches: 100,
+ delayBetweenDocuments: 0,
+ })
const req = createMockRequest('POST', validBulkData)
const { POST } = await import('@/app/api/knowledge/[id]/documents/route')
const response = await POST(req, { params: mockParams })
const data = await response.json()
- // The endpoint should still return success since documents are created
- // and processing happens asynchronously
expect(response.status).toBe(200)
expect(data.success).toBe(true)
})
@@ -485,13 +590,14 @@ describe('Knowledge Base Documents API Route', () => {
it('should handle database errors during creation', async () => {
const { checkKnowledgeBaseWriteAccess } = await import('@/app/api/knowledge/utils')
+ const { createSingleDocument } = await import('@/lib/knowledge/documents/service')
mockAuth$.mockAuthenticatedUser()
vi.mocked(checkKnowledgeBaseWriteAccess).mockResolvedValue({
hasAccess: true,
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
- mockDbChain.values.mockRejectedValue(new Error('Database error'))
+ vi.mocked(createSingleDocument).mockRejectedValue(new Error('Database error'))
const req = createMockRequest('POST', validDocumentData)
const { POST } = await import('@/app/api/knowledge/[id]/documents/route')
diff --git a/apps/sim/app/api/knowledge/[id]/documents/route.ts b/apps/sim/app/api/knowledge/[id]/documents/route.ts
index 4c9813a02..ee0712aed 100644
--- a/apps/sim/app/api/knowledge/[id]/documents/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/documents/route.ts
@@ -1,279 +1,22 @@
import { randomUUID } from 'crypto'
-import { and, desc, eq, inArray, isNull, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
-import { getSlotsForFieldType } from '@/lib/constants/knowledge'
+import {
+ bulkDocumentOperation,
+ createDocumentRecords,
+ createSingleDocument,
+ getDocuments,
+ getProcessingConfig,
+ processDocumentsWithQueue,
+} from '@/lib/knowledge/documents/service'
+import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types'
import { createLogger } from '@/lib/logs/console/logger'
import { getUserId } from '@/app/api/auth/oauth/utils'
-import {
- checkKnowledgeBaseAccess,
- checkKnowledgeBaseWriteAccess,
- processDocumentAsync,
-} from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, knowledgeBaseTagDefinitions } from '@/db/schema'
+import { checkKnowledgeBaseAccess, checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils'
const logger = createLogger('DocumentsAPI')
-const PROCESSING_CONFIG = {
- maxConcurrentDocuments: 3,
- batchSize: 5,
- delayBetweenBatches: 1000,
- delayBetweenDocuments: 500,
-}
-
-// Helper function to get the next available slot for a knowledge base and field type
-async function getNextAvailableSlot(
- knowledgeBaseId: string,
- fieldType: string,
- existingBySlot?: Map
-): Promise {
- let usedSlots: Set
-
- if (existingBySlot) {
- // Use provided map if available (for performance in batch operations)
- // Filter by field type
- usedSlots = new Set(
- Array.from(existingBySlot.entries())
- .filter(([_, def]) => def.fieldType === fieldType)
- .map(([slot, _]) => slot)
- )
- } else {
- // Query database for existing tag definitions of the same field type
- const existingDefinitions = await db
- .select({ tagSlot: knowledgeBaseTagDefinitions.tagSlot })
- .from(knowledgeBaseTagDefinitions)
- .where(
- and(
- eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId),
- eq(knowledgeBaseTagDefinitions.fieldType, fieldType)
- )
- )
-
- usedSlots = new Set(existingDefinitions.map((def) => def.tagSlot))
- }
-
- // Find the first available slot for this field type
- const availableSlots = getSlotsForFieldType(fieldType)
- for (const slot of availableSlots) {
- if (!usedSlots.has(slot)) {
- return slot
- }
- }
-
- return null // No available slots for this field type
-}
-
-// Helper function to process structured document tags
-async function processDocumentTags(
- knowledgeBaseId: string,
- tagData: Array<{ tagName: string; fieldType: string; value: string }>,
- requestId: string
-): Promise> {
- const result: Record = {}
-
- // Initialize all text tag slots to null (only text type is supported currently)
- const textSlots = getSlotsForFieldType('text')
- textSlots.forEach((slot) => {
- result[slot] = null
- })
-
- if (!Array.isArray(tagData) || tagData.length === 0) {
- return result
- }
-
- try {
- // Get existing tag definitions
- const existingDefinitions = await db
- .select()
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
-
- const existingByName = new Map(existingDefinitions.map((def) => [def.displayName, def]))
- const existingBySlot = new Map(existingDefinitions.map((def) => [def.tagSlot, def]))
-
- // Process each tag
- for (const tag of tagData) {
- if (!tag.tagName?.trim() || !tag.value?.trim()) continue
-
- const tagName = tag.tagName.trim()
- const fieldType = tag.fieldType
- const value = tag.value.trim()
-
- let targetSlot: string | null = null
-
- // Check if tag definition already exists
- const existingDef = existingByName.get(tagName)
- if (existingDef) {
- targetSlot = existingDef.tagSlot
- } else {
- // Find next available slot using the helper function
- targetSlot = await getNextAvailableSlot(knowledgeBaseId, fieldType, existingBySlot)
-
- // Create new tag definition if we have a slot
- if (targetSlot) {
- const newDefinition = {
- id: randomUUID(),
- knowledgeBaseId,
- tagSlot: targetSlot as any,
- displayName: tagName,
- fieldType,
- createdAt: new Date(),
- updatedAt: new Date(),
- }
-
- await db.insert(knowledgeBaseTagDefinitions).values(newDefinition)
- existingBySlot.set(targetSlot as any, newDefinition)
-
- logger.info(`[${requestId}] Created tag definition: ${tagName} -> ${targetSlot}`)
- }
- }
-
- // Assign value to the slot
- if (targetSlot) {
- result[targetSlot] = value
- }
- }
-
- return result
- } catch (error) {
- logger.error(`[${requestId}] Error processing document tags:`, error)
- return result
- }
-}
-
-async function processDocumentsWithConcurrencyControl(
- createdDocuments: Array<{
- documentId: string
- filename: string
- fileUrl: string
- fileSize: number
- mimeType: string
- }>,
- knowledgeBaseId: string,
- processingOptions: {
- chunkSize: number
- minCharactersPerChunk: number
- recipe: string
- lang: string
- chunkOverlap: number
- },
- requestId: string
-): Promise {
- const totalDocuments = createdDocuments.length
- const batches = []
-
- for (let i = 0; i < totalDocuments; i += PROCESSING_CONFIG.batchSize) {
- batches.push(createdDocuments.slice(i, i + PROCESSING_CONFIG.batchSize))
- }
-
- logger.info(`[${requestId}] Processing ${totalDocuments} documents in ${batches.length} batches`)
-
- for (const [batchIndex, batch] of batches.entries()) {
- logger.info(
- `[${requestId}] Starting batch ${batchIndex + 1}/${batches.length} with ${batch.length} documents`
- )
-
- await processBatchWithConcurrency(batch, knowledgeBaseId, processingOptions, requestId)
-
- if (batchIndex < batches.length - 1) {
- await new Promise((resolve) => setTimeout(resolve, PROCESSING_CONFIG.delayBetweenBatches))
- }
- }
-
- logger.info(`[${requestId}] Completed processing initiation for all ${totalDocuments} documents`)
-}
-
-async function processBatchWithConcurrency(
- batch: Array<{
- documentId: string
- filename: string
- fileUrl: string
- fileSize: number
- mimeType: string
- }>,
- knowledgeBaseId: string,
- processingOptions: {
- chunkSize: number
- minCharactersPerChunk: number
- recipe: string
- lang: string
- chunkOverlap: number
- },
- requestId: string
-): Promise {
- const semaphore = new Array(PROCESSING_CONFIG.maxConcurrentDocuments).fill(0)
- const processingPromises = batch.map(async (doc, index) => {
- if (index > 0) {
- await new Promise((resolve) =>
- setTimeout(resolve, index * PROCESSING_CONFIG.delayBetweenDocuments)
- )
- }
-
- await new Promise((resolve) => {
- const checkSlot = () => {
- const availableIndex = semaphore.findIndex((slot) => slot === 0)
- if (availableIndex !== -1) {
- semaphore[availableIndex] = 1
- resolve()
- } else {
- setTimeout(checkSlot, 100)
- }
- }
- checkSlot()
- })
-
- try {
- logger.info(`[${requestId}] Starting processing for document: ${doc.filename}`)
-
- await processDocumentAsync(
- knowledgeBaseId,
- doc.documentId,
- {
- filename: doc.filename,
- fileUrl: doc.fileUrl,
- fileSize: doc.fileSize,
- mimeType: doc.mimeType,
- },
- processingOptions
- )
-
- logger.info(`[${requestId}] Successfully initiated processing for document: ${doc.filename}`)
- } catch (error: unknown) {
- logger.error(`[${requestId}] Failed to process document: ${doc.filename}`, {
- documentId: doc.documentId,
- filename: doc.filename,
- error: error instanceof Error ? error.message : 'Unknown error',
- })
-
- try {
- await db
- .update(document)
- .set({
- processingStatus: 'failed',
- processingError:
- error instanceof Error ? error.message : 'Failed to initiate processing',
- processingCompletedAt: new Date(),
- })
- .where(eq(document.id, doc.documentId))
- } catch (dbError: unknown) {
- logger.error(
- `[${requestId}] Failed to update document status for failed document: ${doc.documentId}`,
- dbError
- )
- }
- } finally {
- const slotIndex = semaphore.findIndex((slot) => slot === 1)
- if (slotIndex !== -1) {
- semaphore[slotIndex] = 0
- }
- }
- })
-
- await Promise.allSettled(processingPromises)
-}
-
const CreateDocumentSchema = z.object({
filename: z.string().min(1, 'Filename is required'),
fileUrl: z.string().url('File URL must be valid'),
@@ -337,83 +80,50 @@ export async function GET(req: NextRequest, { params }: { params: Promise<{ id:
const url = new URL(req.url)
const includeDisabled = url.searchParams.get('includeDisabled') === 'true'
- const search = url.searchParams.get('search')
+ const search = url.searchParams.get('search') || undefined
const limit = Number.parseInt(url.searchParams.get('limit') || '50')
const offset = Number.parseInt(url.searchParams.get('offset') || '0')
+ const sortByParam = url.searchParams.get('sortBy')
+ const sortOrderParam = url.searchParams.get('sortOrder')
- // Build where conditions
- const whereConditions = [
- eq(document.knowledgeBaseId, knowledgeBaseId),
- isNull(document.deletedAt),
+ // Validate sort parameters
+ const validSortFields: DocumentSortField[] = [
+ 'filename',
+ 'fileSize',
+ 'tokenCount',
+ 'chunkCount',
+ 'uploadedAt',
+ 'processingStatus',
]
+ const validSortOrders: SortOrder[] = ['asc', 'desc']
- // Filter out disabled documents unless specifically requested
- if (!includeDisabled) {
- whereConditions.push(eq(document.enabled, true))
- }
+ const sortBy =
+ sortByParam && validSortFields.includes(sortByParam as DocumentSortField)
+ ? (sortByParam as DocumentSortField)
+ : undefined
+ const sortOrder =
+ sortOrderParam && validSortOrders.includes(sortOrderParam as SortOrder)
+ ? (sortOrderParam as SortOrder)
+ : undefined
- // Add search condition if provided
- if (search) {
- whereConditions.push(
- // Search in filename
- sql`LOWER(${document.filename}) LIKE LOWER(${`%${search}%`})`
- )
- }
-
- // Get total count for pagination
- const totalResult = await db
- .select({ count: sql`COUNT(*)` })
- .from(document)
- .where(and(...whereConditions))
-
- const total = totalResult[0]?.count || 0
- const hasMore = offset + limit < total
-
- const documents = await db
- .select({
- id: document.id,
- filename: document.filename,
- fileUrl: document.fileUrl,
- fileSize: document.fileSize,
- mimeType: document.mimeType,
- chunkCount: document.chunkCount,
- tokenCount: document.tokenCount,
- characterCount: document.characterCount,
- processingStatus: document.processingStatus,
- processingStartedAt: document.processingStartedAt,
- processingCompletedAt: document.processingCompletedAt,
- processingError: document.processingError,
- enabled: document.enabled,
- uploadedAt: document.uploadedAt,
- // Include tags in response
- tag1: document.tag1,
- tag2: document.tag2,
- tag3: document.tag3,
- tag4: document.tag4,
- tag5: document.tag5,
- tag6: document.tag6,
- tag7: document.tag7,
- })
- .from(document)
- .where(and(...whereConditions))
- .orderBy(desc(document.uploadedAt))
- .limit(limit)
- .offset(offset)
-
- logger.info(
- `[${requestId}] Retrieved ${documents.length} documents (${offset}-${offset + documents.length} of ${total}) for knowledge base ${knowledgeBaseId}`
+ const result = await getDocuments(
+ knowledgeBaseId,
+ {
+ includeDisabled,
+ search,
+ limit,
+ offset,
+ ...(sortBy && { sortBy }),
+ ...(sortOrder && { sortOrder }),
+ },
+ requestId
)
return NextResponse.json({
success: true,
data: {
- documents,
- pagination: {
- total,
- limit,
- offset,
- hasMore,
- },
+ documents: result.documents,
+ pagination: result.pagination,
},
})
} catch (error) {
@@ -462,80 +172,21 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if this is a bulk operation
if (body.bulk === true) {
- // Handle bulk processing (replaces process-documents endpoint)
try {
const validatedData = BulkCreateDocumentsSchema.parse(body)
- const createdDocuments = await db.transaction(async (tx) => {
- const documentPromises = validatedData.documents.map(async (docData) => {
- const documentId = randomUUID()
- const now = new Date()
-
- // Process documentTagsData if provided (for knowledge base block)
- let processedTags: Record = {
- tag1: null,
- tag2: null,
- tag3: null,
- tag4: null,
- tag5: null,
- tag6: null,
- tag7: null,
- }
-
- if (docData.documentTagsData) {
- try {
- const tagData = JSON.parse(docData.documentTagsData)
- if (Array.isArray(tagData)) {
- processedTags = await processDocumentTags(knowledgeBaseId, tagData, requestId)
- }
- } catch (error) {
- logger.warn(
- `[${requestId}] Failed to parse documentTagsData for bulk document:`,
- error
- )
- }
- }
-
- const newDocument = {
- id: documentId,
- knowledgeBaseId,
- filename: docData.filename,
- fileUrl: docData.fileUrl,
- fileSize: docData.fileSize,
- mimeType: docData.mimeType,
- chunkCount: 0,
- tokenCount: 0,
- characterCount: 0,
- processingStatus: 'pending' as const,
- enabled: true,
- uploadedAt: now,
- // Use processed tags if available, otherwise fall back to individual tag fields
- tag1: processedTags.tag1 || docData.tag1 || null,
- tag2: processedTags.tag2 || docData.tag2 || null,
- tag3: processedTags.tag3 || docData.tag3 || null,
- tag4: processedTags.tag4 || docData.tag4 || null,
- tag5: processedTags.tag5 || docData.tag5 || null,
- tag6: processedTags.tag6 || docData.tag6 || null,
- tag7: processedTags.tag7 || docData.tag7 || null,
- }
-
- await tx.insert(document).values(newDocument)
- logger.info(
- `[${requestId}] Document record created: ${documentId} for file: ${docData.filename}`
- )
- return { documentId, ...docData }
- })
-
- return await Promise.all(documentPromises)
- })
+ const createdDocuments = await createDocumentRecords(
+ validatedData.documents,
+ knowledgeBaseId,
+ requestId
+ )
logger.info(
`[${requestId}] Starting controlled async processing of ${createdDocuments.length} documents`
)
- processDocumentsWithConcurrencyControl(
+ processDocumentsWithQueue(
createdDocuments,
knowledgeBaseId,
validatedData.processingOptions,
@@ -555,9 +206,9 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
})),
processingMethod: 'background',
processingConfig: {
- maxConcurrentDocuments: PROCESSING_CONFIG.maxConcurrentDocuments,
- batchSize: PROCESSING_CONFIG.batchSize,
- totalBatches: Math.ceil(createdDocuments.length / PROCESSING_CONFIG.batchSize),
+ maxConcurrentDocuments: getProcessingConfig().maxConcurrentDocuments,
+ batchSize: getProcessingConfig().batchSize,
+ totalBatches: Math.ceil(createdDocuments.length / getProcessingConfig().batchSize),
},
},
})
@@ -578,52 +229,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
try {
const validatedData = CreateDocumentSchema.parse(body)
- const documentId = randomUUID()
- const now = new Date()
-
- // Process structured tag data if provided
- let processedTags: Record = {
- tag1: validatedData.tag1 || null,
- tag2: validatedData.tag2 || null,
- tag3: validatedData.tag3 || null,
- tag4: validatedData.tag4 || null,
- tag5: validatedData.tag5 || null,
- tag6: validatedData.tag6 || null,
- tag7: validatedData.tag7 || null,
- }
-
- if (validatedData.documentTagsData) {
- try {
- const tagData = JSON.parse(validatedData.documentTagsData)
- if (Array.isArray(tagData)) {
- // Process structured tag data and create tag definitions
- processedTags = await processDocumentTags(knowledgeBaseId, tagData, requestId)
- }
- } catch (error) {
- logger.warn(`[${requestId}] Failed to parse documentTagsData:`, error)
- }
- }
-
- const newDocument = {
- id: documentId,
- knowledgeBaseId,
- filename: validatedData.filename,
- fileUrl: validatedData.fileUrl,
- fileSize: validatedData.fileSize,
- mimeType: validatedData.mimeType,
- chunkCount: 0,
- tokenCount: 0,
- characterCount: 0,
- enabled: true,
- uploadedAt: now,
- ...processedTags,
- }
-
- await db.insert(document).values(newDocument)
-
- logger.info(
- `[${requestId}] Document created: ${documentId} in knowledge base ${knowledgeBaseId}`
- )
+ const newDocument = await createSingleDocument(validatedData, knowledgeBaseId, requestId)
return NextResponse.json({
success: true,
@@ -649,7 +255,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
}
export async function PATCH(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
- const requestId = crypto.randomUUID().slice(0, 8)
+ const requestId = randomUUID().slice(0, 8)
const { id: knowledgeBaseId } = await params
try {
@@ -678,89 +284,28 @@ export async function PATCH(req: NextRequest, { params }: { params: Promise<{ id
const validatedData = BulkUpdateDocumentsSchema.parse(body)
const { operation, documentIds } = validatedData
- logger.info(
- `[${requestId}] Starting bulk ${operation} operation on ${documentIds.length} documents in knowledge base ${knowledgeBaseId}`
- )
-
- // Verify all documents belong to this knowledge base and user has access
- const documentsToUpdate = await db
- .select({
- id: document.id,
- enabled: document.enabled,
- })
- .from(document)
- .where(
- and(
- eq(document.knowledgeBaseId, knowledgeBaseId),
- inArray(document.id, documentIds),
- isNull(document.deletedAt)
- )
- )
-
- if (documentsToUpdate.length === 0) {
- return NextResponse.json({ error: 'No valid documents found to update' }, { status: 404 })
- }
-
- if (documentsToUpdate.length !== documentIds.length) {
- logger.warn(
- `[${requestId}] Some documents not found or don't belong to knowledge base. Requested: ${documentIds.length}, Found: ${documentsToUpdate.length}`
- )
- }
-
- // Perform the bulk operation
- let updateResult: Array<{ id: string; enabled?: boolean; deletedAt?: Date | null }>
- let successCount: number
-
- if (operation === 'delete') {
- // Handle bulk soft delete
- updateResult = await db
- .update(document)
- .set({
- deletedAt: new Date(),
- })
- .where(
- and(
- eq(document.knowledgeBaseId, knowledgeBaseId),
- inArray(document.id, documentIds),
- isNull(document.deletedAt)
- )
- )
- .returning({ id: document.id, deletedAt: document.deletedAt })
-
- successCount = updateResult.length
- } else {
- // Handle bulk enable/disable
- const enabled = operation === 'enable'
-
- updateResult = await db
- .update(document)
- .set({
- enabled,
- })
- .where(
- and(
- eq(document.knowledgeBaseId, knowledgeBaseId),
- inArray(document.id, documentIds),
- isNull(document.deletedAt)
- )
- )
- .returning({ id: document.id, enabled: document.enabled })
-
- successCount = updateResult.length
- }
-
- logger.info(
- `[${requestId}] Bulk ${operation} operation completed: ${successCount} documents updated in knowledge base ${knowledgeBaseId}`
- )
-
- return NextResponse.json({
- success: true,
- data: {
+ try {
+ const result = await bulkDocumentOperation(
+ knowledgeBaseId,
operation,
- successCount,
- updatedDocuments: updateResult,
- },
- })
+ documentIds,
+ requestId
+ )
+
+ return NextResponse.json({
+ success: true,
+ data: {
+ operation,
+ successCount: result.successCount,
+ updatedDocuments: result.updatedDocuments,
+ },
+ })
+ } catch (error) {
+ if (error instanceof Error && error.message === 'No valid documents found to update') {
+ return NextResponse.json({ error: 'No valid documents found to update' }, { status: 404 })
+ }
+ throw error
+ }
} catch (validationError) {
if (validationError instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid bulk operation data`, {
diff --git a/apps/sim/app/api/knowledge/[id]/next-available-slot/route.ts b/apps/sim/app/api/knowledge/[id]/next-available-slot/route.ts
index dbb8f775e..fc17e86fe 100644
--- a/apps/sim/app/api/knowledge/[id]/next-available-slot/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/next-available-slot/route.ts
@@ -1,12 +1,9 @@
import { randomUUID } from 'crypto'
-import { and, eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
-import { getMaxSlotsForFieldType, getSlotsForFieldType } from '@/lib/constants/knowledge'
+import { getNextAvailableSlot, getTagDefinitions } from '@/lib/knowledge/tags/service'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { knowledgeBaseTagDefinitions } from '@/db/schema'
const logger = createLogger('NextAvailableSlotAPI')
@@ -31,51 +28,36 @@ export async function GET(req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has read access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
- // Get available slots for this field type
- const availableSlots = getSlotsForFieldType(fieldType)
- const maxSlots = getMaxSlotsForFieldType(fieldType)
+ // Get existing definitions once and reuse
+ const existingDefinitions = await getTagDefinitions(knowledgeBaseId)
+ const usedSlots = existingDefinitions
+ .filter((def) => def.fieldType === fieldType)
+ .map((def) => def.tagSlot)
- // Get existing tag definitions to find used slots for this field type
- const existingDefinitions = await db
- .select({ tagSlot: knowledgeBaseTagDefinitions.tagSlot })
- .from(knowledgeBaseTagDefinitions)
- .where(
- and(
- eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId),
- eq(knowledgeBaseTagDefinitions.fieldType, fieldType)
- )
- )
-
- const usedSlots = new Set(existingDefinitions.map((def) => def.tagSlot as string))
-
- // Find the first available slot for this field type
- let nextAvailableSlot: string | null = null
- for (const slot of availableSlots) {
- if (!usedSlots.has(slot)) {
- nextAvailableSlot = slot
- break
- }
- }
+ // Create a map for efficient lookup and pass to avoid redundant query
+ const existingBySlot = new Map(existingDefinitions.map((def) => [def.tagSlot as string, def]))
+ const nextAvailableSlot = await getNextAvailableSlot(knowledgeBaseId, fieldType, existingBySlot)
logger.info(
`[${requestId}] Next available slot for fieldType ${fieldType}: ${nextAvailableSlot}`
)
+ const result = {
+ nextAvailableSlot,
+ fieldType,
+ usedSlots,
+ totalSlots: 7,
+ availableSlots: nextAvailableSlot ? 7 - usedSlots.length : 0,
+ }
+
return NextResponse.json({
success: true,
- data: {
- nextAvailableSlot,
- fieldType,
- usedSlots: Array.from(usedSlots),
- totalSlots: maxSlots,
- availableSlots: maxSlots - usedSlots.size,
- },
+ data: result,
})
} catch (error) {
logger.error(`[${requestId}] Error getting next available slot`, error)
diff --git a/apps/sim/app/api/knowledge/[id]/route.test.ts b/apps/sim/app/api/knowledge/[id]/route.test.ts
index 33150b8a5..66b9e544b 100644
--- a/apps/sim/app/api/knowledge/[id]/route.test.ts
+++ b/apps/sim/app/api/knowledge/[id]/route.test.ts
@@ -16,9 +16,26 @@ mockKnowledgeSchemas()
mockDrizzleOrm()
mockConsoleLogger()
+vi.mock('@/lib/knowledge/service', () => ({
+ getKnowledgeBaseById: vi.fn(),
+ updateKnowledgeBase: vi.fn(),
+ deleteKnowledgeBase: vi.fn(),
+}))
+
+vi.mock('@/app/api/knowledge/utils', () => ({
+ checkKnowledgeBaseAccess: vi.fn(),
+ checkKnowledgeBaseWriteAccess: vi.fn(),
+}))
+
describe('Knowledge Base By ID API Route', () => {
const mockAuth$ = mockAuth()
+ let mockGetKnowledgeBaseById: any
+ let mockUpdateKnowledgeBase: any
+ let mockDeleteKnowledgeBase: any
+ let mockCheckKnowledgeBaseAccess: any
+ let mockCheckKnowledgeBaseWriteAccess: any
+
const mockDbChain = {
select: vi.fn().mockReturnThis(),
from: vi.fn().mockReturnThis(),
@@ -62,6 +79,15 @@ describe('Knowledge Base By ID API Route', () => {
vi.stubGlobal('crypto', {
randomUUID: vi.fn().mockReturnValue('mock-uuid-1234-5678'),
})
+
+ const knowledgeService = await import('@/lib/knowledge/service')
+ const knowledgeUtils = await import('@/app/api/knowledge/utils')
+
+ mockGetKnowledgeBaseById = knowledgeService.getKnowledgeBaseById as any
+ mockUpdateKnowledgeBase = knowledgeService.updateKnowledgeBase as any
+ mockDeleteKnowledgeBase = knowledgeService.deleteKnowledgeBase as any
+ mockCheckKnowledgeBaseAccess = knowledgeUtils.checkKnowledgeBaseAccess as any
+ mockCheckKnowledgeBaseWriteAccess = knowledgeUtils.checkKnowledgeBaseWriteAccess as any
})
afterEach(() => {
@@ -74,9 +100,12 @@ describe('Knowledge Base By ID API Route', () => {
it('should retrieve knowledge base successfully for authenticated user', async () => {
mockAuth$.mockAuthenticatedUser()
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'user-123' }])
+ mockCheckKnowledgeBaseAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
- mockDbChain.limit.mockResolvedValueOnce([mockKnowledgeBase])
+ mockGetKnowledgeBaseById.mockResolvedValueOnce(mockKnowledgeBase)
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/route')
@@ -87,7 +116,8 @@ describe('Knowledge Base By ID API Route', () => {
expect(data.success).toBe(true)
expect(data.data.id).toBe('kb-123')
expect(data.data.name).toBe('Test Knowledge Base')
- expect(mockDbChain.select).toHaveBeenCalled()
+ expect(mockCheckKnowledgeBaseAccess).toHaveBeenCalledWith('kb-123', 'user-123')
+ expect(mockGetKnowledgeBaseById).toHaveBeenCalledWith('kb-123')
})
it('should return unauthorized for unauthenticated user', async () => {
@@ -105,7 +135,10 @@ describe('Knowledge Base By ID API Route', () => {
it('should return not found for non-existent knowledge base', async () => {
mockAuth$.mockAuthenticatedUser()
- mockDbChain.limit.mockResolvedValueOnce([])
+ mockCheckKnowledgeBaseAccess.mockResolvedValueOnce({
+ hasAccess: false,
+ notFound: true,
+ })
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/route')
@@ -119,7 +152,10 @@ describe('Knowledge Base By ID API Route', () => {
it('should return unauthorized for knowledge base owned by different user', async () => {
mockAuth$.mockAuthenticatedUser()
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'different-user' }])
+ mockCheckKnowledgeBaseAccess.mockResolvedValueOnce({
+ hasAccess: false,
+ notFound: false,
+ })
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/route')
@@ -130,9 +166,29 @@ describe('Knowledge Base By ID API Route', () => {
expect(data.error).toBe('Unauthorized')
})
+ it('should return not found when service returns null', async () => {
+ mockAuth$.mockAuthenticatedUser()
+
+ mockCheckKnowledgeBaseAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
+
+ mockGetKnowledgeBaseById.mockResolvedValueOnce(null)
+
+ const req = createMockRequest('GET')
+ const { GET } = await import('@/app/api/knowledge/[id]/route')
+ const response = await GET(req, { params: mockParams })
+ const data = await response.json()
+
+ expect(response.status).toBe(404)
+ expect(data.error).toBe('Knowledge base not found')
+ })
+
it('should handle database errors', async () => {
mockAuth$.mockAuthenticatedUser()
- mockDbChain.limit.mockRejectedValueOnce(new Error('Database error'))
+
+ mockCheckKnowledgeBaseAccess.mockRejectedValueOnce(new Error('Database error'))
const req = createMockRequest('GET')
const { GET } = await import('@/app/api/knowledge/[id]/route')
@@ -156,13 +212,13 @@ describe('Knowledge Base By ID API Route', () => {
resetMocks()
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'user-123' }])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
- mockDbChain.where.mockResolvedValueOnce(undefined)
-
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([{ ...mockKnowledgeBase, ...validUpdateData }])
+ const updatedKnowledgeBase = { ...mockKnowledgeBase, ...validUpdateData }
+ mockUpdateKnowledgeBase.mockResolvedValueOnce(updatedKnowledgeBase)
const req = createMockRequest('PUT', validUpdateData)
const { PUT } = await import('@/app/api/knowledge/[id]/route')
@@ -172,7 +228,16 @@ describe('Knowledge Base By ID API Route', () => {
expect(response.status).toBe(200)
expect(data.success).toBe(true)
expect(data.data.name).toBe('Updated Knowledge Base')
- expect(mockDbChain.update).toHaveBeenCalled()
+ expect(mockCheckKnowledgeBaseWriteAccess).toHaveBeenCalledWith('kb-123', 'user-123')
+ expect(mockUpdateKnowledgeBase).toHaveBeenCalledWith(
+ 'kb-123',
+ {
+ name: validUpdateData.name,
+ description: validUpdateData.description,
+ chunkingConfig: undefined,
+ },
+ expect.any(String)
+ )
})
it('should return unauthorized for unauthenticated user', async () => {
@@ -192,8 +257,10 @@ describe('Knowledge Base By ID API Route', () => {
resetMocks()
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: false,
+ notFound: true,
+ })
const req = createMockRequest('PUT', validUpdateData)
const { PUT } = await import('@/app/api/knowledge/[id]/route')
@@ -209,8 +276,10 @@ describe('Knowledge Base By ID API Route', () => {
resetMocks()
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'user-123' }])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
const invalidData = {
name: '',
@@ -229,9 +298,13 @@ describe('Knowledge Base By ID API Route', () => {
it('should handle database errors during update', async () => {
mockAuth$.mockAuthenticatedUser()
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'user-123' }])
+ // Mock successful write access check
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
- mockDbChain.where.mockRejectedValueOnce(new Error('Database error'))
+ mockUpdateKnowledgeBase.mockRejectedValueOnce(new Error('Database error'))
const req = createMockRequest('PUT', validUpdateData)
const { PUT } = await import('@/app/api/knowledge/[id]/route')
@@ -251,10 +324,12 @@ describe('Knowledge Base By ID API Route', () => {
resetMocks()
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'user-123' }])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
- mockDbChain.where.mockResolvedValueOnce(undefined)
+ mockDeleteKnowledgeBase.mockResolvedValueOnce(undefined)
const req = createMockRequest('DELETE')
const { DELETE } = await import('@/app/api/knowledge/[id]/route')
@@ -264,7 +339,8 @@ describe('Knowledge Base By ID API Route', () => {
expect(response.status).toBe(200)
expect(data.success).toBe(true)
expect(data.data.message).toBe('Knowledge base deleted successfully')
- expect(mockDbChain.update).toHaveBeenCalled()
+ expect(mockCheckKnowledgeBaseWriteAccess).toHaveBeenCalledWith('kb-123', 'user-123')
+ expect(mockDeleteKnowledgeBase).toHaveBeenCalledWith('kb-123', expect.any(String))
})
it('should return unauthorized for unauthenticated user', async () => {
@@ -284,8 +360,10 @@ describe('Knowledge Base By ID API Route', () => {
resetMocks()
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: false,
+ notFound: true,
+ })
const req = createMockRequest('DELETE')
const { DELETE } = await import('@/app/api/knowledge/[id]/route')
@@ -301,8 +379,10 @@ describe('Knowledge Base By ID API Route', () => {
resetMocks()
- mockDbChain.where.mockReturnValueOnce(mockDbChain) // Return this to continue chain
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'different-user' }])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: false,
+ notFound: false,
+ })
const req = createMockRequest('DELETE')
const { DELETE } = await import('@/app/api/knowledge/[id]/route')
@@ -316,9 +396,12 @@ describe('Knowledge Base By ID API Route', () => {
it('should handle database errors during delete', async () => {
mockAuth$.mockAuthenticatedUser()
- mockDbChain.limit.mockResolvedValueOnce([{ id: 'kb-123', userId: 'user-123' }])
+ mockCheckKnowledgeBaseWriteAccess.mockResolvedValueOnce({
+ hasAccess: true,
+ knowledgeBase: { id: 'kb-123', userId: 'user-123' },
+ })
- mockDbChain.where.mockRejectedValueOnce(new Error('Database error'))
+ mockDeleteKnowledgeBase.mockRejectedValueOnce(new Error('Database error'))
const req = createMockRequest('DELETE')
const { DELETE } = await import('@/app/api/knowledge/[id]/route')
diff --git a/apps/sim/app/api/knowledge/[id]/route.ts b/apps/sim/app/api/knowledge/[id]/route.ts
index fe517b949..a176df4fd 100644
--- a/apps/sim/app/api/knowledge/[id]/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/route.ts
@@ -1,11 +1,13 @@
-import { and, eq, isNull } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
+import {
+ deleteKnowledgeBase,
+ getKnowledgeBaseById,
+ updateKnowledgeBase,
+} from '@/lib/knowledge/service'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess, checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { knowledgeBase } from '@/db/schema'
const logger = createLogger('KnowledgeBaseByIdAPI')
@@ -48,13 +50,9 @@ export async function GET(_req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- const knowledgeBases = await db
- .select()
- .from(knowledgeBase)
- .where(and(eq(knowledgeBase.id, id), isNull(knowledgeBase.deletedAt)))
- .limit(1)
+ const knowledgeBaseData = await getKnowledgeBaseById(id)
- if (knowledgeBases.length === 0) {
+ if (!knowledgeBaseData) {
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
@@ -62,7 +60,7 @@ export async function GET(_req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({
success: true,
- data: knowledgeBases[0],
+ data: knowledgeBaseData,
})
} catch (error) {
logger.error(`[${requestId}] Error fetching knowledge base`, error)
@@ -99,42 +97,21 @@ export async function PUT(req: NextRequest, { params }: { params: Promise<{ id:
try {
const validatedData = UpdateKnowledgeBaseSchema.parse(body)
- const updateData: any = {
- updatedAt: new Date(),
- }
-
- if (validatedData.name !== undefined) updateData.name = validatedData.name
- if (validatedData.description !== undefined)
- updateData.description = validatedData.description
- if (validatedData.workspaceId !== undefined)
- updateData.workspaceId = validatedData.workspaceId
-
- // Handle embedding model and dimension together to ensure consistency
- if (
- validatedData.embeddingModel !== undefined ||
- validatedData.embeddingDimension !== undefined
- ) {
- updateData.embeddingModel = 'text-embedding-3-small'
- updateData.embeddingDimension = 1536
- }
-
- if (validatedData.chunkingConfig !== undefined)
- updateData.chunkingConfig = validatedData.chunkingConfig
-
- await db.update(knowledgeBase).set(updateData).where(eq(knowledgeBase.id, id))
-
- // Fetch the updated knowledge base
- const updatedKnowledgeBase = await db
- .select()
- .from(knowledgeBase)
- .where(eq(knowledgeBase.id, id))
- .limit(1)
+ const updatedKnowledgeBase = await updateKnowledgeBase(
+ id,
+ {
+ name: validatedData.name,
+ description: validatedData.description,
+ chunkingConfig: validatedData.chunkingConfig,
+ },
+ requestId
+ )
logger.info(`[${requestId}] Knowledge base updated: ${id} for user ${session.user.id}`)
return NextResponse.json({
success: true,
- data: updatedKnowledgeBase[0],
+ data: updatedKnowledgeBase,
})
} catch (validationError) {
if (validationError instanceof z.ZodError) {
@@ -178,14 +155,7 @@ export async function DELETE(_req: NextRequest, { params }: { params: Promise<{
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Soft delete by setting deletedAt timestamp
- await db
- .update(knowledgeBase)
- .set({
- deletedAt: new Date(),
- updatedAt: new Date(),
- })
- .where(eq(knowledgeBase.id, id))
+ await deleteKnowledgeBase(id, requestId)
logger.info(`[${requestId}] Knowledge base deleted: ${id} for user ${session.user.id}`)
diff --git a/apps/sim/app/api/knowledge/[id]/tag-definitions/[tagId]/route.ts b/apps/sim/app/api/knowledge/[id]/tag-definitions/[tagId]/route.ts
index caa044619..a0f18b54e 100644
--- a/apps/sim/app/api/knowledge/[id]/tag-definitions/[tagId]/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/tag-definitions/[tagId]/route.ts
@@ -1,11 +1,9 @@
import { randomUUID } from 'crypto'
-import { and, eq, isNotNull } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
+import { deleteTagDefinition } from '@/lib/knowledge/tags/service'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, embedding, knowledgeBaseTagDefinitions } from '@/db/schema'
export const dynamic = 'force-dynamic'
@@ -29,87 +27,16 @@ export async function DELETE(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
- // Get the tag definition to find which slot it uses
- const tagDefinition = await db
- .select({
- id: knowledgeBaseTagDefinitions.id,
- tagSlot: knowledgeBaseTagDefinitions.tagSlot,
- displayName: knowledgeBaseTagDefinitions.displayName,
- })
- .from(knowledgeBaseTagDefinitions)
- .where(
- and(
- eq(knowledgeBaseTagDefinitions.id, tagId),
- eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId)
- )
- )
- .limit(1)
-
- if (tagDefinition.length === 0) {
- return NextResponse.json({ error: 'Tag definition not found' }, { status: 404 })
- }
-
- const tagDef = tagDefinition[0]
-
- // Delete the tag definition and clear all document tags in a transaction
- await db.transaction(async (tx) => {
- logger.info(`[${requestId}] Starting transaction to delete ${tagDef.tagSlot}`)
-
- try {
- // Clear the tag from documents that actually have this tag set
- logger.info(`[${requestId}] Clearing tag from documents...`)
- await tx
- .update(document)
- .set({ [tagDef.tagSlot]: null })
- .where(
- and(
- eq(document.knowledgeBaseId, knowledgeBaseId),
- isNotNull(document[tagDef.tagSlot as keyof typeof document.$inferSelect])
- )
- )
-
- logger.info(`[${requestId}] Documents updated successfully`)
-
- // Clear the tag from embeddings that actually have this tag set
- logger.info(`[${requestId}] Clearing tag from embeddings...`)
- await tx
- .update(embedding)
- .set({ [tagDef.tagSlot]: null })
- .where(
- and(
- eq(embedding.knowledgeBaseId, knowledgeBaseId),
- isNotNull(embedding[tagDef.tagSlot as keyof typeof embedding.$inferSelect])
- )
- )
-
- logger.info(`[${requestId}] Embeddings updated successfully`)
-
- // Delete the tag definition
- logger.info(`[${requestId}] Deleting tag definition...`)
- await tx
- .delete(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.id, tagId))
-
- logger.info(`[${requestId}] Tag definition deleted successfully`)
- } catch (error) {
- logger.error(`[${requestId}] Error in transaction:`, error)
- throw error
- }
- })
-
- logger.info(
- `[${requestId}] Successfully deleted tag definition ${tagDef.displayName} (${tagDef.tagSlot})`
- )
+ const deletedTag = await deleteTagDefinition(tagId, requestId)
return NextResponse.json({
success: true,
- message: `Tag definition "${tagDef.displayName}" deleted successfully`,
+ message: `Tag definition "${deletedTag.displayName}" deleted successfully`,
})
} catch (error) {
logger.error(`[${requestId}] Error deleting tag definition`, error)
diff --git a/apps/sim/app/api/knowledge/[id]/tag-definitions/route.ts b/apps/sim/app/api/knowledge/[id]/tag-definitions/route.ts
index af74e474a..f462f4aec 100644
--- a/apps/sim/app/api/knowledge/[id]/tag-definitions/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/tag-definitions/route.ts
@@ -1,11 +1,11 @@
import { randomUUID } from 'crypto'
-import { and, eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
+import { z } from 'zod'
import { getSession } from '@/lib/auth'
+import { SUPPORTED_FIELD_TYPES } from '@/lib/constants/knowledge'
+import { createTagDefinition, getTagDefinitions } from '@/lib/knowledge/tags/service'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { knowledgeBaseTagDefinitions } from '@/db/schema'
export const dynamic = 'force-dynamic'
@@ -24,25 +24,12 @@ export async function GET(req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
- // Get tag definitions for the knowledge base
- const tagDefinitions = await db
- .select({
- id: knowledgeBaseTagDefinitions.id,
- tagSlot: knowledgeBaseTagDefinitions.tagSlot,
- displayName: knowledgeBaseTagDefinitions.displayName,
- fieldType: knowledgeBaseTagDefinitions.fieldType,
- createdAt: knowledgeBaseTagDefinitions.createdAt,
- updatedAt: knowledgeBaseTagDefinitions.updatedAt,
- })
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
- .orderBy(knowledgeBaseTagDefinitions.tagSlot)
+ const tagDefinitions = await getTagDefinitions(knowledgeBaseId)
logger.info(`[${requestId}] Retrieved ${tagDefinitions.length} tag definitions`)
@@ -69,68 +56,43 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
const body = await req.json()
- const { tagSlot, displayName, fieldType } = body
- if (!tagSlot || !displayName || !fieldType) {
- return NextResponse.json(
- { error: 'tagSlot, displayName, and fieldType are required' },
- { status: 400 }
- )
- }
+ const CreateTagDefinitionSchema = z.object({
+ tagSlot: z.string().min(1, 'Tag slot is required'),
+ displayName: z.string().min(1, 'Display name is required'),
+ fieldType: z.enum(SUPPORTED_FIELD_TYPES as [string, ...string[]], {
+ errorMap: () => ({ message: 'Invalid field type' }),
+ }),
+ })
- // Check if tag slot is already used
- const existingTag = await db
- .select()
- .from(knowledgeBaseTagDefinitions)
- .where(
- and(
- eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId),
- eq(knowledgeBaseTagDefinitions.tagSlot, tagSlot)
+ let validatedData
+ try {
+ validatedData = CreateTagDefinitionSchema.parse(body)
+ } catch (error) {
+ if (error instanceof z.ZodError) {
+ return NextResponse.json(
+ { error: 'Invalid request data', details: error.errors },
+ { status: 400 }
)
- )
- .limit(1)
-
- if (existingTag.length > 0) {
- return NextResponse.json({ error: 'Tag slot is already in use' }, { status: 409 })
+ }
+ throw error
}
- // Check if display name is already used
- const existingName = await db
- .select()
- .from(knowledgeBaseTagDefinitions)
- .where(
- and(
- eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId),
- eq(knowledgeBaseTagDefinitions.displayName, displayName)
- )
- )
- .limit(1)
-
- if (existingName.length > 0) {
- return NextResponse.json({ error: 'Tag name is already in use' }, { status: 409 })
- }
-
- // Create the new tag definition
- const newTagDefinition = {
- id: randomUUID(),
- knowledgeBaseId,
- tagSlot,
- displayName,
- fieldType,
- createdAt: new Date(),
- updatedAt: new Date(),
- }
-
- await db.insert(knowledgeBaseTagDefinitions).values(newTagDefinition)
-
- logger.info(`[${requestId}] Successfully created tag definition ${displayName} (${tagSlot})`)
+ const newTagDefinition = await createTagDefinition(
+ {
+ knowledgeBaseId,
+ tagSlot: validatedData.tagSlot,
+ displayName: validatedData.displayName,
+ fieldType: validatedData.fieldType,
+ },
+ requestId
+ )
return NextResponse.json({
success: true,
diff --git a/apps/sim/app/api/knowledge/[id]/tag-usage/route.ts b/apps/sim/app/api/knowledge/[id]/tag-usage/route.ts
index bf2fc7e17..55ef74ef6 100644
--- a/apps/sim/app/api/knowledge/[id]/tag-usage/route.ts
+++ b/apps/sim/app/api/knowledge/[id]/tag-usage/route.ts
@@ -1,11 +1,9 @@
import { randomUUID } from 'crypto'
-import { and, eq, isNotNull } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
+import { getTagUsage } from '@/lib/knowledge/tags/service'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { document, knowledgeBaseTagDefinitions } from '@/db/schema'
export const dynamic = 'force-dynamic'
@@ -24,57 +22,15 @@ export async function GET(req: NextRequest, { params }: { params: Promise<{ id:
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check if user has access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
- // Get all tag definitions for the knowledge base
- const tagDefinitions = await db
- .select({
- id: knowledgeBaseTagDefinitions.id,
- tagSlot: knowledgeBaseTagDefinitions.tagSlot,
- displayName: knowledgeBaseTagDefinitions.displayName,
- })
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
-
- // Get usage statistics for each tag definition
- const usageStats = await Promise.all(
- tagDefinitions.map(async (tagDef) => {
- // Count documents using this tag slot
- const tagSlotColumn = tagDef.tagSlot as keyof typeof document.$inferSelect
-
- const documentsWithTag = await db
- .select({
- id: document.id,
- filename: document.filename,
- [tagDef.tagSlot]: document[tagSlotColumn as keyof typeof document.$inferSelect] as any,
- })
- .from(document)
- .where(
- and(
- eq(document.knowledgeBaseId, knowledgeBaseId),
- isNotNull(document[tagSlotColumn as keyof typeof document.$inferSelect])
- )
- )
-
- return {
- tagName: tagDef.displayName,
- tagSlot: tagDef.tagSlot,
- documentCount: documentsWithTag.length,
- documents: documentsWithTag.map((doc) => ({
- id: doc.id,
- name: doc.filename,
- tagValue: doc[tagDef.tagSlot],
- })),
- }
- })
- )
+ const usageStats = await getTagUsage(knowledgeBaseId, requestId)
logger.info(
- `[${requestId}] Retrieved usage statistics for ${tagDefinitions.length} tag definitions`
+ `[${requestId}] Retrieved usage statistics for ${usageStats.length} tag definitions`
)
return NextResponse.json({
diff --git a/apps/sim/app/api/knowledge/route.ts b/apps/sim/app/api/knowledge/route.ts
index a4f5b2dd0..06f42be61 100644
--- a/apps/sim/app/api/knowledge/route.ts
+++ b/apps/sim/app/api/knowledge/route.ts
@@ -1,11 +1,8 @@
-import { and, count, eq, isNotNull, isNull, or } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
+import { createKnowledgeBase, getKnowledgeBases } from '@/lib/knowledge/service'
import { createLogger } from '@/lib/logs/console/logger'
-import { getUserEntityPermissions } from '@/lib/permissions/utils'
-import { db } from '@/db'
-import { document, knowledgeBase, permissions } from '@/db/schema'
const logger = createLogger('KnowledgeBaseAPI')
@@ -41,60 +38,10 @@ export async function GET(req: NextRequest) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
- // Check for workspace filtering
const { searchParams } = new URL(req.url)
const workspaceId = searchParams.get('workspaceId')
- // Get knowledge bases that user can access through direct ownership OR workspace permissions
- const knowledgeBasesWithCounts = await db
- .select({
- id: knowledgeBase.id,
- name: knowledgeBase.name,
- description: knowledgeBase.description,
- tokenCount: knowledgeBase.tokenCount,
- embeddingModel: knowledgeBase.embeddingModel,
- embeddingDimension: knowledgeBase.embeddingDimension,
- chunkingConfig: knowledgeBase.chunkingConfig,
- createdAt: knowledgeBase.createdAt,
- updatedAt: knowledgeBase.updatedAt,
- workspaceId: knowledgeBase.workspaceId,
- docCount: count(document.id),
- })
- .from(knowledgeBase)
- .leftJoin(
- document,
- and(eq(document.knowledgeBaseId, knowledgeBase.id), isNull(document.deletedAt))
- )
- .leftJoin(
- permissions,
- and(
- eq(permissions.entityType, 'workspace'),
- eq(permissions.entityId, knowledgeBase.workspaceId),
- eq(permissions.userId, session.user.id)
- )
- )
- .where(
- and(
- isNull(knowledgeBase.deletedAt),
- workspaceId
- ? // When filtering by workspace
- or(
- // Knowledge bases belonging to the specified workspace (user must have workspace permissions)
- and(eq(knowledgeBase.workspaceId, workspaceId), isNotNull(permissions.userId)),
- // Fallback: User-owned knowledge bases without workspace (legacy)
- and(eq(knowledgeBase.userId, session.user.id), isNull(knowledgeBase.workspaceId))
- )
- : // When not filtering by workspace, use original logic
- or(
- // User owns the knowledge base directly
- eq(knowledgeBase.userId, session.user.id),
- // User has permissions on the knowledge base's workspace
- isNotNull(permissions.userId)
- )
- )
- )
- .groupBy(knowledgeBase.id)
- .orderBy(knowledgeBase.createdAt)
+ const knowledgeBasesWithCounts = await getKnowledgeBases(session.user.id, workspaceId)
return NextResponse.json({
success: true,
@@ -121,49 +68,16 @@ export async function POST(req: NextRequest) {
try {
const validatedData = CreateKnowledgeBaseSchema.parse(body)
- // If creating in a workspace, check if user has write/admin permissions
- if (validatedData.workspaceId) {
- const userPermission = await getUserEntityPermissions(
- session.user.id,
- 'workspace',
- validatedData.workspaceId
- )
- if (userPermission !== 'write' && userPermission !== 'admin') {
- logger.warn(
- `[${requestId}] User ${session.user.id} denied permission to create knowledge base in workspace ${validatedData.workspaceId}`
- )
- return NextResponse.json(
- { error: 'Insufficient permissions to create knowledge base in this workspace' },
- { status: 403 }
- )
- }
- }
-
- const id = crypto.randomUUID()
- const now = new Date()
-
- const newKnowledgeBase = {
- id,
+ const createData = {
+ ...validatedData,
userId: session.user.id,
- workspaceId: validatedData.workspaceId || null,
- name: validatedData.name,
- description: validatedData.description || null,
- tokenCount: 0,
- embeddingModel: validatedData.embeddingModel,
- embeddingDimension: validatedData.embeddingDimension,
- chunkingConfig: validatedData.chunkingConfig || {
- maxSize: 1024,
- minSize: 100,
- overlap: 200,
- },
- docCount: 0,
- createdAt: now,
- updatedAt: now,
}
- await db.insert(knowledgeBase).values(newKnowledgeBase)
+ const newKnowledgeBase = await createKnowledgeBase(createData, requestId)
- logger.info(`[${requestId}] Knowledge base created: ${id} for user ${session.user.id}`)
+ logger.info(
+ `[${requestId}] Knowledge base created: ${newKnowledgeBase.id} for user ${session.user.id}`
+ )
return NextResponse.json({
success: true,
diff --git a/apps/sim/app/api/knowledge/search/route.ts b/apps/sim/app/api/knowledge/search/route.ts
index a34dc23a7..2177cb6f2 100644
--- a/apps/sim/app/api/knowledge/search/route.ts
+++ b/apps/sim/app/api/knowledge/search/route.ts
@@ -1,13 +1,11 @@
-import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { TAG_SLOTS } from '@/lib/constants/knowledge'
+import { getDocumentTagDefinitions } from '@/lib/knowledge/tags/service'
import { createLogger } from '@/lib/logs/console/logger'
import { estimateTokenCount } from '@/lib/tokenization/estimators'
import { getUserId } from '@/app/api/auth/oauth/utils'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
-import { db } from '@/db'
-import { knowledgeBaseTagDefinitions } from '@/db/schema'
import { calculateCost } from '@/providers/utils'
import {
generateSearchEmbedding,
@@ -94,13 +92,7 @@ export async function POST(request: NextRequest) {
try {
// Fetch tag definitions for the first accessible KB (since we're using single KB now)
const kbId = accessibleKbIds[0]
- const tagDefs = await db
- .select({
- tagSlot: knowledgeBaseTagDefinitions.tagSlot,
- displayName: knowledgeBaseTagDefinitions.displayName,
- })
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, kbId))
+ const tagDefs = await getDocumentTagDefinitions(kbId)
logger.debug(`[${requestId}] Found tag definitions:`, tagDefs)
logger.debug(`[${requestId}] Original filters:`, validatedData.filters)
@@ -224,13 +216,7 @@ export async function POST(request: NextRequest) {
const tagDefinitionsMap: Record> = {}
for (const kbId of accessibleKbIds) {
try {
- const tagDefs = await db
- .select({
- tagSlot: knowledgeBaseTagDefinitions.tagSlot,
- displayName: knowledgeBaseTagDefinitions.displayName,
- })
- .from(knowledgeBaseTagDefinitions)
- .where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, kbId))
+ const tagDefs = await getDocumentTagDefinitions(kbId)
tagDefinitionsMap[kbId] = {}
tagDefs.forEach((def) => {
diff --git a/apps/sim/app/api/knowledge/search/utils.test.ts b/apps/sim/app/api/knowledge/search/utils.test.ts
index 3fcd04db7..790b2e3fe 100644
--- a/apps/sim/app/api/knowledge/search/utils.test.ts
+++ b/apps/sim/app/api/knowledge/search/utils.test.ts
@@ -16,7 +16,7 @@ vi.mock('@/lib/logs/console/logger', () => ({
})),
}))
vi.mock('@/db')
-vi.mock('@/lib/documents/utils', () => ({
+vi.mock('@/lib/knowledge/documents/utils', () => ({
retryWithExponentialBackoff: (fn: any) => fn(),
}))
diff --git a/apps/sim/app/api/knowledge/utils.test.ts b/apps/sim/app/api/knowledge/utils.test.ts
index 0c5e84e63..a35ca9a76 100644
--- a/apps/sim/app/api/knowledge/utils.test.ts
+++ b/apps/sim/app/api/knowledge/utils.test.ts
@@ -21,11 +21,11 @@ vi.mock('@/lib/env', () => ({
typeof value === 'string' ? value === 'true' || value === '1' : Boolean(value),
}))
-vi.mock('@/lib/documents/utils', () => ({
+vi.mock('@/lib/knowledge/documents/utils', () => ({
retryWithExponentialBackoff: (fn: any) => fn(),
}))
-vi.mock('@/lib/documents/document-processor', () => ({
+vi.mock('@/lib/knowledge/documents/document-processor', () => ({
processDocument: vi.fn().mockResolvedValue({
chunks: [
{
@@ -149,12 +149,12 @@ vi.mock('@/db', () => {
}
})
+import { generateEmbeddings } from '@/lib/embeddings/utils'
+import { processDocumentAsync } from '@/lib/knowledge/documents/service'
import {
checkChunkAccess,
checkDocumentAccess,
checkKnowledgeBaseAccess,
- generateEmbeddings,
- processDocumentAsync,
} from '@/app/api/knowledge/utils'
describe('Knowledge Utils', () => {
diff --git a/apps/sim/app/api/knowledge/utils.ts b/apps/sim/app/api/knowledge/utils.ts
index df85c67df..215163878 100644
--- a/apps/sim/app/api/knowledge/utils.ts
+++ b/apps/sim/app/api/knowledge/utils.ts
@@ -1,35 +1,8 @@
-import crypto from 'crypto'
import { and, eq, isNull } from 'drizzle-orm'
-import { processDocument } from '@/lib/documents/document-processor'
-import { generateEmbeddings } from '@/lib/embeddings/utils'
-import { createLogger } from '@/lib/logs/console/logger'
import { getUserEntityPermissions } from '@/lib/permissions/utils'
import { db } from '@/db'
import { document, embedding, knowledgeBase } from '@/db/schema'
-const logger = createLogger('KnowledgeUtils')
-
-const TIMEOUTS = {
- OVERALL_PROCESSING: 150000, // 150 seconds (2.5 minutes)
- EMBEDDINGS_API: 60000, // 60 seconds per batch
-} as const
-
-/**
- * Create a timeout wrapper for async operations
- */
-function withTimeout(
- promise: Promise,
- timeoutMs: number,
- operation = 'Operation'
-): Promise {
- return Promise.race([
- promise,
- new Promise((_, reject) =>
- setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs)
- ),
- ])
-}
-
export interface KnowledgeBaseData {
id: string
userId: string
@@ -380,154 +353,3 @@ export async function checkChunkAccess(
knowledgeBase: kbAccess.knowledgeBase!,
}
}
-
-// Export for external use
-export { generateEmbeddings }
-
-/**
- * Process a document asynchronously with full error handling
- */
-export async function processDocumentAsync(
- knowledgeBaseId: string,
- documentId: string,
- docData: {
- filename: string
- fileUrl: string
- fileSize: number
- mimeType: string
- },
- processingOptions: {
- chunkSize?: number
- minCharactersPerChunk?: number
- recipe?: string
- lang?: string
- chunkOverlap?: number
- }
-): Promise {
- const startTime = Date.now()
- try {
- logger.info(`[${documentId}] Starting document processing: ${docData.filename}`)
-
- // Set status to processing
- await db
- .update(document)
- .set({
- processingStatus: 'processing',
- processingStartedAt: new Date(),
- processingError: null, // Clear any previous error
- })
- .where(eq(document.id, documentId))
-
- logger.info(`[${documentId}] Status updated to 'processing', starting document processor`)
-
- // Wrap the entire processing operation with a 5-minute timeout
- await withTimeout(
- (async () => {
- const processed = await processDocument(
- docData.fileUrl,
- docData.filename,
- docData.mimeType,
- processingOptions.chunkSize || 1000,
- processingOptions.chunkOverlap || 200,
- processingOptions.minCharactersPerChunk || 1
- )
-
- const now = new Date()
-
- logger.info(
- `[${documentId}] Document parsed successfully, generating embeddings for ${processed.chunks.length} chunks`
- )
-
- const chunkTexts = processed.chunks.map((chunk) => chunk.text)
- const embeddings = chunkTexts.length > 0 ? await generateEmbeddings(chunkTexts) : []
-
- logger.info(`[${documentId}] Embeddings generated, fetching document tags`)
-
- // Fetch document to get tags
- const documentRecord = await db
- .select({
- tag1: document.tag1,
- tag2: document.tag2,
- tag3: document.tag3,
- tag4: document.tag4,
- tag5: document.tag5,
- tag6: document.tag6,
- tag7: document.tag7,
- })
- .from(document)
- .where(eq(document.id, documentId))
- .limit(1)
-
- const documentTags = documentRecord[0] || {}
-
- logger.info(`[${documentId}] Creating embedding records with tags`)
-
- const embeddingRecords = processed.chunks.map((chunk, chunkIndex) => ({
- id: crypto.randomUUID(),
- knowledgeBaseId,
- documentId,
- chunkIndex,
- chunkHash: crypto.createHash('sha256').update(chunk.text).digest('hex'),
- content: chunk.text,
- contentLength: chunk.text.length,
- tokenCount: Math.ceil(chunk.text.length / 4),
- embedding: embeddings[chunkIndex] || null,
- embeddingModel: 'text-embedding-3-small',
- startOffset: chunk.metadata.startIndex,
- endOffset: chunk.metadata.endIndex,
- // Copy tags from document
- tag1: documentTags.tag1,
- tag2: documentTags.tag2,
- tag3: documentTags.tag3,
- tag4: documentTags.tag4,
- tag5: documentTags.tag5,
- tag6: documentTags.tag6,
- tag7: documentTags.tag7,
- createdAt: now,
- updatedAt: now,
- }))
-
- await db.transaction(async (tx) => {
- if (embeddingRecords.length > 0) {
- await tx.insert(embedding).values(embeddingRecords)
- }
-
- await tx
- .update(document)
- .set({
- chunkCount: processed.metadata.chunkCount,
- tokenCount: processed.metadata.tokenCount,
- characterCount: processed.metadata.characterCount,
- processingStatus: 'completed',
- processingCompletedAt: now,
- processingError: null,
- })
- .where(eq(document.id, documentId))
- })
- })(),
- TIMEOUTS.OVERALL_PROCESSING,
- 'Document processing'
- )
-
- const processingTime = Date.now() - startTime
- logger.info(`[${documentId}] Successfully processed document in ${processingTime}ms`)
- } catch (error) {
- const processingTime = Date.now() - startTime
- logger.error(`[${documentId}] Failed to process document after ${processingTime}ms:`, {
- error: error instanceof Error ? error.message : 'Unknown error',
- stack: error instanceof Error ? error.stack : undefined,
- filename: docData.filename,
- fileUrl: docData.fileUrl,
- mimeType: docData.mimeType,
- })
-
- await db
- .update(document)
- .set({
- processingStatus: 'failed',
- processingError: error instanceof Error ? error.message : 'Unknown error',
- processingCompletedAt: new Date(),
- })
- .where(eq(document.id, documentId))
- }
-}
diff --git a/apps/sim/app/api/proxy/tts/route.ts b/apps/sim/app/api/proxy/tts/route.ts
index 3918ca53a..a54071e72 100644
--- a/apps/sim/app/api/proxy/tts/route.ts
+++ b/apps/sim/app/api/proxy/tts/route.ts
@@ -64,7 +64,9 @@ export async function POST(request: Request) {
return new NextResponse(
`Internal Server Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
- { status: 500 }
+ {
+ status: 500,
+ }
)
}
}
diff --git a/apps/sim/app/api/proxy/tts/stream/route.ts b/apps/sim/app/api/proxy/tts/stream/route.ts
index fdf7cfea9..2d8f3c6c6 100644
--- a/apps/sim/app/api/proxy/tts/stream/route.ts
+++ b/apps/sim/app/api/proxy/tts/stream/route.ts
@@ -112,7 +112,9 @@ export async function POST(request: NextRequest) {
return new Response(
`Internal Server Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
- { status: 500 }
+ {
+ status: 500,
+ }
)
}
}
diff --git a/apps/sim/app/api/webhooks/route.ts b/apps/sim/app/api/webhooks/route.ts
index 7f2bb1279..12fed5795 100644
--- a/apps/sim/app/api/webhooks/route.ts
+++ b/apps/sim/app/api/webhooks/route.ts
@@ -495,7 +495,9 @@ async function createAirtableWebhookSubscription(
} else {
logger.info(
`[${requestId}] Successfully created webhook in Airtable for webhook ${webhookData.id}.`,
- { airtableWebhookId: responseBody.id }
+ {
+ airtableWebhookId: responseBody.id,
+ }
)
// Store the airtableWebhookId (responseBody.id) within the providerConfig
try {
diff --git a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx
index 475933a15..99ec6b8c0 100644
--- a/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/knowledge/[id]/base.tsx
@@ -4,8 +4,10 @@ import { useCallback, useEffect, useState } from 'react'
import { format } from 'date-fns'
import {
AlertCircle,
+ ChevronDown,
ChevronLeft,
ChevronRight,
+ ChevronUp,
Circle,
CircleOff,
FileText,
@@ -29,6 +31,7 @@ import { Button } from '@/components/ui/button'
import { Checkbox } from '@/components/ui/checkbox'
import { SearchHighlight } from '@/components/ui/search-highlight'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
+import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types'
import { createLogger } from '@/lib/logs/console/logger'
import {
ActionBar,
@@ -47,7 +50,6 @@ import { type DocumentData, useKnowledgeStore } from '@/stores/knowledge/store'
const logger = createLogger('KnowledgeBase')
-// Constants
const DOCUMENTS_PER_PAGE = 50
interface KnowledgeBaseProps {
@@ -143,6 +145,8 @@ export function KnowledgeBase({
const [isDeleting, setIsDeleting] = useState(false)
const [isBulkOperating, setIsBulkOperating] = useState(false)
const [currentPage, setCurrentPage] = useState(1)
+ const [sortBy, setSortBy] = useState('uploadedAt')
+ const [sortOrder, setSortOrder] = useState('desc')
const {
knowledgeBase,
@@ -160,6 +164,8 @@ export function KnowledgeBase({
search: searchQuery || undefined,
limit: DOCUMENTS_PER_PAGE,
offset: (currentPage - 1) * DOCUMENTS_PER_PAGE,
+ sortBy,
+ sortOrder,
})
const router = useRouter()
@@ -194,6 +200,41 @@ export function KnowledgeBase({
}
}, [hasPrevPage])
+ const handleSort = useCallback(
+ (field: DocumentSortField) => {
+ if (sortBy === field) {
+ // Toggle sort order if same field
+ setSortOrder(sortOrder === 'asc' ? 'desc' : 'asc')
+ } else {
+ // Set new field with default desc order
+ setSortBy(field)
+ setSortOrder('desc')
+ }
+ // Reset to first page when sorting changes
+ setCurrentPage(1)
+ },
+ [sortBy, sortOrder]
+ )
+
+ // Helper function to render sortable header
+ const renderSortableHeader = (field: DocumentSortField, label: string, className = '') => (
+
+
+
+ )
+
// Auto-refresh documents when there are processing documents
useEffect(() => {
const hasProcessingDocuments = documents.some(
@@ -677,6 +718,7 @@ export function KnowledgeBase({
value={searchQuery}
onChange={handleSearchChange}
placeholder='Search documents...'
+ isLoading={isLoadingDocuments}
/>