diff --git a/apps/sim/app/api/files/serve/[...path]/route.ts b/apps/sim/app/api/files/serve/[...path]/route.ts index 584c9b2e6..8f1462fed 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.ts @@ -1,11 +1,11 @@ import { readFile } from 'fs/promises' import type { NextRequest } from 'next/server' import { NextResponse } from 'next/server' +import { checkHybridAuth } from '@/lib/auth/hybrid' import { createLogger } from '@/lib/logs/console/logger' import { downloadFile, getStorageProvider, isUsingCloudStorage } from '@/lib/uploads' import { S3_KB_CONFIG } from '@/lib/uploads/setup' import '@/lib/uploads/setup.server' -import { getSession } from '@/lib/auth' import { createErrorResponse, createFileResponse, @@ -29,23 +29,19 @@ export async function GET( logger.info('File serve request:', { path }) - const session = await getSession() - if (!session?.user?.id) { - logger.warn('Unauthorized file access attempt', { path }) + const authResult = await checkHybridAuth(request, { requireWorkflowId: false }) + + if (!authResult.success) { + logger.warn('Unauthorized file access attempt', { path, error: authResult.error }) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) } - const userId = session.user.id + const userId = authResult.userId const fullPath = path.join('/') const isS3Path = path[0] === 's3' const isBlobPath = path[0] === 'blob' const isCloudPath = isS3Path || isBlobPath const cloudKey = isCloudPath ? path.slice(1).join('/') : fullPath - const isExecutionFile = cloudKey.split('/').length >= 3 && !cloudKey.startsWith('kb/') - - if (!isExecutionFile) { - logger.info('Authenticated file access granted', { userId, path: cloudKey }) - } if (isUsingCloudStorage() || isCloudPath) { const bucketType = request.nextUrl.searchParams.get('bucket') @@ -64,7 +60,7 @@ export async function GET( } } -async function handleLocalFile(filename: string, userId: string): Promise { +async function handleLocalFile(filename: string, userId?: string): Promise { try { const filePath = findLocalFile(filename) diff --git a/apps/sim/app/api/files/upload/route.ts b/apps/sim/app/api/files/upload/route.ts index d0824c7e2..68cd14932 100644 --- a/apps/sim/app/api/files/upload/route.ts +++ b/apps/sim/app/api/files/upload/route.ts @@ -123,8 +123,7 @@ export async function POST(request: NextRequest) { } } - // Create the serve path - const servePath = `/api/files/serve/${result.key}` + const servePath = result.path const uploadResult = { name: originalName, diff --git a/apps/sim/app/api/files/utils.ts b/apps/sim/app/api/files/utils.ts index 2e88f0c9c..22460e4c7 100644 --- a/apps/sim/app/api/files/utils.ts +++ b/apps/sim/app/api/files/utils.ts @@ -307,6 +307,22 @@ function getSecureFileHeaders(filename: string, originalContentType: string) { } } +/** + * Encode filename for Content-Disposition header to support non-ASCII characters + * Uses RFC 5987 encoding for international characters + */ +function encodeFilenameForHeader(filename: string): string { + const hasNonAscii = /[^\x00-\x7F]/.test(filename) + + if (!hasNonAscii) { + return `filename="${filename}"` + } + + const encodedFilename = encodeURIComponent(filename) + const asciiSafe = filename.replace(/[^\x00-\x7F]/g, '_') + return `filename="${asciiSafe}"; filename*=UTF-8''${encodedFilename}` +} + /** * Create a file response with appropriate security headers */ @@ -317,7 +333,7 @@ export function createFileResponse(file: FileResponse): NextResponse { status: 200, headers: { 'Content-Type': contentType, - 'Content-Disposition': `${disposition}; filename="${file.filename}"`, + 'Content-Disposition': `${disposition}; ${encodeFilenameForHeader(file.filename)}`, 'Cache-Control': 'public, max-age=31536000', // Cache for 1 year 'X-Content-Type-Options': 'nosniff', 'Content-Security-Policy': "default-src 'none'; style-src 'unsafe-inline'; sandbox;", diff --git a/apps/sim/background/knowledge-processing.ts b/apps/sim/background/knowledge-processing.ts index 02c4108ad..f5b2d6880 100644 --- a/apps/sim/background/knowledge-processing.ts +++ b/apps/sim/background/knowledge-processing.ts @@ -26,7 +26,7 @@ export type DocumentProcessingPayload = { export const processDocument = task({ id: 'knowledge-process-document', - maxDuration: env.KB_CONFIG_MAX_DURATION || 300, + maxDuration: env.KB_CONFIG_MAX_DURATION || 600, retry: { maxAttempts: env.KB_CONFIG_MAX_ATTEMPTS || 3, factor: env.KB_CONFIG_RETRY_FACTOR || 2, diff --git a/apps/sim/lib/env.ts b/apps/sim/lib/env.ts index 7f910a05a..eb3ebc852 100644 --- a/apps/sim/lib/env.ts +++ b/apps/sim/lib/env.ts @@ -146,7 +146,7 @@ export const env = createEnv({ RATE_LIMIT_ENTERPRISE_ASYNC: z.string().optional().default('1000'), // Enterprise tier async API executions per minute // Knowledge Base Processing Configuration - Shared across all processing methods - KB_CONFIG_MAX_DURATION: z.number().optional().default(300), // Max processing duration in s + KB_CONFIG_MAX_DURATION: z.number().optional().default(600), // Max processing duration in seconds (10 minutes) KB_CONFIG_MAX_ATTEMPTS: z.number().optional().default(3), // Max retry attempts KB_CONFIG_RETRY_FACTOR: z.number().optional().default(2), // Retry backoff factor KB_CONFIG_MIN_TIMEOUT: z.number().optional().default(1000), // Min timeout in ms diff --git a/apps/sim/lib/knowledge/documents/document-processor.ts b/apps/sim/lib/knowledge/documents/document-processor.ts index 5efcb3608..4f9d7f81c 100644 --- a/apps/sim/lib/knowledge/documents/document-processor.ts +++ b/apps/sim/lib/knowledge/documents/document-processor.ts @@ -180,7 +180,9 @@ async function parseDocument( } async function handleFileForOCR(fileUrl: string, filename: string, mimeType: string) { - if (fileUrl.startsWith('https://')) { + const isExternalHttps = fileUrl.startsWith('https://') && !fileUrl.includes('/api/files/serve/') + + if (isExternalHttps) { return { httpsUrl: fileUrl } } @@ -207,7 +209,16 @@ async function downloadFileWithTimeout(fileUrl: string): Promise { const timeoutId = setTimeout(() => controller.abort(), TIMEOUTS.FILE_DOWNLOAD) try { - const response = await fetch(fileUrl, { signal: controller.signal }) + const isInternalFileServe = fileUrl.includes('/api/files/serve/') + const headers: HeadersInit = {} + + if (isInternalFileServe) { + const { generateInternalToken } = await import('@/lib/auth/internal') + const token = await generateInternalToken() + headers.Authorization = `Bearer ${token}` + } + + const response = await fetch(fileUrl, { signal: controller.signal, headers }) clearTimeout(timeoutId) if (!response.ok) { diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index af21f2b58..9694337cd 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -17,14 +17,14 @@ import type { DocumentSortField, SortOrder } from './types' const logger = createLogger('DocumentService') const TIMEOUTS = { - OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Increased to 10 minutes to match Trigger's timeout + OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Default 10 minutes for KB document processing EMBEDDINGS_API: (env.KB_CONFIG_MAX_TIMEOUT || 10000) * 18, } as const // Configuration for handling large documents const LARGE_DOC_CONFIG = { MAX_CHUNKS_PER_BATCH: 500, // Insert embeddings in batches of 500 - MAX_EMBEDDING_BATCH: 50, // Generate embeddings in batches of 50 + MAX_EMBEDDING_BATCH: 500, // Generate embeddings in batches of 500 MAX_FILE_SIZE: 100 * 1024 * 1024, // 100MB max file size MAX_CHUNKS_PER_DOCUMENT: 100000, // Maximum chunks allowed per document }