Files
sim/apps/sim/app/api/files/parse/route.ts
Vikhyath Mondreti 5b0c2156e0 improvement(files): pass user file objects around consistently (#3119)
* improvement(collab): do not refetch active workflow id

* progress on files

* more integrations

* separate server and client logic

* consolidate more code

* fix integrations

* fix types

* consolidate more code

* fix tests

* fix more bugbot comments

* fix type check

* fix circular impport

* address more bugbot comments

* fix ocr integrations

* fix typing

* remove leftover type

* address bugbot comment

* fix file block adv mode

* fix

* normalize file input

* fix v2 blocmks for ocr

* fix for v2 versions

* fix more v2 blocks

* update single file blocks

* make interface simpler

* cleanup fireflies

* remove file only annotation

* accept all types

* added wand to ssh block

* user files should be passed through

* improve docs

* fix slack to include successful execs

* fix dropbox upload file

* fix sendgrid

* fix dropbox

* fix

* fix

* update skills

* fix uploaded file

---------

Co-authored-by: waleed <walif6@gmail.com>
2026-02-03 19:50:23 -08:00

919 lines
27 KiB
TypeScript

import { Buffer } from 'buffer'
import { createHash } from 'crypto'
import fsPromises, { readFile } from 'fs/promises'
import path from 'path'
import { createLogger } from '@sim/logger'
import binaryExtensionsList from 'binary-extensions'
import { type NextRequest, NextResponse } from 'next/server'
import { checkHybridAuth } from '@/lib/auth/hybrid'
import {
secureFetchWithPinnedIP,
validateUrlWithDNS,
} from '@/lib/core/security/input-validation.server'
import { sanitizeUrlForLog } from '@/lib/core/utils/logging'
import { isSupportedFileType, parseFile } from '@/lib/file-parsers'
import { isUsingCloudStorage, type StorageContext, StorageService } from '@/lib/uploads'
import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
import {
extractCleanFilename,
extractStorageKey,
extractWorkspaceIdFromExecutionKey,
getMimeTypeFromExtension,
getViewerUrl,
inferContextFromKey,
isInternalFileUrl,
} from '@/lib/uploads/utils/file-utils'
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
import { verifyFileAccess } from '@/app/api/files/authorization'
import type { UserFile } from '@/executor/types'
import '@/lib/uploads/core/setup.server'
export const dynamic = 'force-dynamic'
const logger = createLogger('FilesParseAPI')
const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds
interface ExecutionContext {
workspaceId: string
workflowId: string
executionId: string
}
interface ParseResult {
success: boolean
content?: string
error?: string
filePath: string
originalName?: string // Original filename from database (for workspace files)
viewerUrl?: string | null // Viewer URL for the file if available
userFile?: UserFile // UserFile object for the raw file
metadata?: {
fileType: string
size: number
hash: string
processingTime: number
}
}
/**
* Main API route handler
*/
export async function POST(request: NextRequest) {
const startTime = Date.now()
try {
const authResult = await checkHybridAuth(request, { requireWorkflowId: true })
if (!authResult.success) {
logger.warn('Unauthorized file parse request', {
error: authResult.error || 'Authentication failed',
})
return NextResponse.json({ success: false, error: 'Unauthorized' }, { status: 401 })
}
if (!authResult.userId) {
logger.warn('File parse request missing userId', {
authType: authResult.authType,
})
return NextResponse.json({ success: false, error: 'User context required' }, { status: 401 })
}
const userId = authResult.userId
const requestData = await request.json()
const { filePath, fileType, workspaceId, workflowId, executionId } = requestData
if (!filePath || (typeof filePath === 'string' && filePath.trim() === '')) {
return NextResponse.json({ success: false, error: 'No file path provided' }, { status: 400 })
}
// Build execution context if all required fields are present
const executionContext: ExecutionContext | undefined =
workspaceId && workflowId && executionId
? { workspaceId, workflowId, executionId }
: undefined
logger.info('File parse request received:', {
filePath,
fileType,
workspaceId,
userId,
hasExecutionContext: !!executionContext,
})
if (Array.isArray(filePath)) {
const results = []
for (const singlePath of filePath) {
if (!singlePath || (typeof singlePath === 'string' && singlePath.trim() === '')) {
results.push({
success: false,
error: 'Empty file path in array',
filePath: singlePath || '',
})
continue
}
const result = await parseFileSingle(
singlePath,
fileType,
workspaceId,
userId,
executionContext
)
if (result.metadata) {
result.metadata.processingTime = Date.now() - startTime
}
if (result.success) {
const displayName =
result.originalName || extractCleanFilename(result.filePath) || 'unknown'
results.push({
success: true,
output: {
content: result.content,
name: displayName,
fileType: result.metadata?.fileType || 'application/octet-stream',
size: result.metadata?.size || 0,
binary: false,
file: result.userFile,
},
filePath: result.filePath,
viewerUrl: result.viewerUrl,
})
} else {
results.push(result)
}
}
return NextResponse.json({
success: true,
results,
})
}
const result = await parseFileSingle(filePath, fileType, workspaceId, userId, executionContext)
if (result.metadata) {
result.metadata.processingTime = Date.now() - startTime
}
if (result.success) {
const displayName = result.originalName || extractCleanFilename(result.filePath) || 'unknown'
return NextResponse.json({
success: true,
output: {
content: result.content,
name: displayName,
fileType: result.metadata?.fileType || 'application/octet-stream',
size: result.metadata?.size || 0,
binary: false,
file: result.userFile,
},
filePath: result.filePath,
viewerUrl: result.viewerUrl,
})
}
return NextResponse.json(result)
} catch (error) {
logger.error('Error in file parse API:', error)
return NextResponse.json(
{
success: false,
error: error instanceof Error ? error.message : 'Unknown error occurred',
filePath: '',
},
{ status: 500 }
)
}
}
/**
* Parse a single file and return its content
*/
async function parseFileSingle(
filePath: string,
fileType: string,
workspaceId: string,
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
logger.info('Parsing file:', filePath)
if (!filePath || filePath.trim() === '') {
return {
success: false,
error: 'Empty file path provided',
filePath: filePath || '',
}
}
const pathValidation = validateFilePath(filePath)
if (!pathValidation.isValid) {
return {
success: false,
error: pathValidation.error || 'Invalid path',
filePath,
}
}
if (isInternalFileUrl(filePath)) {
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
}
if (filePath.startsWith('http://') || filePath.startsWith('https://')) {
return handleExternalUrl(filePath, fileType, workspaceId, userId, executionContext)
}
if (isUsingCloudStorage()) {
return handleCloudFile(filePath, fileType, undefined, userId, executionContext)
}
return handleLocalFile(filePath, fileType, userId, executionContext)
}
/**
* Validate file path for security - prevents null byte injection and path traversal attacks
*/
function validateFilePath(filePath: string): { isValid: boolean; error?: string } {
if (filePath.includes('\0')) {
return { isValid: false, error: 'Invalid path: null byte detected' }
}
if (filePath.includes('..')) {
return { isValid: false, error: 'Access denied: path traversal detected' }
}
if (filePath.includes('~')) {
return { isValid: false, error: 'Invalid path: tilde character not allowed' }
}
if (filePath.startsWith('/') && !isInternalFileUrl(filePath)) {
return { isValid: false, error: 'Path outside allowed directory' }
}
if (/^[A-Za-z]:\\/.test(filePath)) {
return { isValid: false, error: 'Path outside allowed directory' }
}
return { isValid: true }
}
/**
* Handle external URL
* If workspaceId is provided, checks if file already exists and saves to workspace if not
* If executionContext is provided, also stores the file in execution storage and returns UserFile
*/
async function handleExternalUrl(
url: string,
fileType: string,
workspaceId: string,
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
try {
logger.info('Fetching external URL:', url)
logger.info('WorkspaceId for URL save:', workspaceId)
const urlValidation = await validateUrlWithDNS(url, 'fileUrl')
if (!urlValidation.isValid) {
logger.warn(`Blocked external URL request: ${urlValidation.error}`)
return {
success: false,
error: urlValidation.error || 'Invalid external URL',
filePath: url,
}
}
const urlPath = new URL(url).pathname
const filename = urlPath.split('/').pop() || 'download'
const extension = path.extname(filename).toLowerCase().substring(1)
logger.info(`Extracted filename: ${filename}, workspaceId: ${workspaceId}`)
const {
S3_EXECUTION_FILES_CONFIG,
BLOB_EXECUTION_FILES_CONFIG,
USE_S3_STORAGE,
USE_BLOB_STORAGE,
} = await import('@/lib/uploads/config')
let isExecutionFile = false
try {
const parsedUrl = new URL(url)
if (USE_S3_STORAGE && S3_EXECUTION_FILES_CONFIG.bucket) {
const bucketInHost = parsedUrl.hostname.startsWith(S3_EXECUTION_FILES_CONFIG.bucket)
const bucketInPath = parsedUrl.pathname.startsWith(`/${S3_EXECUTION_FILES_CONFIG.bucket}/`)
isExecutionFile = bucketInHost || bucketInPath
} else if (USE_BLOB_STORAGE && BLOB_EXECUTION_FILES_CONFIG.containerName) {
isExecutionFile = url.includes(`/${BLOB_EXECUTION_FILES_CONFIG.containerName}/`)
}
} catch (error) {
logger.warn('Failed to parse URL for execution file check:', error)
isExecutionFile = false
}
// Only apply workspace deduplication if:
// 1. WorkspaceId is provided
// 2. URL is NOT from execution files bucket/container
const shouldCheckWorkspace = workspaceId && !isExecutionFile
if (shouldCheckWorkspace) {
const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
if (permission === null) {
logger.warn('User does not have workspace access for file parse', {
userId,
workspaceId,
filename,
})
return {
success: false,
error: 'File not found',
filePath: url,
}
}
const { fileExistsInWorkspace, listWorkspaceFiles } = await import(
'@/lib/uploads/contexts/workspace'
)
const exists = await fileExistsInWorkspace(workspaceId, filename)
if (exists) {
logger.info(`File ${filename} already exists in workspace, using existing file`)
const workspaceFiles = await listWorkspaceFiles(workspaceId)
const existingFile = workspaceFiles.find((f) => f.name === filename)
if (existingFile) {
const storageFilePath = `/api/files/serve/${existingFile.key}`
return handleCloudFile(storageFilePath, fileType, 'workspace', userId, executionContext)
}
}
}
const response = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP!, {
timeout: DOWNLOAD_TIMEOUT_MS,
})
if (!response.ok) {
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`)
}
const contentLength = response.headers.get('content-length')
if (contentLength && Number.parseInt(contentLength) > MAX_DOWNLOAD_SIZE_BYTES) {
throw new Error(`File too large: ${contentLength} bytes (max: ${MAX_DOWNLOAD_SIZE_BYTES})`)
}
const buffer = Buffer.from(await response.arrayBuffer())
if (buffer.length > MAX_DOWNLOAD_SIZE_BYTES) {
throw new Error(`File too large: ${buffer.length} bytes (max: ${MAX_DOWNLOAD_SIZE_BYTES})`)
}
logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`)
let userFile: UserFile | undefined
const mimeType = response.headers.get('content-type') || getMimeTypeFromExtension(extension)
if (executionContext) {
try {
userFile = await uploadExecutionFile(executionContext, buffer, filename, mimeType, userId)
logger.info(`Stored file in execution storage: ${filename}`, { key: userFile.key })
} catch (uploadError) {
logger.warn(`Failed to store file in execution storage:`, uploadError)
// Continue without userFile - parsing can still work
}
}
if (shouldCheckWorkspace) {
try {
const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId)
if (permission !== 'admin' && permission !== 'write') {
logger.warn('User does not have write permission for workspace file save', {
userId,
workspaceId,
filename,
permission,
})
} else {
const { uploadWorkspaceFile } = await import('@/lib/uploads/contexts/workspace')
await uploadWorkspaceFile(workspaceId, userId, buffer, filename, mimeType)
logger.info(`Saved URL file to workspace storage: ${filename}`)
}
} catch (saveError) {
logger.warn(`Failed to save URL file to workspace:`, saveError)
}
}
let parseResult: ParseResult
if (extension === 'pdf') {
parseResult = await handlePdfBuffer(buffer, filename, fileType, url)
} else if (extension === 'csv') {
parseResult = await handleCsvBuffer(buffer, filename, fileType, url)
} else if (isSupportedFileType(extension)) {
parseResult = await handleGenericTextBuffer(buffer, filename, extension, fileType, url)
} else {
parseResult = handleGenericBuffer(buffer, filename, extension, fileType)
}
// Attach userFile to the result
if (userFile) {
parseResult.userFile = userFile
}
return parseResult
} catch (error) {
logger.error(`Error handling external URL ${sanitizeUrlForLog(url)}:`, error)
return {
success: false,
error: `Error fetching URL: ${(error as Error).message}`,
filePath: url,
}
}
}
/**
* Handle file stored in cloud storage
* If executionContext is provided and file is not already from execution storage,
* copies the file to execution storage and returns UserFile
*/
async function handleCloudFile(
filePath: string,
fileType: string,
explicitContext: string | undefined,
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
try {
const cloudKey = extractStorageKey(filePath)
logger.info('Extracted cloud key:', cloudKey)
const context = (explicitContext as StorageContext) || inferContextFromKey(cloudKey)
const hasAccess = await verifyFileAccess(
cloudKey,
userId,
undefined, // customConfig
context, // context
false // isLocal
)
if (!hasAccess) {
logger.warn('Unauthorized cloud file parse attempt', { userId, key: cloudKey, context })
return {
success: false,
error: 'File not found',
filePath,
}
}
let originalFilename: string | undefined
if (context === 'workspace') {
try {
const fileRecord = await getFileMetadataByKey(cloudKey, 'workspace')
if (fileRecord) {
originalFilename = fileRecord.originalName
logger.debug(`Found original filename for workspace file: ${originalFilename}`)
}
} catch (dbError) {
logger.debug(`Failed to lookup original filename for ${cloudKey}:`, dbError)
}
}
const fileBuffer = await StorageService.downloadFile({ key: cloudKey, context })
logger.info(
`Downloaded file from ${context} storage (${explicitContext ? 'explicit' : 'inferred'}): ${cloudKey}, size: ${fileBuffer.length} bytes`
)
const filename = originalFilename || cloudKey.split('/').pop() || cloudKey
const extension = path.extname(filename).toLowerCase().substring(1)
const mimeType = getMimeTypeFromExtension(extension)
const normalizedFilePath = `/api/files/serve/${encodeURIComponent(cloudKey)}?context=${context}`
let workspaceIdFromKey: string | undefined
if (context === 'execution') {
workspaceIdFromKey = extractWorkspaceIdFromExecutionKey(cloudKey) || undefined
} else if (context === 'workspace') {
const segments = cloudKey.split('/')
if (segments.length >= 2 && /^[a-f0-9-]{36}$/.test(segments[0])) {
workspaceIdFromKey = segments[0]
}
}
const viewerUrl = getViewerUrl(cloudKey, workspaceIdFromKey)
// Store file in execution storage if executionContext is provided
let userFile: UserFile | undefined
if (executionContext) {
// If file is already from execution context, create UserFile reference without re-uploading
if (context === 'execution') {
userFile = {
id: `file_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`,
name: filename,
url: normalizedFilePath,
size: fileBuffer.length,
type: mimeType,
key: cloudKey,
context: 'execution',
}
logger.info(`Created UserFile reference for existing execution file: ${filename}`)
} else {
// Copy from workspace/other storage to execution storage
try {
userFile = await uploadExecutionFile(
executionContext,
fileBuffer,
filename,
mimeType,
userId
)
logger.info(`Copied file to execution storage: ${filename}`, { key: userFile.key })
} catch (uploadError) {
logger.warn(`Failed to copy file to execution storage:`, uploadError)
}
}
}
let parseResult: ParseResult
if (extension === 'pdf') {
parseResult = await handlePdfBuffer(fileBuffer, filename, fileType, normalizedFilePath)
} else if (extension === 'csv') {
parseResult = await handleCsvBuffer(fileBuffer, filename, fileType, normalizedFilePath)
} else if (isSupportedFileType(extension)) {
parseResult = await handleGenericTextBuffer(
fileBuffer,
filename,
extension,
fileType,
normalizedFilePath
)
} else {
parseResult = handleGenericBuffer(fileBuffer, filename, extension, fileType)
parseResult.filePath = normalizedFilePath
}
if (originalFilename) {
parseResult.originalName = originalFilename
}
parseResult.viewerUrl = viewerUrl
// Attach userFile to the result
if (userFile) {
parseResult.userFile = userFile
}
return parseResult
} catch (error) {
logger.error(`Error handling cloud file ${filePath}:`, error)
const errorMessage = (error as Error).message
if (errorMessage.includes('Access denied') || errorMessage.includes('Forbidden')) {
throw new Error(`Error accessing file from cloud storage: ${errorMessage}`)
}
return {
success: false,
error: `Error accessing file from cloud storage: ${errorMessage}`,
filePath,
}
}
}
/**
* Handle local file
*/
async function handleLocalFile(
filePath: string,
fileType: string,
userId: string,
executionContext?: ExecutionContext
): Promise<ParseResult> {
try {
const filename = filePath.split('/').pop() || filePath
const context = inferContextFromKey(filename)
const hasAccess = await verifyFileAccess(
filename,
userId,
undefined, // customConfig
context, // context
true // isLocal
)
if (!hasAccess) {
logger.warn('Unauthorized local file parse attempt', { userId, filename })
return {
success: false,
error: 'File not found',
filePath,
}
}
const fullPath = path.join(UPLOAD_DIR_SERVER, filename)
logger.info('Processing local file:', fullPath)
try {
await fsPromises.access(fullPath)
} catch {
throw new Error(`File not found: ${filename}`)
}
const result = await parseFile(fullPath)
const stats = await fsPromises.stat(fullPath)
const fileBuffer = await readFile(fullPath)
const hash = createHash('md5').update(fileBuffer).digest('hex')
const extension = path.extname(filename).toLowerCase().substring(1)
const mimeType = fileType || getMimeTypeFromExtension(extension)
// Store file in execution storage if executionContext is provided
let userFile: UserFile | undefined
if (executionContext) {
try {
userFile = await uploadExecutionFile(
executionContext,
fileBuffer,
filename,
mimeType,
userId
)
logger.info(`Stored local file in execution storage: ${filename}`, { key: userFile.key })
} catch (uploadError) {
logger.warn(`Failed to store local file in execution storage:`, uploadError)
}
}
return {
success: true,
content: result.content,
filePath,
userFile,
metadata: {
fileType: mimeType,
size: stats.size,
hash,
processingTime: 0,
},
}
} catch (error) {
logger.error(`Error handling local file ${filePath}:`, error)
return {
success: false,
error: `Error processing local file: ${(error as Error).message}`,
filePath,
}
}
}
/**
* Handle a PDF buffer directly in memory
*/
async function handlePdfBuffer(
fileBuffer: Buffer,
filename: string,
fileType?: string,
originalPath?: string
): Promise<ParseResult> {
try {
logger.info(`Parsing PDF in memory: ${filename}`)
const result = await parseBufferAsPdf(fileBuffer)
const content =
result.content ||
createPdfFallbackMessage(result.metadata?.pageCount || 0, fileBuffer.length, originalPath)
return {
success: true,
content,
filePath: originalPath || filename,
metadata: {
fileType: fileType || 'application/pdf',
size: fileBuffer.length,
hash: createHash('md5').update(fileBuffer).digest('hex'),
processingTime: 0,
},
}
} catch (error) {
logger.error('Failed to parse PDF in memory:', error)
const content = createPdfFailureMessage(
0,
fileBuffer.length,
originalPath || filename,
(error as Error).message
)
return {
success: true,
content,
filePath: originalPath || filename,
metadata: {
fileType: fileType || 'application/pdf',
size: fileBuffer.length,
hash: createHash('md5').update(fileBuffer).digest('hex'),
processingTime: 0,
},
}
}
}
/**
* Handle a CSV buffer directly in memory
*/
async function handleCsvBuffer(
fileBuffer: Buffer,
filename: string,
fileType?: string,
originalPath?: string
): Promise<ParseResult> {
try {
logger.info(`Parsing CSV in memory: ${filename}`)
const { parseBuffer } = await import('@/lib/file-parsers')
const result = await parseBuffer(fileBuffer, 'csv')
return {
success: true,
content: result.content,
filePath: originalPath || filename,
metadata: {
fileType: fileType || 'text/csv',
size: fileBuffer.length,
hash: createHash('md5').update(fileBuffer).digest('hex'),
processingTime: 0,
},
}
} catch (error) {
logger.error('Failed to parse CSV in memory:', error)
return {
success: false,
error: `Failed to parse CSV: ${(error as Error).message}`,
filePath: originalPath || filename,
metadata: {
fileType: 'text/csv',
size: 0,
hash: '',
processingTime: 0,
},
}
}
}
/**
* Handle a generic text file buffer in memory
*/
async function handleGenericTextBuffer(
fileBuffer: Buffer,
filename: string,
extension: string,
fileType?: string,
originalPath?: string
): Promise<ParseResult> {
try {
logger.info(`Parsing text file in memory: ${filename}`)
try {
const { parseBuffer, isSupportedFileType } = await import('@/lib/file-parsers')
if (isSupportedFileType(extension)) {
const result = await parseBuffer(fileBuffer, extension)
return {
success: true,
content: result.content,
filePath: originalPath || filename,
metadata: {
fileType: fileType || getMimeTypeFromExtension(extension),
size: fileBuffer.length,
hash: createHash('md5').update(fileBuffer).digest('hex'),
processingTime: 0,
},
}
}
} catch (parserError) {
logger.warn('Specialized parser failed, falling back to generic parsing:', parserError)
}
const content = fileBuffer.toString('utf-8')
return {
success: true,
content,
filePath: originalPath || filename,
metadata: {
fileType: fileType || getMimeTypeFromExtension(extension),
size: fileBuffer.length,
hash: createHash('md5').update(fileBuffer).digest('hex'),
processingTime: 0,
},
}
} catch (error) {
logger.error('Failed to parse text file in memory:', error)
return {
success: false,
error: `Failed to parse file: ${(error as Error).message}`,
filePath: originalPath || filename,
metadata: {
fileType: 'text/plain',
size: 0,
hash: '',
processingTime: 0,
},
}
}
}
/**
* Handle a generic binary buffer
*/
function handleGenericBuffer(
fileBuffer: Buffer,
filename: string,
extension: string,
fileType?: string
): ParseResult {
const isBinary = binaryExtensionsList.includes(extension)
const content = isBinary
? `[Binary ${extension.toUpperCase()} file - ${fileBuffer.length} bytes]`
: fileBuffer.toString('utf-8')
return {
success: true,
content,
filePath: filename,
metadata: {
fileType: fileType || getMimeTypeFromExtension(extension),
size: fileBuffer.length,
hash: createHash('md5').update(fileBuffer).digest('hex'),
processingTime: 0,
},
}
}
/**
* Parse a PDF buffer
*/
async function parseBufferAsPdf(buffer: Buffer) {
try {
const { PdfParser } = await import('@/lib/file-parsers/pdf-parser')
const parser = new PdfParser()
logger.info('Using main PDF parser for buffer')
return await parser.parseBuffer(buffer)
} catch (error) {
throw new Error(`PDF parsing failed: ${(error as Error).message}`)
}
}
/**
* Format bytes to human readable size
*/
function prettySize(bytes: number): string {
if (bytes === 0) return '0 Bytes'
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']
const i = Math.floor(Math.log(bytes) / Math.log(1024))
return `${Number.parseFloat((bytes / 1024 ** i).toFixed(2))} ${sizes[i]}`
}
/**
* Create a formatted message for PDF content
*/
function createPdfFallbackMessage(pageCount: number, size: number, path?: string): string {
const formattedPath = path || 'Unknown path'
return `PDF document - ${pageCount} page(s), ${prettySize(size)}
Path: ${formattedPath}
This file appears to be a PDF document that could not be fully processed as text.
Please use a PDF viewer for best results.`
}
/**
* Create error message for PDF parsing failure and make it more readable
*/
function createPdfFailureMessage(
pageCount: number,
size: number,
path: string,
error: string
): string {
return `PDF document - Processing failed, ${prettySize(size)}
Path: ${path}
Error: ${error}
This file appears to be a PDF document that could not be processed.
Please use a PDF viewer for best results.`
}