feat(files): local upload, s3 upload for pdf/docx/csv parsing (#213)

* feat(file-parse): local upload; new block

* feat(file-parse): complete for local usage; need to integrate S3 for hosted version

* fix(file-parse): pdf parsing

* added support for multiple file upload & delete

* added s3 file upload/fetch for hosted platform

* swapped console for logger

* added unit tests for files

* add s3 client, resolved build error

---------

Co-authored-by: Emir Karabeg <emirkarabeg@berkeley.edu>
This commit is contained in:
Waleed Latif
2025-04-01 03:52:50 -07:00
committed by GitHub
parent 275183dc88
commit 92b20455f1
35 changed files with 4715 additions and 27 deletions

View File

@@ -13,4 +13,19 @@ ENCRYPTION_KEY=your_encryption_key # Use `openssl rand -hex 64` to generate
# If left commented out, emails will be logged to console instead
# StackBlitz (Webcontainer) API Key (Optional, for handling sandboxed code execution for functions/custom-tools)
# WEBCONTAINER_CLIENT_ID= # Uncomment and add your key from https://stackblitz.com/docs/webcontainer-api#webcontainer-client-id
# WEBCONTAINER_CLIENT_ID= # Uncomment and add your key from https://stackblitz.com/docs/webcontainer-api#webcontainer-client-id
# S3 Storage Configuration (Optional)
# Set USE_S3=true to enable S3 storage in development
# USE_S3=true
# AWS Credentials (Required when USE_S3=true)
# AWS_ACCESS_KEY_ID=your-access-key-id
# AWS_SECRET_ACCESS_KEY=your-secret-access-key
# S3 Configuration (Required when USE_S3=true)
# S3_BUCKET_NAME=your-bucket-name
# AWS_REGION=us-east-1
# Optional: Custom S3 Base URL (for custom domains or non-AWS S3-compatible storage)
# S3_BASE_URL=https://your-custom-domain.com

4
sim/.gitignore vendored
View File

@@ -52,3 +52,7 @@ next-env.d.ts
# cursorrules
.cursorrules
# file uploads
uploads/
uploads/*

View File

@@ -0,0 +1,174 @@
/**
* Tests for file delete API route
*
* @vitest-environment node
*/
import { NextRequest } from 'next/server'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { createMockRequest } from '@/app/api/__test-utils__/utils'
describe('File Delete API Route', () => {
// Mock file system modules
const mockUnlink = vi.fn().mockResolvedValue(undefined)
const mockExistsSync = vi.fn().mockReturnValue(true)
const mockDeleteFromS3 = vi.fn().mockResolvedValue(undefined)
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
beforeEach(() => {
vi.resetModules()
// Mock filesystem operations
vi.doMock('fs', () => ({
existsSync: mockExistsSync,
}))
vi.doMock('fs/promises', () => ({
unlink: mockUnlink,
}))
// Mock the S3 client
vi.doMock('@/lib/uploads/s3-client', () => ({
deleteFromS3: mockDeleteFromS3,
}))
// Mock the logger
vi.doMock('@/lib/logs/console-logger', () => ({
createLogger: vi.fn().mockReturnValue({
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
}),
}))
// Configure upload directory and S3 mode with all required exports
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: false,
ensureUploadsDirectory: mockEnsureUploadsDirectory,
S3_CONFIG: {
bucket: 'test-bucket',
region: 'test-region',
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
},
}))
// Skip setup.server.ts side effects
vi.doMock('@/lib/uploads/setup.server', () => ({}))
})
afterEach(() => {
vi.clearAllMocks()
})
it('should handle local file deletion successfully', async () => {
// Configure upload directory and S3 mode for this test
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: false,
}))
// Create request with file path
const req = createMockRequest('POST', {
filePath: '/api/files/serve/test-file.txt',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('message', 'File deleted successfully')
// Verify unlink was called with correct path
expect(mockUnlink).toHaveBeenCalledWith('/test/uploads/test-file.txt')
})
it('should handle file not found gracefully', async () => {
// Mock file not existing
mockExistsSync.mockReturnValueOnce(false)
// Create request with file path
const req = createMockRequest('POST', {
filePath: '/api/files/serve/nonexistent.txt',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('message', "File not found, but that's okay")
// Verify unlink was not called
expect(mockUnlink).not.toHaveBeenCalled()
})
it('should handle S3 file deletion successfully', async () => {
// Configure upload directory and S3 mode for this test
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: true,
}))
// Create request with S3 file path
const req = createMockRequest('POST', {
filePath: '/api/files/serve/s3/1234567890-test-file.txt',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('message', 'File deleted successfully from S3')
// Verify deleteFromS3 was called with correct key
expect(mockDeleteFromS3).toHaveBeenCalledWith('1234567890-test-file.txt')
})
it('should handle missing file path', async () => {
// Create request with no file path
const req = createMockRequest('POST', {})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify error response
expect(response.status).toBe(400)
expect(data).toHaveProperty('error', 'InvalidRequestError')
expect(data).toHaveProperty('message', 'No file path provided')
})
it('should handle CORS preflight requests', async () => {
// Import the handler after mocks are set up
const { OPTIONS } = await import('./route')
// Call the handler
const response = await OPTIONS()
// Verify response
expect(response.status).toBe(204)
expect(response.headers.get('Access-Control-Allow-Methods')).toBe('GET, POST, DELETE, OPTIONS')
expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type')
})
})

View File

@@ -0,0 +1,115 @@
import { NextRequest } from 'next/server'
import { existsSync } from 'fs'
import { unlink } from 'fs/promises'
import { join } from 'path'
import { createLogger } from '@/lib/logs/console-logger'
import { deleteFromS3 } from '@/lib/uploads/s3-client'
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
// Import to ensure the uploads directory is created
import '@/lib/uploads/setup.server'
import {
createErrorResponse,
createOptionsResponse,
createSuccessResponse,
extractFilename,
extractS3Key,
InvalidRequestError,
isS3Path,
} from '../utils'
const logger = createLogger('FilesDeleteAPI')
/**
* Main API route handler for file deletion
*/
export async function POST(request: NextRequest) {
try {
const requestData = await request.json()
const { filePath } = requestData
logger.info('File delete request received:', { filePath })
if (!filePath) {
throw new InvalidRequestError('No file path provided')
}
try {
// Use appropriate handler based on path and environment
const result =
isS3Path(filePath) || USE_S3_STORAGE
? await handleS3FileDelete(filePath)
: await handleLocalFileDelete(filePath)
// Return success response
return createSuccessResponse(result)
} catch (error) {
logger.error('Error deleting file:', error)
return createErrorResponse(
error instanceof Error ? error : new Error('Failed to delete file')
)
}
} catch (error) {
logger.error('Error parsing request:', error)
return createErrorResponse(error instanceof Error ? error : new Error('Invalid request'))
}
}
/**
* Handle S3 file deletion
*/
async function handleS3FileDelete(filePath: string) {
// Extract the S3 key from the path
const s3Key = extractS3Key(filePath)
logger.info(`Deleting file from S3: ${s3Key}`)
try {
// Delete from S3
await deleteFromS3(s3Key)
logger.info(`File successfully deleted from S3: ${s3Key}`)
return {
success: true as const,
message: 'File deleted successfully from S3',
}
} catch (error) {
logger.error('Error deleting file from S3:', error)
throw error
}
}
/**
* Handle local file deletion
*/
async function handleLocalFileDelete(filePath: string) {
// Extract the filename from the path
const filename = extractFilename(filePath)
logger.info('Extracted filename for deletion:', filename)
const fullPath = join(UPLOAD_DIR, filename)
logger.info('Full file path for deletion:', fullPath)
// Check if file exists
if (!existsSync(fullPath)) {
logger.info(`File not found for deletion at path: ${fullPath}`)
return {
success: true as const,
message: "File not found, but that's okay",
}
}
// Delete the file
await unlink(fullPath)
logger.info(`File successfully deleted: ${fullPath}`)
return {
success: true as const,
message: 'File deleted successfully',
}
}
/**
* Handle CORS preflight requests
*/
export async function OPTIONS() {
return createOptionsResponse()
}

View File

@@ -0,0 +1,249 @@
/**
* Tests for file parse API route
*
* @vitest-environment node
*/
import { NextRequest } from 'next/server'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { createMockRequest } from '@/app/api/__test-utils__/utils'
describe('File Parse API Route', () => {
// Mock file system and parser modules
const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test file content'))
const mockWriteFile = vi.fn().mockResolvedValue(undefined)
const mockUnlink = vi.fn().mockResolvedValue(undefined)
const mockExistsSync = vi.fn().mockReturnValue(true)
const mockDownloadFromS3 = vi.fn().mockResolvedValue(Buffer.from('test s3 file content'))
const mockParseFile = vi.fn().mockResolvedValue({
content: 'parsed content',
metadata: { pageCount: 1 },
})
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
beforeEach(() => {
vi.resetModules()
// Mock filesystem operations
vi.doMock('fs', () => ({
existsSync: mockExistsSync,
}))
vi.doMock('fs/promises', () => ({
readFile: mockReadFile,
writeFile: mockWriteFile,
unlink: mockUnlink,
}))
// Mock the S3 client
vi.doMock('@/lib/uploads/s3-client', () => ({
downloadFromS3: mockDownloadFromS3,
}))
// Mock file parsers
vi.doMock('@/lib/file-parsers', () => ({
isSupportedFileType: vi.fn().mockReturnValue(true),
parseFile: mockParseFile,
}))
// Mock the logger
vi.doMock('@/lib/logs/console-logger', () => ({
createLogger: vi.fn().mockReturnValue({
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
}),
}))
// Configure upload directory and S3 mode with all required exports
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: false,
ensureUploadsDirectory: mockEnsureUploadsDirectory,
S3_CONFIG: {
bucket: 'test-bucket',
region: 'test-region',
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
},
}))
// Skip setup.server.ts side effects
vi.doMock('@/lib/uploads/setup.server', () => ({}))
})
afterEach(() => {
vi.clearAllMocks()
})
it('should parse local file successfully', async () => {
// Create request with file path
const req = createMockRequest('POST', {
filePath: '/api/files/serve/test-file.txt',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('output')
expect(data.output).toHaveProperty('content', 'parsed content')
expect(data.output).toHaveProperty('name', 'test-file.txt')
// Verify readFile was called with correct path
expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/test-file.txt')
})
it('should parse S3 file successfully', async () => {
// Configure S3 storage mode
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: true,
}))
// Create request with S3 file path
const req = createMockRequest('POST', {
filePath: '/api/files/serve/s3/1234567890-test-file.pdf',
fileType: 'application/pdf',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('output')
expect(data.output).toHaveProperty('content', 'parsed content')
expect(data.output).toHaveProperty('metadata')
expect(data.output.metadata).toHaveProperty('pageCount', 1)
// Verify S3 download was called with correct key
expect(mockDownloadFromS3).toHaveBeenCalledWith('1234567890-test-file.pdf')
// Verify temporary file was created and cleaned up
expect(mockWriteFile).toHaveBeenCalled()
expect(mockUnlink).toHaveBeenCalled()
})
it('should handle multiple files', async () => {
// Create request with multiple file paths
const req = createMockRequest('POST', {
filePath: ['/api/files/serve/file1.txt', '/api/files/serve/file2.txt'],
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('results')
expect(Array.isArray(data.results)).toBe(true)
expect(data.results).toHaveLength(2)
expect(data.results[0]).toHaveProperty('success', true)
expect(data.results[1]).toHaveProperty('success', true)
})
it('should handle file not found', async () => {
// Mock file not existing for this test
mockExistsSync.mockReturnValueOnce(false)
// Create request with nonexistent file
const req = createMockRequest('POST', {
filePath: '/api/files/serve/nonexistent.txt',
})
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
expect(response.status).toBe(200)
if (data.success === true) {
expect(data).toHaveProperty('output')
expect(data.output).toHaveProperty('content')
} else {
expect(data).toHaveProperty('error')
expect(data.error).toContain('File not found')
}
})
it('should handle unsupported file types with generic parser', async () => {
// Mock file not being a supported type
vi.doMock('@/lib/file-parsers', () => ({
isSupportedFileType: vi.fn().mockReturnValue(false),
parseFile: mockParseFile,
}))
// Create request with unsupported file type
const req = createMockRequest('POST', {
filePath: '/api/files/serve/test-file.xyz',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response uses generic handling
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data).toHaveProperty('output')
expect(data.output).toHaveProperty('binary', false)
})
it('should handle missing file path', async () => {
// Create request with no file path
const req = createMockRequest('POST', {})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify error response
expect(response.status).toBe(400)
expect(data).toHaveProperty('error', 'No file path provided')
})
it('should handle parser errors gracefully', async () => {
// Mock parser error
mockParseFile.mockRejectedValueOnce(new Error('Parser failure'))
// Create request with file that will fail parsing
const req = createMockRequest('POST', {
filePath: '/api/files/serve/error-file.txt',
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify error was handled
expect(response.status).toBe(200)
expect(data).toHaveProperty('success', true)
expect(data.output).toHaveProperty('content')
})
})

View File

@@ -0,0 +1,427 @@
import { NextRequest, NextResponse } from 'next/server'
import { existsSync } from 'fs'
import { readFile, unlink, writeFile } from 'fs/promises'
import { join } from 'path'
import path from 'path'
import { isSupportedFileType, parseFile } from '@/lib/file-parsers'
import { createLogger } from '@/lib/logs/console-logger'
import { downloadFromS3 } from '@/lib/uploads/s3-client'
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
import '@/lib/uploads/setup.server'
const logger = createLogger('FilesParseAPI')
interface ParseSuccessResult {
success: true
output: {
content: string
fileType: string
size: number
name: string
binary: boolean
metadata?: Record<string, any>
}
filePath?: string
}
interface ParseErrorResult {
success: false
error: string
filePath?: string
}
type ParseResult = ParseSuccessResult | ParseErrorResult
// MIME type mapping for various file extensions
const fileTypeMap: Record<string, string> = {
// Text formats
txt: 'text/plain',
csv: 'text/csv',
json: 'application/json',
xml: 'application/xml',
md: 'text/markdown',
html: 'text/html',
css: 'text/css',
js: 'application/javascript',
ts: 'application/typescript',
// Document formats
pdf: 'application/pdf',
doc: 'application/msword',
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
// Spreadsheet formats
xls: 'application/vnd.ms-excel',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
// Presentation formats
ppt: 'application/vnd.ms-powerpoint',
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
// Image formats
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
// Archive formats
zip: 'application/zip',
}
// Binary file extensions
const binaryExtensions = [
'doc',
'docx',
'xls',
'xlsx',
'ppt',
'pptx',
'zip',
'png',
'jpg',
'jpeg',
'gif',
]
/**
* Main API route handler
*/
export async function POST(request: NextRequest) {
try {
const requestData = await request.json()
const { filePath, fileType } = requestData
logger.info('File parse request received:', { filePath, fileType })
if (!filePath) {
return NextResponse.json({ error: 'No file path provided' }, { status: 400 })
}
// Handle both single file path and array of file paths
const filePaths = Array.isArray(filePath) ? filePath : [filePath]
// Parse each file
const results = await Promise.all(
filePaths.map(async (singleFilePath) => {
try {
return await parseFileSingle(singleFilePath, fileType)
} catch (error) {
logger.error(`Error parsing file ${singleFilePath}:`, error)
return {
success: false,
error: (error as Error).message,
filePath: singleFilePath,
} as ParseErrorResult
}
})
)
// If it was a single file request, return a single result
// Otherwise return an array of results
if (!Array.isArray(filePath)) {
// Single file was requested
const result = results[0]
if (!result.success) {
return NextResponse.json({ error: result.error }, { status: 400 })
}
return NextResponse.json(result)
}
// Multiple files were requested
return NextResponse.json({
success: true,
results,
})
} catch (error) {
logger.error('Error parsing file(s):', error)
return NextResponse.json(
{ error: 'Failed to parse file(s)', message: (error as Error).message },
{ status: 500 }
)
}
}
/**
* Parse a single file and return its content
*/
async function parseFileSingle(filePath: string, fileType?: string): Promise<ParseResult> {
logger.info('Parsing file:', filePath)
// Check if this is an S3 path
const isS3Path = filePath.includes('/api/files/serve/s3/')
// Use S3 handler if it's an S3 path or we're in S3 mode
if (isS3Path || USE_S3_STORAGE) {
return handleS3File(filePath, fileType)
}
// Use local handler for local files
return handleLocalFile(filePath, fileType)
}
/**
* Handle file stored in S3
*/
async function handleS3File(filePath: string, fileType?: string): Promise<ParseResult> {
try {
// Extract the S3 key from the path
const isS3Path = filePath.includes('/api/files/serve/s3/')
const s3Key = isS3Path
? decodeURIComponent(filePath.split('/api/files/serve/s3/')[1])
: filePath
logger.info('Extracted S3 key:', s3Key)
// Download the file from S3
const fileBuffer = await downloadFromS3(s3Key)
logger.info(`Downloaded file from S3: ${s3Key}, size: ${fileBuffer.length} bytes`)
// Extract the filename from the S3 key
const filename = s3Key.split('/').pop() || s3Key
const extension = path.extname(filename).toLowerCase().substring(1)
// Create a temporary file path
const tempFilePath = join(UPLOAD_DIR, `temp-${Date.now()}-${filename}`)
try {
// Save to a temporary file so we can use existing parsers
await writeFile(tempFilePath, fileBuffer)
// Process the file based on its type
const result = isSupportedFileType(extension)
? await processWithSpecializedParser(tempFilePath, filename, extension, fileType, filePath)
: await handleGenericFile(tempFilePath, filename, extension, fileType)
return result
} finally {
// Clean up the temporary file regardless of outcome
if (existsSync(tempFilePath)) {
await unlink(tempFilePath).catch((err) => logger.error('Error removing temp file:', err))
}
}
} catch (error) {
logger.error(`Error handling S3 file ${filePath}:`, error)
return {
success: false,
error: `Error accessing file from S3: ${(error as Error).message}`,
filePath,
}
}
}
/**
* Handle file stored locally
*/
async function handleLocalFile(filePath: string, fileType?: string): Promise<ParseResult> {
// Extract the filename from the path
const filename = filePath.startsWith('/api/files/serve/')
? filePath.substring('/api/files/serve/'.length)
: path.basename(filePath)
logger.info('Processing local file:', filename)
// Try several possible file paths
const possiblePaths = [join(UPLOAD_DIR, filename), join(process.cwd(), 'uploads', filename)]
// Find the actual file path
let actualPath = ''
for (const p of possiblePaths) {
if (existsSync(p)) {
actualPath = p
logger.info(`Found file at: ${actualPath}`)
break
}
}
if (!actualPath) {
return {
success: false,
error: `File not found: ${filename}`,
filePath,
}
}
const extension = path.extname(filename).toLowerCase().substring(1)
// Process the file based on its type
return isSupportedFileType(extension)
? await processWithSpecializedParser(actualPath, filename, extension, fileType, filePath)
: await handleGenericFile(actualPath, filename, extension, fileType)
}
/**
* Process a file with a specialized parser
*/
async function processWithSpecializedParser(
filePath: string,
filename: string,
extension: string,
fileType?: string,
originalPath?: string
): Promise<ParseResult> {
try {
logger.info(`Parsing ${filename} with specialized parser for ${extension}`)
const result = await parseFile(filePath)
// Get file stats
const fileBuffer = await readFile(filePath)
const fileSize = fileBuffer.length
// Handle PDF-specific validation
if (
extension === 'pdf' &&
(result.content.includes('\u0000') ||
result.content.match(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\xFF]{10,}/g))
) {
result.content = createPdfFallbackMessage(result.metadata?.pageCount, fileSize, originalPath)
}
return {
success: true,
output: {
content: result.content,
fileType: fileType || getMimeType(extension),
size: fileSize,
name: filename,
binary: false,
metadata: result.metadata || {},
},
filePath: originalPath || filePath,
}
} catch (error) {
logger.error(`Specialized parser failed for ${extension} file:`, error)
// Special handling for PDFs
if (extension === 'pdf') {
const fileBuffer = await readFile(filePath)
const fileSize = fileBuffer.length
// Get page count using a simple regex pattern
let pageCount = 0
const pdfContent = fileBuffer.toString('utf-8')
const pageMatches = pdfContent.match(/\/Type\s*\/Page\b/gi)
if (pageMatches) {
pageCount = pageMatches.length
}
const content = createPdfFailureMessage(
pageCount,
fileSize,
originalPath || filePath,
(error as Error).message
)
return {
success: true,
output: {
content,
fileType: fileType || getMimeType(extension),
size: fileSize,
name: filename,
binary: false,
},
filePath: originalPath || filePath,
}
}
// For other file types, fall back to generic handling
return handleGenericFile(filePath, filename, extension, fileType)
}
}
/**
* Handle generic file types with basic parsing
*/
async function handleGenericFile(
filePath: string,
filename: string,
extension: string,
fileType?: string
): Promise<ParseResult> {
try {
// Read the file
const fileBuffer = await readFile(filePath)
const fileSize = fileBuffer.length
// Determine if file should be treated as binary
const isBinary = binaryExtensions.includes(extension)
// Parse content based on binary status
const fileContent = isBinary
? `[Binary ${extension.toUpperCase()} file - ${fileSize} bytes]`
: await parseTextFile(fileBuffer)
return {
success: true,
output: {
content: fileContent,
fileType: fileType || getMimeType(extension),
size: fileSize,
name: filename,
binary: isBinary,
},
}
} catch (error) {
logger.error('Error handling generic file:', error)
return {
success: false,
error: `Failed to parse file: ${(error as Error).message}`,
}
}
}
/**
* Parse a text file buffer to string
*/
async function parseTextFile(fileBuffer: Buffer): Promise<string> {
try {
return fileBuffer.toString('utf-8')
} catch (error) {
return `[Unable to parse file as text: ${(error as Error).message}]`
}
}
/**
* Get MIME type from file extension
*/
function getMimeType(extension: string): string {
return fileTypeMap[extension] || 'application/octet-stream'
}
/**
* Create a fallback message for PDF files that couldn't be parsed properly
*/
function createPdfFallbackMessage(
pageCount: number | undefined,
fileSize: number,
filePath?: string
): string {
return `This PDF document could not be parsed for text content. It contains ${pageCount || 'unknown number of'} pages. File size: ${fileSize} bytes.
To view this PDF properly, you can:
1. Download it directly using this URL: ${filePath}
2. Try a dedicated PDF text extraction service or tool
3. Open it with a PDF reader like Adobe Acrobat
PDF parsing failed because the document appears to use an encoding or compression method that our parser cannot handle.`
}
/**
* Create an error message for PDF files that failed to parse
*/
function createPdfFailureMessage(
pageCount: number,
fileSize: number,
filePath: string,
errorMessage: string
): string {
return `PDF parsing failed: ${errorMessage}
This PDF document contains ${pageCount || 'an unknown number of'} pages and is ${fileSize} bytes in size.
To view this PDF properly, you can:
1. Download it directly using this URL: ${filePath}
2. Try a dedicated PDF text extraction service or tool
3. Open it with a PDF reader like Adobe Acrobat
Common causes of PDF parsing failures:
- The PDF uses an unsupported compression algorithm
- The PDF is protected or encrypted
- The PDF content uses non-standard encodings
- The PDF was created with features our parser doesn't support`
}

View File

@@ -0,0 +1,275 @@
/**
* Tests for file serve API route
*
* @vitest-environment node
*/
import { NextRequest } from 'next/server'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
describe('File Serve API Route', () => {
// Mock file system and S3 client modules
const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test file content'))
const mockExistsSync = vi.fn().mockReturnValue(true)
const mockDownloadFromS3 = vi.fn().mockResolvedValue(Buffer.from('test s3 file content'))
const mockGetPresignedUrl = vi.fn().mockResolvedValue('https://example-s3.com/presigned-url')
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
beforeEach(() => {
vi.resetModules()
// Mock filesystem operations
vi.doMock('fs', () => ({
existsSync: mockExistsSync,
}))
vi.doMock('fs/promises', () => ({
readFile: mockReadFile,
}))
// Mock the S3 client
vi.doMock('@/lib/uploads/s3-client', () => ({
downloadFromS3: mockDownloadFromS3,
getPresignedUrl: mockGetPresignedUrl,
}))
// Mock the logger
vi.doMock('@/lib/logs/console-logger', () => ({
createLogger: vi.fn().mockReturnValue({
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
}),
}))
// Configure upload directory and S3 mode with all required exports
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: false,
ensureUploadsDirectory: mockEnsureUploadsDirectory,
S3_CONFIG: {
bucket: 'test-bucket',
region: 'test-region',
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
},
}))
// Skip setup.server.ts side effects
vi.doMock('@/lib/uploads/setup.server', () => ({}))
})
afterEach(() => {
vi.clearAllMocks()
})
it('should serve local file successfully', async () => {
// Create mock request
const req = new NextRequest('http://localhost:3000/api/files/serve/test-file.txt')
// Create params similar to what Next.js would provide
const params = { path: ['test-file.txt'] }
// Import the handler after mocks are set up
const { GET } = await import('./route')
// Call the handler
const response = await GET(req, { params })
// Verify response
expect(response.status).toBe(200)
expect(response.headers.get('Content-Type')).toBe('text/plain')
expect(response.headers.get('Content-Disposition')).toBe('inline; filename="test-file.txt"')
expect(response.headers.get('Cache-Control')).toBe('public, max-age=31536000')
// Verify file was read from correct path
expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/test-file.txt')
// Verify response content
const buffer = await response.arrayBuffer()
const content = Buffer.from(buffer).toString()
expect(content).toBe('test file content')
})
it('should handle nested paths correctly', async () => {
// Create mock request
const req = new NextRequest('http://localhost:3000/api/files/serve/nested/path/file.txt')
// Create params similar to what Next.js would provide
const params = { path: ['nested', 'path', 'file.txt'] }
// Import the handler after mocks are set up
const { GET } = await import('./route')
// Call the handler
const response = await GET(req, { params })
// Verify file was read with correct path
expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/nested/path/file.txt')
})
it('should serve S3 file with presigned URL redirect', async () => {
// Configure S3 storage mode
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: true,
}))
// Create mock request
const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-file.pdf')
// Create params similar to what Next.js would provide
const params = { path: ['s3', '1234567890-file.pdf'] }
// Import the handler after mocks are set up
const { GET } = await import('./route')
// Call the handler
const response = await GET(req, { params })
// Verify redirect to presigned URL
expect(response.status).toBe(307) // Temporary redirect
expect(response.headers.get('Location')).toBe('https://example-s3.com/presigned-url')
// Verify presigned URL was generated for correct S3 key
expect(mockGetPresignedUrl).toHaveBeenCalledWith('1234567890-file.pdf')
})
it('should handle S3 file download fallback if presigned URL fails', async () => {
// Configure S3 storage mode
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: true,
}))
// Mock presigned URL to fail
mockGetPresignedUrl.mockRejectedValueOnce(new Error('Presigned URL failed'))
// Create mock request
const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-image.png')
// Create params similar to what Next.js would provide
const params = { path: ['s3', '1234567890-image.png'] }
// Import the handler after mocks are set up
const { GET } = await import('./route')
// Call the handler
const response = await GET(req, { params })
// Verify response falls back to downloading and proxying the file
expect(response.status).toBe(200)
expect(response.headers.get('Content-Type')).toBe('image/png')
expect(mockDownloadFromS3).toHaveBeenCalledWith('1234567890-image.png')
})
it('should return 404 when file not found', async () => {
// Mock file not existing
mockExistsSync.mockReturnValue(false)
// Create mock request
const req = new NextRequest('http://localhost:3000/api/files/serve/nonexistent.txt')
// Create params similar to what Next.js would provide
const params = { path: ['nonexistent.txt'] }
// Import the handler after mocks are set up
const { GET } = await import('./route')
// Call the handler
const response = await GET(req, { params })
// Verify 404 response
expect(response.status).toBe(404)
const data = await response.json()
// Updated to match actual error format
expect(data).toHaveProperty('error', 'FileNotFoundError')
expect(data).toHaveProperty('message')
expect(data.message).toContain('File not found')
})
// Instead of testing all content types in one test, let's separate them
describe('content type detection', () => {
const contentTypeTests = [
{ ext: 'pdf', contentType: 'application/pdf' },
{ ext: 'json', contentType: 'application/json' },
{ ext: 'jpg', contentType: 'image/jpeg' },
{ ext: 'txt', contentType: 'text/plain' },
{ ext: 'unknown', contentType: 'application/octet-stream' },
]
for (const test of contentTypeTests) {
it(`should serve ${test.ext} file with correct content type`, async () => {
// Reset modules for this test
vi.resetModules()
// Re-apply all mocks
vi.doMock('fs', () => ({
existsSync: mockExistsSync.mockReturnValue(true),
}))
vi.doMock('fs/promises', () => ({
readFile: mockReadFile,
}))
vi.doMock('@/lib/uploads/s3-client', () => ({
downloadFromS3: mockDownloadFromS3,
getPresignedUrl: mockGetPresignedUrl,
}))
vi.doMock('@/lib/logs/console-logger', () => ({
createLogger: vi.fn().mockReturnValue({
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
}),
}))
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: false,
ensureUploadsDirectory: mockEnsureUploadsDirectory,
S3_CONFIG: {
bucket: 'test-bucket',
region: 'test-region',
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
},
}))
vi.doMock('@/lib/uploads/setup.server', () => ({}))
// Mock utils functions that determine content type
vi.doMock('@/app/api/files/utils', () => ({
getContentType: () => test.contentType,
findLocalFile: () => '/test/uploads/file.' + test.ext,
createFileResponse: (obj: { buffer: Buffer; contentType: string; filename: string }) =>
new Response(obj.buffer, {
status: 200,
headers: {
'Content-Type': obj.contentType,
'Content-Disposition': `inline; filename="${obj.filename}"`,
'Cache-Control': 'public, max-age=31536000',
},
}),
createErrorResponse: () => new Response(null, { status: 404 }),
}))
// Create mock request with this extension
const req = new NextRequest(`http://localhost:3000/api/files/serve/file.${test.ext}`)
// Create params
const params = { path: [`file.${test.ext}`] }
// Import the handler after mocks are set up
const { GET } = await import('./route')
// Call the handler
const response = await GET(req, { params })
// Verify correct content type
expect(response.headers.get('Content-Type')).toBe(test.contentType)
})
}
})
})

View File

@@ -0,0 +1,141 @@
import { NextRequest, NextResponse } from 'next/server'
import { readFile } from 'fs/promises'
import { join } from 'path'
import { createLogger } from '@/lib/logs/console-logger'
import { downloadFromS3, getPresignedUrl } from '@/lib/uploads/s3-client'
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
// Import to ensure the uploads directory is created
import '@/lib/uploads/setup.server'
import {
createErrorResponse,
createFileResponse,
FileNotFoundError,
findLocalFile,
getContentType,
} from '../../utils'
const logger = createLogger('FilesServeAPI')
/**
* Main API route handler for serving files
*/
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ path: string[] }> }
) {
try {
// Extract params
const { path } = await params
// Join the path segments to get the filename or S3 key
const pathString = path.join('/')
logger.info(`Serving file: ${pathString}`)
// Check if this is an S3 file (path starts with 's3/')
const isS3Path = path[0] === 's3'
try {
// Use S3 handler if in production or path explicitly specifies S3
if (USE_S3_STORAGE || isS3Path) {
return await handleS3File(path, isS3Path, pathString)
}
// Use local handler for local files
return await handleLocalFile(path)
} catch (error) {
logger.error('Error serving file:', error)
return createErrorResponse(error as Error)
}
} catch (error) {
logger.error('Error serving file:', error)
return createErrorResponse(error as Error)
}
}
/**
* Handle S3 file serving
*/
async function handleS3File(
path: string[],
isS3Path: boolean,
pathString: string
): Promise<NextResponse> {
// If path starts with s3/, remove that prefix to get the actual key
const s3Key = isS3Path ? decodeURIComponent(path.slice(1).join('/')) : pathString
logger.info(`Serving file from S3: ${s3Key}`)
try {
// First try direct access via presigned URL (most efficient)
return await handleS3PresignedUrl(s3Key)
} catch (error) {
logger.info('Falling back to proxy method for S3 file')
// Fall back to proxy method if presigned URL fails
return await handleS3Proxy(s3Key)
}
}
/**
* Generate a presigned URL and redirect to it
*/
async function handleS3PresignedUrl(s3Key: string): Promise<NextResponse> {
try {
// Generate a presigned URL for direct S3 access
const presignedUrl = await getPresignedUrl(s3Key)
// Redirect to the presigned URL for direct S3 access
return NextResponse.redirect(presignedUrl)
} catch (error) {
logger.error('Error generating presigned URL:', error)
throw error
}
}
/**
* Proxy S3 file through our server
*/
async function handleS3Proxy(s3Key: string): Promise<NextResponse> {
try {
const fileBuffer = await downloadFromS3(s3Key)
// Extract the original filename from the key (last part after last /)
const originalFilename = s3Key.split('/').pop() || 'download'
const contentType = getContentType(originalFilename)
return createFileResponse({
buffer: fileBuffer,
contentType,
filename: originalFilename,
})
} catch (error) {
logger.error('Error downloading from S3:', error)
throw error
}
}
/**
* Handle local file serving
*/
async function handleLocalFile(path: string[]): Promise<NextResponse> {
// Join as a path for findLocalFile
const pathString = path.join('/')
const filePath = findLocalFile(pathString)
// Handle file not found
if (!filePath) {
logger.error(`File not found in any checked paths for: ${pathString}`)
throw new FileNotFoundError(`File not found: ${pathString}`)
}
// Read the file
const fileBuffer = await readFile(filePath)
// Get filename for content type detection and response
const filename = path[path.length - 1]
const contentType = getContentType(filename)
return createFileResponse({
buffer: fileBuffer,
contentType,
filename,
})
}

View File

@@ -0,0 +1,261 @@
/**
* Tests for file upload API route
*
* @vitest-environment node
*/
import { NextRequest } from 'next/server'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
describe('File Upload API Route', () => {
// Mock file system and S3 client modules
const mockWriteFile = vi.fn().mockResolvedValue(undefined)
const mockUploadToS3 = vi.fn().mockImplementation((buffer, fileName) => {
return Promise.resolve({
path: `/api/files/serve/s3/${Date.now()}-${fileName}`,
key: `${Date.now()}-${fileName}`,
name: fileName,
size: buffer.length,
type: 'text/plain',
})
})
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
// Mock form data
const createMockFormData = (files: File[]): FormData => {
const formData = new FormData()
files.forEach((file) => {
formData.append('file', file)
})
return formData
}
// Mock file
const createMockFile = (
name = 'test.txt',
type = 'text/plain',
content = 'test content'
): File => {
return new File([content], name, { type })
}
beforeEach(() => {
vi.resetModules()
// Mock filesystem operations
vi.doMock('fs/promises', () => ({
writeFile: mockWriteFile,
}))
// Mock the S3 client
vi.doMock('@/lib/uploads/s3-client', () => ({
uploadToS3: mockUploadToS3,
}))
// Mock the logger
vi.doMock('@/lib/logs/console-logger', () => ({
createLogger: vi.fn().mockReturnValue({
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
}),
}))
// Mock UUID generation
vi.doMock('uuid', () => ({
v4: vi.fn().mockReturnValue('mock-uuid'),
}))
// Configure upload directory and S3 mode with all required exports
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: false,
ensureUploadsDirectory: mockEnsureUploadsDirectory,
S3_CONFIG: {
bucket: 'test-bucket',
region: 'test-region',
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
},
}))
// Skip setup.server.ts side effects
vi.doMock('@/lib/uploads/setup.server', () => ({}))
})
afterEach(() => {
vi.clearAllMocks()
})
it('should upload a file to local storage', async () => {
// Create a mock request with file
const mockFile = createMockFile()
const formData = createMockFormData([mockFile])
// Create mock request object
const req = new NextRequest('http://localhost:3000/api/files/upload', {
method: 'POST',
body: formData,
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('path', '/api/files/serve/mock-uuid.txt')
expect(data).toHaveProperty('name', 'test.txt')
expect(data).toHaveProperty('size')
expect(data).toHaveProperty('type', 'text/plain')
// Verify file was written to local storage
expect(mockWriteFile).toHaveBeenCalledWith('/test/uploads/mock-uuid.txt', expect.any(Buffer))
})
it('should upload a file to S3 when in S3 mode', async () => {
// Configure S3 storage mode
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: true,
}))
// Create a mock request with file
const mockFile = createMockFile('document.pdf', 'application/pdf')
const formData = createMockFormData([mockFile])
// Create mock request object
const req = new NextRequest('http://localhost:3000/api/files/upload', {
method: 'POST',
body: formData,
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response
expect(response.status).toBe(200)
expect(data).toHaveProperty('path')
expect(data.path).toContain('/api/files/serve/s3/')
expect(data).toHaveProperty('key')
expect(data).toHaveProperty('name', 'document.pdf')
// Verify uploadToS3 was called with correct parameters
expect(mockUploadToS3).toHaveBeenCalledWith(
expect.any(Buffer),
'document.pdf',
'application/pdf',
expect.any(Number)
)
// Verify local write was NOT called
expect(mockWriteFile).not.toHaveBeenCalled()
})
it('should handle multiple file uploads', async () => {
// Create multiple mock files
const mockFiles = [
createMockFile('file1.txt', 'text/plain'),
createMockFile('file2.jpg', 'image/jpeg'),
]
const formData = createMockFormData(mockFiles)
// Create mock request object
const req = new NextRequest('http://localhost:3000/api/files/upload', {
method: 'POST',
body: formData,
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify response has multiple results
expect(response.status).toBe(200)
expect(Array.isArray(data)).toBe(true)
expect(data).toHaveLength(2)
expect(data[0]).toHaveProperty('name', 'file1.txt')
expect(data[1]).toHaveProperty('name', 'file2.jpg')
// Verify files were written
expect(mockWriteFile).toHaveBeenCalledTimes(2)
})
it('should handle missing files', async () => {
// Create empty form data
const formData = new FormData()
// Create mock request object
const req = new NextRequest('http://localhost:3000/api/files/upload', {
method: 'POST',
body: formData,
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify error response
expect(response.status).toBe(400)
expect(data).toHaveProperty('error', 'InvalidRequestError')
expect(data).toHaveProperty('message', 'No files provided')
})
it('should handle S3 upload errors', async () => {
// Configure S3 storage mode
vi.doMock('@/lib/uploads/setup', () => ({
UPLOAD_DIR: '/test/uploads',
USE_S3_STORAGE: true,
}))
// Mock S3 upload failure
mockUploadToS3.mockRejectedValueOnce(new Error('S3 upload failed'))
// Create a mock request with file
const mockFile = createMockFile()
const formData = createMockFormData([mockFile])
// Create mock request object
const req = new NextRequest('http://localhost:3000/api/files/upload', {
method: 'POST',
body: formData,
})
// Import the handler after mocks are set up
const { POST } = await import('./route')
// Call the handler
const response = await POST(req)
const data = await response.json()
// Verify error response
expect(response.status).toBe(500)
expect(data).toHaveProperty('error', 'Error')
expect(data).toHaveProperty('message', 'S3 upload failed')
})
it('should handle CORS preflight requests', async () => {
// Import the handler after mocks are set up
const { OPTIONS } = await import('./route')
// Call the handler
const response = await OPTIONS()
// Verify response
expect(response.status).toBe(204)
expect(response.headers.get('Access-Control-Allow-Methods')).toBe('GET, POST, DELETE, OPTIONS')
expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type')
})
})

View File

@@ -0,0 +1,77 @@
import { NextRequest, NextResponse } from 'next/server'
import { writeFile } from 'fs/promises'
import { join } from 'path'
import { v4 as uuidv4 } from 'uuid'
import { createLogger } from '@/lib/logs/console-logger'
import { uploadToS3 } from '@/lib/uploads/s3-client'
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
// Import to ensure the uploads directory is created
import '@/lib/uploads/setup.server'
import { createErrorResponse, createOptionsResponse, InvalidRequestError } from '../utils'
const logger = createLogger('FilesUploadAPI')
export async function POST(request: NextRequest) {
try {
const formData = await request.formData()
// Check if multiple files are being uploaded or a single file
const files = formData.getAll('file') as File[]
if (!files || files.length === 0) {
throw new InvalidRequestError('No files provided')
}
// Log storage mode
logger.info(`Using storage mode: ${USE_S3_STORAGE ? 'S3' : 'Local'} for file upload`)
const uploadResults = []
// Process each file
for (const file of files) {
const originalName = file.name
const bytes = await file.arrayBuffer()
const buffer = Buffer.from(bytes)
if (USE_S3_STORAGE) {
// Upload to S3 in production
try {
logger.info(`Uploading file to S3: ${originalName}`)
const result = await uploadToS3(buffer, originalName, file.type, file.size)
logger.info(`Successfully uploaded to S3: ${result.key}`)
uploadResults.push(result)
} catch (error) {
logger.error('Error uploading to S3:', error)
throw error
}
} else {
// Upload to local file system in development
const extension = originalName.split('.').pop() || ''
const uniqueFilename = `${uuidv4()}.${extension}`
const filePath = join(UPLOAD_DIR, uniqueFilename)
logger.info(`Uploading file to local storage: ${filePath}`)
await writeFile(filePath, buffer)
logger.info(`Successfully wrote file to: ${filePath}`)
uploadResults.push({
path: `/api/files/serve/${uniqueFilename}`,
name: originalName,
size: file.size,
type: file.type,
})
}
}
// Return all file information
return NextResponse.json(files.length === 1 ? uploadResults[0] : uploadResults)
} catch (error) {
logger.error('Error uploading files:', error)
return createErrorResponse(error instanceof Error ? error : new Error('Failed to upload files'))
}
}
// Handle preflight requests
export async function OPTIONS() {
return createOptionsResponse()
}

193
sim/app/api/files/utils.ts Normal file
View File

@@ -0,0 +1,193 @@
import { NextResponse } from 'next/server'
import { existsSync } from 'fs'
import { join } from 'path'
import { UPLOAD_DIR } from '@/lib/uploads/setup'
/**
* Response type definitions
*/
export interface ApiSuccessResponse {
success: true
[key: string]: any
}
export interface ApiErrorResponse {
error: string
message?: string
}
export interface FileResponse {
buffer: Buffer
contentType: string
filename: string
}
/**
* Custom error types
*/
export class FileNotFoundError extends Error {
constructor(message: string) {
super(message)
this.name = 'FileNotFoundError'
}
}
export class InvalidRequestError extends Error {
constructor(message: string) {
super(message)
this.name = 'InvalidRequestError'
}
}
/**
* Maps file extensions to MIME types
*/
export const contentTypeMap: Record<string, string> = {
// Text formats
txt: 'text/plain',
csv: 'text/csv',
json: 'application/json',
xml: 'application/xml',
md: 'text/markdown',
html: 'text/html',
css: 'text/css',
js: 'application/javascript',
ts: 'application/typescript',
// Document formats
pdf: 'application/pdf',
doc: 'application/msword',
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
// Spreadsheet formats
xls: 'application/vnd.ms-excel',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
// Presentation formats
ppt: 'application/vnd.ms-powerpoint',
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
// Image formats
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
svg: 'image/svg+xml',
// Archive formats
zip: 'application/zip',
}
/**
* List of binary file extensions
*/
export const binaryExtensions = [
'doc',
'docx',
'xls',
'xlsx',
'ppt',
'pptx',
'zip',
'png',
'jpg',
'jpeg',
'gif',
'pdf',
]
/**
* Determine content type from file extension
*/
export function getContentType(filename: string): string {
const extension = filename.split('.').pop()?.toLowerCase() || ''
return contentTypeMap[extension] || 'application/octet-stream'
}
/**
* Check if a path is an S3 path
*/
export function isS3Path(path: string): boolean {
return path.includes('/api/files/serve/s3/')
}
/**
* Extract S3 key from a path
*/
export function extractS3Key(path: string): string {
if (isS3Path(path)) {
return decodeURIComponent(path.split('/api/files/serve/s3/')[1])
}
return path
}
/**
* Extract filename from a serve path
*/
export function extractFilename(path: string): string {
if (path.startsWith('/api/files/serve/')) {
return path.substring('/api/files/serve/'.length)
}
return path.split('/').pop() || path
}
/**
* Find a file in possible local storage locations
*/
export function findLocalFile(filename: string): string | null {
const possiblePaths = [join(UPLOAD_DIR, filename), join(process.cwd(), 'uploads', filename)]
for (const path of possiblePaths) {
if (existsSync(path)) {
return path
}
}
return null
}
/**
* Create a file response with appropriate headers
*/
export function createFileResponse(file: FileResponse): NextResponse {
return new NextResponse(file.buffer, {
status: 200,
headers: {
'Content-Type': file.contentType,
'Content-Disposition': `inline; filename="${file.filename}"`,
'Cache-Control': 'public, max-age=31536000', // Cache for 1 year
},
})
}
/**
* Create a standardized error response
*/
export function createErrorResponse(error: Error, status: number = 500): NextResponse {
// Map error types to appropriate status codes
const statusCode =
error instanceof FileNotFoundError ? 404 : error instanceof InvalidRequestError ? 400 : status
return NextResponse.json(
{
error: error.name,
message: error.message,
},
{ status: statusCode }
)
}
/**
* Create a standardized success response
*/
export function createSuccessResponse(data: ApiSuccessResponse): NextResponse {
return NextResponse.json(data)
}
/**
* Handle CORS preflight requests
*/
export function createOptionsResponse(): NextResponse {
return new NextResponse(null, {
status: 204,
headers: {
'Access-Control-Allow-Methods': 'GET, POST, DELETE, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type',
},
})
}

View File

@@ -0,0 +1,508 @@
'use client'
import { useRef, useState } from 'react'
import { Info, Upload, X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Progress } from '@/components/ui/progress'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
import { useNotificationStore } from '@/stores/notifications/store'
import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
import { useWorkflowStore } from '@/stores/workflows/workflow/store'
import { useSubBlockValue } from '../hooks/use-sub-block-value'
interface FileUploadProps {
blockId: string
subBlockId: string
maxSize?: number // in MB
acceptedTypes?: string // comma separated MIME types
multiple?: boolean // whether to allow multiple file uploads
}
interface UploadedFile {
name: string
path: string
size: number
type: string
}
export function FileUpload({
blockId,
subBlockId,
maxSize = 10, // Default 10MB
acceptedTypes = '*',
multiple = false, // Default to single file for backward compatibility
}: FileUploadProps) {
// State management - handle both single file and array of files
const [value, setValue] = useSubBlockValue<UploadedFile | UploadedFile[] | null>(
blockId,
subBlockId,
true
)
const [isUploading, setIsUploading] = useState(false)
const [uploadProgress, setUploadProgress] = useState(0)
// For file deletion status
const [deletingFiles, setDeletingFiles] = useState<Record<string, boolean>>({})
// Refs
const fileInputRef = useRef<HTMLInputElement>(null)
// Stores
const { addNotification } = useNotificationStore()
const { activeWorkflowId } = useWorkflowRegistry()
/**
* Opens file dialog
* Prevents event propagation to avoid ReactFlow capturing the event
*/
const handleOpenFileDialog = (e: React.MouseEvent) => {
e.preventDefault()
e.stopPropagation()
if (fileInputRef.current) {
fileInputRef.current.value = ''
fileInputRef.current.click()
}
}
/**
* Formats file size for display in a human-readable format
*/
const formatFileSize = (bytes: number): string => {
if (bytes < 1024) return `${bytes} B`
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
}
/**
* Handles file upload when new file(s) are selected
*/
const handleFileChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
e.stopPropagation()
const files = e.target.files
if (!files || files.length === 0) return
// Get existing files and their total size
const existingFiles = Array.isArray(value) ? value : value ? [value] : []
const existingTotalSize = existingFiles.reduce((sum, file) => sum + file.size, 0)
// Validate file sizes
const maxSizeInBytes = maxSize * 1024 * 1024
const validFiles: File[] = []
let totalNewSize = 0
for (let i = 0; i < files.length; i++) {
const file = files[i]
// Check if adding this file would exceed the total limit
if (existingTotalSize + totalNewSize + file.size > maxSizeInBytes) {
addNotification(
'error',
`Adding ${file.name} would exceed the maximum size limit of ${maxSize}MB`,
activeWorkflowId
)
} else {
validFiles.push(file)
totalNewSize += file.size
}
}
if (validFiles.length === 0) return
setIsUploading(true)
setUploadProgress(0)
// Track progress simulation interval
let progressInterval: NodeJS.Timeout | null = null
try {
// Simulate upload progress
progressInterval = setInterval(() => {
setUploadProgress((prev) => {
const newProgress = prev + Math.random() * 10
return newProgress > 90 ? 90 : newProgress
})
}, 200)
const uploadedFiles: UploadedFile[] = []
const uploadErrors: string[] = []
// Upload each file separately
for (const file of validFiles) {
// Create FormData for upload
const formData = new FormData()
formData.append('file', file)
// Upload the file
const response = await fetch('/api/files/upload', {
method: 'POST',
body: formData,
})
// Handle error response
if (!response.ok) {
const errorData = await response.json().catch(() => ({ error: response.statusText }))
const errorMessage = errorData.error || `Failed to upload file: ${response.status}`
uploadErrors.push(`${file.name}: ${errorMessage}`)
continue
}
// Process successful upload
const data = await response.json()
uploadedFiles.push({
name: file.name,
path: data.path,
size: file.size,
type: file.type,
})
}
// Clear progress interval
if (progressInterval) {
clearInterval(progressInterval)
progressInterval = null
}
setUploadProgress(100)
// Send consolidated notification about uploaded files
if (uploadedFiles.length > 0) {
if (uploadedFiles.length === 1) {
addNotification(
'console',
`${uploadedFiles[0].name} was uploaded successfully`,
activeWorkflowId
)
} else {
addNotification(
'console',
`Uploaded ${uploadedFiles.length} files successfully: ${uploadedFiles.map((f) => f.name).join(', ')}`,
activeWorkflowId
)
}
}
// Send consolidated error notification if any
if (uploadErrors.length > 0) {
if (uploadErrors.length === 1) {
addNotification('error', uploadErrors[0], activeWorkflowId)
} else {
addNotification(
'error',
`Failed to upload ${uploadErrors.length} files: ${uploadErrors.join('; ')}`,
activeWorkflowId
)
}
}
// Update the file value in state based on multiple setting
if (multiple) {
// For multiple files: Append to existing files if any
const existingFiles = Array.isArray(value) ? value : value ? [value] : []
const newFiles = [...existingFiles, ...uploadedFiles]
setValue(newFiles)
// Make sure to update the subblock store value for the workflow execution
useSubBlockStore.getState().setValue(blockId, subBlockId, newFiles)
useWorkflowStore.getState().triggerUpdate()
} else {
// For single file: Replace with last uploaded file
setValue(uploadedFiles[0] || null)
// Make sure to update the subblock store value for the workflow execution
useSubBlockStore.getState().setValue(blockId, subBlockId, uploadedFiles[0] || null)
useWorkflowStore.getState().triggerUpdate()
}
} catch (error) {
addNotification(
'error',
error instanceof Error ? error.message : 'Failed to upload file(s)',
activeWorkflowId
)
} finally {
// Clean up and reset upload state
if (progressInterval) {
clearInterval(progressInterval)
}
setTimeout(() => {
setIsUploading(false)
setUploadProgress(0)
}, 500)
}
}
/**
* Handles deletion of a single file
*/
const handleRemoveFile = async (file: UploadedFile, e?: React.MouseEvent) => {
if (e) {
e.preventDefault()
e.stopPropagation()
}
// Mark this file as being deleted
setDeletingFiles((prev) => ({ ...prev, [file.path]: true }))
try {
// Call API to delete the file from server
const response = await fetch('/api/files/delete', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ filePath: file.path }),
})
if (!response.ok) {
const errorData = await response.json().catch(() => ({ error: response.statusText }))
const errorMessage = errorData.error || `Failed to delete file: ${response.status}`
throw new Error(errorMessage)
}
// Update the UI state
if (multiple) {
// For multiple files: Remove the specific file
const filesArray = Array.isArray(value) ? value : value ? [value] : []
const updatedFiles = filesArray.filter((f) => f.path !== file.path)
setValue(updatedFiles.length > 0 ? updatedFiles : null)
// Make sure to update the subblock store value for the workflow execution
useSubBlockStore
.getState()
.setValue(blockId, subBlockId, updatedFiles.length > 0 ? updatedFiles : null)
} else {
// For single file: Clear the value
setValue(null)
// Make sure to update the subblock store
useSubBlockStore.getState().setValue(blockId, subBlockId, null)
}
addNotification('console', `${file.name} was deleted successfully`, activeWorkflowId)
useWorkflowStore.getState().triggerUpdate()
} catch (error) {
addNotification(
'error',
error instanceof Error ? error.message : 'Failed to delete file from server',
activeWorkflowId
)
} finally {
// Remove file from the deleting state
setDeletingFiles((prev) => {
const updated = { ...prev }
delete updated[file.path]
return updated
})
}
}
/**
* Handles deletion of all files (for multiple mode)
*/
const handleRemoveAllFiles = async (e: React.MouseEvent) => {
e.preventDefault()
e.stopPropagation()
if (!value) return
const filesToDelete = Array.isArray(value) ? value : [value]
const fileCount = filesToDelete.length
// Mark all files as deleting
const deletingStatus: Record<string, boolean> = {}
filesToDelete.forEach((file) => {
deletingStatus[file.path] = true
})
setDeletingFiles(deletingStatus)
// Clear input state immediately for better UX
setValue(null)
useSubBlockStore.getState().setValue(blockId, subBlockId, null)
useWorkflowStore.getState().triggerUpdate()
if (fileInputRef.current) {
fileInputRef.current.value = ''
}
// Track successful and failed deletions
const deletionResults = {
success: 0,
failures: [] as string[],
}
// Delete each file
for (const file of filesToDelete) {
try {
const response = await fetch('/api/files/delete', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ filePath: file.path }),
})
if (response.ok) {
deletionResults.success++
} else {
const errorData = await response.json().catch(() => ({ error: response.statusText }))
const errorMessage = errorData.error || `Failed to delete file: ${response.status}`
deletionResults.failures.push(`${file.name}: ${errorMessage}`)
}
} catch (error) {
console.error(`Failed to delete file ${file.name}:`, error)
deletionResults.failures.push(
`${file.name}: ${error instanceof Error ? error.message : 'Unknown error'}`
)
}
}
// Show a single consolidated notification about the deletions
if (deletionResults.success > 0) {
if (fileCount === 1) {
addNotification('console', `File was deleted successfully`, activeWorkflowId)
} else {
addNotification(
'console',
`${deletionResults.success} of ${fileCount} files were deleted successfully`,
activeWorkflowId
)
}
}
// Show error notification if any deletions failed
if (deletionResults.failures.length > 0) {
if (deletionResults.failures.length === 1) {
addNotification(
'error',
`Failed to delete file: ${deletionResults.failures[0]}`,
activeWorkflowId
)
} else {
addNotification(
'error',
`Failed to delete ${deletionResults.failures.length} files: ${deletionResults.failures.join('; ')}`,
activeWorkflowId
)
}
}
setDeletingFiles({})
}
// Helper to render a single file item
const renderFileItem = (file: UploadedFile) => {
const isDeleting = deletingFiles[file.path]
return (
<div
key={file.path}
className="flex items-center justify-between p-2 rounded border border-border bg-secondary/30 mb-2"
>
<div className="flex-1 truncate pr-2">
<div className="font-medium text-sm truncate">{file.name}</div>
<div className="text-xs text-muted-foreground">{formatFileSize(file.size)}</div>
</div>
<Button
type="button"
variant="ghost"
size="icon"
className="h-8 w-8 shrink-0"
onClick={(e) => handleRemoveFile(file, e)}
disabled={isDeleting}
>
{isDeleting ? (
<div className="h-4 w-4 animate-spin rounded-full border-2 border-current border-t-transparent" />
) : (
<X className="h-4 w-4" />
)}
</Button>
</div>
)
}
// Get files array regardless of multiple setting
const filesArray = Array.isArray(value) ? value : value ? [value] : []
const hasFiles = filesArray.length > 0
return (
<div className="w-full" onClick={(e) => e.stopPropagation()}>
<input
type="file"
ref={fileInputRef}
onChange={handleFileChange}
style={{ display: 'none' }}
accept={acceptedTypes}
multiple={multiple}
data-testid="file-input-element"
/>
{isUploading ? (
<div className="w-full p-4 border border-border rounded-md">
<Progress value={uploadProgress} className="w-full h-2 mb-2" />
<div className="text-xs text-center text-muted-foreground">
{uploadProgress < 100 ? 'Uploading...' : 'Upload complete!'}
</div>
</div>
) : (
<>
{hasFiles && (
<div className="mb-3">
{/* File list */}
<div className="space-y-1">{filesArray.map(renderFileItem)}</div>
{/* Action buttons */}
<div className="flex space-x-2 mt-2">
<Button
type="button"
variant="outline"
size="sm"
className="flex-1"
onClick={handleRemoveAllFiles}
>
Remove All
</Button>
{multiple && (
<Button
type="button"
variant="outline"
size="sm"
className="flex-1"
onClick={handleOpenFileDialog}
>
Add More
</Button>
)}
</div>
</div>
)}
{/* Show upload button if no files or if not in multiple mode */}
{(!hasFiles || !multiple) && (
<Button
type="button"
variant="outline"
className="w-full justify-center text-center font-normal"
onClick={handleOpenFileDialog}
>
<Upload className="mr-2 h-4 w-4" />
{multiple ? 'Upload Files' : 'Upload File'}
<Tooltip>
<TooltipTrigger className="ml-1">
<Info className="h-4 w-4 text-muted-foreground" />
</TooltipTrigger>
<TooltipContent>
<p>Max file size: {maxSize}MB</p>
{multiple && <p>You can select multiple files at once</p>}
</TooltipContent>
</Tooltip>
</Button>
)}
</>
)}
</div>
)
}

View File

@@ -1,7 +1,17 @@
import { useCallback } from 'react'
import { useCallback, useEffect, useRef } from 'react'
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
import { useWorkflowStore } from '@/stores/workflows/workflow/store'
import { isEqual } from 'lodash'
/**
* Custom hook to get and set values for a sub-block in a workflow.
* Handles complex object values properly by using deep equality comparison.
*
* @param blockId The ID of the block containing the sub-block
* @param subBlockId The ID of the sub-block
* @param triggerWorkflowUpdate Whether to trigger a workflow update when the value changes
* @returns A tuple containing the current value and a setter function
*/
export function useSubBlockValue<T = any>(
blockId: string,
subBlockId: string,
@@ -15,23 +25,48 @@ export function useSubBlockValue<T = any>(
)
)
// Get value and setter from subblock store
const value = useSubBlockStore(
// Keep a ref to the latest value to prevent unnecessary re-renders
const valueRef = useRef<T | null>(null)
// Get value from subblock store
const storeValue = useSubBlockStore(
useCallback(
(state) => state.getValue(blockId, subBlockId) ?? initialValue,
[blockId, subBlockId, initialValue]
(state) => state.getValue(blockId, subBlockId),
[blockId, subBlockId]
)
)
// Update the ref if the store value changes
// This ensures we're always working with the latest value
useEffect(() => {
// Use deep comparison for objects to prevent unnecessary updates
if (!isEqual(valueRef.current, storeValue)) {
valueRef.current = storeValue !== undefined ? storeValue : initialValue
}
}, [storeValue, initialValue])
// Set value function that handles deep equality for complex objects
const setValue = useCallback(
(newValue: T) => {
useSubBlockStore.getState().setValue(blockId, subBlockId, newValue)
if (triggerWorkflowUpdate) {
useWorkflowStore.getState().triggerUpdate()
// Use deep comparison to avoid unnecessary updates for complex objects
if (!isEqual(valueRef.current, newValue)) {
valueRef.current = newValue
// Ensure we're passing the actual value, not a reference that might change
const valueCopy = newValue === null
? null
: (typeof newValue === 'object' ? JSON.parse(JSON.stringify(newValue)) : newValue)
useSubBlockStore.getState().setValue(blockId, subBlockId, valueCopy)
if (triggerWorkflowUpdate) {
useWorkflowStore.getState().triggerUpdate()
}
}
},
[blockId, subBlockId, triggerWorkflowUpdate]
)
return [value as T | null, setValue] as const
// Return the current value and setter
return [valueRef.current as T | null, setValue] as const
}

View File

@@ -12,6 +12,7 @@ import { DateInput } from './components/date-input'
import { Dropdown } from './components/dropdown'
import { EvalInput } from './components/eval-input'
import { FileSelectorInput } from './components/file-selector/file-selector-input'
import { FileUpload } from './components/file-upload'
import { FolderSelectorInput } from './components/folder-selector/components/folder-selector-input'
import { LongInput } from './components/long-input'
import { ScheduleConfig } from './components/schedule/schedule-config'
@@ -129,6 +130,15 @@ export function SubBlock({ blockId, config, isConnecting }: SubBlockProps) {
return (
<TimeInput blockId={blockId} subBlockId={config.id} placeholder={config.placeholder} />
)
case 'file-upload':
return (
<FileUpload
blockId={blockId}
subBlockId={config.id}
acceptedTypes={config.acceptedTypes || '*'}
multiple={config.multiple === true}
/>
)
case 'webhook-config':
return (
<WebhookConfig blockId={blockId} subBlockId={config.id} isConnecting={isConnecting} />

66
sim/blocks/blocks/file.ts Normal file
View File

@@ -0,0 +1,66 @@
import { DocumentIcon } from '@/components/icons'
import { FileParserOutput } from '@/tools/file/parser'
import { BlockConfig } from '../types'
export const FileBlock: BlockConfig<FileParserOutput> = {
type: 'file',
name: 'File',
description: 'Read and parse multiple files',
longDescription:
'Upload and extract contents from structured file formats including PDFs, CSV spreadsheets, and Word documents (DOCX). Specialized parsers extract text and metadata from each format. You can upload multiple files at once and access them individually or as a combined document.',
category: 'tools',
bgColor: '#40916C',
icon: DocumentIcon,
subBlocks: [
{
id: 'file',
title: 'Upload Files',
type: 'file-upload',
layout: 'full',
acceptedTypes: '.pdf,.csv,.docx',
multiple: true,
},
],
tools: {
access: ['file_parser'],
config: {
tool: () => 'file_parser',
params: (params) => {
console.log('File block params:', params)
// Handle case where 'file' is an array (multiple files)
if (params.file && Array.isArray(params.file) && params.file.length > 0) {
// Process all files by sending array of paths
const filePaths = params.file.map((file) => file.path)
return {
filePath: filePaths.length === 1 ? filePaths[0] : filePaths,
fileType: params.fileType || 'auto',
}
}
// Handle case where 'file' is a single file object
if (params.file && params.file.path) {
return {
filePath: params.file.path,
fileType: params.fileType || 'auto',
}
}
// If no files, return empty params
return { filePath: '', fileType: params.fileType || 'auto' }
},
},
},
inputs: {
fileType: { type: 'string', required: false },
file: { type: 'json', required: true },
},
outputs: {
response: {
type: {
files: 'json',
combinedContent: 'string',
},
},
},
}

View File

@@ -32,13 +32,13 @@ export const StarterBlock: BlockConfig<StarterBlockOutput> = {
value: () => 'manual',
},
// Structured Input format - visible if manual run is selected
{
id: 'inputFormat',
title: 'Input Format (for API calls)',
type: 'input-format',
layout: 'full',
condition: { field: 'startWorkflow', value: 'manual' },
},
// {
// id: 'inputFormat',
// title: 'Input Format (for API calls)',
// type: 'input-format',
// layout: 'full',
// condition: { field: 'startWorkflow', value: 'manual' },
// },
// Webhook configuration
{
id: 'webhookProvider',

View File

@@ -7,6 +7,7 @@ import { GoogleDocsBlock } from './blocks/docs'
import { GoogleDriveBlock } from './blocks/drive'
import { EvaluatorBlock } from './blocks/evaluator'
import { ExaBlock } from './blocks/exa'
import { FileBlock } from './blocks/file'
import { FirecrawlBlock } from './blocks/firecrawl'
import { FunctionBlock } from './blocks/function'
import { GitHubBlock } from './blocks/github'
@@ -38,6 +39,7 @@ import { BlockConfig } from './types'
export {
AgentBlock,
ApiBlock,
FileBlock,
FunctionBlock,
VisionBlock,
FirecrawlBlock,
@@ -79,6 +81,7 @@ const blocks: Record<string, BlockConfig> = {
confluence: ConfluenceBlock,
evaluator: EvaluatorBlock,
exa: ExaBlock,
file: FileBlock,
firecrawl: FirecrawlBlock,
function: FunctionBlock,
github: GitHubBlock,

View File

@@ -31,6 +31,7 @@ export type SubBlockType =
| 'file-selector' // File selector for Google Drive, etc.
| 'folder-selector' // Folder selector for Gmail, etc.
| 'input-format' // Input structure format
| 'file-upload' // File uploader
// Component width setting
export type SubBlockLayout = 'full' | 'half'
@@ -111,6 +112,9 @@ export interface SubBlockConfig {
requiredScopes?: string[]
// File selector specific properties
mimeType?: string
// File upload specific properties
acceptedTypes?: string
multiple?: boolean
}
// Main block definition

View File

@@ -1771,3 +1771,25 @@ export function TypeformIcon(props: SVGProps<SVGSVGElement>) {
</svg>
)
}
export function DocumentIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg
{...props}
width="20"
height="24"
viewBox="0 0 20 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M18.5 8.2L11.5 1.2C11.4 1.1 11.3 1.05 11.2 1C11.1 0.95 11 0.92 10.9 0.9C10.85 0.88 10.82 0.85 10.8 0.85H3C1.9 0.85 1 1.75 1 2.85V21.15C1 22.25 1.9 23.15 3 23.15H17C18.1 23.15 19 22.25 19 21.15V8.5C19 8.4 18.95 8.3 18.5 8.2ZM11.5 3.5L16.5 8.5H11.5V3.5ZM3 21.15V2.85H9.5V8.5C9.5 9.05 9.95 9.5 10.5 9.5H17V21.15H3Z"
fill="currentColor"
/>
<path
d="M5 12.5H14V13.5H5V12.5ZM5 17.3H14V18.3H5V17.3ZM5 7.5H7V8.5H5V7.5Z"
fill="currentColor"
/>
</svg>
)
}

View File

@@ -0,0 +1,68 @@
import { createReadStream, existsSync } from 'fs';
import { FileParseResult, FileParser } from './types';
import csvParser from 'csv-parser';
export class CsvParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
return new Promise((resolve, reject) => {
try {
// Validate input
if (!filePath) {
return reject(new Error('No file path provided'));
}
// Check if file exists
if (!existsSync(filePath)) {
return reject(new Error(`File not found: ${filePath}`));
}
const results: Record<string, any>[] = [];
const headers: string[] = [];
createReadStream(filePath)
.on('error', (error: Error) => {
console.error('CSV stream error:', error);
reject(new Error(`Failed to read CSV file: ${error.message}`));
})
.pipe(csvParser())
.on('headers', (headerList: string[]) => {
headers.push(...headerList);
})
.on('data', (data: Record<string, any>) => {
results.push(data);
})
.on('end', () => {
// Convert CSV data to a formatted string representation
let content = '';
// Add headers
if (headers.length > 0) {
content += headers.join(', ') + '\n';
}
// Add rows
results.forEach(row => {
const rowValues = Object.values(row).join(', ');
content += rowValues + '\n';
});
resolve({
content,
metadata: {
rowCount: results.length,
headers: headers,
rawData: results
}
});
})
.on('error', (error: Error) => {
console.error('CSV parsing error:', error);
reject(new Error(`Failed to parse CSV file: ${error.message}`));
});
} catch (error) {
console.error('CSV general error:', error);
reject(new Error(`Failed to process CSV file: ${(error as Error).message}`));
}
});
}
}

View File

@@ -0,0 +1,45 @@
import { readFile } from 'fs/promises';
import mammoth from 'mammoth';
import { FileParseResult, FileParser } from './types';
// Define interface for mammoth result
interface MammothResult {
value: string;
messages: any[];
}
export class DocxParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
// Validate input
if (!filePath) {
throw new Error('No file path provided');
}
// Read the file
const buffer = await readFile(filePath);
// Extract text with mammoth
const result = await mammoth.extractRawText({ buffer });
// Extract HTML for metadata (optional - won't fail if this fails)
let htmlResult: MammothResult = { value: '', messages: [] };
try {
htmlResult = await mammoth.convertToHtml({ buffer });
} catch (htmlError) {
console.warn('HTML conversion warning:', htmlError);
}
return {
content: result.value,
metadata: {
messages: [...result.messages, ...htmlResult.messages],
html: htmlResult.value
}
};
} catch (error) {
console.error('DOCX Parser error:', error);
throw new Error(`Failed to parse DOCX file: ${(error as Error).message}`);
}
}
}

View File

@@ -0,0 +1,278 @@
/**
* Unit tests for file parsers
*
* @vitest-environment node
*/
import path from 'path'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import type { FileParser, FileParseResult } from './types'
// Mock file system modules
const mockExistsSync = vi.fn().mockReturnValue(true)
const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test content'))
// Mock parser functions
const mockPdfParseFile = vi.fn().mockResolvedValue({
content: 'Parsed PDF content',
metadata: {
info: { Title: 'Test PDF' },
pageCount: 5,
version: '1.7',
},
})
const mockCsvParseFile = vi.fn().mockResolvedValue({
content: 'Parsed CSV content',
metadata: {
headers: ['column1', 'column2'],
rowCount: 10,
},
})
const mockDocxParseFile = vi.fn().mockResolvedValue({
content: 'Parsed DOCX content',
metadata: {
pages: 3,
author: 'Test Author',
},
})
// Create mock module implementation
const createMockModule = () => {
// Create mock parsers
const mockParsers: Record<string, FileParser> = {
pdf: { parseFile: mockPdfParseFile },
csv: { parseFile: mockCsvParseFile },
docx: { parseFile: mockDocxParseFile },
}
// Create the mock module implementation
return {
parseFile: async (filePath: string): Promise<FileParseResult> => {
if (!filePath) {
throw new Error('No file path provided')
}
if (!mockExistsSync(filePath)) {
throw new Error(`File not found: ${filePath}`)
}
const extension = path.extname(filePath).toLowerCase().substring(1)
if (!Object.keys(mockParsers).includes(extension)) {
throw new Error(
`Unsupported file type: ${extension}. Supported types are: ${Object.keys(mockParsers).join(', ')}`
)
}
return mockParsers[extension].parseFile(filePath)
},
isSupportedFileType: (extension: string): boolean => {
if (!extension) return false
return Object.keys(mockParsers).includes(extension.toLowerCase())
},
}
}
describe('File Parsers', () => {
// Setup required mocks before each test
beforeEach(() => {
vi.resetModules()
// Mock file system modules
vi.doMock('fs', () => ({
existsSync: mockExistsSync,
}))
vi.doMock('fs/promises', () => ({
readFile: mockReadFile,
}))
// Mock the file parser module with our implementation
vi.doMock('./index', () => createMockModule())
// Mock parser classes
vi.doMock('./pdf-parser', () => ({
PdfParser: vi.fn().mockImplementation(() => ({
parseFile: mockPdfParseFile,
})),
}))
vi.doMock('./csv-parser', () => ({
CsvParser: vi.fn().mockImplementation(() => ({
parseFile: mockCsvParseFile,
})),
}))
vi.doMock('./docx-parser', () => ({
DocxParser: vi.fn().mockImplementation(() => ({
parseFile: mockDocxParseFile,
})),
}))
vi.doMock('./raw-pdf-parser', () => ({
RawPdfParser: vi.fn().mockImplementation(() => ({
parseFile: vi.fn().mockResolvedValue({
content: 'Raw parsed PDF content',
metadata: {
pageCount: 3,
},
}),
})),
}))
// Silence console output during tests
global.console = {
...console,
log: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn(),
}
})
afterEach(() => {
vi.clearAllMocks()
vi.resetAllMocks()
vi.restoreAllMocks()
})
describe('parseFile', () => {
it('should validate file existence', async () => {
// Mock file not existing for this test only
mockExistsSync.mockReturnValueOnce(false)
// Dynamically import the module after mocks are set up
const { parseFile } = await import('./index')
const testFilePath = '/test/files/test.pdf'
await expect(parseFile(testFilePath)).rejects.toThrow('File not found')
expect(mockExistsSync).toHaveBeenCalledWith(testFilePath)
})
it('should throw error if file path is empty', async () => {
const { parseFile } = await import('./index')
await expect(parseFile('')).rejects.toThrow('No file path provided')
})
it('should parse PDF files successfully', async () => {
const expectedResult = {
content: 'Parsed PDF content',
metadata: {
info: { Title: 'Test PDF' },
pageCount: 5,
version: '1.7',
},
}
mockPdfParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)
const { parseFile } = await import('./index')
const result = await parseFile('/test/files/document.pdf')
expect(result).toEqual(expectedResult)
})
it('should parse CSV files successfully', async () => {
const expectedResult = {
content: 'Parsed CSV content',
metadata: {
headers: ['column1', 'column2'],
rowCount: 10,
},
}
mockCsvParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)
const { parseFile } = await import('./index')
const result = await parseFile('/test/files/data.csv')
expect(result).toEqual(expectedResult)
})
it('should parse DOCX files successfully', async () => {
const expectedResult = {
content: 'Parsed DOCX content',
metadata: {
pages: 3,
author: 'Test Author',
},
}
mockDocxParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)
const { parseFile } = await import('./index')
const result = await parseFile('/test/files/document.docx')
expect(result).toEqual(expectedResult)
})
it('should throw error for unsupported file types', async () => {
// Make sure the file "exists" for this test
mockExistsSync.mockReturnValue(true)
const { parseFile } = await import('./index')
const unsupportedFilePath = '/test/files/image.png'
await expect(parseFile(unsupportedFilePath)).rejects.toThrow('Unsupported file type')
})
it('should handle errors during parsing', async () => {
// Make sure the file "exists" for this test
mockExistsSync.mockReturnValue(true)
const parsingError = new Error('CSV parsing failed')
mockCsvParseFile.mockRejectedValueOnce(parsingError)
const { parseFile } = await import('./index')
await expect(parseFile('/test/files/data.csv')).rejects.toThrow('CSV parsing failed')
})
})
describe('isSupportedFileType', () => {
it('should return true for supported file types', async () => {
const { isSupportedFileType } = await import('./index')
expect(isSupportedFileType('pdf')).toBe(true)
expect(isSupportedFileType('csv')).toBe(true)
expect(isSupportedFileType('docx')).toBe(true)
})
it('should return false for unsupported file types', async () => {
const { isSupportedFileType } = await import('./index')
expect(isSupportedFileType('png')).toBe(false)
expect(isSupportedFileType('txt')).toBe(false)
expect(isSupportedFileType('unknown')).toBe(false)
})
it('should handle uppercase extensions', async () => {
const { isSupportedFileType } = await import('./index')
expect(isSupportedFileType('PDF')).toBe(true)
expect(isSupportedFileType('CSV')).toBe(true)
})
it('should handle errors gracefully', async () => {
// Create a mock that throws an error when called
const errorMockModule = {
isSupportedFileType: () => {
throw new Error('Failed to get parsers')
},
}
// Mock the module with our error-throwing implementation
vi.doMock('./index', () => errorMockModule)
// Import and test
const { isSupportedFileType } = await import('./index')
// Should catch the error and return false
expect(() => isSupportedFileType('pdf')).toThrow('Failed to get parsers')
})
})
})

View File

@@ -0,0 +1,125 @@
import path from 'path';
import { FileParser, SupportedFileType, FileParseResult } from './types';
import { existsSync } from 'fs';
import { readFile } from 'fs/promises';
import { RawPdfParser } from './raw-pdf-parser';
// Lazy-loaded parsers to avoid initialization issues
let parserInstances: Record<string, FileParser> | null = null;
/**
* Get parser instances with lazy initialization
*/
function getParserInstances(): Record<string, FileParser> {
if (parserInstances === null) {
parserInstances = {};
try {
// Import parsers only when needed - with try/catch for each one
try {
console.log('Attempting to load PDF parser...');
try {
// First try to use the pdf-parse library
// Import the PdfParser using ES module import to avoid test file access
const { PdfParser } = require('./pdf-parser');
parserInstances['pdf'] = new PdfParser();
console.log('PDF parser loaded successfully');
} catch (pdfParseError) {
// If that fails, fallback to our raw PDF parser
console.error('Failed to load primary PDF parser:', pdfParseError);
console.log('Falling back to raw PDF parser');
parserInstances['pdf'] = new RawPdfParser();
console.log('Raw PDF parser loaded successfully');
}
} catch (error) {
console.error('Failed to load any PDF parser:', error);
// Create a simple fallback that just returns the file size and a message
parserInstances['pdf'] = {
async parseFile(filePath: string): Promise<FileParseResult> {
const buffer = await readFile(filePath);
return {
content: `PDF parsing is not available. File size: ${buffer.length} bytes`,
metadata: {
info: { Error: 'PDF parsing unavailable' },
pageCount: 0,
version: 'unknown'
}
};
}
};
}
try {
const { CsvParser } = require('./csv-parser');
parserInstances['csv'] = new CsvParser();
} catch (error) {
console.error('Failed to load CSV parser:', error);
}
try {
const { DocxParser } = require('./docx-parser');
parserInstances['docx'] = new DocxParser();
} catch (error) {
console.error('Failed to load DOCX parser:', error);
}
} catch (error) {
console.error('Error loading file parsers:', error);
}
}
console.log('Available parsers:', Object.keys(parserInstances));
return parserInstances;
}
/**
* Parse a file based on its extension
* @param filePath Path to the file
* @returns Parsed content and metadata
*/
export async function parseFile(filePath: string): Promise<FileParseResult> {
try {
// Validate input
if (!filePath) {
throw new Error('No file path provided');
}
// Check if file exists
if (!existsSync(filePath)) {
throw new Error(`File not found: ${filePath}`);
}
const extension = path.extname(filePath).toLowerCase().substring(1);
console.log('Attempting to parse file with extension:', extension);
const parsers = getParserInstances();
if (!Object.keys(parsers).includes(extension)) {
console.log('No parser found for extension:', extension);
throw new Error(`Unsupported file type: ${extension}. Supported types are: ${Object.keys(parsers).join(', ')}`);
}
console.log('Using parser for extension:', extension);
const parser = parsers[extension];
return await parser.parseFile(filePath);
} catch (error) {
console.error('File parsing error:', error);
throw error;
}
}
/**
* Check if a file type is supported
* @param extension File extension without the dot
* @returns true if supported, false otherwise
*/
export function isSupportedFileType(extension: string): extension is SupportedFileType {
try {
return Object.keys(getParserInstances()).includes(extension.toLowerCase());
} catch (error) {
console.error('Error checking supported file type:', error);
return false;
}
}
// Type exports
export type { FileParseResult, FileParser, SupportedFileType };

View File

@@ -0,0 +1,113 @@
import { readFile } from 'fs/promises';
// @ts-ignore
import * as pdfParseLib from 'pdf-parse/lib/pdf-parse.js';
import { FileParseResult, FileParser } from './types';
export class PdfParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
console.log('PDF Parser: Starting to parse file:', filePath);
// Make sure we're only parsing the provided file path
if (!filePath) {
throw new Error('No file path provided');
}
// Read the file
console.log('PDF Parser: Reading file...');
const dataBuffer = await readFile(filePath);
console.log('PDF Parser: File read successfully, size:', dataBuffer.length);
// Try to parse with pdf-parse library first
try {
console.log('PDF Parser: Attempting to parse with pdf-parse library...');
// Parse PDF with direct function call to avoid test file access
console.log('PDF Parser: Starting PDF parsing...');
const data = await pdfParseLib.default(dataBuffer);
console.log('PDF Parser: PDF parsed successfully with pdf-parse, pages:', data.numpages);
return {
content: data.text,
metadata: {
pageCount: data.numpages,
info: data.info,
version: data.version
}
};
} catch (pdfParseError) {
console.error('PDF-parse library failed:', pdfParseError);
// Fallback to manual text extraction
console.log('PDF Parser: Falling back to manual text extraction...');
// Extract basic PDF info from raw content
const rawContent = dataBuffer.toString('utf-8', 0, Math.min(10000, dataBuffer.length));
let version = 'Unknown';
let pageCount = 0;
// Try to extract PDF version
const versionMatch = rawContent.match(/%PDF-(\d+\.\d+)/);
if (versionMatch && versionMatch[1]) {
version = versionMatch[1];
}
// Try to get page count
const pageMatches = rawContent.match(/\/Type\s*\/Page\b/g);
if (pageMatches) {
pageCount = pageMatches.length;
}
// Try to extract text by looking for text-related operators in the PDF
let extractedText = '';
// Look for text in the PDF content using common patterns
const textMatches = rawContent.match(/BT[\s\S]*?ET/g);
if (textMatches && textMatches.length > 0) {
extractedText = textMatches.map(textBlock => {
// Extract text objects (Tj, TJ) from the text block
const textObjects = textBlock.match(/\([^)]*\)\s*Tj|\[[^\]]*\]\s*TJ/g);
if (textObjects) {
return textObjects.map(obj => {
// Clean up text objects
return obj.replace(/\(([^)]*)\)\s*Tj|\[([^\]]*)\]\s*TJ/g,
(match, p1, p2) => p1 || p2 || '')
// Clean up PDF escape sequences
.replace(/\\(\d{3}|[()\\])/g, '')
.replace(/\\\\/g, '\\')
.replace(/\\\(/g, '(')
.replace(/\\\)/g, ')');
}).join(' ');
}
return '';
}).join('\n');
}
// If we couldn't extract text, provide a helpful message
if (!extractedText || extractedText.length < 20) {
extractedText = `This PDF document (version ${version}) contains ${pageCount || 'an unknown number of'} pages. The text could not be extracted properly.
For better results, please use a dedicated PDF reader or text extraction tool.`;
}
console.log('PDF Parser: Manual text extraction completed, found text length:', extractedText.length);
return {
content: extractedText,
metadata: {
pageCount: pageCount || 0,
info: {
manualExtraction: true,
version
},
version
}
};
}
} catch (error) {
console.error('PDF Parser error:', error);
throw new Error(`Failed to parse PDF file: ${(error as Error).message}`);
}
}
}

View File

@@ -0,0 +1,481 @@
import { readFile } from 'fs/promises';
import { FileParseResult, FileParser } from './types';
import zlib from 'zlib';
import { promisify } from 'util';
// Promisify zlib functions
const inflateAsync = promisify(zlib.inflate);
const unzipAsync = promisify(zlib.unzip);
/**
* A simple PDF parser that extracts readable text from a PDF file.
* This is used as a fallback when the pdf-parse library fails.
*/
export class RawPdfParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
console.log('RawPdfParser: Starting to parse file:', filePath);
if (!filePath) {
throw new Error('No file path provided');
}
// Read the file
console.log('RawPdfParser: Reading file...');
const dataBuffer = await readFile(filePath);
console.log('RawPdfParser: File read successfully, size:', dataBuffer.length);
// Instead of trying to parse the binary PDF data directly,
// we'll extract only the text sections that are readable
// First convert to string but only for pattern matching, not for display
const rawContent = dataBuffer.toString('utf-8');
// Extract basic PDF info
let version = 'Unknown';
let pageCount = 0;
// Try to extract PDF version
const versionMatch = rawContent.match(/%PDF-(\d+\.\d+)/);
if (versionMatch && versionMatch[1]) {
version = versionMatch[1];
}
// Count pages using multiple methods for redundancy
// Method 1: Count "/Type /Page" occurrences (most reliable)
const typePageMatches = rawContent.match(/\/Type\s*\/Page\b/gi);
if (typePageMatches) {
pageCount = typePageMatches.length;
console.log('RawPdfParser: Found page count using /Type /Page:', pageCount);
}
// Method 2: Look for "/Page" dictionary references
if (pageCount === 0) {
const pageMatches = rawContent.match(/\/Page\s*\//gi);
if (pageMatches) {
pageCount = pageMatches.length;
console.log('RawPdfParser: Found page count using /Page/ pattern:', pageCount);
}
}
// Method 3: Look for "/Pages" object references
if (pageCount === 0) {
const pagesObjMatches = rawContent.match(/\/Pages\s+\d+\s+\d+\s+R/gi);
if (pagesObjMatches && pagesObjMatches.length > 0) {
// Extract the object reference
const pagesObjRef = pagesObjMatches[0].match(/\/Pages\s+(\d+)\s+\d+\s+R/i);
if (pagesObjRef && pagesObjRef[1]) {
const objNum = pagesObjRef[1];
// Find the referenced object
const objRegex = new RegExp(`${objNum}\\s+0\\s+obj[\\s\\S]*?endobj`, 'i');
const objMatch = rawContent.match(objRegex);
if (objMatch) {
// Look for /Count within the Pages object
const countMatch = objMatch[0].match(/\/Count\s+(\d+)/i);
if (countMatch && countMatch[1]) {
pageCount = parseInt(countMatch[1], 10);
console.log('RawPdfParser: Found page count using /Count in Pages object:', pageCount);
}
}
}
}
}
// Method 4: Count trailer references to get an approximate count
if (pageCount === 0) {
const trailerMatches = rawContent.match(/trailer/gi);
if (trailerMatches) {
// This is just a rough estimate, not accurate
pageCount = Math.max(1, Math.ceil(trailerMatches.length / 2));
console.log('RawPdfParser: Estimated page count using trailer references:', pageCount);
}
}
// Default to at least 1 page if we couldn't find any
if (pageCount === 0) {
pageCount = 1;
console.log('RawPdfParser: Defaulting to 1 page as no count was found');
}
// Extract text content using text markers commonly found in PDFs
let extractedText = '';
// Method 1: Extract text between BT (Begin Text) and ET (End Text) markers
const textMatches = rawContent.match(/BT[\s\S]*?ET/g);
if (textMatches && textMatches.length > 0) {
console.log('RawPdfParser: Found', textMatches.length, 'text blocks');
extractedText = textMatches.map(textBlock => {
// Extract text objects (Tj, TJ) from the text block
const textObjects = textBlock.match(/(\([^)]*\)|\[[^\]]*\])\s*(Tj|TJ)/g);
if (textObjects && textObjects.length > 0) {
return textObjects.map(obj => {
// Clean up text objects
let text = '';
if (obj.includes('Tj')) {
// Handle Tj operator (simple string)
const match = obj.match(/\(([^)]*)\)\s*Tj/);
if (match && match[1]) {
text = match[1];
}
} else if (obj.includes('TJ')) {
// Handle TJ operator (array of strings and positioning)
const match = obj.match(/\[(.*)\]\s*TJ/);
if (match && match[1]) {
// Extract only the string parts from the array
const parts = match[1].match(/\([^)]*\)/g);
if (parts) {
text = parts.map(p => p.slice(1, -1)).join(' ');
}
}
}
// Clean up PDF escape sequences
return text
.replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)))
.replace(/\\\\/g, '\\')
.replace(/\\\(/g, '(')
.replace(/\\\)/g, ')');
}).join(' ');
}
return '';
}).join('\n').trim();
}
// Try to extract metadata from XML
let metadataText = '';
const xmlMatch = rawContent.match(/<x:xmpmeta[\s\S]*?<\/x:xmpmeta>/);
if (xmlMatch) {
const xmlContent = xmlMatch[0];
console.log('RawPdfParser: Found XML metadata');
// Extract document title
const titleMatch = xmlContent.match(/<dc:title>[\s\S]*?<rdf:li[^>]*>(.*?)<\/rdf:li>/i);
if (titleMatch && titleMatch[1]) {
const title = titleMatch[1].replace(/<[^>]+>/g, '').trim();
metadataText += `Document Title: ${title}\n\n`;
}
// Extract creator/author
const creatorMatch = xmlContent.match(/<dc:creator>[\s\S]*?<rdf:li[^>]*>(.*?)<\/rdf:li>/i);
if (creatorMatch && creatorMatch[1]) {
const creator = creatorMatch[1].replace(/<[^>]+>/g, '').trim();
metadataText += `Author: ${creator}\n`;
}
// Extract creation date
const dateMatch = xmlContent.match(/<xmp:CreateDate>(.*?)<\/xmp:CreateDate>/i);
if (dateMatch && dateMatch[1]) {
metadataText += `Created: ${dateMatch[1].trim()}\n`;
}
// Extract producer
const producerMatch = xmlContent.match(/<pdf:Producer>(.*?)<\/pdf:Producer>/i);
if (producerMatch && producerMatch[1]) {
metadataText += `Producer: ${producerMatch[1].trim()}\n`;
}
}
// Try to extract actual text content from content streams
if (!extractedText || extractedText.length < 100 || extractedText.includes('/Type /Page')) {
console.log('RawPdfParser: Trying advanced text extraction from content streams');
// Find content stream references
const contentRefs = rawContent.match(/\/Contents\s+\[?\s*(\d+)\s+\d+\s+R\s*\]?/g);
if (contentRefs && contentRefs.length > 0) {
console.log('RawPdfParser: Found', contentRefs.length, 'content stream references');
// Extract object numbers from content references
const objNumbers = contentRefs.map(ref => {
const match = ref.match(/\/Contents\s+\[?\s*(\d+)\s+\d+\s+R\s*\]?/);
return match ? match[1] : null;
}).filter(Boolean);
console.log('RawPdfParser: Content stream object numbers:', objNumbers);
// Try to find those objects in the content
if (objNumbers.length > 0) {
let textFromStreams = '';
for (const objNum of objNumbers) {
const objRegex = new RegExp(`${objNum}\\s+0\\s+obj[\\s\\S]*?endobj`, 'i');
const objMatch = rawContent.match(objRegex);
if (objMatch) {
// Look for stream content within the object
const streamMatch = objMatch[0].match(/stream\r?\n([\s\S]*?)\r?\nendstream/);
if (streamMatch && streamMatch[1]) {
const streamContent = streamMatch[1];
// Look for text operations in the stream (Tj, TJ, etc.)
const textFragments = streamContent.match(/\([^)]+\)\s*Tj|\[[^\]]*\]\s*TJ/g);
if (textFragments && textFragments.length > 0) {
const extractedFragments = textFragments.map(fragment => {
if (fragment.includes('Tj')) {
return fragment.replace(/\(([^)]*)\)\s*Tj/, '$1')
.replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)))
.replace(/\\\\/g, '\\')
.replace(/\\\(/g, '(')
.replace(/\\\)/g, ')');
} else if (fragment.includes('TJ')) {
const parts = fragment.match(/\([^)]*\)/g);
if (parts) {
return parts.map(p => p.slice(1, -1)
.replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)))
.replace(/\\\\/g, '\\')
.replace(/\\\(/g, '(')
.replace(/\\\)/g, ')')
).join(' ');
}
}
return '';
}).filter(Boolean).join(' ');
if (extractedFragments.trim().length > 0) {
textFromStreams += extractedFragments.trim() + '\n';
}
}
}
}
}
if (textFromStreams.trim().length > 0) {
console.log('RawPdfParser: Successfully extracted text from content streams');
extractedText = textFromStreams.trim();
}
}
}
}
// Try to decompress PDF streams
// This is especially helpful for PDFs with compressed content
if (!extractedText || extractedText.length < 100) {
console.log('RawPdfParser: Trying to decompress PDF streams');
// Find compressed streams (FlateDecode)
const compressedStreams = rawContent.match(/\/Filter\s*\/FlateDecode[\s\S]*?stream[\s\S]*?endstream/g);
if (compressedStreams && compressedStreams.length > 0) {
console.log('RawPdfParser: Found', compressedStreams.length, 'compressed streams');
// Process each stream
const decompressedContents = await Promise.all(
compressedStreams.map(async (stream) => {
try {
// Extract stream content between stream and endstream
const streamMatch = stream.match(/stream\r?\n([\s\S]*?)\r?\nendstream/);
if (!streamMatch || !streamMatch[1]) return '';
const compressedData = Buffer.from(streamMatch[1], 'binary');
// Try different decompression methods
try {
// Try inflate (most common)
const decompressed = await inflateAsync(compressedData);
const content = decompressed.toString('utf-8');
// Check if it contains readable text
const readable = content.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
if (readable.length > 50 &&
readable.includes(' ') &&
(readable.includes('.') || readable.includes(',')) &&
!/[\x00-\x1F\x7F]/.test(readable)) {
return readable;
}
} catch (inflateErr) {
// Try unzip as fallback
try {
const decompressed = await unzipAsync(compressedData);
const content = decompressed.toString('utf-8');
// Check if it contains readable text
const readable = content.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
if (readable.length > 50 &&
readable.includes(' ') &&
(readable.includes('.') || readable.includes(',')) &&
!/[\x00-\x1F\x7F]/.test(readable)) {
return readable;
}
} catch (unzipErr) {
// Both methods failed, continue to next stream
return '';
}
}
} catch (error) {
// Error processing this stream, skip it
return '';
}
return '';
})
);
// Filter out empty results and combine
const decompressedText = decompressedContents
.filter(text => text && text.length > 0)
.join('\n\n');
if (decompressedText && decompressedText.length > 0) {
console.log('RawPdfParser: Successfully decompressed text content, length:', decompressedText.length);
extractedText = decompressedText;
}
}
}
// Method 2: Look for text stream data
if (!extractedText || extractedText.length < 50) {
console.log('RawPdfParser: Trying alternative text extraction method with streams');
// Find text streams
const streamMatches = rawContent.match(/stream[\s\S]*?endstream/g);
if (streamMatches && streamMatches.length > 0) {
console.log('RawPdfParser: Found', streamMatches.length, 'streams');
// Process each stream to look for text content
const textContent = streamMatches
.map(stream => {
// Remove 'stream' and 'endstream' markers
let content = stream.replace(/^stream\r?\n|\r?\nendstream$/g, '');
// Look for readable ASCII text (more strict heuristic)
// Only keep ASCII printable characters
const readable = content.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
// Only keep content that looks like real text (has spaces, periods, etc.)
if (readable.length > 20 &&
readable.includes(' ') &&
(readable.includes('.') || readable.includes(',')) &&
!/[\x00-\x1F\x7F]/.test(readable)) {
return readable;
}
return '';
})
.filter(text => text.length > 0 && text.split(' ').length > 5) // Must have at least 5 words
.join('\n\n');
if (textContent.length > 0) {
extractedText = textContent;
}
}
}
// Method 3: Look for object streams
if (!extractedText || extractedText.length < 50) {
console.log('RawPdfParser: Trying object streams for text');
// Find object stream content
const objMatches = rawContent.match(/\d+\s+\d+\s+obj[\s\S]*?endobj/g);
if (objMatches && objMatches.length > 0) {
console.log('RawPdfParser: Found', objMatches.length, 'objects');
// Process objects looking for text content
const textContent = objMatches
.map(obj => {
// Find readable text in the object - only keep ASCII printable characters
const readable = obj.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
// Only include if it looks like actual text (strict heuristic)
if (readable.length > 50 &&
readable.includes(' ') &&
!readable.includes('/Filter') &&
readable.split(' ').length > 10 &&
(readable.includes('.') || readable.includes(','))) {
return readable;
}
return '';
})
.filter(text => text.length > 0)
.join('\n\n');
if (textContent.length > 0) {
extractedText += (extractedText ? '\n\n' : '') + textContent;
}
}
}
// If what we extracted is just PDF structure information rather than readable text,
// provide a clearer message
if (extractedText && (
extractedText.includes('endobj') ||
extractedText.includes('/Type /Page') ||
extractedText.match(/\d+\s+\d+\s+obj/g)
) && metadataText) {
console.log('RawPdfParser: Extracted content appears to be PDF structure information, using metadata instead');
extractedText = metadataText;
} else if (metadataText && !extractedText.includes('Document Title:')) {
// Prepend metadata to extracted text if available
extractedText = metadataText + (extractedText ? '\n\n' + extractedText : '');
}
// Validate that the extracted text looks meaningful
// Count how many recognizable words/characters it contains
const validCharCount = (extractedText || '').replace(/[^\x20-\x7E\r\n]/g, '').length;
const totalCharCount = (extractedText || '').length;
const validRatio = validCharCount / (totalCharCount || 1);
// Check for common PDF artifacts that indicate binary corruption
const hasBinaryArtifacts = extractedText && (
extractedText.includes('\\u') ||
extractedText.includes('\\x') ||
extractedText.includes('\0') ||
/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\xFF]{10,}/g.test(extractedText) ||
validRatio < 0.7 // Less than 70% valid characters
);
// Check if the content looks like gibberish
const looksLikeGibberish = extractedText && (
// Too many special characters
extractedText.replace(/[a-zA-Z0-9\s.,;:'"()[\]{}]/g, '').length / extractedText.length > 0.3 ||
// Not enough spaces (real text has spaces between words)
extractedText.split(' ').length < extractedText.length / 20
);
// If no text was extracted, or if it's binary/gibberish,
// provide a helpful message instead
if (!extractedText || extractedText.length < 50 || hasBinaryArtifacts || looksLikeGibberish) {
console.log('RawPdfParser: Could not extract meaningful text, providing fallback message');
console.log('RawPdfParser: Valid character ratio:', validRatio);
console.log('RawPdfParser: Has binary artifacts:', hasBinaryArtifacts);
console.log('RawPdfParser: Looks like gibberish:', looksLikeGibberish);
// Start with metadata if available
if (metadataText) {
extractedText = metadataText + '\n';
} else {
extractedText = '';
}
// Add basic PDF info
extractedText += `This is a PDF document with ${pageCount} page(s) and version ${version}.\n\n`;
// Try to find a title in the PDF structure that we might have missed
const titleInStructure = rawContent.match(/title\s*:\s*([^\n]+)/i) ||
rawContent.match(/Microsoft Word -\s*([^\n]+)/i);
if (titleInStructure && titleInStructure[1] && !extractedText.includes('Document Title:')) {
const title = titleInStructure[1].trim();
extractedText = `Document Title: ${title}\n\n` + extractedText;
}
extractedText += `The text content could not be properly extracted due to encoding or compression issues.\nFile size: ${dataBuffer.length} bytes.\n\nTo view this PDF properly, please download the file and open it with a PDF reader.`;
}
console.log('RawPdfParser: PDF parsed with basic extraction, found text length:', extractedText.length);
return {
content: extractedText,
metadata: {
pageCount,
info: {
RawExtraction: true,
Version: version,
Size: dataBuffer.length
},
version
}
};
} catch (error) {
console.error('RawPdfParser error:', error);
throw new Error(`Failed to parse PDF file: ${(error as Error).message}`);
}
}
}

View File

@@ -0,0 +1,10 @@
export interface FileParseResult {
content: string;
metadata?: Record<string, any>;
}
export interface FileParser {
parseFile(filePath: string): Promise<FileParseResult>;
}
export type SupportedFileType = 'pdf' | 'csv' | 'docx';

View File

@@ -0,0 +1,302 @@
/**
* Unit tests for S3 client
*
* @vitest-environment node
*/
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import {
S3Client,
PutObjectCommand,
GetObjectCommand,
DeleteObjectCommand
} from '@aws-sdk/client-s3'
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
import {
uploadToS3,
getPresignedUrl,
downloadFromS3,
deleteFromS3,
s3Client,
FileInfo
} from './s3-client'
// Mock AWS SDK
vi.mock('@aws-sdk/client-s3', () => {
const mockSend = vi.fn()
const mockS3Client = vi.fn().mockImplementation(() => ({
send: mockSend
}))
return {
S3Client: mockS3Client,
PutObjectCommand: vi.fn(),
GetObjectCommand: vi.fn(),
DeleteObjectCommand: vi.fn()
}
})
vi.mock('@aws-sdk/s3-request-presigner', () => ({
getSignedUrl: vi.fn().mockResolvedValue('https://example.com/presigned-url')
}))
// Mock date for predictable timestamps
vi.mock('./setup', () => ({
S3_CONFIG: {
bucket: 'test-bucket',
region: 'test-region',
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com'
}
}))
// Mock logger
vi.mock('@/lib/logs/console-logger', () => ({
createLogger: vi.fn().mockReturnValue({
info: vi.fn(),
error: vi.fn(),
warn: vi.fn(),
debug: vi.fn()
})
}))
describe('S3 Client', () => {
let mockDate: Date
let originalDateNow: typeof Date.now
beforeEach(() => {
vi.clearAllMocks()
// Mock Date.now() for predictable timestamps
mockDate = new Date(2023, 0, 1, 12, 0, 0) // 2023-01-01 12:00:00
originalDateNow = Date.now
Date.now = vi.fn(() => mockDate.getTime())
})
afterEach(() => {
// Restore original Date.now
Date.now = originalDateNow
})
describe('uploadToS3', () => {
it('should upload a file to S3 and return file info', async () => {
// Mock S3 client send method to return an appropriate type
vi.mocked(s3Client.send).mockResolvedValueOnce({
$metadata: { httpStatusCode: 200 }
} as any)
const testFile = Buffer.from('test file content')
const fileName = 'test-file.txt'
const contentType = 'text/plain'
const fileSize = testFile.length
const result = await uploadToS3(testFile, fileName, contentType)
// Check that S3 client was called with correct parameters
expect(PutObjectCommand).toHaveBeenCalledWith({
Bucket: 'test-bucket',
Key: expect.stringContaining('test-file.txt'),
Body: testFile,
ContentType: contentType,
Metadata: {
originalName: fileName,
uploadedAt: expect.any(String)
}
})
expect(s3Client.send).toHaveBeenCalledTimes(1)
// Check return value
expect(result).toEqual({
path: expect.stringContaining('/api/files/serve/s3/'),
key: expect.stringContaining('test-file.txt'),
name: fileName,
size: fileSize,
type: contentType
})
})
it('should handle spaces in filenames', async () => {
vi.mocked(s3Client.send).mockResolvedValueOnce({
$metadata: { httpStatusCode: 200 }
} as any)
const testFile = Buffer.from('test file content')
const fileName = 'test file with spaces.txt'
const contentType = 'text/plain'
const result = await uploadToS3(testFile, fileName, contentType)
// Verify spaces were replaced with hyphens in the key but original name is preserved
expect(result.key).toContain('test-file-with-spaces.txt')
expect(result.name).toBe(fileName)
})
it('should use provided size if available', async () => {
vi.mocked(s3Client.send).mockResolvedValueOnce({
$metadata: { httpStatusCode: 200 }
} as any)
const testFile = Buffer.from('test file content')
const fileName = 'test-file.txt'
const contentType = 'text/plain'
const providedSize = 12345 // Different from actual buffer size
const result = await uploadToS3(testFile, fileName, contentType, providedSize)
expect(result.size).toBe(providedSize)
})
it('should handle upload errors', async () => {
const error = new Error('Upload failed')
vi.mocked(s3Client.send).mockRejectedValueOnce(error)
const testFile = Buffer.from('test file content')
const fileName = 'test-file.txt'
const contentType = 'text/plain'
await expect(uploadToS3(testFile, fileName, contentType)).rejects.toThrow('Upload failed')
})
})
describe('getPresignedUrl', () => {
it('should generate a presigned URL for a file', async () => {
const key = 'test-file.txt'
const expiresIn = 7200
const url = await getPresignedUrl(key, expiresIn)
expect(GetObjectCommand).toHaveBeenCalledWith({
Bucket: 'test-bucket',
Key: key
})
expect(getSignedUrl).toHaveBeenCalledWith(
s3Client,
expect.any(Object),
{ expiresIn }
)
expect(url).toBe('https://example.com/presigned-url')
})
it('should use default expiration if not provided', async () => {
const key = 'test-file.txt'
await getPresignedUrl(key)
expect(getSignedUrl).toHaveBeenCalledWith(
s3Client,
expect.any(Object),
{ expiresIn: 3600 } // Default is 3600 seconds (1 hour)
)
})
it('should handle errors when generating presigned URL', async () => {
const error = new Error('Presigned URL generation failed')
vi.mocked(getSignedUrl).mockRejectedValueOnce(error)
const key = 'test-file.txt'
await expect(getPresignedUrl(key)).rejects.toThrow('Presigned URL generation failed')
})
})
describe('downloadFromS3', () => {
it('should download a file from S3', async () => {
// Create mock stream with data events
const mockStream = {
on: vi.fn((event, callback) => {
if (event === 'data') {
callback(Buffer.from('chunk1'))
callback(Buffer.from('chunk2'))
}
if (event === 'end') {
callback()
}
return mockStream
})
}
vi.mocked(s3Client.send).mockResolvedValueOnce({
Body: mockStream,
$metadata: { httpStatusCode: 200 }
} as any)
const key = 'test-file.txt'
const result = await downloadFromS3(key)
expect(GetObjectCommand).toHaveBeenCalledWith({
Bucket: 'test-bucket',
Key: key
})
expect(s3Client.send).toHaveBeenCalledTimes(1)
expect(result).toBeInstanceOf(Buffer)
expect(Buffer.concat([Buffer.from('chunk1'), Buffer.from('chunk2')]).toString()).toEqual(result.toString())
})
it('should handle stream errors', async () => {
const mockStream = {
on: vi.fn((event, callback) => {
if (event === 'error') {
callback(new Error('Stream error'))
}
return mockStream
})
}
vi.mocked(s3Client.send).mockResolvedValueOnce({
Body: mockStream,
$metadata: { httpStatusCode: 200 }
} as any)
const key = 'test-file.txt'
await expect(downloadFromS3(key)).rejects.toThrow('Stream error')
})
it('should handle S3 client errors', async () => {
const error = new Error('Download failed')
vi.mocked(s3Client.send).mockRejectedValueOnce(error)
const key = 'test-file.txt'
await expect(downloadFromS3(key)).rejects.toThrow('Download failed')
})
})
describe('deleteFromS3', () => {
it('should delete a file from S3', async () => {
vi.mocked(s3Client.send).mockResolvedValueOnce({
$metadata: { httpStatusCode: 200 }
} as any)
const key = 'test-file.txt'
await deleteFromS3(key)
expect(DeleteObjectCommand).toHaveBeenCalledWith({
Bucket: 'test-bucket',
Key: key
})
expect(s3Client.send).toHaveBeenCalledTimes(1)
})
it('should handle delete errors', async () => {
const error = new Error('Delete failed')
vi.mocked(s3Client.send).mockRejectedValueOnce(error)
const key = 'test-file.txt'
await expect(deleteFromS3(key)).rejects.toThrow('Delete failed')
})
})
describe('s3Client initialization', () => {
it('should initialize with correct configuration', () => {
// We can't test the constructor call easily since it happens at import time
// Instead, we can test the s3Client properties
expect(s3Client).toBeDefined()
// Verify the client was constructed with the right configuration
expect(S3Client).toBeDefined()
// We mocked S3Client function earlier, but that doesn't affect the imported s3Client object
// So instead of checking constructor call, check that mocked client exists
})
})
})

View File

@@ -0,0 +1,119 @@
import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3'
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
import { createLogger } from '@/lib/logs/console-logger'
import { S3_CONFIG } from './setup'
const logger = createLogger('S3Client')
// Create an S3 client
export const s3Client = new S3Client({
region: S3_CONFIG.region,
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID || '',
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || ''
}
})
/**
* File information structure
*/
export interface FileInfo {
path: string // Path to access the file
key: string // S3 key or local filename
name: string // Original filename
size: number // File size in bytes
type: string // MIME type
}
/**
* Upload a file to S3
* @param file Buffer containing file data
* @param fileName Original file name
* @param contentType MIME type of the file
* @param size File size in bytes (optional, will use buffer length if not provided)
* @returns Object with file information
*/
export async function uploadToS3(
file: Buffer,
fileName: string,
contentType: string,
size?: number
): Promise<FileInfo> {
// Create a unique filename with timestamp to prevent collisions
// Use a simple timestamp without directory structure
const safeFileName = fileName.replace(/\s+/g, '-') // Replace spaces with hyphens
const uniqueKey = `${Date.now()}-${safeFileName}`
// Upload the file to S3
await s3Client.send(new PutObjectCommand({
Bucket: S3_CONFIG.bucket,
Key: uniqueKey,
Body: file,
ContentType: contentType,
// Add some useful metadata
Metadata: {
originalName: fileName,
uploadedAt: new Date().toISOString()
}
}))
// Create a path for API to serve the file
const servePath = `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}`
return {
path: servePath,
key: uniqueKey,
name: fileName,
size: size ?? file.length,
type: contentType
}
}
/**
* Generate a presigned URL for direct file access
* @param key S3 object key
* @param expiresIn Time in seconds until URL expires
* @returns Presigned URL
*/
export async function getPresignedUrl(key: string, expiresIn = 3600) {
const command = new GetObjectCommand({
Bucket: S3_CONFIG.bucket,
Key: key
})
return getSignedUrl(s3Client, command, { expiresIn })
}
/**
* Download a file from S3
* @param key S3 object key
* @returns File buffer
*/
export async function downloadFromS3(key: string) {
const command = new GetObjectCommand({
Bucket: S3_CONFIG.bucket,
Key: key
})
const response = await s3Client.send(command)
const stream = response.Body as any
// Convert stream to buffer
return new Promise<Buffer>((resolve, reject) => {
const chunks: Buffer[] = []
stream.on('data', (chunk: Buffer) => chunks.push(chunk))
stream.on('end', () => resolve(Buffer.concat(chunks)))
stream.on('error', reject)
})
}
/**
* Delete a file from S3
* @param key S3 object key
*/
export async function deleteFromS3(key: string) {
await s3Client.send(new DeleteObjectCommand({
Bucket: S3_CONFIG.bucket,
Key: key
}))
}

View File

@@ -0,0 +1,35 @@
import { ensureUploadsDirectory, USE_S3_STORAGE, S3_CONFIG } from './setup'
import { createLogger } from '@/lib/logs/console-logger'
const logger = createLogger('UploadsSetup')
// Immediately invoke on server startup
if (typeof process !== 'undefined') {
// Log storage mode
logger.info(`Storage mode: ${USE_S3_STORAGE ? 'S3' : 'Local'}`)
if (USE_S3_STORAGE) {
logger.info('Using S3 storage mode with configuration:')
logger.info(`- Bucket: ${S3_CONFIG.bucket}`)
logger.info(`- Region: ${S3_CONFIG.region}`)
// Verify AWS credentials
if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY) {
logger.warn('AWS credentials are not set in environment variables.')
logger.warn('Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for S3 storage.')
} else {
logger.info('AWS credentials found in environment variables')
}
} else {
// Only initialize local uploads directory in development mode
ensureUploadsDirectory().then((success) => {
if (success) {
logger.info('Local uploads directory initialized')
} else {
logger.error('Failed to initialize local uploads directory')
}
})
}
}
export default ensureUploadsDirectory

45
sim/lib/uploads/setup.ts Normal file
View File

@@ -0,0 +1,45 @@
import { existsSync } from 'fs'
import { mkdir } from 'fs/promises'
import { join } from 'path'
import path from 'path'
import { createLogger } from '@/lib/logs/console-logger'
const logger = createLogger('UploadsSetup')
// Define project root - this works regardless of how the app is started
const PROJECT_ROOT = path.resolve(process.cwd())
// Define the upload directory path using project root
export const UPLOAD_DIR = join(PROJECT_ROOT, 'uploads')
export const USE_S3_STORAGE = process.env.NODE_ENV === 'production' || process.env.USE_S3 === 'true'
export const S3_CONFIG = {
bucket: process.env.S3_BUCKET_NAME || 'sim-studio-files',
region: process.env.AWS_REGION || 'us-east-1',
baseUrl: process.env.S3_BASE_URL || `https://${process.env.S3_BUCKET_NAME || 'sim-studio-files'}.s3.${process.env.AWS_REGION || 'us-east-1'}.amazonaws.com`
}
/**
* Ensures that the uploads directory exists (for local storage)
*/
export async function ensureUploadsDirectory() {
if (USE_S3_STORAGE) {
logger.info('Using S3 storage, skipping local uploads directory creation')
return true
}
try {
if (!existsSync(UPLOAD_DIR)) {
logger.info(`Creating uploads directory at ${UPLOAD_DIR}`)
await mkdir(UPLOAD_DIR, { recursive: true })
logger.info(`Created uploads directory at ${UPLOAD_DIR}`)
} else {
logger.info(`Uploads directory already exists at ${UPLOAD_DIR}`)
}
return true
} catch (error) {
logger.error('Failed to create uploads directory:', error)
return false
}
}

300
sim/package-lock.json generated
View File

@@ -14,7 +14,8 @@
],
"dependencies": {
"@anthropic-ai/sdk": "^0.39.0",
"@aws-sdk/client-s3": "^3.758.0",
"@aws-sdk/client-s3": "^3.779.0",
"@aws-sdk/s3-request-presigner": "^3.779.0",
"@cerebras/cerebras_cloud_sdk": "^1.23.0",
"@hookform/resolvers": "^4.1.3",
"@radix-ui/react-alert-dialog": "^1.1.5",
@@ -42,6 +43,8 @@
"clsx": "^2.1.1",
"cmdk": "^1.0.0",
"croner": "^9.0.0",
"csv-parse": "^5.6.0",
"csv-parser": "^3.2.0",
"date-fns": "^3.6.0",
"drizzle-orm": "^0.41.0",
"framer-motion": "^12.5.0",
@@ -50,9 +53,11 @@
"ioredis": "^5.6.0",
"jwt-decode": "^4.0.0",
"lucide-react": "^0.469.0",
"mammoth": "^1.9.0",
"next": "^15.2.4",
"next-themes": "^0.4.6",
"openai": "^4.89.0",
"pdf-parse": "^1.1.1",
"postgres": "^3.4.5",
"prismjs": "^1.30.0",
"react": "^18.2.0",
@@ -73,6 +78,7 @@
"@testing-library/react": "^16.2.0",
"@testing-library/user-event": "^14.6.1",
"@trivago/prettier-plugin-sort-imports": "^5.2.2",
"@types/lodash": "^4.17.16",
"@types/node": "^20",
"@types/prismjs": "^1.26.5",
"@types/react": "^19",
@@ -381,9 +387,9 @@
}
},
"node_modules/@aws-sdk/client-s3": {
"version": "3.777.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/client-s3/-/client-s3-3.777.0.tgz",
"integrity": "sha512-KVX2QD6lLczZxtzIRCpmztgNnGq+spiMIDYqkum/rCBjCX1YJoDHwMYXaMf2EtAH8tFkJmBiA/CiT/J36iN7Xg==",
"version": "3.779.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/client-s3/-/client-s3-3.779.0.tgz",
"integrity": "sha512-Lagz+ersQaLNYkpOU9V12PYspT//lGvhPXlKU3OXDj3whDchdqUdtRKY8rmV+jli4KXe+udx/hj2yqrFRfKGvQ==",
"license": "Apache-2.0",
"dependencies": {
"@aws-crypto/sha1-browser": "5.2.0",
@@ -893,6 +899,25 @@
"node": ">=18.0.0"
}
},
"node_modules/@aws-sdk/s3-request-presigner": {
"version": "3.779.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/s3-request-presigner/-/s3-request-presigner-3.779.0.tgz",
"integrity": "sha512-L3mGSh6/9gf3FBVrQziCkuLbaRJMeNbLr6tg9ZSymJcDRzRqAiCWnHrenAavTnAAnm+Lu62Fg/A4g3T+YT+gEg==",
"license": "Apache-2.0",
"dependencies": {
"@aws-sdk/signature-v4-multi-region": "3.775.0",
"@aws-sdk/types": "3.775.0",
"@aws-sdk/util-format-url": "3.775.0",
"@smithy/middleware-endpoint": "^4.1.0",
"@smithy/protocol-http": "^5.1.0",
"@smithy/smithy-client": "^4.2.0",
"@smithy/types": "^4.2.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@aws-sdk/signature-v4-multi-region": {
"version": "3.775.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4-multi-region/-/signature-v4-multi-region-3.775.0.tgz",
@@ -967,6 +992,21 @@
"node": ">=18.0.0"
}
},
"node_modules/@aws-sdk/util-format-url": {
"version": "3.775.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/util-format-url/-/util-format-url-3.775.0.tgz",
"integrity": "sha512-Nw4nBeyCbWixoGh8NcVpa/i8McMA6RXJIjQFyloJLaPr7CPquz7ZbSl0MUWMFVwP/VHaJ7B+lNN3Qz1iFCEP/Q==",
"license": "Apache-2.0",
"dependencies": {
"@aws-sdk/types": "3.775.0",
"@smithy/querystring-builder": "^4.0.2",
"@smithy/types": "^4.2.0",
"tslib": "^2.6.2"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@aws-sdk/util-locate-window": {
"version": "3.723.0",
"resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.723.0.tgz",
@@ -6166,6 +6206,13 @@
"integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==",
"license": "MIT"
},
"node_modules/@types/lodash": {
"version": "4.17.16",
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.16.tgz",
"integrity": "sha512-HX7Em5NYQAXKW+1T+FiuG27NGwzJfCX3s1GjOa7ujxZa52kjJLOr4FUxT+giF6Tgxv1e+/czV/iTtBw27WTU9g==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/node": {
"version": "20.17.28",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.28.tgz",
@@ -6398,6 +6445,15 @@
"integrity": "sha512-f6Oq3ohtSC5RYABhpN8aVOVHpcKvJ1fB1jjuvODTBU5u6BcroYEhphnrywdw8RO+2Vy5ekCdKe5D4dCMdMSrzA==",
"license": "MIT"
},
"node_modules/@xmldom/xmldom": {
"version": "0.8.10",
"resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz",
"integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/abort-controller": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
@@ -6511,6 +6567,15 @@
"integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==",
"license": "MIT"
},
"node_modules/argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
"integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
"license": "MIT",
"dependencies": {
"sprintf-js": "~1.0.2"
}
},
"node_modules/aria-hidden": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.4.tgz",
@@ -6573,7 +6638,6 @@
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"dev": true,
"funding": [
{
"type": "github",
@@ -6655,6 +6719,12 @@
"readable-stream": "^3.4.0"
}
},
"node_modules/bluebird": {
"version": "3.4.7",
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz",
"integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==",
"license": "MIT"
},
"node_modules/bowser": {
"version": "2.11.0",
"resolved": "https://registry.npmjs.org/bowser/-/bowser-2.11.0.tgz",
@@ -7091,6 +7161,12 @@
"node": ">= 0.6"
}
},
"node_modules/core-util-is": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
"integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
"license": "MIT"
},
"node_modules/cors": {
"version": "2.8.5",
"resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
@@ -7230,6 +7306,24 @@
"devOptional": true,
"license": "MIT"
},
"node_modules/csv-parse": {
"version": "5.6.0",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.6.0.tgz",
"integrity": "sha512-l3nz3euub2QMg5ouu5U09Ew9Wf6/wQ8I++ch1loQ0ljmzhmfZYrH9fflS22i/PQEvsPvxCwxgz5q7UB8K1JO4Q==",
"license": "MIT"
},
"node_modules/csv-parser": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
"license": "MIT",
"bin": {
"csv-parser": "bin/csv-parser"
},
"engines": {
"node": ">= 10"
}
},
"node_modules/d3-color": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
@@ -7484,6 +7578,12 @@
"integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==",
"license": "Apache-2.0"
},
"node_modules/dingbat-to-unicode": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz",
"integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==",
"license": "BSD-2-Clause"
},
"node_modules/dlv": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz",
@@ -7704,6 +7804,15 @@
}
}
},
"node_modules/duck": {
"version": "0.1.12",
"resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz",
"integrity": "sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==",
"license": "BSD",
"dependencies": {
"underscore": "^1.13.1"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -8645,6 +8754,12 @@
],
"license": "BSD-3-Clause"
},
"node_modules/immediate": {
"version": "3.0.6",
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
"integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==",
"license": "MIT"
},
"node_modules/indent-string": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz",
@@ -8659,7 +8774,6 @@
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
"dev": true,
"license": "ISC"
},
"node_modules/input-otp": {
@@ -8816,6 +8930,12 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
"license": "MIT"
},
"node_modules/isexe": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-3.1.1.tgz",
@@ -8997,6 +9117,54 @@
"node": ">=6"
}
},
"node_modules/jszip": {
"version": "3.10.1",
"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
"integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==",
"license": "(MIT OR GPL-3.0-or-later)",
"dependencies": {
"lie": "~3.3.0",
"pako": "~1.0.2",
"readable-stream": "~2.3.6",
"setimmediate": "^1.0.5"
}
},
"node_modules/jszip/node_modules/pako": {
"version": "1.0.11",
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
"license": "(MIT AND Zlib)"
},
"node_modules/jszip/node_modules/readable-stream": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
"license": "MIT",
"dependencies": {
"core-util-is": "~1.0.0",
"inherits": "~2.0.3",
"isarray": "~1.0.0",
"process-nextick-args": "~2.0.0",
"safe-buffer": "~5.1.1",
"string_decoder": "~1.1.1",
"util-deprecate": "~1.0.1"
}
},
"node_modules/jszip/node_modules/safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
"license": "MIT"
},
"node_modules/jszip/node_modules/string_decoder": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"license": "MIT",
"dependencies": {
"safe-buffer": "~5.1.0"
}
},
"node_modules/jwt-decode": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/jwt-decode/-/jwt-decode-4.0.0.tgz",
@@ -9024,6 +9192,15 @@
"url": "https://ko-fi.com/killymxi"
}
},
"node_modules/lie": {
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz",
"integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
"license": "MIT",
"dependencies": {
"immediate": "~3.0.5"
}
},
"node_modules/lilconfig": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
@@ -9234,6 +9411,17 @@
"loose-envify": "cli.js"
}
},
"node_modules/lop": {
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/lop/-/lop-0.4.2.tgz",
"integrity": "sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==",
"license": "BSD-2-Clause",
"dependencies": {
"duck": "^0.1.12",
"option": "~0.2.1",
"underscore": "^1.13.1"
}
},
"node_modules/loupe": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/loupe/-/loupe-3.1.3.tgz",
@@ -9322,6 +9510,30 @@
"node": ">=10"
}
},
"node_modules/mammoth": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/mammoth/-/mammoth-1.9.0.tgz",
"integrity": "sha512-F+0NxzankQV9XSUAuVKvkdQK0GbtGGuqVnND9aVf9VSeUA82LQa29GjLqYU6Eez8LHqSJG3eGiDW3224OKdpZg==",
"license": "BSD-2-Clause",
"dependencies": {
"@xmldom/xmldom": "^0.8.6",
"argparse": "~1.0.3",
"base64-js": "^1.5.1",
"bluebird": "~3.4.0",
"dingbat-to-unicode": "^1.0.1",
"jszip": "^3.7.1",
"lop": "^0.4.2",
"path-is-absolute": "^1.0.0",
"underscore": "^1.13.1",
"xmlbuilder": "^10.0.0"
},
"bin": {
"mammoth": "bin/mammoth"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/marked": {
"version": "7.0.4",
"resolved": "https://registry.npmjs.org/marked/-/marked-7.0.4.tgz",
@@ -9651,6 +9863,12 @@
"node": ">=10.5.0"
}
},
"node_modules/node-ensure": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz",
"integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==",
"license": "MIT"
},
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
@@ -9824,6 +10042,12 @@
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"license": "MIT"
},
"node_modules/option": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz",
"integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==",
"license": "BSD-2-Clause"
},
"node_modules/ora": {
"version": "5.4.1",
"resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz",
@@ -9969,6 +10193,15 @@
"url": "https://ko-fi.com/killymxi"
}
},
"node_modules/path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/path-key": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
@@ -10023,6 +10256,28 @@
"node": ">= 14.16"
}
},
"node_modules/pdf-parse": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.1.tgz",
"integrity": "sha512-v6ZJ/efsBpGrGGknjtq9J/oC8tZWq0KWL5vQrk2GlzLEQPUDB1ex+13Rmidl1neNN358Jn9EHZw5y07FFtaC7A==",
"license": "MIT",
"dependencies": {
"debug": "^3.1.0",
"node-ensure": "^0.0.0"
},
"engines": {
"node": ">=6.8.1"
}
},
"node_modules/pdf-parse/node_modules/debug": {
"version": "3.2.7",
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
"license": "MIT",
"dependencies": {
"ms": "^2.1.1"
}
},
"node_modules/peberminta": {
"version": "0.9.0",
"resolved": "https://registry.npmjs.org/peberminta/-/peberminta-0.9.0.tgz",
@@ -10371,6 +10626,12 @@
"node": ">=6"
}
},
"node_modules/process-nextick-args": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
"license": "MIT"
},
"node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
@@ -11697,6 +11958,12 @@
"integrity": "sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ==",
"license": "MIT"
},
"node_modules/setimmediate": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
"integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
"license": "MIT"
},
"node_modules/sharp": {
"version": "0.33.5",
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz",
@@ -11993,6 +12260,12 @@
"source-map": "^0.6.0"
}
},
"node_modules/sprintf-js": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
"integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==",
"license": "BSD-3-Clause"
},
"node_modules/stackback": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
@@ -12643,6 +12916,12 @@
"integrity": "sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q==",
"license": "MIT"
},
"node_modules/underscore": {
"version": "1.13.7",
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz",
"integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==",
"license": "MIT"
},
"node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
@@ -13659,6 +13938,15 @@
"node": ">=18"
}
},
"node_modules/xmlbuilder": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz",
"integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==",
"license": "MIT",
"engines": {
"node": ">=4.0"
}
},
"node_modules/xmlchars": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",

View File

@@ -28,7 +28,8 @@
},
"dependencies": {
"@anthropic-ai/sdk": "^0.39.0",
"@aws-sdk/client-s3": "^3.758.0",
"@aws-sdk/client-s3": "^3.779.0",
"@aws-sdk/s3-request-presigner": "^3.779.0",
"@cerebras/cerebras_cloud_sdk": "^1.23.0",
"@hookform/resolvers": "^4.1.3",
"@radix-ui/react-alert-dialog": "^1.1.5",
@@ -56,6 +57,8 @@
"clsx": "^2.1.1",
"cmdk": "^1.0.0",
"croner": "^9.0.0",
"csv-parse": "^5.6.0",
"csv-parser": "^3.2.0",
"date-fns": "^3.6.0",
"drizzle-orm": "^0.41.0",
"framer-motion": "^12.5.0",
@@ -64,9 +67,11 @@
"ioredis": "^5.6.0",
"jwt-decode": "^4.0.0",
"lucide-react": "^0.469.0",
"mammoth": "^1.9.0",
"next": "^15.2.4",
"next-themes": "^0.4.6",
"openai": "^4.89.0",
"pdf-parse": "^1.1.1",
"postgres": "^3.4.5",
"prismjs": "^1.30.0",
"react": "^18.2.0",
@@ -87,6 +92,7 @@
"@testing-library/react": "^16.2.0",
"@testing-library/user-event": "^14.6.1",
"@trivago/prettier-plugin-sort-imports": "^5.2.2",
"@types/lodash": "^4.17.16",
"@types/node": "^20",
"@types/prismjs": "^1.26.5",
"@types/react": "^19",

3
sim/tools/file/index.ts Normal file
View File

@@ -0,0 +1,3 @@
import { fileParserTool } from './parser'
export const fileParseTool = fileParserTool

191
sim/tools/file/parser.ts Normal file
View File

@@ -0,0 +1,191 @@
import { ToolConfig, ToolResponse } from '../types'
export interface FileParserInput {
filePath: string | string[]
fileType?: string
}
export interface FileParseResult {
content: string
fileType: string
size: number
name: string
binary: boolean
metadata?: Record<string, any>
}
export interface FileParserOutputData {
files: FileParseResult[]
combinedContent: string
[key: string]: any
}
export interface FileParserOutput extends ToolResponse {
output: FileParserOutputData
}
export const fileParserTool: ToolConfig<FileParserInput, FileParserOutput> = {
id: 'file_parser',
name: 'File Parser',
description: 'Parse one or more uploaded files (text, PDF, CSV, images, etc.)',
version: '1.0.0',
params: {
filePath: {
type: 'string',
required: true,
description: 'Path to the uploaded file(s). Can be a single path or an array of paths.',
},
fileType: {
type: 'string',
required: false,
description: 'Type of file to parse (auto-detected if not specified)',
},
},
request: {
url: '/api/files/parse',
method: 'POST',
headers: () => ({
'Content-Type': 'application/json',
}),
body: (params: any) => {
console.log('[fileParserTool] Request parameters:', params)
// Check for valid input
if (!params) {
console.error('[fileParserTool] No parameters provided')
throw new Error('No parameters provided')
}
// Handle various input formats
let filePath = null
// Handle multiple files case from block output
if (params.files && Array.isArray(params.files)) {
console.log('[fileParserTool] Processing multiple files:', params.files.length)
filePath = params.files.map((file: any) => file.path)
}
// Handle the case where params is an object with file property
else if (params.file) {
if (Array.isArray(params.file)) {
console.log(
'[fileParserTool] Processing multiple files from file array:',
params.file.length
)
filePath = params.file.map((file: any) => file.path)
} else if (params.file.path) {
console.log('[fileParserTool] Extracted file path from file object:', params.file.path)
filePath = params.file.path
}
}
// Handle direct filePath parameter
else if (params.filePath) {
console.log('[fileParserTool] Using direct filePath parameter:', params.filePath)
filePath = params.filePath
}
if (!filePath) {
console.error('[fileParserTool] Missing required parameter: filePath')
throw new Error('Missing required parameter: filePath')
}
return {
filePath,
fileType: params.fileType,
}
},
isInternalRoute: true,
},
transformResponse: async (response: Response): Promise<FileParserOutput> => {
console.log('[fileParserTool] Received response status:', response.status)
try {
const result = await response.json()
console.log('[fileParserTool] Response parsed successfully')
if (!response.ok) {
const errorMsg = result.error || 'File parsing failed'
console.error('[fileParserTool] Error in response:', errorMsg)
throw new Error(errorMsg)
}
// Handle multiple files response
if (result.results) {
console.log('[fileParserTool] Processing multiple files response')
// Extract individual file results
const fileResults = result.results.map((fileResult: any) => {
if (!fileResult.success) {
console.warn(
`[fileParserTool] Error parsing file ${fileResult.filePath}: ${fileResult.error}`
)
return {
content: `Error parsing file: ${fileResult.error || 'Unknown error'}`,
fileType: 'text/plain',
size: 0,
name: fileResult.filePath.split('/').pop() || 'unknown',
binary: false,
}
}
return fileResult.output
})
// Combine all file contents with clear dividers
const combinedContent = fileResults
.map((file: FileParseResult, index: number) => {
const divider = `\n${'='.repeat(80)}\n`
return file.content + (index < fileResults.length - 1 ? divider : '')
})
.join('\n')
// Create the base output
const output: FileParserOutputData = {
files: fileResults,
combinedContent,
}
// Add named properties for each file for dropdown access
fileResults.forEach((file: FileParseResult, index: number) => {
output[`file${index + 1}`] = file
})
return {
success: true,
output,
}
}
// Handle single file response
if (result.success) {
console.log('[fileParserTool] Successfully parsed file:', result.output.name)
// For a single file, create the output with both array and named property
const output: FileParserOutputData = {
files: [result.output],
combinedContent: result.output.content,
file1: result.output,
}
return {
success: true,
output,
}
}
// Handle error response
throw new Error(result.error || 'File parsing failed')
} catch (error) {
console.error('[fileParserTool] Error processing response:', error)
throw error
}
},
transformError: (error: any) => {
console.error('[fileParserTool] Error occurred:', error)
return error.message || 'File parsing failed'
},
}

View File

@@ -5,6 +5,7 @@ import { confluenceListTool, confluenceRetrieveTool, confluenceUpdateTool } from
import { docsCreateTool, docsReadTool, docsWriteTool } from './docs'
import { driveDownloadTool, driveListTool, driveUploadTool } from './drive'
import { exaAnswerTool, exaFindSimilarLinksTool, exaGetContentsTool, exaSearchTool } from './exa'
import { fileParseTool } from './file'
import { scrapeTool } from './firecrawl/scrape'
import { functionExecuteTool, webcontainerExecuteTool } from './function'
import {
@@ -55,6 +56,7 @@ export const tools: Record<string, ToolConfig> = {
function_execute: functionExecuteTool,
webcontainer_execute: webcontainerExecuteTool,
vision_tool: visionTool,
file_parser: fileParseTool,
firecrawl_scrape: scrapeTool,
jina_readurl: readUrlTool,
slack_message: slackMessageTool,
@@ -369,9 +371,7 @@ export async function executeTool(
},
}
} catch (error) {
logger.error(`Error in post-processing for tool ${toolId}:`, {
error,
})
logger.error(`Error in post-processing for tool ${toolId}:`, { error })
// Return original result if post-processing fails
// Still include timing data
const endTime = new Date()