mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-06 21:54:01 -05:00
feat(files): local upload, s3 upload for pdf/docx/csv parsing (#213)
* feat(file-parse): local upload; new block * feat(file-parse): complete for local usage; need to integrate S3 for hosted version * fix(file-parse): pdf parsing * added support for multiple file upload & delete * added s3 file upload/fetch for hosted platform * swapped console for logger * added unit tests for files * add s3 client, resolved build error --------- Co-authored-by: Emir Karabeg <emirkarabeg@berkeley.edu>
This commit is contained in:
@@ -13,4 +13,19 @@ ENCRYPTION_KEY=your_encryption_key # Use `openssl rand -hex 64` to generate
|
||||
# If left commented out, emails will be logged to console instead
|
||||
|
||||
# StackBlitz (Webcontainer) API Key (Optional, for handling sandboxed code execution for functions/custom-tools)
|
||||
# WEBCONTAINER_CLIENT_ID= # Uncomment and add your key from https://stackblitz.com/docs/webcontainer-api#webcontainer-client-id
|
||||
# WEBCONTAINER_CLIENT_ID= # Uncomment and add your key from https://stackblitz.com/docs/webcontainer-api#webcontainer-client-id
|
||||
|
||||
# S3 Storage Configuration (Optional)
|
||||
# Set USE_S3=true to enable S3 storage in development
|
||||
# USE_S3=true
|
||||
|
||||
# AWS Credentials (Required when USE_S3=true)
|
||||
# AWS_ACCESS_KEY_ID=your-access-key-id
|
||||
# AWS_SECRET_ACCESS_KEY=your-secret-access-key
|
||||
|
||||
# S3 Configuration (Required when USE_S3=true)
|
||||
# S3_BUCKET_NAME=your-bucket-name
|
||||
# AWS_REGION=us-east-1
|
||||
|
||||
# Optional: Custom S3 Base URL (for custom domains or non-AWS S3-compatible storage)
|
||||
# S3_BASE_URL=https://your-custom-domain.com
|
||||
4
sim/.gitignore
vendored
4
sim/.gitignore
vendored
@@ -52,3 +52,7 @@ next-env.d.ts
|
||||
|
||||
# cursorrules
|
||||
.cursorrules
|
||||
|
||||
# file uploads
|
||||
uploads/
|
||||
uploads/*
|
||||
174
sim/app/api/files/delete/route.test.ts
Normal file
174
sim/app/api/files/delete/route.test.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
/**
|
||||
* Tests for file delete API route
|
||||
*
|
||||
* @vitest-environment node
|
||||
*/
|
||||
import { NextRequest } from 'next/server'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { createMockRequest } from '@/app/api/__test-utils__/utils'
|
||||
|
||||
describe('File Delete API Route', () => {
|
||||
// Mock file system modules
|
||||
const mockUnlink = vi.fn().mockResolvedValue(undefined)
|
||||
const mockExistsSync = vi.fn().mockReturnValue(true)
|
||||
const mockDeleteFromS3 = vi.fn().mockResolvedValue(undefined)
|
||||
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetModules()
|
||||
|
||||
// Mock filesystem operations
|
||||
vi.doMock('fs', () => ({
|
||||
existsSync: mockExistsSync,
|
||||
}))
|
||||
|
||||
vi.doMock('fs/promises', () => ({
|
||||
unlink: mockUnlink,
|
||||
}))
|
||||
|
||||
// Mock the S3 client
|
||||
vi.doMock('@/lib/uploads/s3-client', () => ({
|
||||
deleteFromS3: mockDeleteFromS3,
|
||||
}))
|
||||
|
||||
// Mock the logger
|
||||
vi.doMock('@/lib/logs/console-logger', () => ({
|
||||
createLogger: vi.fn().mockReturnValue({
|
||||
info: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
}),
|
||||
}))
|
||||
|
||||
// Configure upload directory and S3 mode with all required exports
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: false,
|
||||
ensureUploadsDirectory: mockEnsureUploadsDirectory,
|
||||
S3_CONFIG: {
|
||||
bucket: 'test-bucket',
|
||||
region: 'test-region',
|
||||
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
|
||||
},
|
||||
}))
|
||||
|
||||
// Skip setup.server.ts side effects
|
||||
vi.doMock('@/lib/uploads/setup.server', () => ({}))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('should handle local file deletion successfully', async () => {
|
||||
// Configure upload directory and S3 mode for this test
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: false,
|
||||
}))
|
||||
|
||||
// Create request with file path
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/test-file.txt',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('message', 'File deleted successfully')
|
||||
|
||||
// Verify unlink was called with correct path
|
||||
expect(mockUnlink).toHaveBeenCalledWith('/test/uploads/test-file.txt')
|
||||
})
|
||||
|
||||
it('should handle file not found gracefully', async () => {
|
||||
// Mock file not existing
|
||||
mockExistsSync.mockReturnValueOnce(false)
|
||||
|
||||
// Create request with file path
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/nonexistent.txt',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('message', "File not found, but that's okay")
|
||||
|
||||
// Verify unlink was not called
|
||||
expect(mockUnlink).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should handle S3 file deletion successfully', async () => {
|
||||
// Configure upload directory and S3 mode for this test
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: true,
|
||||
}))
|
||||
|
||||
// Create request with S3 file path
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/s3/1234567890-test-file.txt',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('message', 'File deleted successfully from S3')
|
||||
|
||||
// Verify deleteFromS3 was called with correct key
|
||||
expect(mockDeleteFromS3).toHaveBeenCalledWith('1234567890-test-file.txt')
|
||||
})
|
||||
|
||||
it('should handle missing file path', async () => {
|
||||
// Create request with no file path
|
||||
const req = createMockRequest('POST', {})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify error response
|
||||
expect(response.status).toBe(400)
|
||||
expect(data).toHaveProperty('error', 'InvalidRequestError')
|
||||
expect(data).toHaveProperty('message', 'No file path provided')
|
||||
})
|
||||
|
||||
it('should handle CORS preflight requests', async () => {
|
||||
// Import the handler after mocks are set up
|
||||
const { OPTIONS } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await OPTIONS()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(204)
|
||||
expect(response.headers.get('Access-Control-Allow-Methods')).toBe('GET, POST, DELETE, OPTIONS')
|
||||
expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type')
|
||||
})
|
||||
})
|
||||
115
sim/app/api/files/delete/route.ts
Normal file
115
sim/app/api/files/delete/route.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { NextRequest } from 'next/server'
|
||||
import { existsSync } from 'fs'
|
||||
import { unlink } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
import { deleteFromS3 } from '@/lib/uploads/s3-client'
|
||||
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
|
||||
// Import to ensure the uploads directory is created
|
||||
import '@/lib/uploads/setup.server'
|
||||
import {
|
||||
createErrorResponse,
|
||||
createOptionsResponse,
|
||||
createSuccessResponse,
|
||||
extractFilename,
|
||||
extractS3Key,
|
||||
InvalidRequestError,
|
||||
isS3Path,
|
||||
} from '../utils'
|
||||
|
||||
const logger = createLogger('FilesDeleteAPI')
|
||||
|
||||
/**
|
||||
* Main API route handler for file deletion
|
||||
*/
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const requestData = await request.json()
|
||||
const { filePath } = requestData
|
||||
|
||||
logger.info('File delete request received:', { filePath })
|
||||
|
||||
if (!filePath) {
|
||||
throw new InvalidRequestError('No file path provided')
|
||||
}
|
||||
|
||||
try {
|
||||
// Use appropriate handler based on path and environment
|
||||
const result =
|
||||
isS3Path(filePath) || USE_S3_STORAGE
|
||||
? await handleS3FileDelete(filePath)
|
||||
: await handleLocalFileDelete(filePath)
|
||||
|
||||
// Return success response
|
||||
return createSuccessResponse(result)
|
||||
} catch (error) {
|
||||
logger.error('Error deleting file:', error)
|
||||
return createErrorResponse(
|
||||
error instanceof Error ? error : new Error('Failed to delete file')
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error parsing request:', error)
|
||||
return createErrorResponse(error instanceof Error ? error : new Error('Invalid request'))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle S3 file deletion
|
||||
*/
|
||||
async function handleS3FileDelete(filePath: string) {
|
||||
// Extract the S3 key from the path
|
||||
const s3Key = extractS3Key(filePath)
|
||||
logger.info(`Deleting file from S3: ${s3Key}`)
|
||||
|
||||
try {
|
||||
// Delete from S3
|
||||
await deleteFromS3(s3Key)
|
||||
logger.info(`File successfully deleted from S3: ${s3Key}`)
|
||||
|
||||
return {
|
||||
success: true as const,
|
||||
message: 'File deleted successfully from S3',
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error deleting file from S3:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle local file deletion
|
||||
*/
|
||||
async function handleLocalFileDelete(filePath: string) {
|
||||
// Extract the filename from the path
|
||||
const filename = extractFilename(filePath)
|
||||
logger.info('Extracted filename for deletion:', filename)
|
||||
|
||||
const fullPath = join(UPLOAD_DIR, filename)
|
||||
logger.info('Full file path for deletion:', fullPath)
|
||||
|
||||
// Check if file exists
|
||||
if (!existsSync(fullPath)) {
|
||||
logger.info(`File not found for deletion at path: ${fullPath}`)
|
||||
return {
|
||||
success: true as const,
|
||||
message: "File not found, but that's okay",
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the file
|
||||
await unlink(fullPath)
|
||||
logger.info(`File successfully deleted: ${fullPath}`)
|
||||
|
||||
return {
|
||||
success: true as const,
|
||||
message: 'File deleted successfully',
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle CORS preflight requests
|
||||
*/
|
||||
export async function OPTIONS() {
|
||||
return createOptionsResponse()
|
||||
}
|
||||
249
sim/app/api/files/parse/route.test.ts
Normal file
249
sim/app/api/files/parse/route.test.ts
Normal file
@@ -0,0 +1,249 @@
|
||||
/**
|
||||
* Tests for file parse API route
|
||||
*
|
||||
* @vitest-environment node
|
||||
*/
|
||||
import { NextRequest } from 'next/server'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { createMockRequest } from '@/app/api/__test-utils__/utils'
|
||||
|
||||
describe('File Parse API Route', () => {
|
||||
// Mock file system and parser modules
|
||||
const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test file content'))
|
||||
const mockWriteFile = vi.fn().mockResolvedValue(undefined)
|
||||
const mockUnlink = vi.fn().mockResolvedValue(undefined)
|
||||
const mockExistsSync = vi.fn().mockReturnValue(true)
|
||||
const mockDownloadFromS3 = vi.fn().mockResolvedValue(Buffer.from('test s3 file content'))
|
||||
const mockParseFile = vi.fn().mockResolvedValue({
|
||||
content: 'parsed content',
|
||||
metadata: { pageCount: 1 },
|
||||
})
|
||||
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetModules()
|
||||
|
||||
// Mock filesystem operations
|
||||
vi.doMock('fs', () => ({
|
||||
existsSync: mockExistsSync,
|
||||
}))
|
||||
|
||||
vi.doMock('fs/promises', () => ({
|
||||
readFile: mockReadFile,
|
||||
writeFile: mockWriteFile,
|
||||
unlink: mockUnlink,
|
||||
}))
|
||||
|
||||
// Mock the S3 client
|
||||
vi.doMock('@/lib/uploads/s3-client', () => ({
|
||||
downloadFromS3: mockDownloadFromS3,
|
||||
}))
|
||||
|
||||
// Mock file parsers
|
||||
vi.doMock('@/lib/file-parsers', () => ({
|
||||
isSupportedFileType: vi.fn().mockReturnValue(true),
|
||||
parseFile: mockParseFile,
|
||||
}))
|
||||
|
||||
// Mock the logger
|
||||
vi.doMock('@/lib/logs/console-logger', () => ({
|
||||
createLogger: vi.fn().mockReturnValue({
|
||||
info: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
}),
|
||||
}))
|
||||
|
||||
// Configure upload directory and S3 mode with all required exports
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: false,
|
||||
ensureUploadsDirectory: mockEnsureUploadsDirectory,
|
||||
S3_CONFIG: {
|
||||
bucket: 'test-bucket',
|
||||
region: 'test-region',
|
||||
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
|
||||
},
|
||||
}))
|
||||
|
||||
// Skip setup.server.ts side effects
|
||||
vi.doMock('@/lib/uploads/setup.server', () => ({}))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('should parse local file successfully', async () => {
|
||||
// Create request with file path
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/test-file.txt',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('output')
|
||||
expect(data.output).toHaveProperty('content', 'parsed content')
|
||||
expect(data.output).toHaveProperty('name', 'test-file.txt')
|
||||
|
||||
// Verify readFile was called with correct path
|
||||
expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/test-file.txt')
|
||||
})
|
||||
|
||||
it('should parse S3 file successfully', async () => {
|
||||
// Configure S3 storage mode
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: true,
|
||||
}))
|
||||
|
||||
// Create request with S3 file path
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/s3/1234567890-test-file.pdf',
|
||||
fileType: 'application/pdf',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('output')
|
||||
expect(data.output).toHaveProperty('content', 'parsed content')
|
||||
expect(data.output).toHaveProperty('metadata')
|
||||
expect(data.output.metadata).toHaveProperty('pageCount', 1)
|
||||
|
||||
// Verify S3 download was called with correct key
|
||||
expect(mockDownloadFromS3).toHaveBeenCalledWith('1234567890-test-file.pdf')
|
||||
|
||||
// Verify temporary file was created and cleaned up
|
||||
expect(mockWriteFile).toHaveBeenCalled()
|
||||
expect(mockUnlink).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should handle multiple files', async () => {
|
||||
// Create request with multiple file paths
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: ['/api/files/serve/file1.txt', '/api/files/serve/file2.txt'],
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('results')
|
||||
expect(Array.isArray(data.results)).toBe(true)
|
||||
expect(data.results).toHaveLength(2)
|
||||
expect(data.results[0]).toHaveProperty('success', true)
|
||||
expect(data.results[1]).toHaveProperty('success', true)
|
||||
})
|
||||
|
||||
it('should handle file not found', async () => {
|
||||
// Mock file not existing for this test
|
||||
mockExistsSync.mockReturnValueOnce(false)
|
||||
|
||||
// Create request with nonexistent file
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/nonexistent.txt',
|
||||
})
|
||||
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
expect(response.status).toBe(200)
|
||||
if (data.success === true) {
|
||||
expect(data).toHaveProperty('output')
|
||||
expect(data.output).toHaveProperty('content')
|
||||
} else {
|
||||
expect(data).toHaveProperty('error')
|
||||
expect(data.error).toContain('File not found')
|
||||
}
|
||||
})
|
||||
|
||||
it('should handle unsupported file types with generic parser', async () => {
|
||||
// Mock file not being a supported type
|
||||
vi.doMock('@/lib/file-parsers', () => ({
|
||||
isSupportedFileType: vi.fn().mockReturnValue(false),
|
||||
parseFile: mockParseFile,
|
||||
}))
|
||||
|
||||
// Create request with unsupported file type
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/test-file.xyz',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response uses generic handling
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data).toHaveProperty('output')
|
||||
expect(data.output).toHaveProperty('binary', false)
|
||||
})
|
||||
|
||||
it('should handle missing file path', async () => {
|
||||
// Create request with no file path
|
||||
const req = createMockRequest('POST', {})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify error response
|
||||
expect(response.status).toBe(400)
|
||||
expect(data).toHaveProperty('error', 'No file path provided')
|
||||
})
|
||||
|
||||
it('should handle parser errors gracefully', async () => {
|
||||
// Mock parser error
|
||||
mockParseFile.mockRejectedValueOnce(new Error('Parser failure'))
|
||||
|
||||
// Create request with file that will fail parsing
|
||||
const req = createMockRequest('POST', {
|
||||
filePath: '/api/files/serve/error-file.txt',
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify error was handled
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('success', true)
|
||||
expect(data.output).toHaveProperty('content')
|
||||
})
|
||||
})
|
||||
427
sim/app/api/files/parse/route.ts
Normal file
427
sim/app/api/files/parse/route.ts
Normal file
@@ -0,0 +1,427 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
import { existsSync } from 'fs'
|
||||
import { readFile, unlink, writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import path from 'path'
|
||||
import { isSupportedFileType, parseFile } from '@/lib/file-parsers'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
import { downloadFromS3 } from '@/lib/uploads/s3-client'
|
||||
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
|
||||
import '@/lib/uploads/setup.server'
|
||||
|
||||
const logger = createLogger('FilesParseAPI')
|
||||
|
||||
interface ParseSuccessResult {
|
||||
success: true
|
||||
output: {
|
||||
content: string
|
||||
fileType: string
|
||||
size: number
|
||||
name: string
|
||||
binary: boolean
|
||||
metadata?: Record<string, any>
|
||||
}
|
||||
filePath?: string
|
||||
}
|
||||
|
||||
interface ParseErrorResult {
|
||||
success: false
|
||||
error: string
|
||||
filePath?: string
|
||||
}
|
||||
|
||||
type ParseResult = ParseSuccessResult | ParseErrorResult
|
||||
|
||||
// MIME type mapping for various file extensions
|
||||
const fileTypeMap: Record<string, string> = {
|
||||
// Text formats
|
||||
txt: 'text/plain',
|
||||
csv: 'text/csv',
|
||||
json: 'application/json',
|
||||
xml: 'application/xml',
|
||||
md: 'text/markdown',
|
||||
html: 'text/html',
|
||||
css: 'text/css',
|
||||
js: 'application/javascript',
|
||||
ts: 'application/typescript',
|
||||
// Document formats
|
||||
pdf: 'application/pdf',
|
||||
doc: 'application/msword',
|
||||
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
// Spreadsheet formats
|
||||
xls: 'application/vnd.ms-excel',
|
||||
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
// Presentation formats
|
||||
ppt: 'application/vnd.ms-powerpoint',
|
||||
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
// Image formats
|
||||
png: 'image/png',
|
||||
jpg: 'image/jpeg',
|
||||
jpeg: 'image/jpeg',
|
||||
gif: 'image/gif',
|
||||
// Archive formats
|
||||
zip: 'application/zip',
|
||||
}
|
||||
|
||||
// Binary file extensions
|
||||
const binaryExtensions = [
|
||||
'doc',
|
||||
'docx',
|
||||
'xls',
|
||||
'xlsx',
|
||||
'ppt',
|
||||
'pptx',
|
||||
'zip',
|
||||
'png',
|
||||
'jpg',
|
||||
'jpeg',
|
||||
'gif',
|
||||
]
|
||||
|
||||
/**
|
||||
* Main API route handler
|
||||
*/
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const requestData = await request.json()
|
||||
const { filePath, fileType } = requestData
|
||||
|
||||
logger.info('File parse request received:', { filePath, fileType })
|
||||
|
||||
if (!filePath) {
|
||||
return NextResponse.json({ error: 'No file path provided' }, { status: 400 })
|
||||
}
|
||||
|
||||
// Handle both single file path and array of file paths
|
||||
const filePaths = Array.isArray(filePath) ? filePath : [filePath]
|
||||
|
||||
// Parse each file
|
||||
const results = await Promise.all(
|
||||
filePaths.map(async (singleFilePath) => {
|
||||
try {
|
||||
return await parseFileSingle(singleFilePath, fileType)
|
||||
} catch (error) {
|
||||
logger.error(`Error parsing file ${singleFilePath}:`, error)
|
||||
return {
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
filePath: singleFilePath,
|
||||
} as ParseErrorResult
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
// If it was a single file request, return a single result
|
||||
// Otherwise return an array of results
|
||||
if (!Array.isArray(filePath)) {
|
||||
// Single file was requested
|
||||
const result = results[0]
|
||||
if (!result.success) {
|
||||
return NextResponse.json({ error: result.error }, { status: 400 })
|
||||
}
|
||||
return NextResponse.json(result)
|
||||
}
|
||||
|
||||
// Multiple files were requested
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
results,
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error parsing file(s):', error)
|
||||
return NextResponse.json(
|
||||
{ error: 'Failed to parse file(s)', message: (error as Error).message },
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single file and return its content
|
||||
*/
|
||||
async function parseFileSingle(filePath: string, fileType?: string): Promise<ParseResult> {
|
||||
logger.info('Parsing file:', filePath)
|
||||
|
||||
// Check if this is an S3 path
|
||||
const isS3Path = filePath.includes('/api/files/serve/s3/')
|
||||
|
||||
// Use S3 handler if it's an S3 path or we're in S3 mode
|
||||
if (isS3Path || USE_S3_STORAGE) {
|
||||
return handleS3File(filePath, fileType)
|
||||
}
|
||||
|
||||
// Use local handler for local files
|
||||
return handleLocalFile(filePath, fileType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle file stored in S3
|
||||
*/
|
||||
async function handleS3File(filePath: string, fileType?: string): Promise<ParseResult> {
|
||||
try {
|
||||
// Extract the S3 key from the path
|
||||
const isS3Path = filePath.includes('/api/files/serve/s3/')
|
||||
const s3Key = isS3Path
|
||||
? decodeURIComponent(filePath.split('/api/files/serve/s3/')[1])
|
||||
: filePath
|
||||
|
||||
logger.info('Extracted S3 key:', s3Key)
|
||||
|
||||
// Download the file from S3
|
||||
const fileBuffer = await downloadFromS3(s3Key)
|
||||
logger.info(`Downloaded file from S3: ${s3Key}, size: ${fileBuffer.length} bytes`)
|
||||
|
||||
// Extract the filename from the S3 key
|
||||
const filename = s3Key.split('/').pop() || s3Key
|
||||
const extension = path.extname(filename).toLowerCase().substring(1)
|
||||
|
||||
// Create a temporary file path
|
||||
const tempFilePath = join(UPLOAD_DIR, `temp-${Date.now()}-${filename}`)
|
||||
|
||||
try {
|
||||
// Save to a temporary file so we can use existing parsers
|
||||
await writeFile(tempFilePath, fileBuffer)
|
||||
|
||||
// Process the file based on its type
|
||||
const result = isSupportedFileType(extension)
|
||||
? await processWithSpecializedParser(tempFilePath, filename, extension, fileType, filePath)
|
||||
: await handleGenericFile(tempFilePath, filename, extension, fileType)
|
||||
|
||||
return result
|
||||
} finally {
|
||||
// Clean up the temporary file regardless of outcome
|
||||
if (existsSync(tempFilePath)) {
|
||||
await unlink(tempFilePath).catch((err) => logger.error('Error removing temp file:', err))
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error handling S3 file ${filePath}:`, error)
|
||||
return {
|
||||
success: false,
|
||||
error: `Error accessing file from S3: ${(error as Error).message}`,
|
||||
filePath,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle file stored locally
|
||||
*/
|
||||
async function handleLocalFile(filePath: string, fileType?: string): Promise<ParseResult> {
|
||||
// Extract the filename from the path
|
||||
const filename = filePath.startsWith('/api/files/serve/')
|
||||
? filePath.substring('/api/files/serve/'.length)
|
||||
: path.basename(filePath)
|
||||
|
||||
logger.info('Processing local file:', filename)
|
||||
|
||||
// Try several possible file paths
|
||||
const possiblePaths = [join(UPLOAD_DIR, filename), join(process.cwd(), 'uploads', filename)]
|
||||
|
||||
// Find the actual file path
|
||||
let actualPath = ''
|
||||
for (const p of possiblePaths) {
|
||||
if (existsSync(p)) {
|
||||
actualPath = p
|
||||
logger.info(`Found file at: ${actualPath}`)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!actualPath) {
|
||||
return {
|
||||
success: false,
|
||||
error: `File not found: ${filename}`,
|
||||
filePath,
|
||||
}
|
||||
}
|
||||
|
||||
const extension = path.extname(filename).toLowerCase().substring(1)
|
||||
|
||||
// Process the file based on its type
|
||||
return isSupportedFileType(extension)
|
||||
? await processWithSpecializedParser(actualPath, filename, extension, fileType, filePath)
|
||||
: await handleGenericFile(actualPath, filename, extension, fileType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a file with a specialized parser
|
||||
*/
|
||||
async function processWithSpecializedParser(
|
||||
filePath: string,
|
||||
filename: string,
|
||||
extension: string,
|
||||
fileType?: string,
|
||||
originalPath?: string
|
||||
): Promise<ParseResult> {
|
||||
try {
|
||||
logger.info(`Parsing ${filename} with specialized parser for ${extension}`)
|
||||
const result = await parseFile(filePath)
|
||||
|
||||
// Get file stats
|
||||
const fileBuffer = await readFile(filePath)
|
||||
const fileSize = fileBuffer.length
|
||||
|
||||
// Handle PDF-specific validation
|
||||
if (
|
||||
extension === 'pdf' &&
|
||||
(result.content.includes('\u0000') ||
|
||||
result.content.match(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\xFF]{10,}/g))
|
||||
) {
|
||||
result.content = createPdfFallbackMessage(result.metadata?.pageCount, fileSize, originalPath)
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
content: result.content,
|
||||
fileType: fileType || getMimeType(extension),
|
||||
size: fileSize,
|
||||
name: filename,
|
||||
binary: false,
|
||||
metadata: result.metadata || {},
|
||||
},
|
||||
filePath: originalPath || filePath,
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Specialized parser failed for ${extension} file:`, error)
|
||||
|
||||
// Special handling for PDFs
|
||||
if (extension === 'pdf') {
|
||||
const fileBuffer = await readFile(filePath)
|
||||
const fileSize = fileBuffer.length
|
||||
|
||||
// Get page count using a simple regex pattern
|
||||
let pageCount = 0
|
||||
const pdfContent = fileBuffer.toString('utf-8')
|
||||
const pageMatches = pdfContent.match(/\/Type\s*\/Page\b/gi)
|
||||
if (pageMatches) {
|
||||
pageCount = pageMatches.length
|
||||
}
|
||||
|
||||
const content = createPdfFailureMessage(
|
||||
pageCount,
|
||||
fileSize,
|
||||
originalPath || filePath,
|
||||
(error as Error).message
|
||||
)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
content,
|
||||
fileType: fileType || getMimeType(extension),
|
||||
size: fileSize,
|
||||
name: filename,
|
||||
binary: false,
|
||||
},
|
||||
filePath: originalPath || filePath,
|
||||
}
|
||||
}
|
||||
|
||||
// For other file types, fall back to generic handling
|
||||
return handleGenericFile(filePath, filename, extension, fileType)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle generic file types with basic parsing
|
||||
*/
|
||||
async function handleGenericFile(
|
||||
filePath: string,
|
||||
filename: string,
|
||||
extension: string,
|
||||
fileType?: string
|
||||
): Promise<ParseResult> {
|
||||
try {
|
||||
// Read the file
|
||||
const fileBuffer = await readFile(filePath)
|
||||
const fileSize = fileBuffer.length
|
||||
|
||||
// Determine if file should be treated as binary
|
||||
const isBinary = binaryExtensions.includes(extension)
|
||||
|
||||
// Parse content based on binary status
|
||||
const fileContent = isBinary
|
||||
? `[Binary ${extension.toUpperCase()} file - ${fileSize} bytes]`
|
||||
: await parseTextFile(fileBuffer)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
content: fileContent,
|
||||
fileType: fileType || getMimeType(extension),
|
||||
size: fileSize,
|
||||
name: filename,
|
||||
binary: isBinary,
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error handling generic file:', error)
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to parse file: ${(error as Error).message}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a text file buffer to string
|
||||
*/
|
||||
async function parseTextFile(fileBuffer: Buffer): Promise<string> {
|
||||
try {
|
||||
return fileBuffer.toString('utf-8')
|
||||
} catch (error) {
|
||||
return `[Unable to parse file as text: ${(error as Error).message}]`
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get MIME type from file extension
|
||||
*/
|
||||
function getMimeType(extension: string): string {
|
||||
return fileTypeMap[extension] || 'application/octet-stream'
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a fallback message for PDF files that couldn't be parsed properly
|
||||
*/
|
||||
function createPdfFallbackMessage(
|
||||
pageCount: number | undefined,
|
||||
fileSize: number,
|
||||
filePath?: string
|
||||
): string {
|
||||
return `This PDF document could not be parsed for text content. It contains ${pageCount || 'unknown number of'} pages. File size: ${fileSize} bytes.
|
||||
|
||||
To view this PDF properly, you can:
|
||||
1. Download it directly using this URL: ${filePath}
|
||||
2. Try a dedicated PDF text extraction service or tool
|
||||
3. Open it with a PDF reader like Adobe Acrobat
|
||||
|
||||
PDF parsing failed because the document appears to use an encoding or compression method that our parser cannot handle.`
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an error message for PDF files that failed to parse
|
||||
*/
|
||||
function createPdfFailureMessage(
|
||||
pageCount: number,
|
||||
fileSize: number,
|
||||
filePath: string,
|
||||
errorMessage: string
|
||||
): string {
|
||||
return `PDF parsing failed: ${errorMessage}
|
||||
|
||||
This PDF document contains ${pageCount || 'an unknown number of'} pages and is ${fileSize} bytes in size.
|
||||
|
||||
To view this PDF properly, you can:
|
||||
1. Download it directly using this URL: ${filePath}
|
||||
2. Try a dedicated PDF text extraction service or tool
|
||||
3. Open it with a PDF reader like Adobe Acrobat
|
||||
|
||||
Common causes of PDF parsing failures:
|
||||
- The PDF uses an unsupported compression algorithm
|
||||
- The PDF is protected or encrypted
|
||||
- The PDF content uses non-standard encodings
|
||||
- The PDF was created with features our parser doesn't support`
|
||||
}
|
||||
275
sim/app/api/files/serve/[...path]/route.test.ts
Normal file
275
sim/app/api/files/serve/[...path]/route.test.ts
Normal file
@@ -0,0 +1,275 @@
|
||||
/**
|
||||
* Tests for file serve API route
|
||||
*
|
||||
* @vitest-environment node
|
||||
*/
|
||||
import { NextRequest } from 'next/server'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
describe('File Serve API Route', () => {
|
||||
// Mock file system and S3 client modules
|
||||
const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test file content'))
|
||||
const mockExistsSync = vi.fn().mockReturnValue(true)
|
||||
const mockDownloadFromS3 = vi.fn().mockResolvedValue(Buffer.from('test s3 file content'))
|
||||
const mockGetPresignedUrl = vi.fn().mockResolvedValue('https://example-s3.com/presigned-url')
|
||||
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetModules()
|
||||
|
||||
// Mock filesystem operations
|
||||
vi.doMock('fs', () => ({
|
||||
existsSync: mockExistsSync,
|
||||
}))
|
||||
|
||||
vi.doMock('fs/promises', () => ({
|
||||
readFile: mockReadFile,
|
||||
}))
|
||||
|
||||
// Mock the S3 client
|
||||
vi.doMock('@/lib/uploads/s3-client', () => ({
|
||||
downloadFromS3: mockDownloadFromS3,
|
||||
getPresignedUrl: mockGetPresignedUrl,
|
||||
}))
|
||||
|
||||
// Mock the logger
|
||||
vi.doMock('@/lib/logs/console-logger', () => ({
|
||||
createLogger: vi.fn().mockReturnValue({
|
||||
info: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
}),
|
||||
}))
|
||||
|
||||
// Configure upload directory and S3 mode with all required exports
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: false,
|
||||
ensureUploadsDirectory: mockEnsureUploadsDirectory,
|
||||
S3_CONFIG: {
|
||||
bucket: 'test-bucket',
|
||||
region: 'test-region',
|
||||
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
|
||||
},
|
||||
}))
|
||||
|
||||
// Skip setup.server.ts side effects
|
||||
vi.doMock('@/lib/uploads/setup.server', () => ({}))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('should serve local file successfully', async () => {
|
||||
// Create mock request
|
||||
const req = new NextRequest('http://localhost:3000/api/files/serve/test-file.txt')
|
||||
|
||||
// Create params similar to what Next.js would provide
|
||||
const params = { path: ['test-file.txt'] }
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { GET } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await GET(req, { params })
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(response.headers.get('Content-Type')).toBe('text/plain')
|
||||
expect(response.headers.get('Content-Disposition')).toBe('inline; filename="test-file.txt"')
|
||||
expect(response.headers.get('Cache-Control')).toBe('public, max-age=31536000')
|
||||
|
||||
// Verify file was read from correct path
|
||||
expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/test-file.txt')
|
||||
|
||||
// Verify response content
|
||||
const buffer = await response.arrayBuffer()
|
||||
const content = Buffer.from(buffer).toString()
|
||||
expect(content).toBe('test file content')
|
||||
})
|
||||
|
||||
it('should handle nested paths correctly', async () => {
|
||||
// Create mock request
|
||||
const req = new NextRequest('http://localhost:3000/api/files/serve/nested/path/file.txt')
|
||||
|
||||
// Create params similar to what Next.js would provide
|
||||
const params = { path: ['nested', 'path', 'file.txt'] }
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { GET } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await GET(req, { params })
|
||||
|
||||
// Verify file was read with correct path
|
||||
expect(mockReadFile).toHaveBeenCalledWith('/test/uploads/nested/path/file.txt')
|
||||
})
|
||||
|
||||
it('should serve S3 file with presigned URL redirect', async () => {
|
||||
// Configure S3 storage mode
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: true,
|
||||
}))
|
||||
|
||||
// Create mock request
|
||||
const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-file.pdf')
|
||||
|
||||
// Create params similar to what Next.js would provide
|
||||
const params = { path: ['s3', '1234567890-file.pdf'] }
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { GET } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await GET(req, { params })
|
||||
|
||||
// Verify redirect to presigned URL
|
||||
expect(response.status).toBe(307) // Temporary redirect
|
||||
expect(response.headers.get('Location')).toBe('https://example-s3.com/presigned-url')
|
||||
|
||||
// Verify presigned URL was generated for correct S3 key
|
||||
expect(mockGetPresignedUrl).toHaveBeenCalledWith('1234567890-file.pdf')
|
||||
})
|
||||
|
||||
it('should handle S3 file download fallback if presigned URL fails', async () => {
|
||||
// Configure S3 storage mode
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: true,
|
||||
}))
|
||||
|
||||
// Mock presigned URL to fail
|
||||
mockGetPresignedUrl.mockRejectedValueOnce(new Error('Presigned URL failed'))
|
||||
|
||||
// Create mock request
|
||||
const req = new NextRequest('http://localhost:3000/api/files/serve/s3/1234567890-image.png')
|
||||
|
||||
// Create params similar to what Next.js would provide
|
||||
const params = { path: ['s3', '1234567890-image.png'] }
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { GET } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await GET(req, { params })
|
||||
|
||||
// Verify response falls back to downloading and proxying the file
|
||||
expect(response.status).toBe(200)
|
||||
expect(response.headers.get('Content-Type')).toBe('image/png')
|
||||
expect(mockDownloadFromS3).toHaveBeenCalledWith('1234567890-image.png')
|
||||
})
|
||||
|
||||
it('should return 404 when file not found', async () => {
|
||||
// Mock file not existing
|
||||
mockExistsSync.mockReturnValue(false)
|
||||
|
||||
// Create mock request
|
||||
const req = new NextRequest('http://localhost:3000/api/files/serve/nonexistent.txt')
|
||||
|
||||
// Create params similar to what Next.js would provide
|
||||
const params = { path: ['nonexistent.txt'] }
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { GET } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await GET(req, { params })
|
||||
|
||||
// Verify 404 response
|
||||
expect(response.status).toBe(404)
|
||||
|
||||
const data = await response.json()
|
||||
// Updated to match actual error format
|
||||
expect(data).toHaveProperty('error', 'FileNotFoundError')
|
||||
expect(data).toHaveProperty('message')
|
||||
expect(data.message).toContain('File not found')
|
||||
})
|
||||
|
||||
// Instead of testing all content types in one test, let's separate them
|
||||
describe('content type detection', () => {
|
||||
const contentTypeTests = [
|
||||
{ ext: 'pdf', contentType: 'application/pdf' },
|
||||
{ ext: 'json', contentType: 'application/json' },
|
||||
{ ext: 'jpg', contentType: 'image/jpeg' },
|
||||
{ ext: 'txt', contentType: 'text/plain' },
|
||||
{ ext: 'unknown', contentType: 'application/octet-stream' },
|
||||
]
|
||||
|
||||
for (const test of contentTypeTests) {
|
||||
it(`should serve ${test.ext} file with correct content type`, async () => {
|
||||
// Reset modules for this test
|
||||
vi.resetModules()
|
||||
|
||||
// Re-apply all mocks
|
||||
vi.doMock('fs', () => ({
|
||||
existsSync: mockExistsSync.mockReturnValue(true),
|
||||
}))
|
||||
|
||||
vi.doMock('fs/promises', () => ({
|
||||
readFile: mockReadFile,
|
||||
}))
|
||||
|
||||
vi.doMock('@/lib/uploads/s3-client', () => ({
|
||||
downloadFromS3: mockDownloadFromS3,
|
||||
getPresignedUrl: mockGetPresignedUrl,
|
||||
}))
|
||||
|
||||
vi.doMock('@/lib/logs/console-logger', () => ({
|
||||
createLogger: vi.fn().mockReturnValue({
|
||||
info: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
}),
|
||||
}))
|
||||
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: false,
|
||||
ensureUploadsDirectory: mockEnsureUploadsDirectory,
|
||||
S3_CONFIG: {
|
||||
bucket: 'test-bucket',
|
||||
region: 'test-region',
|
||||
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
|
||||
},
|
||||
}))
|
||||
|
||||
vi.doMock('@/lib/uploads/setup.server', () => ({}))
|
||||
|
||||
// Mock utils functions that determine content type
|
||||
vi.doMock('@/app/api/files/utils', () => ({
|
||||
getContentType: () => test.contentType,
|
||||
findLocalFile: () => '/test/uploads/file.' + test.ext,
|
||||
createFileResponse: (obj: { buffer: Buffer; contentType: string; filename: string }) =>
|
||||
new Response(obj.buffer, {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': obj.contentType,
|
||||
'Content-Disposition': `inline; filename="${obj.filename}"`,
|
||||
'Cache-Control': 'public, max-age=31536000',
|
||||
},
|
||||
}),
|
||||
createErrorResponse: () => new Response(null, { status: 404 }),
|
||||
}))
|
||||
|
||||
// Create mock request with this extension
|
||||
const req = new NextRequest(`http://localhost:3000/api/files/serve/file.${test.ext}`)
|
||||
|
||||
// Create params
|
||||
const params = { path: [`file.${test.ext}`] }
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { GET } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await GET(req, { params })
|
||||
|
||||
// Verify correct content type
|
||||
expect(response.headers.get('Content-Type')).toBe(test.contentType)
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
141
sim/app/api/files/serve/[...path]/route.ts
Normal file
141
sim/app/api/files/serve/[...path]/route.ts
Normal file
@@ -0,0 +1,141 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
import { readFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
import { downloadFromS3, getPresignedUrl } from '@/lib/uploads/s3-client'
|
||||
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
|
||||
// Import to ensure the uploads directory is created
|
||||
import '@/lib/uploads/setup.server'
|
||||
import {
|
||||
createErrorResponse,
|
||||
createFileResponse,
|
||||
FileNotFoundError,
|
||||
findLocalFile,
|
||||
getContentType,
|
||||
} from '../../utils'
|
||||
|
||||
const logger = createLogger('FilesServeAPI')
|
||||
|
||||
/**
|
||||
* Main API route handler for serving files
|
||||
*/
|
||||
export async function GET(
|
||||
request: NextRequest,
|
||||
{ params }: { params: Promise<{ path: string[] }> }
|
||||
) {
|
||||
try {
|
||||
// Extract params
|
||||
const { path } = await params
|
||||
|
||||
// Join the path segments to get the filename or S3 key
|
||||
const pathString = path.join('/')
|
||||
logger.info(`Serving file: ${pathString}`)
|
||||
|
||||
// Check if this is an S3 file (path starts with 's3/')
|
||||
const isS3Path = path[0] === 's3'
|
||||
|
||||
try {
|
||||
// Use S3 handler if in production or path explicitly specifies S3
|
||||
if (USE_S3_STORAGE || isS3Path) {
|
||||
return await handleS3File(path, isS3Path, pathString)
|
||||
}
|
||||
|
||||
// Use local handler for local files
|
||||
return await handleLocalFile(path)
|
||||
} catch (error) {
|
||||
logger.error('Error serving file:', error)
|
||||
return createErrorResponse(error as Error)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error serving file:', error)
|
||||
return createErrorResponse(error as Error)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle S3 file serving
|
||||
*/
|
||||
async function handleS3File(
|
||||
path: string[],
|
||||
isS3Path: boolean,
|
||||
pathString: string
|
||||
): Promise<NextResponse> {
|
||||
// If path starts with s3/, remove that prefix to get the actual key
|
||||
const s3Key = isS3Path ? decodeURIComponent(path.slice(1).join('/')) : pathString
|
||||
logger.info(`Serving file from S3: ${s3Key}`)
|
||||
|
||||
try {
|
||||
// First try direct access via presigned URL (most efficient)
|
||||
return await handleS3PresignedUrl(s3Key)
|
||||
} catch (error) {
|
||||
logger.info('Falling back to proxy method for S3 file')
|
||||
// Fall back to proxy method if presigned URL fails
|
||||
return await handleS3Proxy(s3Key)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a presigned URL and redirect to it
|
||||
*/
|
||||
async function handleS3PresignedUrl(s3Key: string): Promise<NextResponse> {
|
||||
try {
|
||||
// Generate a presigned URL for direct S3 access
|
||||
const presignedUrl = await getPresignedUrl(s3Key)
|
||||
|
||||
// Redirect to the presigned URL for direct S3 access
|
||||
return NextResponse.redirect(presignedUrl)
|
||||
} catch (error) {
|
||||
logger.error('Error generating presigned URL:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Proxy S3 file through our server
|
||||
*/
|
||||
async function handleS3Proxy(s3Key: string): Promise<NextResponse> {
|
||||
try {
|
||||
const fileBuffer = await downloadFromS3(s3Key)
|
||||
|
||||
// Extract the original filename from the key (last part after last /)
|
||||
const originalFilename = s3Key.split('/').pop() || 'download'
|
||||
const contentType = getContentType(originalFilename)
|
||||
|
||||
return createFileResponse({
|
||||
buffer: fileBuffer,
|
||||
contentType,
|
||||
filename: originalFilename,
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error downloading from S3:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle local file serving
|
||||
*/
|
||||
async function handleLocalFile(path: string[]): Promise<NextResponse> {
|
||||
// Join as a path for findLocalFile
|
||||
const pathString = path.join('/')
|
||||
const filePath = findLocalFile(pathString)
|
||||
|
||||
// Handle file not found
|
||||
if (!filePath) {
|
||||
logger.error(`File not found in any checked paths for: ${pathString}`)
|
||||
throw new FileNotFoundError(`File not found: ${pathString}`)
|
||||
}
|
||||
|
||||
// Read the file
|
||||
const fileBuffer = await readFile(filePath)
|
||||
|
||||
// Get filename for content type detection and response
|
||||
const filename = path[path.length - 1]
|
||||
const contentType = getContentType(filename)
|
||||
|
||||
return createFileResponse({
|
||||
buffer: fileBuffer,
|
||||
contentType,
|
||||
filename,
|
||||
})
|
||||
}
|
||||
261
sim/app/api/files/upload/route.test.ts
Normal file
261
sim/app/api/files/upload/route.test.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
/**
|
||||
* Tests for file upload API route
|
||||
*
|
||||
* @vitest-environment node
|
||||
*/
|
||||
import { NextRequest } from 'next/server'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
describe('File Upload API Route', () => {
|
||||
// Mock file system and S3 client modules
|
||||
const mockWriteFile = vi.fn().mockResolvedValue(undefined)
|
||||
const mockUploadToS3 = vi.fn().mockImplementation((buffer, fileName) => {
|
||||
return Promise.resolve({
|
||||
path: `/api/files/serve/s3/${Date.now()}-${fileName}`,
|
||||
key: `${Date.now()}-${fileName}`,
|
||||
name: fileName,
|
||||
size: buffer.length,
|
||||
type: 'text/plain',
|
||||
})
|
||||
})
|
||||
const mockEnsureUploadsDirectory = vi.fn().mockResolvedValue(true)
|
||||
|
||||
// Mock form data
|
||||
const createMockFormData = (files: File[]): FormData => {
|
||||
const formData = new FormData()
|
||||
files.forEach((file) => {
|
||||
formData.append('file', file)
|
||||
})
|
||||
return formData
|
||||
}
|
||||
|
||||
// Mock file
|
||||
const createMockFile = (
|
||||
name = 'test.txt',
|
||||
type = 'text/plain',
|
||||
content = 'test content'
|
||||
): File => {
|
||||
return new File([content], name, { type })
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.resetModules()
|
||||
|
||||
// Mock filesystem operations
|
||||
vi.doMock('fs/promises', () => ({
|
||||
writeFile: mockWriteFile,
|
||||
}))
|
||||
|
||||
// Mock the S3 client
|
||||
vi.doMock('@/lib/uploads/s3-client', () => ({
|
||||
uploadToS3: mockUploadToS3,
|
||||
}))
|
||||
|
||||
// Mock the logger
|
||||
vi.doMock('@/lib/logs/console-logger', () => ({
|
||||
createLogger: vi.fn().mockReturnValue({
|
||||
info: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
}),
|
||||
}))
|
||||
|
||||
// Mock UUID generation
|
||||
vi.doMock('uuid', () => ({
|
||||
v4: vi.fn().mockReturnValue('mock-uuid'),
|
||||
}))
|
||||
|
||||
// Configure upload directory and S3 mode with all required exports
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: false,
|
||||
ensureUploadsDirectory: mockEnsureUploadsDirectory,
|
||||
S3_CONFIG: {
|
||||
bucket: 'test-bucket',
|
||||
region: 'test-region',
|
||||
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com',
|
||||
},
|
||||
}))
|
||||
|
||||
// Skip setup.server.ts side effects
|
||||
vi.doMock('@/lib/uploads/setup.server', () => ({}))
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks()
|
||||
})
|
||||
|
||||
it('should upload a file to local storage', async () => {
|
||||
// Create a mock request with file
|
||||
const mockFile = createMockFile()
|
||||
const formData = createMockFormData([mockFile])
|
||||
|
||||
// Create mock request object
|
||||
const req = new NextRequest('http://localhost:3000/api/files/upload', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('path', '/api/files/serve/mock-uuid.txt')
|
||||
expect(data).toHaveProperty('name', 'test.txt')
|
||||
expect(data).toHaveProperty('size')
|
||||
expect(data).toHaveProperty('type', 'text/plain')
|
||||
|
||||
// Verify file was written to local storage
|
||||
expect(mockWriteFile).toHaveBeenCalledWith('/test/uploads/mock-uuid.txt', expect.any(Buffer))
|
||||
})
|
||||
|
||||
it('should upload a file to S3 when in S3 mode', async () => {
|
||||
// Configure S3 storage mode
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: true,
|
||||
}))
|
||||
|
||||
// Create a mock request with file
|
||||
const mockFile = createMockFile('document.pdf', 'application/pdf')
|
||||
const formData = createMockFormData([mockFile])
|
||||
|
||||
// Create mock request object
|
||||
const req = new NextRequest('http://localhost:3000/api/files/upload', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(200)
|
||||
expect(data).toHaveProperty('path')
|
||||
expect(data.path).toContain('/api/files/serve/s3/')
|
||||
expect(data).toHaveProperty('key')
|
||||
expect(data).toHaveProperty('name', 'document.pdf')
|
||||
|
||||
// Verify uploadToS3 was called with correct parameters
|
||||
expect(mockUploadToS3).toHaveBeenCalledWith(
|
||||
expect.any(Buffer),
|
||||
'document.pdf',
|
||||
'application/pdf',
|
||||
expect.any(Number)
|
||||
)
|
||||
|
||||
// Verify local write was NOT called
|
||||
expect(mockWriteFile).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should handle multiple file uploads', async () => {
|
||||
// Create multiple mock files
|
||||
const mockFiles = [
|
||||
createMockFile('file1.txt', 'text/plain'),
|
||||
createMockFile('file2.jpg', 'image/jpeg'),
|
||||
]
|
||||
const formData = createMockFormData(mockFiles)
|
||||
|
||||
// Create mock request object
|
||||
const req = new NextRequest('http://localhost:3000/api/files/upload', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify response has multiple results
|
||||
expect(response.status).toBe(200)
|
||||
expect(Array.isArray(data)).toBe(true)
|
||||
expect(data).toHaveLength(2)
|
||||
expect(data[0]).toHaveProperty('name', 'file1.txt')
|
||||
expect(data[1]).toHaveProperty('name', 'file2.jpg')
|
||||
|
||||
// Verify files were written
|
||||
expect(mockWriteFile).toHaveBeenCalledTimes(2)
|
||||
})
|
||||
|
||||
it('should handle missing files', async () => {
|
||||
// Create empty form data
|
||||
const formData = new FormData()
|
||||
|
||||
// Create mock request object
|
||||
const req = new NextRequest('http://localhost:3000/api/files/upload', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify error response
|
||||
expect(response.status).toBe(400)
|
||||
expect(data).toHaveProperty('error', 'InvalidRequestError')
|
||||
expect(data).toHaveProperty('message', 'No files provided')
|
||||
})
|
||||
|
||||
it('should handle S3 upload errors', async () => {
|
||||
// Configure S3 storage mode
|
||||
vi.doMock('@/lib/uploads/setup', () => ({
|
||||
UPLOAD_DIR: '/test/uploads',
|
||||
USE_S3_STORAGE: true,
|
||||
}))
|
||||
|
||||
// Mock S3 upload failure
|
||||
mockUploadToS3.mockRejectedValueOnce(new Error('S3 upload failed'))
|
||||
|
||||
// Create a mock request with file
|
||||
const mockFile = createMockFile()
|
||||
const formData = createMockFormData([mockFile])
|
||||
|
||||
// Create mock request object
|
||||
const req = new NextRequest('http://localhost:3000/api/files/upload', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
// Import the handler after mocks are set up
|
||||
const { POST } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await POST(req)
|
||||
const data = await response.json()
|
||||
|
||||
// Verify error response
|
||||
expect(response.status).toBe(500)
|
||||
expect(data).toHaveProperty('error', 'Error')
|
||||
expect(data).toHaveProperty('message', 'S3 upload failed')
|
||||
})
|
||||
|
||||
it('should handle CORS preflight requests', async () => {
|
||||
// Import the handler after mocks are set up
|
||||
const { OPTIONS } = await import('./route')
|
||||
|
||||
// Call the handler
|
||||
const response = await OPTIONS()
|
||||
|
||||
// Verify response
|
||||
expect(response.status).toBe(204)
|
||||
expect(response.headers.get('Access-Control-Allow-Methods')).toBe('GET, POST, DELETE, OPTIONS')
|
||||
expect(response.headers.get('Access-Control-Allow-Headers')).toBe('Content-Type')
|
||||
})
|
||||
})
|
||||
77
sim/app/api/files/upload/route.ts
Normal file
77
sim/app/api/files/upload/route.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
import { writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
import { uploadToS3 } from '@/lib/uploads/s3-client'
|
||||
import { UPLOAD_DIR, USE_S3_STORAGE } from '@/lib/uploads/setup'
|
||||
// Import to ensure the uploads directory is created
|
||||
import '@/lib/uploads/setup.server'
|
||||
import { createErrorResponse, createOptionsResponse, InvalidRequestError } from '../utils'
|
||||
|
||||
const logger = createLogger('FilesUploadAPI')
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const formData = await request.formData()
|
||||
|
||||
// Check if multiple files are being uploaded or a single file
|
||||
const files = formData.getAll('file') as File[]
|
||||
|
||||
if (!files || files.length === 0) {
|
||||
throw new InvalidRequestError('No files provided')
|
||||
}
|
||||
|
||||
// Log storage mode
|
||||
logger.info(`Using storage mode: ${USE_S3_STORAGE ? 'S3' : 'Local'} for file upload`)
|
||||
|
||||
const uploadResults = []
|
||||
|
||||
// Process each file
|
||||
for (const file of files) {
|
||||
const originalName = file.name
|
||||
const bytes = await file.arrayBuffer()
|
||||
const buffer = Buffer.from(bytes)
|
||||
|
||||
if (USE_S3_STORAGE) {
|
||||
// Upload to S3 in production
|
||||
try {
|
||||
logger.info(`Uploading file to S3: ${originalName}`)
|
||||
const result = await uploadToS3(buffer, originalName, file.type, file.size)
|
||||
logger.info(`Successfully uploaded to S3: ${result.key}`)
|
||||
uploadResults.push(result)
|
||||
} catch (error) {
|
||||
logger.error('Error uploading to S3:', error)
|
||||
throw error
|
||||
}
|
||||
} else {
|
||||
// Upload to local file system in development
|
||||
const extension = originalName.split('.').pop() || ''
|
||||
const uniqueFilename = `${uuidv4()}.${extension}`
|
||||
const filePath = join(UPLOAD_DIR, uniqueFilename)
|
||||
|
||||
logger.info(`Uploading file to local storage: ${filePath}`)
|
||||
await writeFile(filePath, buffer)
|
||||
logger.info(`Successfully wrote file to: ${filePath}`)
|
||||
|
||||
uploadResults.push({
|
||||
path: `/api/files/serve/${uniqueFilename}`,
|
||||
name: originalName,
|
||||
size: file.size,
|
||||
type: file.type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Return all file information
|
||||
return NextResponse.json(files.length === 1 ? uploadResults[0] : uploadResults)
|
||||
} catch (error) {
|
||||
logger.error('Error uploading files:', error)
|
||||
return createErrorResponse(error instanceof Error ? error : new Error('Failed to upload files'))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle preflight requests
|
||||
export async function OPTIONS() {
|
||||
return createOptionsResponse()
|
||||
}
|
||||
193
sim/app/api/files/utils.ts
Normal file
193
sim/app/api/files/utils.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
import { NextResponse } from 'next/server'
|
||||
import { existsSync } from 'fs'
|
||||
import { join } from 'path'
|
||||
import { UPLOAD_DIR } from '@/lib/uploads/setup'
|
||||
|
||||
/**
|
||||
* Response type definitions
|
||||
*/
|
||||
export interface ApiSuccessResponse {
|
||||
success: true
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
export interface ApiErrorResponse {
|
||||
error: string
|
||||
message?: string
|
||||
}
|
||||
|
||||
export interface FileResponse {
|
||||
buffer: Buffer
|
||||
contentType: string
|
||||
filename: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom error types
|
||||
*/
|
||||
export class FileNotFoundError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = 'FileNotFoundError'
|
||||
}
|
||||
}
|
||||
|
||||
export class InvalidRequestError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = 'InvalidRequestError'
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps file extensions to MIME types
|
||||
*/
|
||||
export const contentTypeMap: Record<string, string> = {
|
||||
// Text formats
|
||||
txt: 'text/plain',
|
||||
csv: 'text/csv',
|
||||
json: 'application/json',
|
||||
xml: 'application/xml',
|
||||
md: 'text/markdown',
|
||||
html: 'text/html',
|
||||
css: 'text/css',
|
||||
js: 'application/javascript',
|
||||
ts: 'application/typescript',
|
||||
// Document formats
|
||||
pdf: 'application/pdf',
|
||||
doc: 'application/msword',
|
||||
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
// Spreadsheet formats
|
||||
xls: 'application/vnd.ms-excel',
|
||||
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
// Presentation formats
|
||||
ppt: 'application/vnd.ms-powerpoint',
|
||||
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
// Image formats
|
||||
png: 'image/png',
|
||||
jpg: 'image/jpeg',
|
||||
jpeg: 'image/jpeg',
|
||||
gif: 'image/gif',
|
||||
svg: 'image/svg+xml',
|
||||
// Archive formats
|
||||
zip: 'application/zip',
|
||||
}
|
||||
|
||||
/**
|
||||
* List of binary file extensions
|
||||
*/
|
||||
export const binaryExtensions = [
|
||||
'doc',
|
||||
'docx',
|
||||
'xls',
|
||||
'xlsx',
|
||||
'ppt',
|
||||
'pptx',
|
||||
'zip',
|
||||
'png',
|
||||
'jpg',
|
||||
'jpeg',
|
||||
'gif',
|
||||
'pdf',
|
||||
]
|
||||
|
||||
/**
|
||||
* Determine content type from file extension
|
||||
*/
|
||||
export function getContentType(filename: string): string {
|
||||
const extension = filename.split('.').pop()?.toLowerCase() || ''
|
||||
return contentTypeMap[extension] || 'application/octet-stream'
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a path is an S3 path
|
||||
*/
|
||||
export function isS3Path(path: string): boolean {
|
||||
return path.includes('/api/files/serve/s3/')
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract S3 key from a path
|
||||
*/
|
||||
export function extractS3Key(path: string): string {
|
||||
if (isS3Path(path)) {
|
||||
return decodeURIComponent(path.split('/api/files/serve/s3/')[1])
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract filename from a serve path
|
||||
*/
|
||||
export function extractFilename(path: string): string {
|
||||
if (path.startsWith('/api/files/serve/')) {
|
||||
return path.substring('/api/files/serve/'.length)
|
||||
}
|
||||
return path.split('/').pop() || path
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a file in possible local storage locations
|
||||
*/
|
||||
export function findLocalFile(filename: string): string | null {
|
||||
const possiblePaths = [join(UPLOAD_DIR, filename), join(process.cwd(), 'uploads', filename)]
|
||||
|
||||
for (const path of possiblePaths) {
|
||||
if (existsSync(path)) {
|
||||
return path
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file response with appropriate headers
|
||||
*/
|
||||
export function createFileResponse(file: FileResponse): NextResponse {
|
||||
return new NextResponse(file.buffer, {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': file.contentType,
|
||||
'Content-Disposition': `inline; filename="${file.filename}"`,
|
||||
'Cache-Control': 'public, max-age=31536000', // Cache for 1 year
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a standardized error response
|
||||
*/
|
||||
export function createErrorResponse(error: Error, status: number = 500): NextResponse {
|
||||
// Map error types to appropriate status codes
|
||||
const statusCode =
|
||||
error instanceof FileNotFoundError ? 404 : error instanceof InvalidRequestError ? 400 : status
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: error.name,
|
||||
message: error.message,
|
||||
},
|
||||
{ status: statusCode }
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a standardized success response
|
||||
*/
|
||||
export function createSuccessResponse(data: ApiSuccessResponse): NextResponse {
|
||||
return NextResponse.json(data)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle CORS preflight requests
|
||||
*/
|
||||
export function createOptionsResponse(): NextResponse {
|
||||
return new NextResponse(null, {
|
||||
status: 204,
|
||||
headers: {
|
||||
'Access-Control-Allow-Methods': 'GET, POST, DELETE, OPTIONS',
|
||||
'Access-Control-Allow-Headers': 'Content-Type',
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,508 @@
|
||||
'use client'
|
||||
|
||||
import { useRef, useState } from 'react'
|
||||
import { Info, Upload, X } from 'lucide-react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Progress } from '@/components/ui/progress'
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
|
||||
import { useNotificationStore } from '@/stores/notifications/store'
|
||||
import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
|
||||
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
|
||||
import { useWorkflowStore } from '@/stores/workflows/workflow/store'
|
||||
import { useSubBlockValue } from '../hooks/use-sub-block-value'
|
||||
|
||||
interface FileUploadProps {
|
||||
blockId: string
|
||||
subBlockId: string
|
||||
maxSize?: number // in MB
|
||||
acceptedTypes?: string // comma separated MIME types
|
||||
multiple?: boolean // whether to allow multiple file uploads
|
||||
}
|
||||
|
||||
interface UploadedFile {
|
||||
name: string
|
||||
path: string
|
||||
size: number
|
||||
type: string
|
||||
}
|
||||
|
||||
export function FileUpload({
|
||||
blockId,
|
||||
subBlockId,
|
||||
maxSize = 10, // Default 10MB
|
||||
acceptedTypes = '*',
|
||||
multiple = false, // Default to single file for backward compatibility
|
||||
}: FileUploadProps) {
|
||||
// State management - handle both single file and array of files
|
||||
const [value, setValue] = useSubBlockValue<UploadedFile | UploadedFile[] | null>(
|
||||
blockId,
|
||||
subBlockId,
|
||||
true
|
||||
)
|
||||
const [isUploading, setIsUploading] = useState(false)
|
||||
const [uploadProgress, setUploadProgress] = useState(0)
|
||||
|
||||
// For file deletion status
|
||||
const [deletingFiles, setDeletingFiles] = useState<Record<string, boolean>>({})
|
||||
|
||||
// Refs
|
||||
const fileInputRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
// Stores
|
||||
const { addNotification } = useNotificationStore()
|
||||
const { activeWorkflowId } = useWorkflowRegistry()
|
||||
|
||||
/**
|
||||
* Opens file dialog
|
||||
* Prevents event propagation to avoid ReactFlow capturing the event
|
||||
*/
|
||||
const handleOpenFileDialog = (e: React.MouseEvent) => {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
|
||||
if (fileInputRef.current) {
|
||||
fileInputRef.current.value = ''
|
||||
fileInputRef.current.click()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats file size for display in a human-readable format
|
||||
*/
|
||||
const formatFileSize = (bytes: number): string => {
|
||||
if (bytes < 1024) return `${bytes} B`
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`
|
||||
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles file upload when new file(s) are selected
|
||||
*/
|
||||
const handleFileChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
e.stopPropagation()
|
||||
|
||||
const files = e.target.files
|
||||
if (!files || files.length === 0) return
|
||||
|
||||
// Get existing files and their total size
|
||||
const existingFiles = Array.isArray(value) ? value : value ? [value] : []
|
||||
const existingTotalSize = existingFiles.reduce((sum, file) => sum + file.size, 0)
|
||||
|
||||
// Validate file sizes
|
||||
const maxSizeInBytes = maxSize * 1024 * 1024
|
||||
const validFiles: File[] = []
|
||||
let totalNewSize = 0
|
||||
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
const file = files[i]
|
||||
// Check if adding this file would exceed the total limit
|
||||
if (existingTotalSize + totalNewSize + file.size > maxSizeInBytes) {
|
||||
addNotification(
|
||||
'error',
|
||||
`Adding ${file.name} would exceed the maximum size limit of ${maxSize}MB`,
|
||||
activeWorkflowId
|
||||
)
|
||||
} else {
|
||||
validFiles.push(file)
|
||||
totalNewSize += file.size
|
||||
}
|
||||
}
|
||||
|
||||
if (validFiles.length === 0) return
|
||||
|
||||
setIsUploading(true)
|
||||
setUploadProgress(0)
|
||||
|
||||
// Track progress simulation interval
|
||||
let progressInterval: NodeJS.Timeout | null = null
|
||||
|
||||
try {
|
||||
// Simulate upload progress
|
||||
progressInterval = setInterval(() => {
|
||||
setUploadProgress((prev) => {
|
||||
const newProgress = prev + Math.random() * 10
|
||||
return newProgress > 90 ? 90 : newProgress
|
||||
})
|
||||
}, 200)
|
||||
|
||||
const uploadedFiles: UploadedFile[] = []
|
||||
const uploadErrors: string[] = []
|
||||
|
||||
// Upload each file separately
|
||||
for (const file of validFiles) {
|
||||
// Create FormData for upload
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
// Upload the file
|
||||
const response = await fetch('/api/files/upload', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
// Handle error response
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({ error: response.statusText }))
|
||||
const errorMessage = errorData.error || `Failed to upload file: ${response.status}`
|
||||
uploadErrors.push(`${file.name}: ${errorMessage}`)
|
||||
continue
|
||||
}
|
||||
|
||||
// Process successful upload
|
||||
const data = await response.json()
|
||||
|
||||
uploadedFiles.push({
|
||||
name: file.name,
|
||||
path: data.path,
|
||||
size: file.size,
|
||||
type: file.type,
|
||||
})
|
||||
}
|
||||
|
||||
// Clear progress interval
|
||||
if (progressInterval) {
|
||||
clearInterval(progressInterval)
|
||||
progressInterval = null
|
||||
}
|
||||
|
||||
setUploadProgress(100)
|
||||
|
||||
// Send consolidated notification about uploaded files
|
||||
if (uploadedFiles.length > 0) {
|
||||
if (uploadedFiles.length === 1) {
|
||||
addNotification(
|
||||
'console',
|
||||
`${uploadedFiles[0].name} was uploaded successfully`,
|
||||
activeWorkflowId
|
||||
)
|
||||
} else {
|
||||
addNotification(
|
||||
'console',
|
||||
`Uploaded ${uploadedFiles.length} files successfully: ${uploadedFiles.map((f) => f.name).join(', ')}`,
|
||||
activeWorkflowId
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Send consolidated error notification if any
|
||||
if (uploadErrors.length > 0) {
|
||||
if (uploadErrors.length === 1) {
|
||||
addNotification('error', uploadErrors[0], activeWorkflowId)
|
||||
} else {
|
||||
addNotification(
|
||||
'error',
|
||||
`Failed to upload ${uploadErrors.length} files: ${uploadErrors.join('; ')}`,
|
||||
activeWorkflowId
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Update the file value in state based on multiple setting
|
||||
if (multiple) {
|
||||
// For multiple files: Append to existing files if any
|
||||
const existingFiles = Array.isArray(value) ? value : value ? [value] : []
|
||||
const newFiles = [...existingFiles, ...uploadedFiles]
|
||||
setValue(newFiles)
|
||||
|
||||
// Make sure to update the subblock store value for the workflow execution
|
||||
useSubBlockStore.getState().setValue(blockId, subBlockId, newFiles)
|
||||
useWorkflowStore.getState().triggerUpdate()
|
||||
} else {
|
||||
// For single file: Replace with last uploaded file
|
||||
setValue(uploadedFiles[0] || null)
|
||||
|
||||
// Make sure to update the subblock store value for the workflow execution
|
||||
useSubBlockStore.getState().setValue(blockId, subBlockId, uploadedFiles[0] || null)
|
||||
useWorkflowStore.getState().triggerUpdate()
|
||||
}
|
||||
} catch (error) {
|
||||
addNotification(
|
||||
'error',
|
||||
error instanceof Error ? error.message : 'Failed to upload file(s)',
|
||||
activeWorkflowId
|
||||
)
|
||||
} finally {
|
||||
// Clean up and reset upload state
|
||||
if (progressInterval) {
|
||||
clearInterval(progressInterval)
|
||||
}
|
||||
|
||||
setTimeout(() => {
|
||||
setIsUploading(false)
|
||||
setUploadProgress(0)
|
||||
}, 500)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles deletion of a single file
|
||||
*/
|
||||
const handleRemoveFile = async (file: UploadedFile, e?: React.MouseEvent) => {
|
||||
if (e) {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
}
|
||||
|
||||
// Mark this file as being deleted
|
||||
setDeletingFiles((prev) => ({ ...prev, [file.path]: true }))
|
||||
|
||||
try {
|
||||
// Call API to delete the file from server
|
||||
const response = await fetch('/api/files/delete', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ filePath: file.path }),
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({ error: response.statusText }))
|
||||
const errorMessage = errorData.error || `Failed to delete file: ${response.status}`
|
||||
throw new Error(errorMessage)
|
||||
}
|
||||
|
||||
// Update the UI state
|
||||
if (multiple) {
|
||||
// For multiple files: Remove the specific file
|
||||
const filesArray = Array.isArray(value) ? value : value ? [value] : []
|
||||
const updatedFiles = filesArray.filter((f) => f.path !== file.path)
|
||||
setValue(updatedFiles.length > 0 ? updatedFiles : null)
|
||||
|
||||
// Make sure to update the subblock store value for the workflow execution
|
||||
useSubBlockStore
|
||||
.getState()
|
||||
.setValue(blockId, subBlockId, updatedFiles.length > 0 ? updatedFiles : null)
|
||||
} else {
|
||||
// For single file: Clear the value
|
||||
setValue(null)
|
||||
|
||||
// Make sure to update the subblock store
|
||||
useSubBlockStore.getState().setValue(blockId, subBlockId, null)
|
||||
}
|
||||
|
||||
addNotification('console', `${file.name} was deleted successfully`, activeWorkflowId)
|
||||
useWorkflowStore.getState().triggerUpdate()
|
||||
} catch (error) {
|
||||
addNotification(
|
||||
'error',
|
||||
error instanceof Error ? error.message : 'Failed to delete file from server',
|
||||
activeWorkflowId
|
||||
)
|
||||
} finally {
|
||||
// Remove file from the deleting state
|
||||
setDeletingFiles((prev) => {
|
||||
const updated = { ...prev }
|
||||
delete updated[file.path]
|
||||
return updated
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles deletion of all files (for multiple mode)
|
||||
*/
|
||||
const handleRemoveAllFiles = async (e: React.MouseEvent) => {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
|
||||
if (!value) return
|
||||
|
||||
const filesToDelete = Array.isArray(value) ? value : [value]
|
||||
const fileCount = filesToDelete.length
|
||||
|
||||
// Mark all files as deleting
|
||||
const deletingStatus: Record<string, boolean> = {}
|
||||
filesToDelete.forEach((file) => {
|
||||
deletingStatus[file.path] = true
|
||||
})
|
||||
setDeletingFiles(deletingStatus)
|
||||
|
||||
// Clear input state immediately for better UX
|
||||
setValue(null)
|
||||
useSubBlockStore.getState().setValue(blockId, subBlockId, null)
|
||||
useWorkflowStore.getState().triggerUpdate()
|
||||
|
||||
if (fileInputRef.current) {
|
||||
fileInputRef.current.value = ''
|
||||
}
|
||||
|
||||
// Track successful and failed deletions
|
||||
const deletionResults = {
|
||||
success: 0,
|
||||
failures: [] as string[],
|
||||
}
|
||||
|
||||
// Delete each file
|
||||
for (const file of filesToDelete) {
|
||||
try {
|
||||
const response = await fetch('/api/files/delete', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ filePath: file.path }),
|
||||
})
|
||||
|
||||
if (response.ok) {
|
||||
deletionResults.success++
|
||||
} else {
|
||||
const errorData = await response.json().catch(() => ({ error: response.statusText }))
|
||||
const errorMessage = errorData.error || `Failed to delete file: ${response.status}`
|
||||
deletionResults.failures.push(`${file.name}: ${errorMessage}`)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Failed to delete file ${file.name}:`, error)
|
||||
deletionResults.failures.push(
|
||||
`${file.name}: ${error instanceof Error ? error.message : 'Unknown error'}`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Show a single consolidated notification about the deletions
|
||||
if (deletionResults.success > 0) {
|
||||
if (fileCount === 1) {
|
||||
addNotification('console', `File was deleted successfully`, activeWorkflowId)
|
||||
} else {
|
||||
addNotification(
|
||||
'console',
|
||||
`${deletionResults.success} of ${fileCount} files were deleted successfully`,
|
||||
activeWorkflowId
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Show error notification if any deletions failed
|
||||
if (deletionResults.failures.length > 0) {
|
||||
if (deletionResults.failures.length === 1) {
|
||||
addNotification(
|
||||
'error',
|
||||
`Failed to delete file: ${deletionResults.failures[0]}`,
|
||||
activeWorkflowId
|
||||
)
|
||||
} else {
|
||||
addNotification(
|
||||
'error',
|
||||
`Failed to delete ${deletionResults.failures.length} files: ${deletionResults.failures.join('; ')}`,
|
||||
activeWorkflowId
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
setDeletingFiles({})
|
||||
}
|
||||
|
||||
// Helper to render a single file item
|
||||
const renderFileItem = (file: UploadedFile) => {
|
||||
const isDeleting = deletingFiles[file.path]
|
||||
|
||||
return (
|
||||
<div
|
||||
key={file.path}
|
||||
className="flex items-center justify-between p-2 rounded border border-border bg-secondary/30 mb-2"
|
||||
>
|
||||
<div className="flex-1 truncate pr-2">
|
||||
<div className="font-medium text-sm truncate">{file.name}</div>
|
||||
<div className="text-xs text-muted-foreground">{formatFileSize(file.size)}</div>
|
||||
</div>
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className="h-8 w-8 shrink-0"
|
||||
onClick={(e) => handleRemoveFile(file, e)}
|
||||
disabled={isDeleting}
|
||||
>
|
||||
{isDeleting ? (
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-current border-t-transparent" />
|
||||
) : (
|
||||
<X className="h-4 w-4" />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Get files array regardless of multiple setting
|
||||
const filesArray = Array.isArray(value) ? value : value ? [value] : []
|
||||
const hasFiles = filesArray.length > 0
|
||||
|
||||
return (
|
||||
<div className="w-full" onClick={(e) => e.stopPropagation()}>
|
||||
<input
|
||||
type="file"
|
||||
ref={fileInputRef}
|
||||
onChange={handleFileChange}
|
||||
style={{ display: 'none' }}
|
||||
accept={acceptedTypes}
|
||||
multiple={multiple}
|
||||
data-testid="file-input-element"
|
||||
/>
|
||||
|
||||
{isUploading ? (
|
||||
<div className="w-full p-4 border border-border rounded-md">
|
||||
<Progress value={uploadProgress} className="w-full h-2 mb-2" />
|
||||
<div className="text-xs text-center text-muted-foreground">
|
||||
{uploadProgress < 100 ? 'Uploading...' : 'Upload complete!'}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
{hasFiles && (
|
||||
<div className="mb-3">
|
||||
{/* File list */}
|
||||
<div className="space-y-1">{filesArray.map(renderFileItem)}</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex space-x-2 mt-2">
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="flex-1"
|
||||
onClick={handleRemoveAllFiles}
|
||||
>
|
||||
Remove All
|
||||
</Button>
|
||||
{multiple && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="flex-1"
|
||||
onClick={handleOpenFileDialog}
|
||||
>
|
||||
Add More
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Show upload button if no files or if not in multiple mode */}
|
||||
{(!hasFiles || !multiple) && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
className="w-full justify-center text-center font-normal"
|
||||
onClick={handleOpenFileDialog}
|
||||
>
|
||||
<Upload className="mr-2 h-4 w-4" />
|
||||
{multiple ? 'Upload Files' : 'Upload File'}
|
||||
|
||||
<Tooltip>
|
||||
<TooltipTrigger className="ml-1">
|
||||
<Info className="h-4 w-4 text-muted-foreground" />
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>Max file size: {maxSize}MB</p>
|
||||
{multiple && <p>You can select multiple files at once</p>}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</Button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,7 +1,17 @@
|
||||
import { useCallback } from 'react'
|
||||
import { useCallback, useEffect, useRef } from 'react'
|
||||
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
|
||||
import { useWorkflowStore } from '@/stores/workflows/workflow/store'
|
||||
import { isEqual } from 'lodash'
|
||||
|
||||
/**
|
||||
* Custom hook to get and set values for a sub-block in a workflow.
|
||||
* Handles complex object values properly by using deep equality comparison.
|
||||
*
|
||||
* @param blockId The ID of the block containing the sub-block
|
||||
* @param subBlockId The ID of the sub-block
|
||||
* @param triggerWorkflowUpdate Whether to trigger a workflow update when the value changes
|
||||
* @returns A tuple containing the current value and a setter function
|
||||
*/
|
||||
export function useSubBlockValue<T = any>(
|
||||
blockId: string,
|
||||
subBlockId: string,
|
||||
@@ -15,23 +25,48 @@ export function useSubBlockValue<T = any>(
|
||||
)
|
||||
)
|
||||
|
||||
// Get value and setter from subblock store
|
||||
const value = useSubBlockStore(
|
||||
// Keep a ref to the latest value to prevent unnecessary re-renders
|
||||
const valueRef = useRef<T | null>(null)
|
||||
|
||||
// Get value from subblock store
|
||||
const storeValue = useSubBlockStore(
|
||||
useCallback(
|
||||
(state) => state.getValue(blockId, subBlockId) ?? initialValue,
|
||||
[blockId, subBlockId, initialValue]
|
||||
(state) => state.getValue(blockId, subBlockId),
|
||||
[blockId, subBlockId]
|
||||
)
|
||||
)
|
||||
|
||||
// Update the ref if the store value changes
|
||||
// This ensures we're always working with the latest value
|
||||
useEffect(() => {
|
||||
// Use deep comparison for objects to prevent unnecessary updates
|
||||
if (!isEqual(valueRef.current, storeValue)) {
|
||||
valueRef.current = storeValue !== undefined ? storeValue : initialValue
|
||||
}
|
||||
}, [storeValue, initialValue])
|
||||
|
||||
// Set value function that handles deep equality for complex objects
|
||||
const setValue = useCallback(
|
||||
(newValue: T) => {
|
||||
useSubBlockStore.getState().setValue(blockId, subBlockId, newValue)
|
||||
if (triggerWorkflowUpdate) {
|
||||
useWorkflowStore.getState().triggerUpdate()
|
||||
// Use deep comparison to avoid unnecessary updates for complex objects
|
||||
if (!isEqual(valueRef.current, newValue)) {
|
||||
valueRef.current = newValue
|
||||
|
||||
// Ensure we're passing the actual value, not a reference that might change
|
||||
const valueCopy = newValue === null
|
||||
? null
|
||||
: (typeof newValue === 'object' ? JSON.parse(JSON.stringify(newValue)) : newValue)
|
||||
|
||||
useSubBlockStore.getState().setValue(blockId, subBlockId, valueCopy)
|
||||
|
||||
if (triggerWorkflowUpdate) {
|
||||
useWorkflowStore.getState().triggerUpdate()
|
||||
}
|
||||
}
|
||||
},
|
||||
[blockId, subBlockId, triggerWorkflowUpdate]
|
||||
)
|
||||
|
||||
return [value as T | null, setValue] as const
|
||||
// Return the current value and setter
|
||||
return [valueRef.current as T | null, setValue] as const
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import { DateInput } from './components/date-input'
|
||||
import { Dropdown } from './components/dropdown'
|
||||
import { EvalInput } from './components/eval-input'
|
||||
import { FileSelectorInput } from './components/file-selector/file-selector-input'
|
||||
import { FileUpload } from './components/file-upload'
|
||||
import { FolderSelectorInput } from './components/folder-selector/components/folder-selector-input'
|
||||
import { LongInput } from './components/long-input'
|
||||
import { ScheduleConfig } from './components/schedule/schedule-config'
|
||||
@@ -129,6 +130,15 @@ export function SubBlock({ blockId, config, isConnecting }: SubBlockProps) {
|
||||
return (
|
||||
<TimeInput blockId={blockId} subBlockId={config.id} placeholder={config.placeholder} />
|
||||
)
|
||||
case 'file-upload':
|
||||
return (
|
||||
<FileUpload
|
||||
blockId={blockId}
|
||||
subBlockId={config.id}
|
||||
acceptedTypes={config.acceptedTypes || '*'}
|
||||
multiple={config.multiple === true}
|
||||
/>
|
||||
)
|
||||
case 'webhook-config':
|
||||
return (
|
||||
<WebhookConfig blockId={blockId} subBlockId={config.id} isConnecting={isConnecting} />
|
||||
|
||||
66
sim/blocks/blocks/file.ts
Normal file
66
sim/blocks/blocks/file.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { DocumentIcon } from '@/components/icons'
|
||||
import { FileParserOutput } from '@/tools/file/parser'
|
||||
import { BlockConfig } from '../types'
|
||||
|
||||
export const FileBlock: BlockConfig<FileParserOutput> = {
|
||||
type: 'file',
|
||||
name: 'File',
|
||||
description: 'Read and parse multiple files',
|
||||
longDescription:
|
||||
'Upload and extract contents from structured file formats including PDFs, CSV spreadsheets, and Word documents (DOCX). Specialized parsers extract text and metadata from each format. You can upload multiple files at once and access them individually or as a combined document.',
|
||||
category: 'tools',
|
||||
bgColor: '#40916C',
|
||||
icon: DocumentIcon,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'file',
|
||||
title: 'Upload Files',
|
||||
type: 'file-upload',
|
||||
layout: 'full',
|
||||
acceptedTypes: '.pdf,.csv,.docx',
|
||||
multiple: true,
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
access: ['file_parser'],
|
||||
config: {
|
||||
tool: () => 'file_parser',
|
||||
params: (params) => {
|
||||
console.log('File block params:', params)
|
||||
|
||||
// Handle case where 'file' is an array (multiple files)
|
||||
if (params.file && Array.isArray(params.file) && params.file.length > 0) {
|
||||
// Process all files by sending array of paths
|
||||
const filePaths = params.file.map((file) => file.path)
|
||||
return {
|
||||
filePath: filePaths.length === 1 ? filePaths[0] : filePaths,
|
||||
fileType: params.fileType || 'auto',
|
||||
}
|
||||
}
|
||||
|
||||
// Handle case where 'file' is a single file object
|
||||
if (params.file && params.file.path) {
|
||||
return {
|
||||
filePath: params.file.path,
|
||||
fileType: params.fileType || 'auto',
|
||||
}
|
||||
}
|
||||
|
||||
// If no files, return empty params
|
||||
return { filePath: '', fileType: params.fileType || 'auto' }
|
||||
},
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
fileType: { type: 'string', required: false },
|
||||
file: { type: 'json', required: true },
|
||||
},
|
||||
outputs: {
|
||||
response: {
|
||||
type: {
|
||||
files: 'json',
|
||||
combinedContent: 'string',
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -32,13 +32,13 @@ export const StarterBlock: BlockConfig<StarterBlockOutput> = {
|
||||
value: () => 'manual',
|
||||
},
|
||||
// Structured Input format - visible if manual run is selected
|
||||
{
|
||||
id: 'inputFormat',
|
||||
title: 'Input Format (for API calls)',
|
||||
type: 'input-format',
|
||||
layout: 'full',
|
||||
condition: { field: 'startWorkflow', value: 'manual' },
|
||||
},
|
||||
// {
|
||||
// id: 'inputFormat',
|
||||
// title: 'Input Format (for API calls)',
|
||||
// type: 'input-format',
|
||||
// layout: 'full',
|
||||
// condition: { field: 'startWorkflow', value: 'manual' },
|
||||
// },
|
||||
// Webhook configuration
|
||||
{
|
||||
id: 'webhookProvider',
|
||||
|
||||
@@ -7,6 +7,7 @@ import { GoogleDocsBlock } from './blocks/docs'
|
||||
import { GoogleDriveBlock } from './blocks/drive'
|
||||
import { EvaluatorBlock } from './blocks/evaluator'
|
||||
import { ExaBlock } from './blocks/exa'
|
||||
import { FileBlock } from './blocks/file'
|
||||
import { FirecrawlBlock } from './blocks/firecrawl'
|
||||
import { FunctionBlock } from './blocks/function'
|
||||
import { GitHubBlock } from './blocks/github'
|
||||
@@ -38,6 +39,7 @@ import { BlockConfig } from './types'
|
||||
export {
|
||||
AgentBlock,
|
||||
ApiBlock,
|
||||
FileBlock,
|
||||
FunctionBlock,
|
||||
VisionBlock,
|
||||
FirecrawlBlock,
|
||||
@@ -79,6 +81,7 @@ const blocks: Record<string, BlockConfig> = {
|
||||
confluence: ConfluenceBlock,
|
||||
evaluator: EvaluatorBlock,
|
||||
exa: ExaBlock,
|
||||
file: FileBlock,
|
||||
firecrawl: FirecrawlBlock,
|
||||
function: FunctionBlock,
|
||||
github: GitHubBlock,
|
||||
|
||||
@@ -31,6 +31,7 @@ export type SubBlockType =
|
||||
| 'file-selector' // File selector for Google Drive, etc.
|
||||
| 'folder-selector' // Folder selector for Gmail, etc.
|
||||
| 'input-format' // Input structure format
|
||||
| 'file-upload' // File uploader
|
||||
|
||||
// Component width setting
|
||||
export type SubBlockLayout = 'full' | 'half'
|
||||
@@ -111,6 +112,9 @@ export interface SubBlockConfig {
|
||||
requiredScopes?: string[]
|
||||
// File selector specific properties
|
||||
mimeType?: string
|
||||
// File upload specific properties
|
||||
acceptedTypes?: string
|
||||
multiple?: boolean
|
||||
}
|
||||
|
||||
// Main block definition
|
||||
|
||||
@@ -1771,3 +1771,25 @@ export function TypeformIcon(props: SVGProps<SVGSVGElement>) {
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function DocumentIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
{...props}
|
||||
width="20"
|
||||
height="24"
|
||||
viewBox="0 0 20 24"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
>
|
||||
<path
|
||||
d="M18.5 8.2L11.5 1.2C11.4 1.1 11.3 1.05 11.2 1C11.1 0.95 11 0.92 10.9 0.9C10.85 0.88 10.82 0.85 10.8 0.85H3C1.9 0.85 1 1.75 1 2.85V21.15C1 22.25 1.9 23.15 3 23.15H17C18.1 23.15 19 22.25 19 21.15V8.5C19 8.4 18.95 8.3 18.5 8.2ZM11.5 3.5L16.5 8.5H11.5V3.5ZM3 21.15V2.85H9.5V8.5C9.5 9.05 9.95 9.5 10.5 9.5H17V21.15H3Z"
|
||||
fill="currentColor"
|
||||
/>
|
||||
<path
|
||||
d="M5 12.5H14V13.5H5V12.5ZM5 17.3H14V18.3H5V17.3ZM5 7.5H7V8.5H5V7.5Z"
|
||||
fill="currentColor"
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
68
sim/lib/file-parsers/csv-parser.ts
Normal file
68
sim/lib/file-parsers/csv-parser.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { createReadStream, existsSync } from 'fs';
|
||||
import { FileParseResult, FileParser } from './types';
|
||||
import csvParser from 'csv-parser';
|
||||
|
||||
export class CsvParser implements FileParser {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
// Validate input
|
||||
if (!filePath) {
|
||||
return reject(new Error('No file path provided'));
|
||||
}
|
||||
|
||||
// Check if file exists
|
||||
if (!existsSync(filePath)) {
|
||||
return reject(new Error(`File not found: ${filePath}`));
|
||||
}
|
||||
|
||||
const results: Record<string, any>[] = [];
|
||||
const headers: string[] = [];
|
||||
|
||||
createReadStream(filePath)
|
||||
.on('error', (error: Error) => {
|
||||
console.error('CSV stream error:', error);
|
||||
reject(new Error(`Failed to read CSV file: ${error.message}`));
|
||||
})
|
||||
.pipe(csvParser())
|
||||
.on('headers', (headerList: string[]) => {
|
||||
headers.push(...headerList);
|
||||
})
|
||||
.on('data', (data: Record<string, any>) => {
|
||||
results.push(data);
|
||||
})
|
||||
.on('end', () => {
|
||||
// Convert CSV data to a formatted string representation
|
||||
let content = '';
|
||||
|
||||
// Add headers
|
||||
if (headers.length > 0) {
|
||||
content += headers.join(', ') + '\n';
|
||||
}
|
||||
|
||||
// Add rows
|
||||
results.forEach(row => {
|
||||
const rowValues = Object.values(row).join(', ');
|
||||
content += rowValues + '\n';
|
||||
});
|
||||
|
||||
resolve({
|
||||
content,
|
||||
metadata: {
|
||||
rowCount: results.length,
|
||||
headers: headers,
|
||||
rawData: results
|
||||
}
|
||||
});
|
||||
})
|
||||
.on('error', (error: Error) => {
|
||||
console.error('CSV parsing error:', error);
|
||||
reject(new Error(`Failed to parse CSV file: ${error.message}`));
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('CSV general error:', error);
|
||||
reject(new Error(`Failed to process CSV file: ${(error as Error).message}`));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
45
sim/lib/file-parsers/docx-parser.ts
Normal file
45
sim/lib/file-parsers/docx-parser.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { readFile } from 'fs/promises';
|
||||
import mammoth from 'mammoth';
|
||||
import { FileParseResult, FileParser } from './types';
|
||||
|
||||
// Define interface for mammoth result
|
||||
interface MammothResult {
|
||||
value: string;
|
||||
messages: any[];
|
||||
}
|
||||
|
||||
export class DocxParser implements FileParser {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
try {
|
||||
// Validate input
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided');
|
||||
}
|
||||
|
||||
// Read the file
|
||||
const buffer = await readFile(filePath);
|
||||
|
||||
// Extract text with mammoth
|
||||
const result = await mammoth.extractRawText({ buffer });
|
||||
|
||||
// Extract HTML for metadata (optional - won't fail if this fails)
|
||||
let htmlResult: MammothResult = { value: '', messages: [] };
|
||||
try {
|
||||
htmlResult = await mammoth.convertToHtml({ buffer });
|
||||
} catch (htmlError) {
|
||||
console.warn('HTML conversion warning:', htmlError);
|
||||
}
|
||||
|
||||
return {
|
||||
content: result.value,
|
||||
metadata: {
|
||||
messages: [...result.messages, ...htmlResult.messages],
|
||||
html: htmlResult.value
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('DOCX Parser error:', error);
|
||||
throw new Error(`Failed to parse DOCX file: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
278
sim/lib/file-parsers/index.test.ts
Normal file
278
sim/lib/file-parsers/index.test.ts
Normal file
@@ -0,0 +1,278 @@
|
||||
/**
|
||||
* Unit tests for file parsers
|
||||
*
|
||||
* @vitest-environment node
|
||||
*/
|
||||
import path from 'path'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import type { FileParser, FileParseResult } from './types'
|
||||
|
||||
// Mock file system modules
|
||||
const mockExistsSync = vi.fn().mockReturnValue(true)
|
||||
const mockReadFile = vi.fn().mockResolvedValue(Buffer.from('test content'))
|
||||
|
||||
// Mock parser functions
|
||||
const mockPdfParseFile = vi.fn().mockResolvedValue({
|
||||
content: 'Parsed PDF content',
|
||||
metadata: {
|
||||
info: { Title: 'Test PDF' },
|
||||
pageCount: 5,
|
||||
version: '1.7',
|
||||
},
|
||||
})
|
||||
|
||||
const mockCsvParseFile = vi.fn().mockResolvedValue({
|
||||
content: 'Parsed CSV content',
|
||||
metadata: {
|
||||
headers: ['column1', 'column2'],
|
||||
rowCount: 10,
|
||||
},
|
||||
})
|
||||
|
||||
const mockDocxParseFile = vi.fn().mockResolvedValue({
|
||||
content: 'Parsed DOCX content',
|
||||
metadata: {
|
||||
pages: 3,
|
||||
author: 'Test Author',
|
||||
},
|
||||
})
|
||||
|
||||
// Create mock module implementation
|
||||
const createMockModule = () => {
|
||||
// Create mock parsers
|
||||
const mockParsers: Record<string, FileParser> = {
|
||||
pdf: { parseFile: mockPdfParseFile },
|
||||
csv: { parseFile: mockCsvParseFile },
|
||||
docx: { parseFile: mockDocxParseFile },
|
||||
}
|
||||
|
||||
// Create the mock module implementation
|
||||
return {
|
||||
parseFile: async (filePath: string): Promise<FileParseResult> => {
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided')
|
||||
}
|
||||
|
||||
if (!mockExistsSync(filePath)) {
|
||||
throw new Error(`File not found: ${filePath}`)
|
||||
}
|
||||
|
||||
const extension = path.extname(filePath).toLowerCase().substring(1)
|
||||
|
||||
if (!Object.keys(mockParsers).includes(extension)) {
|
||||
throw new Error(
|
||||
`Unsupported file type: ${extension}. Supported types are: ${Object.keys(mockParsers).join(', ')}`
|
||||
)
|
||||
}
|
||||
|
||||
return mockParsers[extension].parseFile(filePath)
|
||||
},
|
||||
|
||||
isSupportedFileType: (extension: string): boolean => {
|
||||
if (!extension) return false
|
||||
return Object.keys(mockParsers).includes(extension.toLowerCase())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
describe('File Parsers', () => {
|
||||
// Setup required mocks before each test
|
||||
beforeEach(() => {
|
||||
vi.resetModules()
|
||||
|
||||
// Mock file system modules
|
||||
vi.doMock('fs', () => ({
|
||||
existsSync: mockExistsSync,
|
||||
}))
|
||||
|
||||
vi.doMock('fs/promises', () => ({
|
||||
readFile: mockReadFile,
|
||||
}))
|
||||
|
||||
// Mock the file parser module with our implementation
|
||||
vi.doMock('./index', () => createMockModule())
|
||||
|
||||
// Mock parser classes
|
||||
vi.doMock('./pdf-parser', () => ({
|
||||
PdfParser: vi.fn().mockImplementation(() => ({
|
||||
parseFile: mockPdfParseFile,
|
||||
})),
|
||||
}))
|
||||
|
||||
vi.doMock('./csv-parser', () => ({
|
||||
CsvParser: vi.fn().mockImplementation(() => ({
|
||||
parseFile: mockCsvParseFile,
|
||||
})),
|
||||
}))
|
||||
|
||||
vi.doMock('./docx-parser', () => ({
|
||||
DocxParser: vi.fn().mockImplementation(() => ({
|
||||
parseFile: mockDocxParseFile,
|
||||
})),
|
||||
}))
|
||||
|
||||
vi.doMock('./raw-pdf-parser', () => ({
|
||||
RawPdfParser: vi.fn().mockImplementation(() => ({
|
||||
parseFile: vi.fn().mockResolvedValue({
|
||||
content: 'Raw parsed PDF content',
|
||||
metadata: {
|
||||
pageCount: 3,
|
||||
},
|
||||
}),
|
||||
})),
|
||||
}))
|
||||
|
||||
// Silence console output during tests
|
||||
global.console = {
|
||||
...console,
|
||||
log: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
}
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks()
|
||||
vi.resetAllMocks()
|
||||
vi.restoreAllMocks()
|
||||
})
|
||||
|
||||
describe('parseFile', () => {
|
||||
it('should validate file existence', async () => {
|
||||
// Mock file not existing for this test only
|
||||
mockExistsSync.mockReturnValueOnce(false)
|
||||
|
||||
// Dynamically import the module after mocks are set up
|
||||
const { parseFile } = await import('./index')
|
||||
|
||||
const testFilePath = '/test/files/test.pdf'
|
||||
await expect(parseFile(testFilePath)).rejects.toThrow('File not found')
|
||||
expect(mockExistsSync).toHaveBeenCalledWith(testFilePath)
|
||||
})
|
||||
|
||||
it('should throw error if file path is empty', async () => {
|
||||
const { parseFile } = await import('./index')
|
||||
await expect(parseFile('')).rejects.toThrow('No file path provided')
|
||||
})
|
||||
|
||||
it('should parse PDF files successfully', async () => {
|
||||
const expectedResult = {
|
||||
content: 'Parsed PDF content',
|
||||
metadata: {
|
||||
info: { Title: 'Test PDF' },
|
||||
pageCount: 5,
|
||||
version: '1.7',
|
||||
},
|
||||
}
|
||||
|
||||
mockPdfParseFile.mockResolvedValueOnce(expectedResult)
|
||||
mockExistsSync.mockReturnValue(true)
|
||||
|
||||
const { parseFile } = await import('./index')
|
||||
const result = await parseFile('/test/files/document.pdf')
|
||||
|
||||
expect(result).toEqual(expectedResult)
|
||||
})
|
||||
|
||||
it('should parse CSV files successfully', async () => {
|
||||
const expectedResult = {
|
||||
content: 'Parsed CSV content',
|
||||
metadata: {
|
||||
headers: ['column1', 'column2'],
|
||||
rowCount: 10,
|
||||
},
|
||||
}
|
||||
|
||||
mockCsvParseFile.mockResolvedValueOnce(expectedResult)
|
||||
mockExistsSync.mockReturnValue(true)
|
||||
|
||||
const { parseFile } = await import('./index')
|
||||
const result = await parseFile('/test/files/data.csv')
|
||||
|
||||
expect(result).toEqual(expectedResult)
|
||||
})
|
||||
|
||||
it('should parse DOCX files successfully', async () => {
|
||||
const expectedResult = {
|
||||
content: 'Parsed DOCX content',
|
||||
metadata: {
|
||||
pages: 3,
|
||||
author: 'Test Author',
|
||||
},
|
||||
}
|
||||
|
||||
mockDocxParseFile.mockResolvedValueOnce(expectedResult)
|
||||
mockExistsSync.mockReturnValue(true)
|
||||
|
||||
const { parseFile } = await import('./index')
|
||||
const result = await parseFile('/test/files/document.docx')
|
||||
|
||||
expect(result).toEqual(expectedResult)
|
||||
})
|
||||
|
||||
it('should throw error for unsupported file types', async () => {
|
||||
// Make sure the file "exists" for this test
|
||||
mockExistsSync.mockReturnValue(true)
|
||||
|
||||
const { parseFile } = await import('./index')
|
||||
const unsupportedFilePath = '/test/files/image.png'
|
||||
|
||||
await expect(parseFile(unsupportedFilePath)).rejects.toThrow('Unsupported file type')
|
||||
})
|
||||
|
||||
it('should handle errors during parsing', async () => {
|
||||
// Make sure the file "exists" for this test
|
||||
mockExistsSync.mockReturnValue(true)
|
||||
|
||||
const parsingError = new Error('CSV parsing failed')
|
||||
mockCsvParseFile.mockRejectedValueOnce(parsingError)
|
||||
|
||||
const { parseFile } = await import('./index')
|
||||
await expect(parseFile('/test/files/data.csv')).rejects.toThrow('CSV parsing failed')
|
||||
})
|
||||
})
|
||||
|
||||
describe('isSupportedFileType', () => {
|
||||
it('should return true for supported file types', async () => {
|
||||
const { isSupportedFileType } = await import('./index')
|
||||
|
||||
expect(isSupportedFileType('pdf')).toBe(true)
|
||||
expect(isSupportedFileType('csv')).toBe(true)
|
||||
expect(isSupportedFileType('docx')).toBe(true)
|
||||
})
|
||||
|
||||
it('should return false for unsupported file types', async () => {
|
||||
const { isSupportedFileType } = await import('./index')
|
||||
|
||||
expect(isSupportedFileType('png')).toBe(false)
|
||||
expect(isSupportedFileType('txt')).toBe(false)
|
||||
expect(isSupportedFileType('unknown')).toBe(false)
|
||||
})
|
||||
|
||||
it('should handle uppercase extensions', async () => {
|
||||
const { isSupportedFileType } = await import('./index')
|
||||
|
||||
expect(isSupportedFileType('PDF')).toBe(true)
|
||||
expect(isSupportedFileType('CSV')).toBe(true)
|
||||
})
|
||||
|
||||
it('should handle errors gracefully', async () => {
|
||||
// Create a mock that throws an error when called
|
||||
const errorMockModule = {
|
||||
isSupportedFileType: () => {
|
||||
throw new Error('Failed to get parsers')
|
||||
},
|
||||
}
|
||||
|
||||
// Mock the module with our error-throwing implementation
|
||||
vi.doMock('./index', () => errorMockModule)
|
||||
|
||||
// Import and test
|
||||
const { isSupportedFileType } = await import('./index')
|
||||
|
||||
// Should catch the error and return false
|
||||
expect(() => isSupportedFileType('pdf')).toThrow('Failed to get parsers')
|
||||
})
|
||||
})
|
||||
})
|
||||
125
sim/lib/file-parsers/index.ts
Normal file
125
sim/lib/file-parsers/index.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import path from 'path';
|
||||
import { FileParser, SupportedFileType, FileParseResult } from './types';
|
||||
import { existsSync } from 'fs';
|
||||
import { readFile } from 'fs/promises';
|
||||
import { RawPdfParser } from './raw-pdf-parser';
|
||||
|
||||
// Lazy-loaded parsers to avoid initialization issues
|
||||
let parserInstances: Record<string, FileParser> | null = null;
|
||||
|
||||
/**
|
||||
* Get parser instances with lazy initialization
|
||||
*/
|
||||
function getParserInstances(): Record<string, FileParser> {
|
||||
if (parserInstances === null) {
|
||||
parserInstances = {};
|
||||
|
||||
try {
|
||||
// Import parsers only when needed - with try/catch for each one
|
||||
try {
|
||||
console.log('Attempting to load PDF parser...');
|
||||
try {
|
||||
// First try to use the pdf-parse library
|
||||
// Import the PdfParser using ES module import to avoid test file access
|
||||
const { PdfParser } = require('./pdf-parser');
|
||||
parserInstances['pdf'] = new PdfParser();
|
||||
console.log('PDF parser loaded successfully');
|
||||
} catch (pdfParseError) {
|
||||
// If that fails, fallback to our raw PDF parser
|
||||
console.error('Failed to load primary PDF parser:', pdfParseError);
|
||||
console.log('Falling back to raw PDF parser');
|
||||
parserInstances['pdf'] = new RawPdfParser();
|
||||
console.log('Raw PDF parser loaded successfully');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load any PDF parser:', error);
|
||||
// Create a simple fallback that just returns the file size and a message
|
||||
parserInstances['pdf'] = {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
const buffer = await readFile(filePath);
|
||||
return {
|
||||
content: `PDF parsing is not available. File size: ${buffer.length} bytes`,
|
||||
metadata: {
|
||||
info: { Error: 'PDF parsing unavailable' },
|
||||
pageCount: 0,
|
||||
version: 'unknown'
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const { CsvParser } = require('./csv-parser');
|
||||
parserInstances['csv'] = new CsvParser();
|
||||
} catch (error) {
|
||||
console.error('Failed to load CSV parser:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
const { DocxParser } = require('./docx-parser');
|
||||
parserInstances['docx'] = new DocxParser();
|
||||
} catch (error) {
|
||||
console.error('Failed to load DOCX parser:', error);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error loading file parsers:', error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('Available parsers:', Object.keys(parserInstances));
|
||||
return parserInstances;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a file based on its extension
|
||||
* @param filePath Path to the file
|
||||
* @returns Parsed content and metadata
|
||||
*/
|
||||
export async function parseFile(filePath: string): Promise<FileParseResult> {
|
||||
try {
|
||||
// Validate input
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided');
|
||||
}
|
||||
|
||||
// Check if file exists
|
||||
if (!existsSync(filePath)) {
|
||||
throw new Error(`File not found: ${filePath}`);
|
||||
}
|
||||
|
||||
const extension = path.extname(filePath).toLowerCase().substring(1);
|
||||
console.log('Attempting to parse file with extension:', extension);
|
||||
|
||||
const parsers = getParserInstances();
|
||||
|
||||
if (!Object.keys(parsers).includes(extension)) {
|
||||
console.log('No parser found for extension:', extension);
|
||||
throw new Error(`Unsupported file type: ${extension}. Supported types are: ${Object.keys(parsers).join(', ')}`);
|
||||
}
|
||||
|
||||
console.log('Using parser for extension:', extension);
|
||||
const parser = parsers[extension];
|
||||
return await parser.parseFile(filePath);
|
||||
} catch (error) {
|
||||
console.error('File parsing error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file type is supported
|
||||
* @param extension File extension without the dot
|
||||
* @returns true if supported, false otherwise
|
||||
*/
|
||||
export function isSupportedFileType(extension: string): extension is SupportedFileType {
|
||||
try {
|
||||
return Object.keys(getParserInstances()).includes(extension.toLowerCase());
|
||||
} catch (error) {
|
||||
console.error('Error checking supported file type:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Type exports
|
||||
export type { FileParseResult, FileParser, SupportedFileType };
|
||||
113
sim/lib/file-parsers/pdf-parser.ts
Normal file
113
sim/lib/file-parsers/pdf-parser.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { readFile } from 'fs/promises';
|
||||
// @ts-ignore
|
||||
import * as pdfParseLib from 'pdf-parse/lib/pdf-parse.js';
|
||||
import { FileParseResult, FileParser } from './types';
|
||||
|
||||
export class PdfParser implements FileParser {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
try {
|
||||
console.log('PDF Parser: Starting to parse file:', filePath);
|
||||
|
||||
// Make sure we're only parsing the provided file path
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided');
|
||||
}
|
||||
|
||||
// Read the file
|
||||
console.log('PDF Parser: Reading file...');
|
||||
const dataBuffer = await readFile(filePath);
|
||||
console.log('PDF Parser: File read successfully, size:', dataBuffer.length);
|
||||
|
||||
// Try to parse with pdf-parse library first
|
||||
try {
|
||||
console.log('PDF Parser: Attempting to parse with pdf-parse library...');
|
||||
|
||||
// Parse PDF with direct function call to avoid test file access
|
||||
console.log('PDF Parser: Starting PDF parsing...');
|
||||
const data = await pdfParseLib.default(dataBuffer);
|
||||
console.log('PDF Parser: PDF parsed successfully with pdf-parse, pages:', data.numpages);
|
||||
|
||||
return {
|
||||
content: data.text,
|
||||
metadata: {
|
||||
pageCount: data.numpages,
|
||||
info: data.info,
|
||||
version: data.version
|
||||
}
|
||||
};
|
||||
} catch (pdfParseError) {
|
||||
console.error('PDF-parse library failed:', pdfParseError);
|
||||
|
||||
// Fallback to manual text extraction
|
||||
console.log('PDF Parser: Falling back to manual text extraction...');
|
||||
|
||||
// Extract basic PDF info from raw content
|
||||
const rawContent = dataBuffer.toString('utf-8', 0, Math.min(10000, dataBuffer.length));
|
||||
|
||||
let version = 'Unknown';
|
||||
let pageCount = 0;
|
||||
|
||||
// Try to extract PDF version
|
||||
const versionMatch = rawContent.match(/%PDF-(\d+\.\d+)/);
|
||||
if (versionMatch && versionMatch[1]) {
|
||||
version = versionMatch[1];
|
||||
}
|
||||
|
||||
// Try to get page count
|
||||
const pageMatches = rawContent.match(/\/Type\s*\/Page\b/g);
|
||||
if (pageMatches) {
|
||||
pageCount = pageMatches.length;
|
||||
}
|
||||
|
||||
// Try to extract text by looking for text-related operators in the PDF
|
||||
let extractedText = '';
|
||||
|
||||
// Look for text in the PDF content using common patterns
|
||||
const textMatches = rawContent.match(/BT[\s\S]*?ET/g);
|
||||
if (textMatches && textMatches.length > 0) {
|
||||
extractedText = textMatches.map(textBlock => {
|
||||
// Extract text objects (Tj, TJ) from the text block
|
||||
const textObjects = textBlock.match(/\([^)]*\)\s*Tj|\[[^\]]*\]\s*TJ/g);
|
||||
if (textObjects) {
|
||||
return textObjects.map(obj => {
|
||||
// Clean up text objects
|
||||
return obj.replace(/\(([^)]*)\)\s*Tj|\[([^\]]*)\]\s*TJ/g,
|
||||
(match, p1, p2) => p1 || p2 || '')
|
||||
// Clean up PDF escape sequences
|
||||
.replace(/\\(\d{3}|[()\\])/g, '')
|
||||
.replace(/\\\\/g, '\\')
|
||||
.replace(/\\\(/g, '(')
|
||||
.replace(/\\\)/g, ')');
|
||||
}).join(' ');
|
||||
}
|
||||
return '';
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
// If we couldn't extract text, provide a helpful message
|
||||
if (!extractedText || extractedText.length < 20) {
|
||||
extractedText = `This PDF document (version ${version}) contains ${pageCount || 'an unknown number of'} pages. The text could not be extracted properly.
|
||||
|
||||
For better results, please use a dedicated PDF reader or text extraction tool.`;
|
||||
}
|
||||
|
||||
console.log('PDF Parser: Manual text extraction completed, found text length:', extractedText.length);
|
||||
|
||||
return {
|
||||
content: extractedText,
|
||||
metadata: {
|
||||
pageCount: pageCount || 0,
|
||||
info: {
|
||||
manualExtraction: true,
|
||||
version
|
||||
},
|
||||
version
|
||||
}
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('PDF Parser error:', error);
|
||||
throw new Error(`Failed to parse PDF file: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
481
sim/lib/file-parsers/raw-pdf-parser.ts
Normal file
481
sim/lib/file-parsers/raw-pdf-parser.ts
Normal file
@@ -0,0 +1,481 @@
|
||||
import { readFile } from 'fs/promises';
|
||||
import { FileParseResult, FileParser } from './types';
|
||||
import zlib from 'zlib';
|
||||
import { promisify } from 'util';
|
||||
|
||||
// Promisify zlib functions
|
||||
const inflateAsync = promisify(zlib.inflate);
|
||||
const unzipAsync = promisify(zlib.unzip);
|
||||
|
||||
/**
|
||||
* A simple PDF parser that extracts readable text from a PDF file.
|
||||
* This is used as a fallback when the pdf-parse library fails.
|
||||
*/
|
||||
export class RawPdfParser implements FileParser {
|
||||
async parseFile(filePath: string): Promise<FileParseResult> {
|
||||
try {
|
||||
console.log('RawPdfParser: Starting to parse file:', filePath);
|
||||
|
||||
if (!filePath) {
|
||||
throw new Error('No file path provided');
|
||||
}
|
||||
|
||||
// Read the file
|
||||
console.log('RawPdfParser: Reading file...');
|
||||
const dataBuffer = await readFile(filePath);
|
||||
console.log('RawPdfParser: File read successfully, size:', dataBuffer.length);
|
||||
|
||||
// Instead of trying to parse the binary PDF data directly,
|
||||
// we'll extract only the text sections that are readable
|
||||
|
||||
// First convert to string but only for pattern matching, not for display
|
||||
const rawContent = dataBuffer.toString('utf-8');
|
||||
|
||||
// Extract basic PDF info
|
||||
let version = 'Unknown';
|
||||
let pageCount = 0;
|
||||
|
||||
// Try to extract PDF version
|
||||
const versionMatch = rawContent.match(/%PDF-(\d+\.\d+)/);
|
||||
if (versionMatch && versionMatch[1]) {
|
||||
version = versionMatch[1];
|
||||
}
|
||||
|
||||
// Count pages using multiple methods for redundancy
|
||||
// Method 1: Count "/Type /Page" occurrences (most reliable)
|
||||
const typePageMatches = rawContent.match(/\/Type\s*\/Page\b/gi);
|
||||
if (typePageMatches) {
|
||||
pageCount = typePageMatches.length;
|
||||
console.log('RawPdfParser: Found page count using /Type /Page:', pageCount);
|
||||
}
|
||||
|
||||
// Method 2: Look for "/Page" dictionary references
|
||||
if (pageCount === 0) {
|
||||
const pageMatches = rawContent.match(/\/Page\s*\//gi);
|
||||
if (pageMatches) {
|
||||
pageCount = pageMatches.length;
|
||||
console.log('RawPdfParser: Found page count using /Page/ pattern:', pageCount);
|
||||
}
|
||||
}
|
||||
|
||||
// Method 3: Look for "/Pages" object references
|
||||
if (pageCount === 0) {
|
||||
const pagesObjMatches = rawContent.match(/\/Pages\s+\d+\s+\d+\s+R/gi);
|
||||
if (pagesObjMatches && pagesObjMatches.length > 0) {
|
||||
// Extract the object reference
|
||||
const pagesObjRef = pagesObjMatches[0].match(/\/Pages\s+(\d+)\s+\d+\s+R/i);
|
||||
if (pagesObjRef && pagesObjRef[1]) {
|
||||
const objNum = pagesObjRef[1];
|
||||
// Find the referenced object
|
||||
const objRegex = new RegExp(`${objNum}\\s+0\\s+obj[\\s\\S]*?endobj`, 'i');
|
||||
const objMatch = rawContent.match(objRegex);
|
||||
if (objMatch) {
|
||||
// Look for /Count within the Pages object
|
||||
const countMatch = objMatch[0].match(/\/Count\s+(\d+)/i);
|
||||
if (countMatch && countMatch[1]) {
|
||||
pageCount = parseInt(countMatch[1], 10);
|
||||
console.log('RawPdfParser: Found page count using /Count in Pages object:', pageCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Method 4: Count trailer references to get an approximate count
|
||||
if (pageCount === 0) {
|
||||
const trailerMatches = rawContent.match(/trailer/gi);
|
||||
if (trailerMatches) {
|
||||
// This is just a rough estimate, not accurate
|
||||
pageCount = Math.max(1, Math.ceil(trailerMatches.length / 2));
|
||||
console.log('RawPdfParser: Estimated page count using trailer references:', pageCount);
|
||||
}
|
||||
}
|
||||
|
||||
// Default to at least 1 page if we couldn't find any
|
||||
if (pageCount === 0) {
|
||||
pageCount = 1;
|
||||
console.log('RawPdfParser: Defaulting to 1 page as no count was found');
|
||||
}
|
||||
|
||||
// Extract text content using text markers commonly found in PDFs
|
||||
let extractedText = '';
|
||||
|
||||
// Method 1: Extract text between BT (Begin Text) and ET (End Text) markers
|
||||
const textMatches = rawContent.match(/BT[\s\S]*?ET/g);
|
||||
if (textMatches && textMatches.length > 0) {
|
||||
console.log('RawPdfParser: Found', textMatches.length, 'text blocks');
|
||||
|
||||
extractedText = textMatches.map(textBlock => {
|
||||
// Extract text objects (Tj, TJ) from the text block
|
||||
const textObjects = textBlock.match(/(\([^)]*\)|\[[^\]]*\])\s*(Tj|TJ)/g);
|
||||
if (textObjects && textObjects.length > 0) {
|
||||
return textObjects.map(obj => {
|
||||
// Clean up text objects
|
||||
let text = '';
|
||||
if (obj.includes('Tj')) {
|
||||
// Handle Tj operator (simple string)
|
||||
const match = obj.match(/\(([^)]*)\)\s*Tj/);
|
||||
if (match && match[1]) {
|
||||
text = match[1];
|
||||
}
|
||||
} else if (obj.includes('TJ')) {
|
||||
// Handle TJ operator (array of strings and positioning)
|
||||
const match = obj.match(/\[(.*)\]\s*TJ/);
|
||||
if (match && match[1]) {
|
||||
// Extract only the string parts from the array
|
||||
const parts = match[1].match(/\([^)]*\)/g);
|
||||
if (parts) {
|
||||
text = parts.map(p => p.slice(1, -1)).join(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up PDF escape sequences
|
||||
return text
|
||||
.replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)))
|
||||
.replace(/\\\\/g, '\\')
|
||||
.replace(/\\\(/g, '(')
|
||||
.replace(/\\\)/g, ')');
|
||||
}).join(' ');
|
||||
}
|
||||
return '';
|
||||
}).join('\n').trim();
|
||||
}
|
||||
|
||||
// Try to extract metadata from XML
|
||||
let metadataText = '';
|
||||
const xmlMatch = rawContent.match(/<x:xmpmeta[\s\S]*?<\/x:xmpmeta>/);
|
||||
if (xmlMatch) {
|
||||
const xmlContent = xmlMatch[0];
|
||||
console.log('RawPdfParser: Found XML metadata');
|
||||
|
||||
// Extract document title
|
||||
const titleMatch = xmlContent.match(/<dc:title>[\s\S]*?<rdf:li[^>]*>(.*?)<\/rdf:li>/i);
|
||||
if (titleMatch && titleMatch[1]) {
|
||||
const title = titleMatch[1].replace(/<[^>]+>/g, '').trim();
|
||||
metadataText += `Document Title: ${title}\n\n`;
|
||||
}
|
||||
|
||||
// Extract creator/author
|
||||
const creatorMatch = xmlContent.match(/<dc:creator>[\s\S]*?<rdf:li[^>]*>(.*?)<\/rdf:li>/i);
|
||||
if (creatorMatch && creatorMatch[1]) {
|
||||
const creator = creatorMatch[1].replace(/<[^>]+>/g, '').trim();
|
||||
metadataText += `Author: ${creator}\n`;
|
||||
}
|
||||
|
||||
// Extract creation date
|
||||
const dateMatch = xmlContent.match(/<xmp:CreateDate>(.*?)<\/xmp:CreateDate>/i);
|
||||
if (dateMatch && dateMatch[1]) {
|
||||
metadataText += `Created: ${dateMatch[1].trim()}\n`;
|
||||
}
|
||||
|
||||
// Extract producer
|
||||
const producerMatch = xmlContent.match(/<pdf:Producer>(.*?)<\/pdf:Producer>/i);
|
||||
if (producerMatch && producerMatch[1]) {
|
||||
metadataText += `Producer: ${producerMatch[1].trim()}\n`;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to extract actual text content from content streams
|
||||
if (!extractedText || extractedText.length < 100 || extractedText.includes('/Type /Page')) {
|
||||
console.log('RawPdfParser: Trying advanced text extraction from content streams');
|
||||
|
||||
// Find content stream references
|
||||
const contentRefs = rawContent.match(/\/Contents\s+\[?\s*(\d+)\s+\d+\s+R\s*\]?/g);
|
||||
if (contentRefs && contentRefs.length > 0) {
|
||||
console.log('RawPdfParser: Found', contentRefs.length, 'content stream references');
|
||||
|
||||
// Extract object numbers from content references
|
||||
const objNumbers = contentRefs.map(ref => {
|
||||
const match = ref.match(/\/Contents\s+\[?\s*(\d+)\s+\d+\s+R\s*\]?/);
|
||||
return match ? match[1] : null;
|
||||
}).filter(Boolean);
|
||||
|
||||
console.log('RawPdfParser: Content stream object numbers:', objNumbers);
|
||||
|
||||
// Try to find those objects in the content
|
||||
if (objNumbers.length > 0) {
|
||||
let textFromStreams = '';
|
||||
|
||||
for (const objNum of objNumbers) {
|
||||
const objRegex = new RegExp(`${objNum}\\s+0\\s+obj[\\s\\S]*?endobj`, 'i');
|
||||
const objMatch = rawContent.match(objRegex);
|
||||
|
||||
if (objMatch) {
|
||||
// Look for stream content within the object
|
||||
const streamMatch = objMatch[0].match(/stream\r?\n([\s\S]*?)\r?\nendstream/);
|
||||
if (streamMatch && streamMatch[1]) {
|
||||
const streamContent = streamMatch[1];
|
||||
|
||||
// Look for text operations in the stream (Tj, TJ, etc.)
|
||||
const textFragments = streamContent.match(/\([^)]+\)\s*Tj|\[[^\]]*\]\s*TJ/g);
|
||||
if (textFragments && textFragments.length > 0) {
|
||||
const extractedFragments = textFragments.map(fragment => {
|
||||
if (fragment.includes('Tj')) {
|
||||
return fragment.replace(/\(([^)]*)\)\s*Tj/, '$1')
|
||||
.replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)))
|
||||
.replace(/\\\\/g, '\\')
|
||||
.replace(/\\\(/g, '(')
|
||||
.replace(/\\\)/g, ')');
|
||||
} else if (fragment.includes('TJ')) {
|
||||
const parts = fragment.match(/\([^)]*\)/g);
|
||||
if (parts) {
|
||||
return parts.map(p => p.slice(1, -1)
|
||||
.replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)))
|
||||
.replace(/\\\\/g, '\\')
|
||||
.replace(/\\\(/g, '(')
|
||||
.replace(/\\\)/g, ')')
|
||||
).join(' ');
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}).filter(Boolean).join(' ');
|
||||
|
||||
if (extractedFragments.trim().length > 0) {
|
||||
textFromStreams += extractedFragments.trim() + '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (textFromStreams.trim().length > 0) {
|
||||
console.log('RawPdfParser: Successfully extracted text from content streams');
|
||||
extractedText = textFromStreams.trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to decompress PDF streams
|
||||
// This is especially helpful for PDFs with compressed content
|
||||
if (!extractedText || extractedText.length < 100) {
|
||||
console.log('RawPdfParser: Trying to decompress PDF streams');
|
||||
|
||||
// Find compressed streams (FlateDecode)
|
||||
const compressedStreams = rawContent.match(/\/Filter\s*\/FlateDecode[\s\S]*?stream[\s\S]*?endstream/g);
|
||||
if (compressedStreams && compressedStreams.length > 0) {
|
||||
console.log('RawPdfParser: Found', compressedStreams.length, 'compressed streams');
|
||||
|
||||
// Process each stream
|
||||
const decompressedContents = await Promise.all(
|
||||
compressedStreams.map(async (stream) => {
|
||||
try {
|
||||
// Extract stream content between stream and endstream
|
||||
const streamMatch = stream.match(/stream\r?\n([\s\S]*?)\r?\nendstream/);
|
||||
if (!streamMatch || !streamMatch[1]) return '';
|
||||
|
||||
const compressedData = Buffer.from(streamMatch[1], 'binary');
|
||||
|
||||
// Try different decompression methods
|
||||
try {
|
||||
// Try inflate (most common)
|
||||
const decompressed = await inflateAsync(compressedData);
|
||||
const content = decompressed.toString('utf-8');
|
||||
|
||||
// Check if it contains readable text
|
||||
const readable = content.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
|
||||
if (readable.length > 50 &&
|
||||
readable.includes(' ') &&
|
||||
(readable.includes('.') || readable.includes(',')) &&
|
||||
!/[\x00-\x1F\x7F]/.test(readable)) {
|
||||
return readable;
|
||||
}
|
||||
} catch (inflateErr) {
|
||||
// Try unzip as fallback
|
||||
try {
|
||||
const decompressed = await unzipAsync(compressedData);
|
||||
const content = decompressed.toString('utf-8');
|
||||
|
||||
// Check if it contains readable text
|
||||
const readable = content.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
|
||||
if (readable.length > 50 &&
|
||||
readable.includes(' ') &&
|
||||
(readable.includes('.') || readable.includes(',')) &&
|
||||
!/[\x00-\x1F\x7F]/.test(readable)) {
|
||||
return readable;
|
||||
}
|
||||
} catch (unzipErr) {
|
||||
// Both methods failed, continue to next stream
|
||||
return '';
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Error processing this stream, skip it
|
||||
return '';
|
||||
}
|
||||
|
||||
return '';
|
||||
})
|
||||
);
|
||||
|
||||
// Filter out empty results and combine
|
||||
const decompressedText = decompressedContents
|
||||
.filter(text => text && text.length > 0)
|
||||
.join('\n\n');
|
||||
|
||||
if (decompressedText && decompressedText.length > 0) {
|
||||
console.log('RawPdfParser: Successfully decompressed text content, length:', decompressedText.length);
|
||||
extractedText = decompressedText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Method 2: Look for text stream data
|
||||
if (!extractedText || extractedText.length < 50) {
|
||||
console.log('RawPdfParser: Trying alternative text extraction method with streams');
|
||||
|
||||
// Find text streams
|
||||
const streamMatches = rawContent.match(/stream[\s\S]*?endstream/g);
|
||||
if (streamMatches && streamMatches.length > 0) {
|
||||
console.log('RawPdfParser: Found', streamMatches.length, 'streams');
|
||||
|
||||
// Process each stream to look for text content
|
||||
const textContent = streamMatches
|
||||
.map(stream => {
|
||||
// Remove 'stream' and 'endstream' markers
|
||||
let content = stream.replace(/^stream\r?\n|\r?\nendstream$/g, '');
|
||||
|
||||
// Look for readable ASCII text (more strict heuristic)
|
||||
// Only keep ASCII printable characters
|
||||
const readable = content.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
|
||||
|
||||
// Only keep content that looks like real text (has spaces, periods, etc.)
|
||||
if (readable.length > 20 &&
|
||||
readable.includes(' ') &&
|
||||
(readable.includes('.') || readable.includes(',')) &&
|
||||
!/[\x00-\x1F\x7F]/.test(readable)) {
|
||||
return readable;
|
||||
}
|
||||
return '';
|
||||
})
|
||||
.filter(text => text.length > 0 && text.split(' ').length > 5) // Must have at least 5 words
|
||||
.join('\n\n');
|
||||
|
||||
if (textContent.length > 0) {
|
||||
extractedText = textContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Method 3: Look for object streams
|
||||
if (!extractedText || extractedText.length < 50) {
|
||||
console.log('RawPdfParser: Trying object streams for text');
|
||||
|
||||
// Find object stream content
|
||||
const objMatches = rawContent.match(/\d+\s+\d+\s+obj[\s\S]*?endobj/g);
|
||||
if (objMatches && objMatches.length > 0) {
|
||||
console.log('RawPdfParser: Found', objMatches.length, 'objects');
|
||||
|
||||
// Process objects looking for text content
|
||||
const textContent = objMatches
|
||||
.map(obj => {
|
||||
// Find readable text in the object - only keep ASCII printable characters
|
||||
const readable = obj.replace(/[^\x20-\x7E\r\n]/g, ' ').trim();
|
||||
|
||||
// Only include if it looks like actual text (strict heuristic)
|
||||
if (readable.length > 50 &&
|
||||
readable.includes(' ') &&
|
||||
!readable.includes('/Filter') &&
|
||||
readable.split(' ').length > 10 &&
|
||||
(readable.includes('.') || readable.includes(','))) {
|
||||
return readable;
|
||||
}
|
||||
return '';
|
||||
})
|
||||
.filter(text => text.length > 0)
|
||||
.join('\n\n');
|
||||
|
||||
if (textContent.length > 0) {
|
||||
extractedText += (extractedText ? '\n\n' : '') + textContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If what we extracted is just PDF structure information rather than readable text,
|
||||
// provide a clearer message
|
||||
if (extractedText && (
|
||||
extractedText.includes('endobj') ||
|
||||
extractedText.includes('/Type /Page') ||
|
||||
extractedText.match(/\d+\s+\d+\s+obj/g)
|
||||
) && metadataText) {
|
||||
console.log('RawPdfParser: Extracted content appears to be PDF structure information, using metadata instead');
|
||||
extractedText = metadataText;
|
||||
} else if (metadataText && !extractedText.includes('Document Title:')) {
|
||||
// Prepend metadata to extracted text if available
|
||||
extractedText = metadataText + (extractedText ? '\n\n' + extractedText : '');
|
||||
}
|
||||
|
||||
// Validate that the extracted text looks meaningful
|
||||
// Count how many recognizable words/characters it contains
|
||||
const validCharCount = (extractedText || '').replace(/[^\x20-\x7E\r\n]/g, '').length;
|
||||
const totalCharCount = (extractedText || '').length;
|
||||
const validRatio = validCharCount / (totalCharCount || 1);
|
||||
|
||||
// Check for common PDF artifacts that indicate binary corruption
|
||||
const hasBinaryArtifacts = extractedText && (
|
||||
extractedText.includes('\\u') ||
|
||||
extractedText.includes('\\x') ||
|
||||
extractedText.includes('\0') ||
|
||||
/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\xFF]{10,}/g.test(extractedText) ||
|
||||
validRatio < 0.7 // Less than 70% valid characters
|
||||
);
|
||||
|
||||
// Check if the content looks like gibberish
|
||||
const looksLikeGibberish = extractedText && (
|
||||
// Too many special characters
|
||||
extractedText.replace(/[a-zA-Z0-9\s.,;:'"()[\]{}]/g, '').length / extractedText.length > 0.3 ||
|
||||
// Not enough spaces (real text has spaces between words)
|
||||
extractedText.split(' ').length < extractedText.length / 20
|
||||
);
|
||||
|
||||
// If no text was extracted, or if it's binary/gibberish,
|
||||
// provide a helpful message instead
|
||||
if (!extractedText || extractedText.length < 50 || hasBinaryArtifacts || looksLikeGibberish) {
|
||||
console.log('RawPdfParser: Could not extract meaningful text, providing fallback message');
|
||||
console.log('RawPdfParser: Valid character ratio:', validRatio);
|
||||
console.log('RawPdfParser: Has binary artifacts:', hasBinaryArtifacts);
|
||||
console.log('RawPdfParser: Looks like gibberish:', looksLikeGibberish);
|
||||
|
||||
// Start with metadata if available
|
||||
if (metadataText) {
|
||||
extractedText = metadataText + '\n';
|
||||
} else {
|
||||
extractedText = '';
|
||||
}
|
||||
|
||||
// Add basic PDF info
|
||||
extractedText += `This is a PDF document with ${pageCount} page(s) and version ${version}.\n\n`;
|
||||
|
||||
// Try to find a title in the PDF structure that we might have missed
|
||||
const titleInStructure = rawContent.match(/title\s*:\s*([^\n]+)/i) ||
|
||||
rawContent.match(/Microsoft Word -\s*([^\n]+)/i);
|
||||
|
||||
if (titleInStructure && titleInStructure[1] && !extractedText.includes('Document Title:')) {
|
||||
const title = titleInStructure[1].trim();
|
||||
extractedText = `Document Title: ${title}\n\n` + extractedText;
|
||||
}
|
||||
|
||||
extractedText += `The text content could not be properly extracted due to encoding or compression issues.\nFile size: ${dataBuffer.length} bytes.\n\nTo view this PDF properly, please download the file and open it with a PDF reader.`;
|
||||
}
|
||||
|
||||
console.log('RawPdfParser: PDF parsed with basic extraction, found text length:', extractedText.length);
|
||||
|
||||
return {
|
||||
content: extractedText,
|
||||
metadata: {
|
||||
pageCount,
|
||||
info: {
|
||||
RawExtraction: true,
|
||||
Version: version,
|
||||
Size: dataBuffer.length
|
||||
},
|
||||
version
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('RawPdfParser error:', error);
|
||||
throw new Error(`Failed to parse PDF file: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
10
sim/lib/file-parsers/types.ts
Normal file
10
sim/lib/file-parsers/types.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
export interface FileParseResult {
|
||||
content: string;
|
||||
metadata?: Record<string, any>;
|
||||
}
|
||||
|
||||
export interface FileParser {
|
||||
parseFile(filePath: string): Promise<FileParseResult>;
|
||||
}
|
||||
|
||||
export type SupportedFileType = 'pdf' | 'csv' | 'docx';
|
||||
302
sim/lib/uploads/s3-client.test.ts
Normal file
302
sim/lib/uploads/s3-client.test.ts
Normal file
@@ -0,0 +1,302 @@
|
||||
/**
|
||||
* Unit tests for S3 client
|
||||
*
|
||||
* @vitest-environment node
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import {
|
||||
S3Client,
|
||||
PutObjectCommand,
|
||||
GetObjectCommand,
|
||||
DeleteObjectCommand
|
||||
} from '@aws-sdk/client-s3'
|
||||
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
|
||||
import {
|
||||
uploadToS3,
|
||||
getPresignedUrl,
|
||||
downloadFromS3,
|
||||
deleteFromS3,
|
||||
s3Client,
|
||||
FileInfo
|
||||
} from './s3-client'
|
||||
|
||||
// Mock AWS SDK
|
||||
vi.mock('@aws-sdk/client-s3', () => {
|
||||
const mockSend = vi.fn()
|
||||
const mockS3Client = vi.fn().mockImplementation(() => ({
|
||||
send: mockSend
|
||||
}))
|
||||
|
||||
return {
|
||||
S3Client: mockS3Client,
|
||||
PutObjectCommand: vi.fn(),
|
||||
GetObjectCommand: vi.fn(),
|
||||
DeleteObjectCommand: vi.fn()
|
||||
}
|
||||
})
|
||||
|
||||
vi.mock('@aws-sdk/s3-request-presigner', () => ({
|
||||
getSignedUrl: vi.fn().mockResolvedValue('https://example.com/presigned-url')
|
||||
}))
|
||||
|
||||
// Mock date for predictable timestamps
|
||||
vi.mock('./setup', () => ({
|
||||
S3_CONFIG: {
|
||||
bucket: 'test-bucket',
|
||||
region: 'test-region',
|
||||
baseUrl: 'https://test-bucket.s3.test-region.amazonaws.com'
|
||||
}
|
||||
}))
|
||||
|
||||
// Mock logger
|
||||
vi.mock('@/lib/logs/console-logger', () => ({
|
||||
createLogger: vi.fn().mockReturnValue({
|
||||
info: vi.fn(),
|
||||
error: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
debug: vi.fn()
|
||||
})
|
||||
}))
|
||||
|
||||
describe('S3 Client', () => {
|
||||
let mockDate: Date
|
||||
let originalDateNow: typeof Date.now
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
|
||||
// Mock Date.now() for predictable timestamps
|
||||
mockDate = new Date(2023, 0, 1, 12, 0, 0) // 2023-01-01 12:00:00
|
||||
originalDateNow = Date.now
|
||||
Date.now = vi.fn(() => mockDate.getTime())
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
// Restore original Date.now
|
||||
Date.now = originalDateNow
|
||||
})
|
||||
|
||||
describe('uploadToS3', () => {
|
||||
it('should upload a file to S3 and return file info', async () => {
|
||||
// Mock S3 client send method to return an appropriate type
|
||||
vi.mocked(s3Client.send).mockResolvedValueOnce({
|
||||
$metadata: { httpStatusCode: 200 }
|
||||
} as any)
|
||||
|
||||
const testFile = Buffer.from('test file content')
|
||||
const fileName = 'test-file.txt'
|
||||
const contentType = 'text/plain'
|
||||
const fileSize = testFile.length
|
||||
|
||||
const result = await uploadToS3(testFile, fileName, contentType)
|
||||
|
||||
// Check that S3 client was called with correct parameters
|
||||
expect(PutObjectCommand).toHaveBeenCalledWith({
|
||||
Bucket: 'test-bucket',
|
||||
Key: expect.stringContaining('test-file.txt'),
|
||||
Body: testFile,
|
||||
ContentType: contentType,
|
||||
Metadata: {
|
||||
originalName: fileName,
|
||||
uploadedAt: expect.any(String)
|
||||
}
|
||||
})
|
||||
|
||||
expect(s3Client.send).toHaveBeenCalledTimes(1)
|
||||
|
||||
// Check return value
|
||||
expect(result).toEqual({
|
||||
path: expect.stringContaining('/api/files/serve/s3/'),
|
||||
key: expect.stringContaining('test-file.txt'),
|
||||
name: fileName,
|
||||
size: fileSize,
|
||||
type: contentType
|
||||
})
|
||||
})
|
||||
|
||||
it('should handle spaces in filenames', async () => {
|
||||
vi.mocked(s3Client.send).mockResolvedValueOnce({
|
||||
$metadata: { httpStatusCode: 200 }
|
||||
} as any)
|
||||
|
||||
const testFile = Buffer.from('test file content')
|
||||
const fileName = 'test file with spaces.txt'
|
||||
const contentType = 'text/plain'
|
||||
|
||||
const result = await uploadToS3(testFile, fileName, contentType)
|
||||
|
||||
// Verify spaces were replaced with hyphens in the key but original name is preserved
|
||||
expect(result.key).toContain('test-file-with-spaces.txt')
|
||||
expect(result.name).toBe(fileName)
|
||||
})
|
||||
|
||||
it('should use provided size if available', async () => {
|
||||
vi.mocked(s3Client.send).mockResolvedValueOnce({
|
||||
$metadata: { httpStatusCode: 200 }
|
||||
} as any)
|
||||
|
||||
const testFile = Buffer.from('test file content')
|
||||
const fileName = 'test-file.txt'
|
||||
const contentType = 'text/plain'
|
||||
const providedSize = 12345 // Different from actual buffer size
|
||||
|
||||
const result = await uploadToS3(testFile, fileName, contentType, providedSize)
|
||||
|
||||
expect(result.size).toBe(providedSize)
|
||||
})
|
||||
|
||||
it('should handle upload errors', async () => {
|
||||
const error = new Error('Upload failed')
|
||||
vi.mocked(s3Client.send).mockRejectedValueOnce(error)
|
||||
|
||||
const testFile = Buffer.from('test file content')
|
||||
const fileName = 'test-file.txt'
|
||||
const contentType = 'text/plain'
|
||||
|
||||
await expect(uploadToS3(testFile, fileName, contentType)).rejects.toThrow('Upload failed')
|
||||
})
|
||||
})
|
||||
|
||||
describe('getPresignedUrl', () => {
|
||||
it('should generate a presigned URL for a file', async () => {
|
||||
const key = 'test-file.txt'
|
||||
const expiresIn = 7200
|
||||
|
||||
const url = await getPresignedUrl(key, expiresIn)
|
||||
|
||||
expect(GetObjectCommand).toHaveBeenCalledWith({
|
||||
Bucket: 'test-bucket',
|
||||
Key: key
|
||||
})
|
||||
|
||||
expect(getSignedUrl).toHaveBeenCalledWith(
|
||||
s3Client,
|
||||
expect.any(Object),
|
||||
{ expiresIn }
|
||||
)
|
||||
|
||||
expect(url).toBe('https://example.com/presigned-url')
|
||||
})
|
||||
|
||||
it('should use default expiration if not provided', async () => {
|
||||
const key = 'test-file.txt'
|
||||
|
||||
await getPresignedUrl(key)
|
||||
|
||||
expect(getSignedUrl).toHaveBeenCalledWith(
|
||||
s3Client,
|
||||
expect.any(Object),
|
||||
{ expiresIn: 3600 } // Default is 3600 seconds (1 hour)
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle errors when generating presigned URL', async () => {
|
||||
const error = new Error('Presigned URL generation failed')
|
||||
vi.mocked(getSignedUrl).mockRejectedValueOnce(error)
|
||||
|
||||
const key = 'test-file.txt'
|
||||
|
||||
await expect(getPresignedUrl(key)).rejects.toThrow('Presigned URL generation failed')
|
||||
})
|
||||
})
|
||||
|
||||
describe('downloadFromS3', () => {
|
||||
it('should download a file from S3', async () => {
|
||||
// Create mock stream with data events
|
||||
const mockStream = {
|
||||
on: vi.fn((event, callback) => {
|
||||
if (event === 'data') {
|
||||
callback(Buffer.from('chunk1'))
|
||||
callback(Buffer.from('chunk2'))
|
||||
}
|
||||
if (event === 'end') {
|
||||
callback()
|
||||
}
|
||||
return mockStream
|
||||
})
|
||||
}
|
||||
|
||||
vi.mocked(s3Client.send).mockResolvedValueOnce({
|
||||
Body: mockStream,
|
||||
$metadata: { httpStatusCode: 200 }
|
||||
} as any)
|
||||
|
||||
const key = 'test-file.txt'
|
||||
const result = await downloadFromS3(key)
|
||||
|
||||
expect(GetObjectCommand).toHaveBeenCalledWith({
|
||||
Bucket: 'test-bucket',
|
||||
Key: key
|
||||
})
|
||||
|
||||
expect(s3Client.send).toHaveBeenCalledTimes(1)
|
||||
expect(result).toBeInstanceOf(Buffer)
|
||||
expect(Buffer.concat([Buffer.from('chunk1'), Buffer.from('chunk2')]).toString()).toEqual(result.toString())
|
||||
})
|
||||
|
||||
it('should handle stream errors', async () => {
|
||||
const mockStream = {
|
||||
on: vi.fn((event, callback) => {
|
||||
if (event === 'error') {
|
||||
callback(new Error('Stream error'))
|
||||
}
|
||||
return mockStream
|
||||
})
|
||||
}
|
||||
|
||||
vi.mocked(s3Client.send).mockResolvedValueOnce({
|
||||
Body: mockStream,
|
||||
$metadata: { httpStatusCode: 200 }
|
||||
} as any)
|
||||
|
||||
const key = 'test-file.txt'
|
||||
await expect(downloadFromS3(key)).rejects.toThrow('Stream error')
|
||||
})
|
||||
|
||||
it('should handle S3 client errors', async () => {
|
||||
const error = new Error('Download failed')
|
||||
vi.mocked(s3Client.send).mockRejectedValueOnce(error)
|
||||
|
||||
const key = 'test-file.txt'
|
||||
await expect(downloadFromS3(key)).rejects.toThrow('Download failed')
|
||||
})
|
||||
})
|
||||
|
||||
describe('deleteFromS3', () => {
|
||||
it('should delete a file from S3', async () => {
|
||||
vi.mocked(s3Client.send).mockResolvedValueOnce({
|
||||
$metadata: { httpStatusCode: 200 }
|
||||
} as any)
|
||||
|
||||
const key = 'test-file.txt'
|
||||
await deleteFromS3(key)
|
||||
|
||||
expect(DeleteObjectCommand).toHaveBeenCalledWith({
|
||||
Bucket: 'test-bucket',
|
||||
Key: key
|
||||
})
|
||||
|
||||
expect(s3Client.send).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('should handle delete errors', async () => {
|
||||
const error = new Error('Delete failed')
|
||||
vi.mocked(s3Client.send).mockRejectedValueOnce(error)
|
||||
|
||||
const key = 'test-file.txt'
|
||||
await expect(deleteFromS3(key)).rejects.toThrow('Delete failed')
|
||||
})
|
||||
})
|
||||
|
||||
describe('s3Client initialization', () => {
|
||||
it('should initialize with correct configuration', () => {
|
||||
// We can't test the constructor call easily since it happens at import time
|
||||
// Instead, we can test the s3Client properties
|
||||
expect(s3Client).toBeDefined()
|
||||
// Verify the client was constructed with the right configuration
|
||||
expect(S3Client).toBeDefined()
|
||||
// We mocked S3Client function earlier, but that doesn't affect the imported s3Client object
|
||||
// So instead of checking constructor call, check that mocked client exists
|
||||
})
|
||||
})
|
||||
})
|
||||
119
sim/lib/uploads/s3-client.ts
Normal file
119
sim/lib/uploads/s3-client.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3'
|
||||
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
import { S3_CONFIG } from './setup'
|
||||
|
||||
const logger = createLogger('S3Client')
|
||||
|
||||
// Create an S3 client
|
||||
export const s3Client = new S3Client({
|
||||
region: S3_CONFIG.region,
|
||||
credentials: {
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID || '',
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || ''
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* File information structure
|
||||
*/
|
||||
export interface FileInfo {
|
||||
path: string // Path to access the file
|
||||
key: string // S3 key or local filename
|
||||
name: string // Original filename
|
||||
size: number // File size in bytes
|
||||
type: string // MIME type
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload a file to S3
|
||||
* @param file Buffer containing file data
|
||||
* @param fileName Original file name
|
||||
* @param contentType MIME type of the file
|
||||
* @param size File size in bytes (optional, will use buffer length if not provided)
|
||||
* @returns Object with file information
|
||||
*/
|
||||
export async function uploadToS3(
|
||||
file: Buffer,
|
||||
fileName: string,
|
||||
contentType: string,
|
||||
size?: number
|
||||
): Promise<FileInfo> {
|
||||
// Create a unique filename with timestamp to prevent collisions
|
||||
// Use a simple timestamp without directory structure
|
||||
const safeFileName = fileName.replace(/\s+/g, '-') // Replace spaces with hyphens
|
||||
const uniqueKey = `${Date.now()}-${safeFileName}`
|
||||
|
||||
// Upload the file to S3
|
||||
await s3Client.send(new PutObjectCommand({
|
||||
Bucket: S3_CONFIG.bucket,
|
||||
Key: uniqueKey,
|
||||
Body: file,
|
||||
ContentType: contentType,
|
||||
// Add some useful metadata
|
||||
Metadata: {
|
||||
originalName: fileName,
|
||||
uploadedAt: new Date().toISOString()
|
||||
}
|
||||
}))
|
||||
|
||||
// Create a path for API to serve the file
|
||||
const servePath = `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}`
|
||||
|
||||
return {
|
||||
path: servePath,
|
||||
key: uniqueKey,
|
||||
name: fileName,
|
||||
size: size ?? file.length,
|
||||
type: contentType
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a presigned URL for direct file access
|
||||
* @param key S3 object key
|
||||
* @param expiresIn Time in seconds until URL expires
|
||||
* @returns Presigned URL
|
||||
*/
|
||||
export async function getPresignedUrl(key: string, expiresIn = 3600) {
|
||||
const command = new GetObjectCommand({
|
||||
Bucket: S3_CONFIG.bucket,
|
||||
Key: key
|
||||
})
|
||||
|
||||
return getSignedUrl(s3Client, command, { expiresIn })
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a file from S3
|
||||
* @param key S3 object key
|
||||
* @returns File buffer
|
||||
*/
|
||||
export async function downloadFromS3(key: string) {
|
||||
const command = new GetObjectCommand({
|
||||
Bucket: S3_CONFIG.bucket,
|
||||
Key: key
|
||||
})
|
||||
|
||||
const response = await s3Client.send(command)
|
||||
const stream = response.Body as any
|
||||
|
||||
// Convert stream to buffer
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
const chunks: Buffer[] = []
|
||||
stream.on('data', (chunk: Buffer) => chunks.push(chunk))
|
||||
stream.on('end', () => resolve(Buffer.concat(chunks)))
|
||||
stream.on('error', reject)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a file from S3
|
||||
* @param key S3 object key
|
||||
*/
|
||||
export async function deleteFromS3(key: string) {
|
||||
await s3Client.send(new DeleteObjectCommand({
|
||||
Bucket: S3_CONFIG.bucket,
|
||||
Key: key
|
||||
}))
|
||||
}
|
||||
35
sim/lib/uploads/setup.server.ts
Normal file
35
sim/lib/uploads/setup.server.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { ensureUploadsDirectory, USE_S3_STORAGE, S3_CONFIG } from './setup'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
|
||||
const logger = createLogger('UploadsSetup')
|
||||
|
||||
// Immediately invoke on server startup
|
||||
if (typeof process !== 'undefined') {
|
||||
// Log storage mode
|
||||
logger.info(`Storage mode: ${USE_S3_STORAGE ? 'S3' : 'Local'}`)
|
||||
|
||||
if (USE_S3_STORAGE) {
|
||||
logger.info('Using S3 storage mode with configuration:')
|
||||
logger.info(`- Bucket: ${S3_CONFIG.bucket}`)
|
||||
logger.info(`- Region: ${S3_CONFIG.region}`)
|
||||
|
||||
// Verify AWS credentials
|
||||
if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY) {
|
||||
logger.warn('AWS credentials are not set in environment variables.')
|
||||
logger.warn('Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for S3 storage.')
|
||||
} else {
|
||||
logger.info('AWS credentials found in environment variables')
|
||||
}
|
||||
} else {
|
||||
// Only initialize local uploads directory in development mode
|
||||
ensureUploadsDirectory().then((success) => {
|
||||
if (success) {
|
||||
logger.info('Local uploads directory initialized')
|
||||
} else {
|
||||
logger.error('Failed to initialize local uploads directory')
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export default ensureUploadsDirectory
|
||||
45
sim/lib/uploads/setup.ts
Normal file
45
sim/lib/uploads/setup.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { existsSync } from 'fs'
|
||||
import { mkdir } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import path from 'path'
|
||||
import { createLogger } from '@/lib/logs/console-logger'
|
||||
|
||||
const logger = createLogger('UploadsSetup')
|
||||
|
||||
// Define project root - this works regardless of how the app is started
|
||||
const PROJECT_ROOT = path.resolve(process.cwd())
|
||||
|
||||
// Define the upload directory path using project root
|
||||
export const UPLOAD_DIR = join(PROJECT_ROOT, 'uploads')
|
||||
|
||||
export const USE_S3_STORAGE = process.env.NODE_ENV === 'production' || process.env.USE_S3 === 'true'
|
||||
|
||||
export const S3_CONFIG = {
|
||||
bucket: process.env.S3_BUCKET_NAME || 'sim-studio-files',
|
||||
region: process.env.AWS_REGION || 'us-east-1',
|
||||
baseUrl: process.env.S3_BASE_URL || `https://${process.env.S3_BUCKET_NAME || 'sim-studio-files'}.s3.${process.env.AWS_REGION || 'us-east-1'}.amazonaws.com`
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that the uploads directory exists (for local storage)
|
||||
*/
|
||||
export async function ensureUploadsDirectory() {
|
||||
if (USE_S3_STORAGE) {
|
||||
logger.info('Using S3 storage, skipping local uploads directory creation')
|
||||
return true
|
||||
}
|
||||
|
||||
try {
|
||||
if (!existsSync(UPLOAD_DIR)) {
|
||||
logger.info(`Creating uploads directory at ${UPLOAD_DIR}`)
|
||||
await mkdir(UPLOAD_DIR, { recursive: true })
|
||||
logger.info(`Created uploads directory at ${UPLOAD_DIR}`)
|
||||
} else {
|
||||
logger.info(`Uploads directory already exists at ${UPLOAD_DIR}`)
|
||||
}
|
||||
return true
|
||||
} catch (error) {
|
||||
logger.error('Failed to create uploads directory:', error)
|
||||
return false
|
||||
}
|
||||
}
|
||||
300
sim/package-lock.json
generated
300
sim/package-lock.json
generated
@@ -14,7 +14,8 @@
|
||||
],
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.39.0",
|
||||
"@aws-sdk/client-s3": "^3.758.0",
|
||||
"@aws-sdk/client-s3": "^3.779.0",
|
||||
"@aws-sdk/s3-request-presigner": "^3.779.0",
|
||||
"@cerebras/cerebras_cloud_sdk": "^1.23.0",
|
||||
"@hookform/resolvers": "^4.1.3",
|
||||
"@radix-ui/react-alert-dialog": "^1.1.5",
|
||||
@@ -42,6 +43,8 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.0.0",
|
||||
"croner": "^9.0.0",
|
||||
"csv-parse": "^5.6.0",
|
||||
"csv-parser": "^3.2.0",
|
||||
"date-fns": "^3.6.0",
|
||||
"drizzle-orm": "^0.41.0",
|
||||
"framer-motion": "^12.5.0",
|
||||
@@ -50,9 +53,11 @@
|
||||
"ioredis": "^5.6.0",
|
||||
"jwt-decode": "^4.0.0",
|
||||
"lucide-react": "^0.469.0",
|
||||
"mammoth": "^1.9.0",
|
||||
"next": "^15.2.4",
|
||||
"next-themes": "^0.4.6",
|
||||
"openai": "^4.89.0",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"postgres": "^3.4.5",
|
||||
"prismjs": "^1.30.0",
|
||||
"react": "^18.2.0",
|
||||
@@ -73,6 +78,7 @@
|
||||
"@testing-library/react": "^16.2.0",
|
||||
"@testing-library/user-event": "^14.6.1",
|
||||
"@trivago/prettier-plugin-sort-imports": "^5.2.2",
|
||||
"@types/lodash": "^4.17.16",
|
||||
"@types/node": "^20",
|
||||
"@types/prismjs": "^1.26.5",
|
||||
"@types/react": "^19",
|
||||
@@ -381,9 +387,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@aws-sdk/client-s3": {
|
||||
"version": "3.777.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-sdk/client-s3/-/client-s3-3.777.0.tgz",
|
||||
"integrity": "sha512-KVX2QD6lLczZxtzIRCpmztgNnGq+spiMIDYqkum/rCBjCX1YJoDHwMYXaMf2EtAH8tFkJmBiA/CiT/J36iN7Xg==",
|
||||
"version": "3.779.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-sdk/client-s3/-/client-s3-3.779.0.tgz",
|
||||
"integrity": "sha512-Lagz+ersQaLNYkpOU9V12PYspT//lGvhPXlKU3OXDj3whDchdqUdtRKY8rmV+jli4KXe+udx/hj2yqrFRfKGvQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@aws-crypto/sha1-browser": "5.2.0",
|
||||
@@ -893,6 +899,25 @@
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@aws-sdk/s3-request-presigner": {
|
||||
"version": "3.779.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-sdk/s3-request-presigner/-/s3-request-presigner-3.779.0.tgz",
|
||||
"integrity": "sha512-L3mGSh6/9gf3FBVrQziCkuLbaRJMeNbLr6tg9ZSymJcDRzRqAiCWnHrenAavTnAAnm+Lu62Fg/A4g3T+YT+gEg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@aws-sdk/signature-v4-multi-region": "3.775.0",
|
||||
"@aws-sdk/types": "3.775.0",
|
||||
"@aws-sdk/util-format-url": "3.775.0",
|
||||
"@smithy/middleware-endpoint": "^4.1.0",
|
||||
"@smithy/protocol-http": "^5.1.0",
|
||||
"@smithy/smithy-client": "^4.2.0",
|
||||
"@smithy/types": "^4.2.0",
|
||||
"tslib": "^2.6.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@aws-sdk/signature-v4-multi-region": {
|
||||
"version": "3.775.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4-multi-region/-/signature-v4-multi-region-3.775.0.tgz",
|
||||
@@ -967,6 +992,21 @@
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@aws-sdk/util-format-url": {
|
||||
"version": "3.775.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-sdk/util-format-url/-/util-format-url-3.775.0.tgz",
|
||||
"integrity": "sha512-Nw4nBeyCbWixoGh8NcVpa/i8McMA6RXJIjQFyloJLaPr7CPquz7ZbSl0MUWMFVwP/VHaJ7B+lNN3Qz1iFCEP/Q==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@aws-sdk/types": "3.775.0",
|
||||
"@smithy/querystring-builder": "^4.0.2",
|
||||
"@smithy/types": "^4.2.0",
|
||||
"tslib": "^2.6.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@aws-sdk/util-locate-window": {
|
||||
"version": "3.723.0",
|
||||
"resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.723.0.tgz",
|
||||
@@ -6166,6 +6206,13 @@
|
||||
"integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/lodash": {
|
||||
"version": "4.17.16",
|
||||
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.16.tgz",
|
||||
"integrity": "sha512-HX7Em5NYQAXKW+1T+FiuG27NGwzJfCX3s1GjOa7ujxZa52kjJLOr4FUxT+giF6Tgxv1e+/czV/iTtBw27WTU9g==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.17.28",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.28.tgz",
|
||||
@@ -6398,6 +6445,15 @@
|
||||
"integrity": "sha512-f6Oq3ohtSC5RYABhpN8aVOVHpcKvJ1fB1jjuvODTBU5u6BcroYEhphnrywdw8RO+2Vy5ekCdKe5D4dCMdMSrzA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@xmldom/xmldom": {
|
||||
"version": "0.8.10",
|
||||
"resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz",
|
||||
"integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/abort-controller": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
||||
@@ -6511,6 +6567,15 @@
|
||||
"integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/argparse": {
|
||||
"version": "1.0.10",
|
||||
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
|
||||
"integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"sprintf-js": "~1.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/aria-hidden": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.4.tgz",
|
||||
@@ -6573,7 +6638,6 @@
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
@@ -6655,6 +6719,12 @@
|
||||
"readable-stream": "^3.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bluebird": {
|
||||
"version": "3.4.7",
|
||||
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz",
|
||||
"integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/bowser": {
|
||||
"version": "2.11.0",
|
||||
"resolved": "https://registry.npmjs.org/bowser/-/bowser-2.11.0.tgz",
|
||||
@@ -7091,6 +7161,12 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/core-util-is": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
|
||||
"integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/cors": {
|
||||
"version": "2.8.5",
|
||||
"resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
|
||||
@@ -7230,6 +7306,24 @@
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/csv-parse": {
|
||||
"version": "5.6.0",
|
||||
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.6.0.tgz",
|
||||
"integrity": "sha512-l3nz3euub2QMg5ouu5U09Ew9Wf6/wQ8I++ch1loQ0ljmzhmfZYrH9fflS22i/PQEvsPvxCwxgz5q7UB8K1JO4Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/csv-parser": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"csv-parser": "bin/csv-parser"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-color": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
|
||||
@@ -7484,6 +7578,12 @@
|
||||
"integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/dingbat-to-unicode": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz",
|
||||
"integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/dlv": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz",
|
||||
@@ -7704,6 +7804,15 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/duck": {
|
||||
"version": "0.1.12",
|
||||
"resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz",
|
||||
"integrity": "sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==",
|
||||
"license": "BSD",
|
||||
"dependencies": {
|
||||
"underscore": "^1.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/dunder-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||
@@ -8645,6 +8754,12 @@
|
||||
],
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/immediate": {
|
||||
"version": "3.0.6",
|
||||
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
|
||||
"integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/indent-string": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz",
|
||||
@@ -8659,7 +8774,6 @@
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/input-otp": {
|
||||
@@ -8816,6 +8930,12 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/isarray": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
||||
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/isexe": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/isexe/-/isexe-3.1.1.tgz",
|
||||
@@ -8997,6 +9117,54 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/jszip": {
|
||||
"version": "3.10.1",
|
||||
"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
|
||||
"integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==",
|
||||
"license": "(MIT OR GPL-3.0-or-later)",
|
||||
"dependencies": {
|
||||
"lie": "~3.3.0",
|
||||
"pako": "~1.0.2",
|
||||
"readable-stream": "~2.3.6",
|
||||
"setimmediate": "^1.0.5"
|
||||
}
|
||||
},
|
||||
"node_modules/jszip/node_modules/pako": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
|
||||
"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
|
||||
"license": "(MIT AND Zlib)"
|
||||
},
|
||||
"node_modules/jszip/node_modules/readable-stream": {
|
||||
"version": "2.3.8",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
|
||||
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"core-util-is": "~1.0.0",
|
||||
"inherits": "~2.0.3",
|
||||
"isarray": "~1.0.0",
|
||||
"process-nextick-args": "~2.0.0",
|
||||
"safe-buffer": "~5.1.1",
|
||||
"string_decoder": "~1.1.1",
|
||||
"util-deprecate": "~1.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/jszip/node_modules/safe-buffer": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
||||
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/jszip/node_modules/string_decoder": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
|
||||
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"safe-buffer": "~5.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jwt-decode": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/jwt-decode/-/jwt-decode-4.0.0.tgz",
|
||||
@@ -9024,6 +9192,15 @@
|
||||
"url": "https://ko-fi.com/killymxi"
|
||||
}
|
||||
},
|
||||
"node_modules/lie": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz",
|
||||
"integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"immediate": "~3.0.5"
|
||||
}
|
||||
},
|
||||
"node_modules/lilconfig": {
|
||||
"version": "3.1.3",
|
||||
"resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz",
|
||||
@@ -9234,6 +9411,17 @@
|
||||
"loose-envify": "cli.js"
|
||||
}
|
||||
},
|
||||
"node_modules/lop": {
|
||||
"version": "0.4.2",
|
||||
"resolved": "https://registry.npmjs.org/lop/-/lop-0.4.2.tgz",
|
||||
"integrity": "sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==",
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"duck": "^0.1.12",
|
||||
"option": "~0.2.1",
|
||||
"underscore": "^1.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/loupe": {
|
||||
"version": "3.1.3",
|
||||
"resolved": "https://registry.npmjs.org/loupe/-/loupe-3.1.3.tgz",
|
||||
@@ -9322,6 +9510,30 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/mammoth": {
|
||||
"version": "1.9.0",
|
||||
"resolved": "https://registry.npmjs.org/mammoth/-/mammoth-1.9.0.tgz",
|
||||
"integrity": "sha512-F+0NxzankQV9XSUAuVKvkdQK0GbtGGuqVnND9aVf9VSeUA82LQa29GjLqYU6Eez8LHqSJG3eGiDW3224OKdpZg==",
|
||||
"license": "BSD-2-Clause",
|
||||
"dependencies": {
|
||||
"@xmldom/xmldom": "^0.8.6",
|
||||
"argparse": "~1.0.3",
|
||||
"base64-js": "^1.5.1",
|
||||
"bluebird": "~3.4.0",
|
||||
"dingbat-to-unicode": "^1.0.1",
|
||||
"jszip": "^3.7.1",
|
||||
"lop": "^0.4.2",
|
||||
"path-is-absolute": "^1.0.0",
|
||||
"underscore": "^1.13.1",
|
||||
"xmlbuilder": "^10.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"mammoth": "bin/mammoth"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/marked": {
|
||||
"version": "7.0.4",
|
||||
"resolved": "https://registry.npmjs.org/marked/-/marked-7.0.4.tgz",
|
||||
@@ -9651,6 +9863,12 @@
|
||||
"node": ">=10.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-ensure": {
|
||||
"version": "0.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz",
|
||||
"integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
@@ -9824,6 +10042,12 @@
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/option": {
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz",
|
||||
"integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/ora": {
|
||||
"version": "5.4.1",
|
||||
"resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz",
|
||||
@@ -9969,6 +10193,15 @@
|
||||
"url": "https://ko-fi.com/killymxi"
|
||||
}
|
||||
},
|
||||
"node_modules/path-is-absolute": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
|
||||
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/path-key": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
|
||||
@@ -10023,6 +10256,28 @@
|
||||
"node": ">= 14.16"
|
||||
}
|
||||
},
|
||||
"node_modules/pdf-parse": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.1.tgz",
|
||||
"integrity": "sha512-v6ZJ/efsBpGrGGknjtq9J/oC8tZWq0KWL5vQrk2GlzLEQPUDB1ex+13Rmidl1neNN358Jn9EHZw5y07FFtaC7A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"debug": "^3.1.0",
|
||||
"node-ensure": "^0.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.8.1"
|
||||
}
|
||||
},
|
||||
"node_modules/pdf-parse/node_modules/debug": {
|
||||
"version": "3.2.7",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz",
|
||||
"integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ms": "^2.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/peberminta": {
|
||||
"version": "0.9.0",
|
||||
"resolved": "https://registry.npmjs.org/peberminta/-/peberminta-0.9.0.tgz",
|
||||
@@ -10371,6 +10626,12 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/process-nextick-args": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
||||
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
|
||||
@@ -11697,6 +11958,12 @@
|
||||
"integrity": "sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/setimmediate": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
|
||||
"integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/sharp": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz",
|
||||
@@ -11993,6 +12260,12 @@
|
||||
"source-map": "^0.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sprintf-js": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
|
||||
"integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/stackback": {
|
||||
"version": "0.0.2",
|
||||
"resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
|
||||
@@ -12643,6 +12916,12 @@
|
||||
"integrity": "sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/underscore": {
|
||||
"version": "1.13.7",
|
||||
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz",
|
||||
"integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.19.8",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
|
||||
@@ -13659,6 +13938,15 @@
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/xmlbuilder": {
|
||||
"version": "10.1.1",
|
||||
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz",
|
||||
"integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/xmlchars": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
|
||||
|
||||
@@ -28,7 +28,8 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.39.0",
|
||||
"@aws-sdk/client-s3": "^3.758.0",
|
||||
"@aws-sdk/client-s3": "^3.779.0",
|
||||
"@aws-sdk/s3-request-presigner": "^3.779.0",
|
||||
"@cerebras/cerebras_cloud_sdk": "^1.23.0",
|
||||
"@hookform/resolvers": "^4.1.3",
|
||||
"@radix-ui/react-alert-dialog": "^1.1.5",
|
||||
@@ -56,6 +57,8 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.0.0",
|
||||
"croner": "^9.0.0",
|
||||
"csv-parse": "^5.6.0",
|
||||
"csv-parser": "^3.2.0",
|
||||
"date-fns": "^3.6.0",
|
||||
"drizzle-orm": "^0.41.0",
|
||||
"framer-motion": "^12.5.0",
|
||||
@@ -64,9 +67,11 @@
|
||||
"ioredis": "^5.6.0",
|
||||
"jwt-decode": "^4.0.0",
|
||||
"lucide-react": "^0.469.0",
|
||||
"mammoth": "^1.9.0",
|
||||
"next": "^15.2.4",
|
||||
"next-themes": "^0.4.6",
|
||||
"openai": "^4.89.0",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"postgres": "^3.4.5",
|
||||
"prismjs": "^1.30.0",
|
||||
"react": "^18.2.0",
|
||||
@@ -87,6 +92,7 @@
|
||||
"@testing-library/react": "^16.2.0",
|
||||
"@testing-library/user-event": "^14.6.1",
|
||||
"@trivago/prettier-plugin-sort-imports": "^5.2.2",
|
||||
"@types/lodash": "^4.17.16",
|
||||
"@types/node": "^20",
|
||||
"@types/prismjs": "^1.26.5",
|
||||
"@types/react": "^19",
|
||||
|
||||
3
sim/tools/file/index.ts
Normal file
3
sim/tools/file/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import { fileParserTool } from './parser'
|
||||
|
||||
export const fileParseTool = fileParserTool
|
||||
191
sim/tools/file/parser.ts
Normal file
191
sim/tools/file/parser.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
import { ToolConfig, ToolResponse } from '../types'
|
||||
|
||||
export interface FileParserInput {
|
||||
filePath: string | string[]
|
||||
fileType?: string
|
||||
}
|
||||
|
||||
export interface FileParseResult {
|
||||
content: string
|
||||
fileType: string
|
||||
size: number
|
||||
name: string
|
||||
binary: boolean
|
||||
metadata?: Record<string, any>
|
||||
}
|
||||
|
||||
export interface FileParserOutputData {
|
||||
files: FileParseResult[]
|
||||
combinedContent: string
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
export interface FileParserOutput extends ToolResponse {
|
||||
output: FileParserOutputData
|
||||
}
|
||||
|
||||
export const fileParserTool: ToolConfig<FileParserInput, FileParserOutput> = {
|
||||
id: 'file_parser',
|
||||
name: 'File Parser',
|
||||
description: 'Parse one or more uploaded files (text, PDF, CSV, images, etc.)',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
filePath: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
description: 'Path to the uploaded file(s). Can be a single path or an array of paths.',
|
||||
},
|
||||
fileType: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
description: 'Type of file to parse (auto-detected if not specified)',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
url: '/api/files/parse',
|
||||
method: 'POST',
|
||||
headers: () => ({
|
||||
'Content-Type': 'application/json',
|
||||
}),
|
||||
body: (params: any) => {
|
||||
console.log('[fileParserTool] Request parameters:', params)
|
||||
|
||||
// Check for valid input
|
||||
if (!params) {
|
||||
console.error('[fileParserTool] No parameters provided')
|
||||
throw new Error('No parameters provided')
|
||||
}
|
||||
|
||||
// Handle various input formats
|
||||
let filePath = null
|
||||
|
||||
// Handle multiple files case from block output
|
||||
if (params.files && Array.isArray(params.files)) {
|
||||
console.log('[fileParserTool] Processing multiple files:', params.files.length)
|
||||
filePath = params.files.map((file: any) => file.path)
|
||||
}
|
||||
// Handle the case where params is an object with file property
|
||||
else if (params.file) {
|
||||
if (Array.isArray(params.file)) {
|
||||
console.log(
|
||||
'[fileParserTool] Processing multiple files from file array:',
|
||||
params.file.length
|
||||
)
|
||||
filePath = params.file.map((file: any) => file.path)
|
||||
} else if (params.file.path) {
|
||||
console.log('[fileParserTool] Extracted file path from file object:', params.file.path)
|
||||
filePath = params.file.path
|
||||
}
|
||||
}
|
||||
// Handle direct filePath parameter
|
||||
else if (params.filePath) {
|
||||
console.log('[fileParserTool] Using direct filePath parameter:', params.filePath)
|
||||
filePath = params.filePath
|
||||
}
|
||||
|
||||
if (!filePath) {
|
||||
console.error('[fileParserTool] Missing required parameter: filePath')
|
||||
throw new Error('Missing required parameter: filePath')
|
||||
}
|
||||
|
||||
return {
|
||||
filePath,
|
||||
fileType: params.fileType,
|
||||
}
|
||||
},
|
||||
isInternalRoute: true,
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response): Promise<FileParserOutput> => {
|
||||
console.log('[fileParserTool] Received response status:', response.status)
|
||||
|
||||
try {
|
||||
const result = await response.json()
|
||||
console.log('[fileParserTool] Response parsed successfully')
|
||||
|
||||
if (!response.ok) {
|
||||
const errorMsg = result.error || 'File parsing failed'
|
||||
console.error('[fileParserTool] Error in response:', errorMsg)
|
||||
throw new Error(errorMsg)
|
||||
}
|
||||
|
||||
// Handle multiple files response
|
||||
if (result.results) {
|
||||
console.log('[fileParserTool] Processing multiple files response')
|
||||
|
||||
// Extract individual file results
|
||||
const fileResults = result.results.map((fileResult: any) => {
|
||||
if (!fileResult.success) {
|
||||
console.warn(
|
||||
`[fileParserTool] Error parsing file ${fileResult.filePath}: ${fileResult.error}`
|
||||
)
|
||||
return {
|
||||
content: `Error parsing file: ${fileResult.error || 'Unknown error'}`,
|
||||
fileType: 'text/plain',
|
||||
size: 0,
|
||||
name: fileResult.filePath.split('/').pop() || 'unknown',
|
||||
binary: false,
|
||||
}
|
||||
}
|
||||
|
||||
return fileResult.output
|
||||
})
|
||||
|
||||
// Combine all file contents with clear dividers
|
||||
const combinedContent = fileResults
|
||||
.map((file: FileParseResult, index: number) => {
|
||||
const divider = `\n${'='.repeat(80)}\n`
|
||||
|
||||
return file.content + (index < fileResults.length - 1 ? divider : '')
|
||||
})
|
||||
.join('\n')
|
||||
|
||||
// Create the base output
|
||||
const output: FileParserOutputData = {
|
||||
files: fileResults,
|
||||
combinedContent,
|
||||
}
|
||||
|
||||
// Add named properties for each file for dropdown access
|
||||
fileResults.forEach((file: FileParseResult, index: number) => {
|
||||
output[`file${index + 1}`] = file
|
||||
})
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output,
|
||||
}
|
||||
}
|
||||
|
||||
// Handle single file response
|
||||
if (result.success) {
|
||||
console.log('[fileParserTool] Successfully parsed file:', result.output.name)
|
||||
|
||||
// For a single file, create the output with both array and named property
|
||||
const output: FileParserOutputData = {
|
||||
files: [result.output],
|
||||
combinedContent: result.output.content,
|
||||
file1: result.output,
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output,
|
||||
}
|
||||
}
|
||||
|
||||
// Handle error response
|
||||
throw new Error(result.error || 'File parsing failed')
|
||||
} catch (error) {
|
||||
console.error('[fileParserTool] Error processing response:', error)
|
||||
throw error
|
||||
}
|
||||
},
|
||||
|
||||
transformError: (error: any) => {
|
||||
console.error('[fileParserTool] Error occurred:', error)
|
||||
return error.message || 'File parsing failed'
|
||||
},
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import { confluenceListTool, confluenceRetrieveTool, confluenceUpdateTool } from
|
||||
import { docsCreateTool, docsReadTool, docsWriteTool } from './docs'
|
||||
import { driveDownloadTool, driveListTool, driveUploadTool } from './drive'
|
||||
import { exaAnswerTool, exaFindSimilarLinksTool, exaGetContentsTool, exaSearchTool } from './exa'
|
||||
import { fileParseTool } from './file'
|
||||
import { scrapeTool } from './firecrawl/scrape'
|
||||
import { functionExecuteTool, webcontainerExecuteTool } from './function'
|
||||
import {
|
||||
@@ -55,6 +56,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
function_execute: functionExecuteTool,
|
||||
webcontainer_execute: webcontainerExecuteTool,
|
||||
vision_tool: visionTool,
|
||||
file_parser: fileParseTool,
|
||||
firecrawl_scrape: scrapeTool,
|
||||
jina_readurl: readUrlTool,
|
||||
slack_message: slackMessageTool,
|
||||
@@ -369,9 +371,7 @@ export async function executeTool(
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error in post-processing for tool ${toolId}:`, {
|
||||
error,
|
||||
})
|
||||
logger.error(`Error in post-processing for tool ${toolId}:`, { error })
|
||||
// Return original result if post-processing fails
|
||||
// Still include timing data
|
||||
const endTime = new Date()
|
||||
|
||||
Reference in New Issue
Block a user