mirror of
https://github.com/simstudioai/sim.git
synced 2026-02-14 00:15:09 -05:00
* improvement(collab): do not refetch active workflow id * progress on files * more integrations * separate server and client logic * consolidate more code * fix integrations * fix types * consolidate more code * fix tests * fix more bugbot comments * fix type check * fix circular impport * address more bugbot comments * fix ocr integrations * fix typing * remove leftover type * address bugbot comment * fix file block adv mode * fix * normalize file input * fix v2 blocmks for ocr * fix for v2 versions * fix more v2 blocks * update single file blocks * make interface simpler * cleanup fireflies * remove file only annotation * accept all types * added wand to ssh block * user files should be passed through * improve docs * fix slack to include successful execs * fix dropbox upload file * fix sendgrid * fix dropbox * fix * fix * update skills * fix uploaded file --------- Co-authored-by: waleed <walif6@gmail.com>
275 lines
9.2 KiB
TypeScript
275 lines
9.2 KiB
TypeScript
import { createLogger } from '@sim/logger'
|
|
import { inferContextFromKey } from '@/lib/uploads/utils/file-utils'
|
|
import type { UserFile } from '@/executor/types'
|
|
import type {
|
|
FileParseApiMultiResponse,
|
|
FileParseApiResponse,
|
|
FileParseResult,
|
|
FileParserInput,
|
|
FileParserOutput,
|
|
FileParserOutputData,
|
|
FileParserV3Output,
|
|
FileParserV3OutputData,
|
|
FileUploadInput,
|
|
} from '@/tools/file/types'
|
|
import type { ToolConfig } from '@/tools/types'
|
|
|
|
const logger = createLogger('FileParserTool')
|
|
|
|
interface ToolBodyParams extends Partial<FileParserInput> {
|
|
files?: FileUploadInput[]
|
|
_context?: {
|
|
workspaceId?: string
|
|
workflowId?: string
|
|
executionId?: string
|
|
}
|
|
}
|
|
|
|
const parseFileParserResponse = async (response: Response): Promise<FileParserOutput> => {
|
|
logger.info('Received response status:', response.status)
|
|
|
|
const result = (await response.json()) as FileParseApiResponse | FileParseApiMultiResponse
|
|
logger.info('Response parsed successfully')
|
|
|
|
// Handle multiple files response
|
|
if ('results' in result) {
|
|
logger.info('Processing multiple files response')
|
|
|
|
// Extract individual file results
|
|
const fileResults: FileParseResult[] = result.results.map((fileResult) => {
|
|
return fileResult.output || (fileResult as unknown as FileParseResult)
|
|
})
|
|
|
|
// Collect UserFile objects from results
|
|
const processedFiles: UserFile[] = fileResults
|
|
.filter((file): file is FileParseResult & { file: UserFile } => Boolean(file.file))
|
|
.map((file) => file.file)
|
|
|
|
// Combine all file contents with clear dividers
|
|
const combinedContent = fileResults
|
|
.map((file, index) => {
|
|
const divider = `\n${'='.repeat(80)}\n`
|
|
|
|
return file.content + (index < fileResults.length - 1 ? divider : '')
|
|
})
|
|
.join('\n')
|
|
|
|
// Create the base output
|
|
const output: FileParserOutputData = {
|
|
files: fileResults,
|
|
combinedContent,
|
|
...(processedFiles.length > 0 && { processedFiles }),
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
output,
|
|
}
|
|
}
|
|
|
|
// Handle single file response
|
|
logger.info('Successfully parsed file:', result.output?.name || 'unknown')
|
|
|
|
const fileOutput: FileParseResult = result.output || (result as unknown as FileParseResult)
|
|
|
|
// For a single file, create the output with just array format
|
|
const output: FileParserOutputData = {
|
|
files: [fileOutput],
|
|
combinedContent: fileOutput?.content || result.content || '',
|
|
...(fileOutput?.file && { processedFiles: [fileOutput.file] }),
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
output,
|
|
}
|
|
}
|
|
|
|
export const fileParserTool: ToolConfig<FileParserInput, FileParserOutput> = {
|
|
id: 'file_parser',
|
|
name: 'File Parser',
|
|
description: 'Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc.)',
|
|
version: '1.0.0',
|
|
|
|
params: {
|
|
filePath: {
|
|
type: 'string',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description: 'Path to the file(s). Can be a single path, URL, or an array of paths.',
|
|
},
|
|
file: {
|
|
type: 'file',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description: 'Uploaded file(s) to parse',
|
|
},
|
|
fileType: {
|
|
type: 'string',
|
|
required: false,
|
|
visibility: 'hidden',
|
|
description: 'Type of file to parse (auto-detected if not specified)',
|
|
},
|
|
},
|
|
|
|
request: {
|
|
url: '/api/files/parse',
|
|
method: 'POST',
|
|
headers: () => ({
|
|
'Content-Type': 'application/json',
|
|
}),
|
|
body: (params: ToolBodyParams) => {
|
|
logger.info('Request parameters received by tool body:', params)
|
|
|
|
if (!params) {
|
|
logger.error('Tool body received no parameters')
|
|
throw new Error('No parameters provided to tool body')
|
|
}
|
|
|
|
let determinedFilePath: string | string[] | null = null
|
|
const determinedFileType: string | undefined = params.fileType
|
|
|
|
const resolveFilePath = (fileInput: unknown): string | null => {
|
|
if (!fileInput || typeof fileInput !== 'object') return null
|
|
|
|
if ('path' in fileInput && typeof (fileInput as { path?: unknown }).path === 'string') {
|
|
return (fileInput as { path: string }).path
|
|
}
|
|
|
|
if ('url' in fileInput && typeof (fileInput as { url?: unknown }).url === 'string') {
|
|
return (fileInput as { url: string }).url
|
|
}
|
|
|
|
if ('key' in fileInput && typeof (fileInput as { key?: unknown }).key === 'string') {
|
|
const fileRecord = fileInput as Record<string, unknown>
|
|
const key = fileRecord.key as string
|
|
const context =
|
|
typeof fileRecord.context === 'string' ? fileRecord.context : inferContextFromKey(key)
|
|
return `/api/files/serve/${encodeURIComponent(key)}?context=${context}`
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
// Determine the file path(s) based on input parameters.
|
|
// Precedence: direct filePath > file array > single file object > legacy files array
|
|
// 1. Check for direct filePath (URL or single path from upload)
|
|
if (params.filePath) {
|
|
logger.info('Tool body found direct filePath:', params.filePath)
|
|
determinedFilePath = params.filePath
|
|
}
|
|
// 2. Check for file upload (array)
|
|
else if (params.file && Array.isArray(params.file) && params.file.length > 0) {
|
|
logger.info('Tool body processing file array upload')
|
|
const filePaths = params.file
|
|
.map((file) => resolveFilePath(file))
|
|
.filter(Boolean) as string[]
|
|
if (filePaths.length !== params.file.length) {
|
|
throw new Error('Invalid file input: One or more files are missing path or URL')
|
|
}
|
|
determinedFilePath = filePaths
|
|
}
|
|
// 3. Check for file upload (single object)
|
|
else if (params.file && !Array.isArray(params.file)) {
|
|
logger.info('Tool body processing single file object upload')
|
|
const resolvedPath = resolveFilePath(params.file)
|
|
if (!resolvedPath) {
|
|
throw new Error('Invalid file input: Missing path or URL')
|
|
}
|
|
determinedFilePath = resolvedPath
|
|
}
|
|
// 4. Check for deprecated multiple files case (from older blocks?)
|
|
else if (params.files && Array.isArray(params.files)) {
|
|
logger.info('Tool body processing legacy files array:', params.files.length)
|
|
if (params.files.length > 0) {
|
|
const filePaths = params.files
|
|
.map((file) => resolveFilePath(file))
|
|
.filter(Boolean) as string[]
|
|
if (filePaths.length !== params.files.length) {
|
|
throw new Error('Invalid file input: One or more files are missing path or URL')
|
|
}
|
|
determinedFilePath = filePaths
|
|
} else {
|
|
logger.warn('Legacy files array provided but is empty')
|
|
}
|
|
}
|
|
|
|
// Final check if filePath was determined
|
|
if (!determinedFilePath) {
|
|
logger.error('Tool body could not determine filePath from parameters:', params)
|
|
throw new Error('Missing required parameter: filePath')
|
|
}
|
|
|
|
logger.info('Tool body determined filePath:', determinedFilePath)
|
|
return {
|
|
filePath: determinedFilePath,
|
|
fileType: determinedFileType,
|
|
workspaceId: params.workspaceId || params._context?.workspaceId,
|
|
workflowId: params._context?.workflowId,
|
|
executionId: params._context?.executionId,
|
|
}
|
|
},
|
|
},
|
|
|
|
transformResponse: parseFileParserResponse,
|
|
|
|
outputs: {
|
|
files: { type: 'array', description: 'Array of parsed files with content and metadata' },
|
|
combinedContent: { type: 'string', description: 'Combined content of all parsed files' },
|
|
processedFiles: { type: 'file[]', description: 'Array of UserFile objects for downstream use' },
|
|
},
|
|
}
|
|
|
|
export const fileParserV2Tool: ToolConfig<FileParserInput, FileParserOutput> = {
|
|
id: 'file_parser_v2',
|
|
name: 'File Parser',
|
|
description: 'Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc.)',
|
|
version: '2.0.0',
|
|
|
|
params: fileParserTool.params,
|
|
request: fileParserTool.request,
|
|
transformResponse: parseFileParserResponse,
|
|
|
|
outputs: {
|
|
files: {
|
|
type: 'array',
|
|
description: 'Array of parsed files with content, metadata, and file properties',
|
|
},
|
|
combinedContent: {
|
|
type: 'string',
|
|
description: 'All file contents merged into a single text string',
|
|
},
|
|
},
|
|
}
|
|
|
|
export const fileParserV3Tool: ToolConfig<FileParserInput, FileParserV3Output> = {
|
|
id: 'file_parser_v3',
|
|
name: 'File Parser',
|
|
description: 'Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc.)',
|
|
version: '3.0.0',
|
|
params: fileParserTool.params,
|
|
request: fileParserTool.request,
|
|
transformResponse: async (response: Response): Promise<FileParserV3Output> => {
|
|
const parsed = await parseFileParserResponse(response)
|
|
const output = parsed.output as FileParserOutputData
|
|
const files =
|
|
Array.isArray(output.processedFiles) && output.processedFiles.length > 0
|
|
? output.processedFiles
|
|
: []
|
|
|
|
const cleanedOutput: FileParserV3OutputData = {
|
|
files,
|
|
combinedContent: output.combinedContent,
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
output: cleanedOutput,
|
|
}
|
|
},
|
|
outputs: {
|
|
files: { type: 'file[]', description: 'Parsed files as UserFile objects' },
|
|
combinedContent: { type: 'string', description: 'Combined content of all parsed files' },
|
|
},
|
|
}
|