diff --git a/apps/docs/components/ui/icon-mapping.ts b/apps/docs/components/ui/icon-mapping.ts index c80cceb56..6a956a45e 100644 --- a/apps/docs/components/ui/icon-mapping.ts +++ b/apps/docs/components/ui/icon-mapping.ts @@ -154,7 +154,7 @@ export const blockTypeToIconMap: Record = { elasticsearch: ElasticsearchIcon, elevenlabs: ElevenLabsIcon, exa: ExaAIIcon, - file: DocumentIcon, + file_v2: DocumentIcon, firecrawl: FirecrawlIcon, fireflies: FirefliesIcon, github_v2: GithubIcon, @@ -196,7 +196,7 @@ export const blockTypeToIconMap: Record = { microsoft_excel_v2: MicrosoftExcelIcon, microsoft_planner: MicrosoftPlannerIcon, microsoft_teams: MicrosoftTeamsIcon, - mistral_parse: MistralIcon, + mistral_parse_v2: MistralIcon, mongodb: MongoDBIcon, mysql: MySQLIcon, neo4j: Neo4jIcon, diff --git a/apps/docs/content/docs/en/tools/file.mdx b/apps/docs/content/docs/en/tools/file.mdx index b90c163bd..2a0cc1b87 100644 --- a/apps/docs/content/docs/en/tools/file.mdx +++ b/apps/docs/content/docs/en/tools/file.mdx @@ -6,7 +6,7 @@ description: Read and parse multiple files import { BlockInfoCard } from "@/components/ui/block-info-card" @@ -48,7 +48,7 @@ Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc | Parameter | Type | Description | | --------- | ---- | ----------- | -| `files` | array | Array of parsed files | -| `combinedContent` | string | Combined content of all parsed files | +| `files` | array | Array of parsed files with content, metadata, and file properties | +| `combinedContent` | string | All file contents merged into a single text string | diff --git a/apps/docs/content/docs/en/tools/mistral_parse.mdx b/apps/docs/content/docs/en/tools/mistral_parse.mdx index b919be56a..ac0b2150c 100644 --- a/apps/docs/content/docs/en/tools/mistral_parse.mdx +++ b/apps/docs/content/docs/en/tools/mistral_parse.mdx @@ -6,7 +6,7 @@ description: Extract text from PDF documents import { BlockInfoCard } from "@/components/ui/block-info-card" @@ -57,15 +57,5 @@ Parse PDF documents using Mistral OCR API | `success` | boolean | Whether the PDF was parsed successfully | | `content` | string | Extracted content in the requested format \(markdown, text, or JSON\) | | `metadata` | object | Processing metadata including jobId, fileType, pageCount, and usage info | -| ↳ `jobId` | string | Unique job identifier | -| ↳ `fileType` | string | File type \(e.g., pdf\) | -| ↳ `fileName` | string | Original file name | -| ↳ `source` | string | Source type \(url\) | -| ↳ `pageCount` | number | Number of pages processed | -| ↳ `model` | string | Mistral model used | -| ↳ `resultType` | string | Output format \(markdown, text, json\) | -| ↳ `processedAt` | string | Processing timestamp | -| ↳ `sourceUrl` | string | Source URL if applicable | -| ↳ `usageInfo` | object | Usage statistics from OCR processing | diff --git a/apps/sim/app/api/tools/textract/parse/route.ts b/apps/sim/app/api/tools/textract/parse/route.ts index 8e2696e48..96bc1121a 100644 --- a/apps/sim/app/api/tools/textract/parse/route.ts +++ b/apps/sim/app/api/tools/textract/parse/route.ts @@ -423,7 +423,12 @@ export async function POST(request: NextRequest) { let fileUrl = validatedData.filePath - if (validatedData.filePath?.includes('/api/files/serve/')) { + const isInternalFilePath = + validatedData.filePath?.startsWith('/api/files/serve/') || + (validatedData.filePath?.startsWith('/') && + validatedData.filePath?.includes('/api/files/serve/')) + + if (isInternalFilePath) { try { const storageKey = extractStorageKey(validatedData.filePath) const context = inferContextFromKey(storageKey) diff --git a/apps/sim/blocks/blocks/file.ts b/apps/sim/blocks/blocks/file.ts index 46bf0f138..bfc0b903c 100644 --- a/apps/sim/blocks/blocks/file.ts +++ b/apps/sim/blocks/blocks/file.ts @@ -1,13 +1,14 @@ import { createLogger } from '@sim/logger' import { DocumentIcon } from '@/components/icons' import type { BlockConfig, SubBlockType } from '@/blocks/types' +import { createVersionedToolSelector } from '@/blocks/utils' import type { FileParserOutput } from '@/tools/file/types' const logger = createLogger('FileBlock') export const FileBlock: BlockConfig = { type: 'file', - name: 'File', + name: 'File (Legacy)', description: 'Read and parse multiple files', longDescription: `Integrate File into the workflow. Can upload a file manually or insert a file url.`, bestPractices: ` @@ -17,6 +18,7 @@ export const FileBlock: BlockConfig = { category: 'tools', bgColor: '#40916C', icon: DocumentIcon, + hideFromToolbar: true, subBlocks: [ { id: 'inputMethod', @@ -123,3 +125,92 @@ export const FileBlock: BlockConfig = { }, }, } + +export const FileV2Block: BlockConfig = { + ...FileBlock, + type: 'file_v2', + name: 'File', + description: 'Read and parse multiple files', + hideFromToolbar: false, + subBlocks: [ + { + id: 'file', + title: 'Files', + type: 'file-upload' as SubBlockType, + canonicalParamId: 'fileInput', + acceptedTypes: + '.pdf,.csv,.doc,.docx,.txt,.md,.xlsx,.xls,.html,.htm,.pptx,.ppt,.json,.xml,.rtf', + placeholder: 'Upload files to process', + multiple: true, + mode: 'basic', + maxSize: 100, + }, + { + id: 'filePath', + title: 'Files', + type: 'short-input' as SubBlockType, + canonicalParamId: 'fileInput', + placeholder: 'File URL or reference from previous block', + mode: 'advanced', + }, + ], + tools: { + access: ['file_parser_v2'], + config: { + tool: createVersionedToolSelector({ + baseToolSelector: () => 'file_parser', + suffix: '_v2', + fallbackToolId: 'file_parser_v2', + }), + params: (params) => { + const fileInput = params.file || params.filePath || params.fileInput + if (!fileInput) { + logger.error('No file input provided') + throw new Error('File is required') + } + + if (typeof fileInput === 'string') { + return { + filePath: fileInput.trim(), + fileType: params.fileType || 'auto', + workspaceId: params._context?.workspaceId, + } + } + + if (Array.isArray(fileInput) && fileInput.length > 0) { + const filePaths = fileInput.map((file) => file.path) + return { + filePath: filePaths.length === 1 ? filePaths[0] : filePaths, + fileType: params.fileType || 'auto', + } + } + + if (fileInput?.path) { + return { + filePath: fileInput.path, + fileType: params.fileType || 'auto', + } + } + + logger.error('Invalid file input format') + throw new Error('Invalid file input') + }, + }, + }, + inputs: { + fileInput: { type: 'json', description: 'File input (upload or URL reference)' }, + filePath: { type: 'string', description: 'File URL (advanced mode)' }, + file: { type: 'json', description: 'Uploaded file data (basic mode)' }, + fileType: { type: 'string', description: 'File type' }, + }, + outputs: { + files: { + type: 'json', + description: 'Array of parsed file objects with content, metadata, and file properties', + }, + combinedContent: { + type: 'string', + description: 'All file contents merged into a single text string', + }, + }, +} diff --git a/apps/sim/blocks/blocks/mistral_parse.ts b/apps/sim/blocks/blocks/mistral_parse.ts index c551f00ce..2cba8700a 100644 --- a/apps/sim/blocks/blocks/mistral_parse.ts +++ b/apps/sim/blocks/blocks/mistral_parse.ts @@ -1,11 +1,13 @@ import { MistralIcon } from '@/components/icons' import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types' +import { createVersionedToolSelector } from '@/blocks/utils' import type { MistralParserOutput } from '@/tools/mistral/types' export const MistralParseBlock: BlockConfig = { type: 'mistral_parse', - name: 'Mistral Parser', + name: 'Mistral Parser (Legacy)', description: 'Extract text from PDF documents', + hideFromToolbar: true, authMode: AuthMode.ApiKey, longDescription: `Integrate Mistral Parse into the workflow. Can extract text from uploaded PDF documents, or from a URL.`, docsLink: 'https://docs.sim.ai/tools/mistral_parse', @@ -13,7 +15,6 @@ export const MistralParseBlock: BlockConfig = { bgColor: '#000000', icon: MistralIcon, subBlocks: [ - // Show input method selection { id: 'inputMethod', title: 'Select Input Method', @@ -23,8 +24,6 @@ export const MistralParseBlock: BlockConfig = { { id: 'upload', label: 'Upload PDF Document' }, ], }, - - // URL input - conditional on inputMethod { id: 'filePath', title: 'PDF Document URL', @@ -35,8 +34,6 @@ export const MistralParseBlock: BlockConfig = { value: 'url', }, }, - - // File upload option { id: 'fileUpload', title: 'Upload PDF', @@ -46,9 +43,8 @@ export const MistralParseBlock: BlockConfig = { field: 'inputMethod', value: 'upload', }, - maxSize: 50, // 50MB max via direct upload + maxSize: 50, }, - { id: 'resultType', title: 'Output Format', @@ -65,28 +61,6 @@ export const MistralParseBlock: BlockConfig = { type: 'short-input', placeholder: 'e.g. 0,1,2 (leave empty for all pages)', }, - /* - * Image-related parameters - temporarily disabled - * Uncomment if PDF image extraction is needed - * - { - id: 'includeImageBase64', - title: 'Include PDF Images', - type: 'switch', - }, - { - id: 'imageLimit', - title: 'Max Images', - type: 'short-input', - placeholder: 'Maximum number of images to extract', - }, - { - id: 'imageMinSize', - title: 'Min Image Size (px)', - type: 'short-input', - placeholder: 'Min width/height in pixels', - }, - */ { id: 'apiKey', title: 'API Key', @@ -101,18 +75,15 @@ export const MistralParseBlock: BlockConfig = { config: { tool: () => 'mistral_parser', params: (params) => { - // Basic validation if (!params || !params.apiKey || params.apiKey.trim() === '') { throw new Error('Mistral API key is required') } - // Build parameters object - file processing is now handled at the tool level - const parameters: any = { + const parameters: Record = { apiKey: params.apiKey.trim(), resultType: params.resultType || 'markdown', } - // Set filePath or fileUpload based on input method const inputMethod = params.inputMethod || 'url' if (inputMethod === 'url') { if (!params.filePath || params.filePath.trim() === '') { @@ -123,11 +94,9 @@ export const MistralParseBlock: BlockConfig = { if (!params.fileUpload) { throw new Error('Please upload a PDF document') } - // Pass the entire fileUpload object to the tool parameters.fileUpload = params.fileUpload } - // Convert pages input from string to array of numbers if provided let pagesArray: number[] | undefined if (params.pages && params.pages.trim() !== '') { try { @@ -146,12 +115,12 @@ export const MistralParseBlock: BlockConfig = { if (pagesArray && pagesArray.length === 0) { pagesArray = undefined } - } catch (error: any) { - throw new Error(`Page number format error: ${error.message}`) + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error) + throw new Error(`Page number format error: ${errorMessage}`) } } - // Add optional parameters if (pagesArray && pagesArray.length > 0) { parameters.pages = pagesArray } @@ -173,3 +142,127 @@ export const MistralParseBlock: BlockConfig = { metadata: { type: 'json', description: 'Processing metadata' }, }, } + +export const MistralParseV2Block: BlockConfig = { + ...MistralParseBlock, + type: 'mistral_parse_v2', + name: 'Mistral Parser', + description: 'Extract text from PDF documents', + hideFromToolbar: false, + subBlocks: [ + { + id: 'fileUpload', + title: 'PDF Document', + type: 'file-upload' as SubBlockType, + canonicalParamId: 'document', + acceptedTypes: 'application/pdf', + placeholder: 'Upload a PDF document', + mode: 'basic', + maxSize: 50, + }, + { + id: 'filePath', + title: 'PDF Document', + type: 'short-input' as SubBlockType, + canonicalParamId: 'document', + placeholder: 'Document URL or reference from previous block', + mode: 'advanced', + }, + { + id: 'resultType', + title: 'Output Format', + type: 'dropdown', + options: [ + { id: 'markdown', label: 'Markdown (Formatted)' }, + { id: 'text', label: 'Plain Text' }, + { id: 'json', label: 'JSON (Raw)' }, + ], + }, + { + id: 'pages', + title: 'Specific Pages', + type: 'short-input', + placeholder: 'e.g. 0,1,2 (leave empty for all pages)', + }, + { + id: 'apiKey', + title: 'API Key', + type: 'short-input' as SubBlockType, + placeholder: 'Enter your Mistral API key', + password: true, + required: true, + }, + ], + tools: { + access: ['mistral_parser_v2'], + config: { + tool: createVersionedToolSelector({ + baseToolSelector: () => 'mistral_parser', + suffix: '_v2', + fallbackToolId: 'mistral_parser_v2', + }), + params: (params) => { + if (!params || !params.apiKey || params.apiKey.trim() === '') { + throw new Error('Mistral API key is required') + } + + const parameters: Record = { + apiKey: params.apiKey.trim(), + resultType: params.resultType || 'markdown', + } + + const documentInput = params.fileUpload || params.filePath || params.document + if (!documentInput) { + throw new Error('PDF document is required') + } + if (typeof documentInput === 'object') { + parameters.fileUpload = documentInput + } else if (typeof documentInput === 'string') { + parameters.filePath = documentInput.trim() + } + + let pagesArray: number[] | undefined + if (params.pages && params.pages.trim() !== '') { + try { + pagesArray = params.pages + .split(',') + .map((p: string) => p.trim()) + .filter((p: string) => p.length > 0) + .map((p: string) => { + const num = Number.parseInt(p, 10) + if (Number.isNaN(num) || num < 0) { + throw new Error(`Invalid page number: ${p}`) + } + return num + }) + + if (pagesArray && pagesArray.length === 0) { + pagesArray = undefined + } + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error) + throw new Error(`Page number format error: ${errorMessage}`) + } + } + + if (pagesArray && pagesArray.length > 0) { + parameters.pages = pagesArray + } + + return parameters + }, + }, + }, + inputs: { + document: { type: 'json', description: 'Document input (file upload or URL reference)' }, + filePath: { type: 'string', description: 'PDF document URL (advanced mode)' }, + fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' }, + apiKey: { type: 'string', description: 'Mistral API key' }, + resultType: { type: 'string', description: 'Output format type' }, + pages: { type: 'string', description: 'Page selection' }, + }, + outputs: { + content: { type: 'string', description: 'Extracted content' }, + metadata: { type: 'json', description: 'Processing metadata' }, + }, +} diff --git a/apps/sim/blocks/blocks/pulse.ts b/apps/sim/blocks/blocks/pulse.ts index 212f325d7..7f36b87ed 100644 --- a/apps/sim/blocks/blocks/pulse.ts +++ b/apps/sim/blocks/blocks/pulse.ts @@ -15,34 +15,22 @@ export const PulseBlock: BlockConfig = { icon: PulseIcon, subBlocks: [ { - id: 'inputMethod', - title: 'Select Input Method', - type: 'dropdown' as SubBlockType, - options: [ - { id: 'url', label: 'Document URL' }, - { id: 'upload', label: 'Upload Document' }, - ], + id: 'fileUpload', + title: 'Document', + type: 'file-upload' as SubBlockType, + canonicalParamId: 'document', + acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx', + placeholder: 'Upload a document', + mode: 'basic', + maxSize: 50, }, { id: 'filePath', - title: 'Document URL', + title: 'Document', type: 'short-input' as SubBlockType, - placeholder: 'Enter full URL to a document (https://example.com/document.pdf)', - condition: { - field: 'inputMethod', - value: 'url', - }, - }, - { - id: 'fileUpload', - title: 'Upload Document', - type: 'file-upload' as SubBlockType, - acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx', - condition: { - field: 'inputMethod', - value: 'upload', - }, - maxSize: 50, + canonicalParamId: 'document', + placeholder: 'Document URL or reference from previous block', + mode: 'advanced', }, { id: 'pages', @@ -84,17 +72,14 @@ export const PulseBlock: BlockConfig = { apiKey: params.apiKey.trim(), } - const inputMethod = params.inputMethod || 'url' - if (inputMethod === 'url') { - if (!params.filePath || params.filePath.trim() === '') { - throw new Error('Document URL is required') - } - parameters.filePath = params.filePath.trim() - } else if (inputMethod === 'upload') { - if (!params.fileUpload) { - throw new Error('Please upload a document') - } - parameters.fileUpload = params.fileUpload + const documentInput = params.fileUpload || params.filePath || params.document + if (!documentInput) { + throw new Error('Document is required') + } + if (typeof documentInput === 'object') { + parameters.fileUpload = documentInput + } else if (typeof documentInput === 'string') { + parameters.filePath = documentInput.trim() } if (params.pages && params.pages.trim() !== '') { @@ -117,9 +102,9 @@ export const PulseBlock: BlockConfig = { }, }, inputs: { - inputMethod: { type: 'string', description: 'Input method selection' }, - filePath: { type: 'string', description: 'Document URL' }, - fileUpload: { type: 'json', description: 'Uploaded document file' }, + document: { type: 'json', description: 'Document input (file upload or URL reference)' }, + filePath: { type: 'string', description: 'Document URL (advanced mode)' }, + fileUpload: { type: 'json', description: 'Uploaded document file (basic mode)' }, apiKey: { type: 'string', description: 'Pulse API key' }, pages: { type: 'string', description: 'Page range selection' }, chunking: { diff --git a/apps/sim/blocks/blocks/reducto.ts b/apps/sim/blocks/blocks/reducto.ts index 5dd33dcb6..a7c7a28c2 100644 --- a/apps/sim/blocks/blocks/reducto.ts +++ b/apps/sim/blocks/blocks/reducto.ts @@ -14,34 +14,22 @@ export const ReductoBlock: BlockConfig = { icon: ReductoIcon, subBlocks: [ { - id: 'inputMethod', - title: 'Select Input Method', - type: 'dropdown' as SubBlockType, - options: [ - { id: 'url', label: 'PDF Document URL' }, - { id: 'upload', label: 'Upload PDF Document' }, - ], + id: 'fileUpload', + title: 'PDF Document', + type: 'file-upload' as SubBlockType, + canonicalParamId: 'document', + acceptedTypes: 'application/pdf', + placeholder: 'Upload a PDF document', + mode: 'basic', + maxSize: 50, }, { id: 'filePath', - title: 'PDF Document URL', + title: 'PDF Document', type: 'short-input' as SubBlockType, - placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)', - condition: { - field: 'inputMethod', - value: 'url', - }, - }, - { - id: 'fileUpload', - title: 'Upload PDF', - type: 'file-upload' as SubBlockType, - acceptedTypes: 'application/pdf', - condition: { - field: 'inputMethod', - value: 'upload', - }, - maxSize: 50, + canonicalParamId: 'document', + placeholder: 'Document URL or reference from previous block', + mode: 'advanced', }, { id: 'pages', @@ -80,17 +68,15 @@ export const ReductoBlock: BlockConfig = { apiKey: params.apiKey.trim(), } - const inputMethod = params.inputMethod || 'url' - if (inputMethod === 'url') { - if (!params.filePath || params.filePath.trim() === '') { - throw new Error('PDF Document URL is required') - } - parameters.filePath = params.filePath.trim() - } else if (inputMethod === 'upload') { - if (!params.fileUpload) { - throw new Error('Please upload a PDF document') - } - parameters.fileUpload = params.fileUpload + const documentInput = params.fileUpload || params.filePath || params.document + if (!documentInput) { + throw new Error('PDF document is required') + } + + if (typeof documentInput === 'object') { + parameters.fileUpload = documentInput + } else if (typeof documentInput === 'string') { + parameters.filePath = documentInput.trim() } let pagesArray: number[] | undefined @@ -130,9 +116,9 @@ export const ReductoBlock: BlockConfig = { }, }, inputs: { - inputMethod: { type: 'string', description: 'Input method selection' }, - filePath: { type: 'string', description: 'PDF document URL' }, - fileUpload: { type: 'json', description: 'Uploaded PDF file' }, + document: { type: 'json', description: 'Document input (file upload or URL reference)' }, + filePath: { type: 'string', description: 'PDF document URL (advanced mode)' }, + fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' }, apiKey: { type: 'string', description: 'Reducto API key' }, pages: { type: 'string', description: 'Page selection' }, tableOutputFormat: { type: 'string', description: 'Table output format' }, diff --git a/apps/sim/blocks/blocks/textract.ts b/apps/sim/blocks/blocks/textract.ts index ee76504e4..1acbb71a6 100644 --- a/apps/sim/blocks/blocks/textract.ts +++ b/apps/sim/blocks/blocks/textract.ts @@ -23,33 +23,32 @@ export const TextractBlock: BlockConfig = { ], }, { - id: 'inputMethod', - title: 'Select Input Method', - type: 'dropdown' as SubBlockType, - options: [ - { id: 'url', label: 'Document URL' }, - { id: 'upload', label: 'Upload Document' }, - ], + id: 'fileUpload', + title: 'Document', + type: 'file-upload' as SubBlockType, + canonicalParamId: 'document', + acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff', + placeholder: 'Upload a document', condition: { field: 'processingMode', value: 'async', not: true, }, + mode: 'basic', + maxSize: 10, }, { id: 'filePath', - title: 'Document URL', + title: 'Document', type: 'short-input' as SubBlockType, - placeholder: 'Enter full URL to a document (JPEG, PNG, or single-page PDF)', + canonicalParamId: 'document', + placeholder: 'Document URL or reference from previous block', condition: { - field: 'inputMethod', - value: 'url', - and: { - field: 'processingMode', - value: 'async', - not: true, - }, + field: 'processingMode', + value: 'async', + not: true, }, + mode: 'advanced', }, { id: 's3Uri', @@ -61,22 +60,6 @@ export const TextractBlock: BlockConfig = { value: 'async', }, }, - { - id: 'fileUpload', - title: 'Upload Document', - type: 'file-upload' as SubBlockType, - acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff', - condition: { - field: 'inputMethod', - value: 'upload', - and: { - field: 'processingMode', - value: 'async', - not: true, - }, - }, - maxSize: 10, - }, { id: 'region', title: 'AWS Region', @@ -150,17 +133,14 @@ export const TextractBlock: BlockConfig = { } parameters.s3Uri = params.s3Uri.trim() } else { - const inputMethod = params.inputMethod || 'url' - if (inputMethod === 'url') { - if (!params.filePath || params.filePath.trim() === '') { - throw new Error('Document URL is required') - } - parameters.filePath = params.filePath.trim() - } else if (inputMethod === 'upload') { - if (!params.fileUpload) { - throw new Error('Please upload a document') - } - parameters.fileUpload = params.fileUpload + const documentInput = params.fileUpload || params.filePath || params.document + if (!documentInput) { + throw new Error('Document is required') + } + if (typeof documentInput === 'object') { + parameters.fileUpload = documentInput + } else if (typeof documentInput === 'string') { + parameters.filePath = documentInput.trim() } } @@ -180,10 +160,10 @@ export const TextractBlock: BlockConfig = { }, inputs: { processingMode: { type: 'string', description: 'Document type: single-page or multi-page' }, - inputMethod: { type: 'string', description: 'Input method selection for single-page mode' }, - filePath: { type: 'string', description: 'Document URL' }, + document: { type: 'json', description: 'Document input (file upload or URL reference)' }, + filePath: { type: 'string', description: 'Document URL (advanced mode)' }, + fileUpload: { type: 'json', description: 'Uploaded document file (basic mode)' }, s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' }, - fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' }, extractTables: { type: 'boolean', description: 'Extract tables from document' }, extractForms: { type: 'boolean', description: 'Extract form key-value pairs' }, detectSignatures: { type: 'boolean', description: 'Detect signatures' }, diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 1a37b2a14..80efc620e 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -25,7 +25,7 @@ import { ElasticsearchBlock } from '@/blocks/blocks/elasticsearch' import { ElevenLabsBlock } from '@/blocks/blocks/elevenlabs' import { EvaluatorBlock } from '@/blocks/blocks/evaluator' import { ExaBlock } from '@/blocks/blocks/exa' -import { FileBlock } from '@/blocks/blocks/file' +import { FileBlock, FileV2Block } from '@/blocks/blocks/file' import { FirecrawlBlock } from '@/blocks/blocks/firecrawl' import { FirefliesBlock } from '@/blocks/blocks/fireflies' import { FunctionBlock } from '@/blocks/blocks/function' @@ -74,7 +74,7 @@ import { MemoryBlock } from '@/blocks/blocks/memory' import { MicrosoftExcelBlock, MicrosoftExcelV2Block } from '@/blocks/blocks/microsoft_excel' import { MicrosoftPlannerBlock } from '@/blocks/blocks/microsoft_planner' import { MicrosoftTeamsBlock } from '@/blocks/blocks/microsoft_teams' -import { MistralParseBlock } from '@/blocks/blocks/mistral_parse' +import { MistralParseBlock, MistralParseV2Block } from '@/blocks/blocks/mistral_parse' import { MongoDBBlock } from '@/blocks/blocks/mongodb' import { MySQLBlock } from '@/blocks/blocks/mysql' import { Neo4jBlock } from '@/blocks/blocks/neo4j' @@ -182,6 +182,7 @@ export const registry: Record = { evaluator: EvaluatorBlock, exa: ExaBlock, file: FileBlock, + file_v2: FileV2Block, firecrawl: FirecrawlBlock, fireflies: FirefliesBlock, function: FunctionBlock, @@ -237,6 +238,7 @@ export const registry: Record = { microsoft_planner: MicrosoftPlannerBlock, microsoft_teams: MicrosoftTeamsBlock, mistral_parse: MistralParseBlock, + mistral_parse_v2: MistralParseV2Block, mongodb: MongoDBBlock, mysql: MySQLBlock, neo4j: Neo4jBlock, diff --git a/apps/sim/tools/file/index.ts b/apps/sim/tools/file/index.ts index d6b6372e0..236461d1a 100644 --- a/apps/sim/tools/file/index.ts +++ b/apps/sim/tools/file/index.ts @@ -1,3 +1,4 @@ -import { fileParserTool } from '@/tools/file/parser' +import { fileParserTool, fileParserV2Tool } from '@/tools/file/parser' export const fileParseTool = fileParserTool +export { fileParserV2Tool } diff --git a/apps/sim/tools/file/parser.ts b/apps/sim/tools/file/parser.ts index 6076e4248..e3f381403 100644 --- a/apps/sim/tools/file/parser.ts +++ b/apps/sim/tools/file/parser.ts @@ -146,3 +146,25 @@ export const fileParserTool: ToolConfig = { combinedContent: { type: 'string', description: 'Combined content of all parsed files' }, }, } + +export const fileParserV2Tool: ToolConfig = { + id: 'file_parser_v2', + name: 'File Parser', + description: 'Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc.)', + version: '2.0.0', + + params: fileParserTool.params, + request: fileParserTool.request, + transformResponse: fileParserTool.transformResponse, + + outputs: { + files: { + type: 'array', + description: 'Array of parsed files with content, metadata, and file properties', + }, + combinedContent: { + type: 'string', + description: 'All file contents merged into a single text string', + }, + }, +} diff --git a/apps/sim/tools/mistral/index.ts b/apps/sim/tools/mistral/index.ts index 53103913b..566b90f41 100644 --- a/apps/sim/tools/mistral/index.ts +++ b/apps/sim/tools/mistral/index.ts @@ -1,3 +1,3 @@ -import { mistralParserTool } from '@/tools/mistral/parser' +import { mistralParserTool, mistralParserV2Tool } from '@/tools/mistral/parser' -export { mistralParserTool } +export { mistralParserTool, mistralParserV2Tool } diff --git a/apps/sim/tools/mistral/parser.ts b/apps/sim/tools/mistral/parser.ts index 3f964dd18..a47f22ba4 100644 --- a/apps/sim/tools/mistral/parser.ts +++ b/apps/sim/tools/mistral/parser.ts @@ -415,3 +415,26 @@ export const mistralParserTool: ToolConfig = { + id: 'mistral_parser_v2', + name: 'Mistral PDF Parser', + description: 'Parse PDF documents using Mistral OCR API', + version: '2.0.0', + + params: mistralParserTool.params, + request: mistralParserTool.request, + transformResponse: mistralParserTool.transformResponse, + + outputs: { + success: { type: 'boolean', description: 'Whether the PDF was parsed successfully' }, + content: { + type: 'string', + description: 'Extracted content in the requested format (markdown, text, or JSON)', + }, + metadata: { + type: 'object', + description: 'Processing metadata including jobId, fileType, pageCount, and usage info', + }, + }, +} diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index d15f72b5f..c8a51881e 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -204,7 +204,7 @@ import { exaResearchTool, exaSearchTool, } from '@/tools/exa' -import { fileParseTool } from '@/tools/file' +import { fileParserV2Tool, fileParseTool } from '@/tools/file' import { firecrawlAgentTool, firecrawlCrawlTool, @@ -979,7 +979,7 @@ import { microsoftTeamsWriteChannelTool, microsoftTeamsWriteChatTool, } from '@/tools/microsoft_teams' -import { mistralParserTool } from '@/tools/mistral' +import { mistralParserTool, mistralParserV2Tool } from '@/tools/mistral' import { mongodbDeleteTool, mongodbExecuteTool, @@ -1683,6 +1683,7 @@ export const tools: Record = { function_execute: functionExecuteTool, vision_tool: visionTool, file_parser: fileParseTool, + file_parser_v2: fileParserV2Tool, firecrawl_scrape: firecrawlScrapeTool, firecrawl_search: firecrawlSearchTool, firecrawl_crawl: firecrawlCrawlTool, @@ -2456,6 +2457,7 @@ export const tools: Record = { apollo_task_search: apolloTaskSearchTool, apollo_email_accounts: apolloEmailAccountsTool, mistral_parser: mistralParserTool, + mistral_parser_v2: mistralParserV2Tool, reducto_parser: reductoParserTool, textract_parser: textractParserTool, thinking_tool: thinkingTool, diff --git a/apps/sim/tools/textract/parser.ts b/apps/sim/tools/textract/parser.ts index 75bc5fb54..a7b95564c 100644 --- a/apps/sim/tools/textract/parser.ts +++ b/apps/sim/tools/textract/parser.ts @@ -141,8 +141,8 @@ export const textractParserTool: ToolConfig