added mistral v2, files v2, and finalized textract

This commit is contained in:
waleed
2026-01-20 15:02:27 -08:00
parent dcaae1df7c
commit 59578dd140
16 changed files with 368 additions and 188 deletions

View File

@@ -154,7 +154,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
elasticsearch: ElasticsearchIcon,
elevenlabs: ElevenLabsIcon,
exa: ExaAIIcon,
file: DocumentIcon,
file_v2: DocumentIcon,
firecrawl: FirecrawlIcon,
fireflies: FirefliesIcon,
github_v2: GithubIcon,
@@ -196,7 +196,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
microsoft_excel_v2: MicrosoftExcelIcon,
microsoft_planner: MicrosoftPlannerIcon,
microsoft_teams: MicrosoftTeamsIcon,
mistral_parse: MistralIcon,
mistral_parse_v2: MistralIcon,
mongodb: MongoDBIcon,
mysql: MySQLIcon,
neo4j: Neo4jIcon,

View File

@@ -6,7 +6,7 @@ description: Read and parse multiple files
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="file"
type="file_v2"
color="#40916C"
/>
@@ -48,7 +48,7 @@ Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `files` | array | Array of parsed files |
| `combinedContent` | string | Combined content of all parsed files |
| `files` | array | Array of parsed files with content, metadata, and file properties |
| `combinedContent` | string | All file contents merged into a single text string |

View File

@@ -6,7 +6,7 @@ description: Extract text from PDF documents
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="mistral_parse"
type="mistral_parse_v2"
color="#000000"
/>
@@ -57,15 +57,5 @@ Parse PDF documents using Mistral OCR API
| `success` | boolean | Whether the PDF was parsed successfully |
| `content` | string | Extracted content in the requested format \(markdown, text, or JSON\) |
| `metadata` | object | Processing metadata including jobId, fileType, pageCount, and usage info |
| ↳ `jobId` | string | Unique job identifier |
| ↳ `fileType` | string | File type \(e.g., pdf\) |
| ↳ `fileName` | string | Original file name |
| ↳ `source` | string | Source type \(url\) |
| ↳ `pageCount` | number | Number of pages processed |
| ↳ `model` | string | Mistral model used |
| ↳ `resultType` | string | Output format \(markdown, text, json\) |
| ↳ `processedAt` | string | Processing timestamp |
| ↳ `sourceUrl` | string | Source URL if applicable |
| ↳ `usageInfo` | object | Usage statistics from OCR processing |

View File

@@ -423,7 +423,12 @@ export async function POST(request: NextRequest) {
let fileUrl = validatedData.filePath
if (validatedData.filePath?.includes('/api/files/serve/')) {
const isInternalFilePath =
validatedData.filePath?.startsWith('/api/files/serve/') ||
(validatedData.filePath?.startsWith('/') &&
validatedData.filePath?.includes('/api/files/serve/'))
if (isInternalFilePath) {
try {
const storageKey = extractStorageKey(validatedData.filePath)
const context = inferContextFromKey(storageKey)

View File

@@ -1,13 +1,14 @@
import { createLogger } from '@sim/logger'
import { DocumentIcon } from '@/components/icons'
import type { BlockConfig, SubBlockType } from '@/blocks/types'
import { createVersionedToolSelector } from '@/blocks/utils'
import type { FileParserOutput } from '@/tools/file/types'
const logger = createLogger('FileBlock')
export const FileBlock: BlockConfig<FileParserOutput> = {
type: 'file',
name: 'File',
name: 'File (Legacy)',
description: 'Read and parse multiple files',
longDescription: `Integrate File into the workflow. Can upload a file manually or insert a file url.`,
bestPractices: `
@@ -17,6 +18,7 @@ export const FileBlock: BlockConfig<FileParserOutput> = {
category: 'tools',
bgColor: '#40916C',
icon: DocumentIcon,
hideFromToolbar: true,
subBlocks: [
{
id: 'inputMethod',
@@ -123,3 +125,92 @@ export const FileBlock: BlockConfig<FileParserOutput> = {
},
},
}
export const FileV2Block: BlockConfig<FileParserOutput> = {
...FileBlock,
type: 'file_v2',
name: 'File',
description: 'Read and parse multiple files',
hideFromToolbar: false,
subBlocks: [
{
id: 'file',
title: 'Files',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'fileInput',
acceptedTypes:
'.pdf,.csv,.doc,.docx,.txt,.md,.xlsx,.xls,.html,.htm,.pptx,.ppt,.json,.xml,.rtf',
placeholder: 'Upload files to process',
multiple: true,
mode: 'basic',
maxSize: 100,
},
{
id: 'filePath',
title: 'Files',
type: 'short-input' as SubBlockType,
canonicalParamId: 'fileInput',
placeholder: 'File URL or reference from previous block',
mode: 'advanced',
},
],
tools: {
access: ['file_parser_v2'],
config: {
tool: createVersionedToolSelector({
baseToolSelector: () => 'file_parser',
suffix: '_v2',
fallbackToolId: 'file_parser_v2',
}),
params: (params) => {
const fileInput = params.file || params.filePath || params.fileInput
if (!fileInput) {
logger.error('No file input provided')
throw new Error('File is required')
}
if (typeof fileInput === 'string') {
return {
filePath: fileInput.trim(),
fileType: params.fileType || 'auto',
workspaceId: params._context?.workspaceId,
}
}
if (Array.isArray(fileInput) && fileInput.length > 0) {
const filePaths = fileInput.map((file) => file.path)
return {
filePath: filePaths.length === 1 ? filePaths[0] : filePaths,
fileType: params.fileType || 'auto',
}
}
if (fileInput?.path) {
return {
filePath: fileInput.path,
fileType: params.fileType || 'auto',
}
}
logger.error('Invalid file input format')
throw new Error('Invalid file input')
},
},
},
inputs: {
fileInput: { type: 'json', description: 'File input (upload or URL reference)' },
filePath: { type: 'string', description: 'File URL (advanced mode)' },
file: { type: 'json', description: 'Uploaded file data (basic mode)' },
fileType: { type: 'string', description: 'File type' },
},
outputs: {
files: {
type: 'json',
description: 'Array of parsed file objects with content, metadata, and file properties',
},
combinedContent: {
type: 'string',
description: 'All file contents merged into a single text string',
},
},
}

View File

@@ -1,11 +1,13 @@
import { MistralIcon } from '@/components/icons'
import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types'
import { createVersionedToolSelector } from '@/blocks/utils'
import type { MistralParserOutput } from '@/tools/mistral/types'
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
type: 'mistral_parse',
name: 'Mistral Parser',
name: 'Mistral Parser (Legacy)',
description: 'Extract text from PDF documents',
hideFromToolbar: true,
authMode: AuthMode.ApiKey,
longDescription: `Integrate Mistral Parse into the workflow. Can extract text from uploaded PDF documents, or from a URL.`,
docsLink: 'https://docs.sim.ai/tools/mistral_parse',
@@ -13,7 +15,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
bgColor: '#000000',
icon: MistralIcon,
subBlocks: [
// Show input method selection
{
id: 'inputMethod',
title: 'Select Input Method',
@@ -23,8 +24,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
{ id: 'upload', label: 'Upload PDF Document' },
],
},
// URL input - conditional on inputMethod
{
id: 'filePath',
title: 'PDF Document URL',
@@ -35,8 +34,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
value: 'url',
},
},
// File upload option
{
id: 'fileUpload',
title: 'Upload PDF',
@@ -46,9 +43,8 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
field: 'inputMethod',
value: 'upload',
},
maxSize: 50, // 50MB max via direct upload
maxSize: 50,
},
{
id: 'resultType',
title: 'Output Format',
@@ -65,28 +61,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
type: 'short-input',
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
},
/*
* Image-related parameters - temporarily disabled
* Uncomment if PDF image extraction is needed
*
{
id: 'includeImageBase64',
title: 'Include PDF Images',
type: 'switch',
},
{
id: 'imageLimit',
title: 'Max Images',
type: 'short-input',
placeholder: 'Maximum number of images to extract',
},
{
id: 'imageMinSize',
title: 'Min Image Size (px)',
type: 'short-input',
placeholder: 'Min width/height in pixels',
},
*/
{
id: 'apiKey',
title: 'API Key',
@@ -101,18 +75,15 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
config: {
tool: () => 'mistral_parser',
params: (params) => {
// Basic validation
if (!params || !params.apiKey || params.apiKey.trim() === '') {
throw new Error('Mistral API key is required')
}
// Build parameters object - file processing is now handled at the tool level
const parameters: any = {
const parameters: Record<string, unknown> = {
apiKey: params.apiKey.trim(),
resultType: params.resultType || 'markdown',
}
// Set filePath or fileUpload based on input method
const inputMethod = params.inputMethod || 'url'
if (inputMethod === 'url') {
if (!params.filePath || params.filePath.trim() === '') {
@@ -123,11 +94,9 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
if (!params.fileUpload) {
throw new Error('Please upload a PDF document')
}
// Pass the entire fileUpload object to the tool
parameters.fileUpload = params.fileUpload
}
// Convert pages input from string to array of numbers if provided
let pagesArray: number[] | undefined
if (params.pages && params.pages.trim() !== '') {
try {
@@ -146,12 +115,12 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
if (pagesArray && pagesArray.length === 0) {
pagesArray = undefined
}
} catch (error: any) {
throw new Error(`Page number format error: ${error.message}`)
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error)
throw new Error(`Page number format error: ${errorMessage}`)
}
}
// Add optional parameters
if (pagesArray && pagesArray.length > 0) {
parameters.pages = pagesArray
}
@@ -173,3 +142,127 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
metadata: { type: 'json', description: 'Processing metadata' },
},
}
export const MistralParseV2Block: BlockConfig<MistralParserOutput> = {
...MistralParseBlock,
type: 'mistral_parse_v2',
name: 'Mistral Parser',
description: 'Extract text from PDF documents',
hideFromToolbar: false,
subBlocks: [
{
id: 'fileUpload',
title: 'PDF Document',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'document',
acceptedTypes: 'application/pdf',
placeholder: 'Upload a PDF document',
mode: 'basic',
maxSize: 50,
},
{
id: 'filePath',
title: 'PDF Document',
type: 'short-input' as SubBlockType,
canonicalParamId: 'document',
placeholder: 'Document URL or reference from previous block',
mode: 'advanced',
},
{
id: 'resultType',
title: 'Output Format',
type: 'dropdown',
options: [
{ id: 'markdown', label: 'Markdown (Formatted)' },
{ id: 'text', label: 'Plain Text' },
{ id: 'json', label: 'JSON (Raw)' },
],
},
{
id: 'pages',
title: 'Specific Pages',
type: 'short-input',
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
},
{
id: 'apiKey',
title: 'API Key',
type: 'short-input' as SubBlockType,
placeholder: 'Enter your Mistral API key',
password: true,
required: true,
},
],
tools: {
access: ['mistral_parser_v2'],
config: {
tool: createVersionedToolSelector({
baseToolSelector: () => 'mistral_parser',
suffix: '_v2',
fallbackToolId: 'mistral_parser_v2',
}),
params: (params) => {
if (!params || !params.apiKey || params.apiKey.trim() === '') {
throw new Error('Mistral API key is required')
}
const parameters: Record<string, unknown> = {
apiKey: params.apiKey.trim(),
resultType: params.resultType || 'markdown',
}
const documentInput = params.fileUpload || params.filePath || params.document
if (!documentInput) {
throw new Error('PDF document is required')
}
if (typeof documentInput === 'object') {
parameters.fileUpload = documentInput
} else if (typeof documentInput === 'string') {
parameters.filePath = documentInput.trim()
}
let pagesArray: number[] | undefined
if (params.pages && params.pages.trim() !== '') {
try {
pagesArray = params.pages
.split(',')
.map((p: string) => p.trim())
.filter((p: string) => p.length > 0)
.map((p: string) => {
const num = Number.parseInt(p, 10)
if (Number.isNaN(num) || num < 0) {
throw new Error(`Invalid page number: ${p}`)
}
return num
})
if (pagesArray && pagesArray.length === 0) {
pagesArray = undefined
}
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error)
throw new Error(`Page number format error: ${errorMessage}`)
}
}
if (pagesArray && pagesArray.length > 0) {
parameters.pages = pagesArray
}
return parameters
},
},
},
inputs: {
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
filePath: { type: 'string', description: 'PDF document URL (advanced mode)' },
fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' },
apiKey: { type: 'string', description: 'Mistral API key' },
resultType: { type: 'string', description: 'Output format type' },
pages: { type: 'string', description: 'Page selection' },
},
outputs: {
content: { type: 'string', description: 'Extracted content' },
metadata: { type: 'json', description: 'Processing metadata' },
},
}

View File

@@ -15,34 +15,22 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
icon: PulseIcon,
subBlocks: [
{
id: 'inputMethod',
title: 'Select Input Method',
type: 'dropdown' as SubBlockType,
options: [
{ id: 'url', label: 'Document URL' },
{ id: 'upload', label: 'Upload Document' },
],
id: 'fileUpload',
title: 'Document',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'document',
acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx',
placeholder: 'Upload a document',
mode: 'basic',
maxSize: 50,
},
{
id: 'filePath',
title: 'Document URL',
title: 'Document',
type: 'short-input' as SubBlockType,
placeholder: 'Enter full URL to a document (https://example.com/document.pdf)',
condition: {
field: 'inputMethod',
value: 'url',
},
},
{
id: 'fileUpload',
title: 'Upload Document',
type: 'file-upload' as SubBlockType,
acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx',
condition: {
field: 'inputMethod',
value: 'upload',
},
maxSize: 50,
canonicalParamId: 'document',
placeholder: 'Document URL or reference from previous block',
mode: 'advanced',
},
{
id: 'pages',
@@ -84,17 +72,14 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
apiKey: params.apiKey.trim(),
}
const inputMethod = params.inputMethod || 'url'
if (inputMethod === 'url') {
if (!params.filePath || params.filePath.trim() === '') {
throw new Error('Document URL is required')
}
parameters.filePath = params.filePath.trim()
} else if (inputMethod === 'upload') {
if (!params.fileUpload) {
throw new Error('Please upload a document')
}
parameters.fileUpload = params.fileUpload
const documentInput = params.fileUpload || params.filePath || params.document
if (!documentInput) {
throw new Error('Document is required')
}
if (typeof documentInput === 'object') {
parameters.fileUpload = documentInput
} else if (typeof documentInput === 'string') {
parameters.filePath = documentInput.trim()
}
if (params.pages && params.pages.trim() !== '') {
@@ -117,9 +102,9 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
},
},
inputs: {
inputMethod: { type: 'string', description: 'Input method selection' },
filePath: { type: 'string', description: 'Document URL' },
fileUpload: { type: 'json', description: 'Uploaded document file' },
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
filePath: { type: 'string', description: 'Document URL (advanced mode)' },
fileUpload: { type: 'json', description: 'Uploaded document file (basic mode)' },
apiKey: { type: 'string', description: 'Pulse API key' },
pages: { type: 'string', description: 'Page range selection' },
chunking: {

View File

@@ -14,34 +14,22 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
icon: ReductoIcon,
subBlocks: [
{
id: 'inputMethod',
title: 'Select Input Method',
type: 'dropdown' as SubBlockType,
options: [
{ id: 'url', label: 'PDF Document URL' },
{ id: 'upload', label: 'Upload PDF Document' },
],
id: 'fileUpload',
title: 'PDF Document',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'document',
acceptedTypes: 'application/pdf',
placeholder: 'Upload a PDF document',
mode: 'basic',
maxSize: 50,
},
{
id: 'filePath',
title: 'PDF Document URL',
title: 'PDF Document',
type: 'short-input' as SubBlockType,
placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
condition: {
field: 'inputMethod',
value: 'url',
},
},
{
id: 'fileUpload',
title: 'Upload PDF',
type: 'file-upload' as SubBlockType,
acceptedTypes: 'application/pdf',
condition: {
field: 'inputMethod',
value: 'upload',
},
maxSize: 50,
canonicalParamId: 'document',
placeholder: 'Document URL or reference from previous block',
mode: 'advanced',
},
{
id: 'pages',
@@ -80,17 +68,15 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
apiKey: params.apiKey.trim(),
}
const inputMethod = params.inputMethod || 'url'
if (inputMethod === 'url') {
if (!params.filePath || params.filePath.trim() === '') {
throw new Error('PDF Document URL is required')
}
parameters.filePath = params.filePath.trim()
} else if (inputMethod === 'upload') {
if (!params.fileUpload) {
throw new Error('Please upload a PDF document')
}
parameters.fileUpload = params.fileUpload
const documentInput = params.fileUpload || params.filePath || params.document
if (!documentInput) {
throw new Error('PDF document is required')
}
if (typeof documentInput === 'object') {
parameters.fileUpload = documentInput
} else if (typeof documentInput === 'string') {
parameters.filePath = documentInput.trim()
}
let pagesArray: number[] | undefined
@@ -130,9 +116,9 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
},
},
inputs: {
inputMethod: { type: 'string', description: 'Input method selection' },
filePath: { type: 'string', description: 'PDF document URL' },
fileUpload: { type: 'json', description: 'Uploaded PDF file' },
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
filePath: { type: 'string', description: 'PDF document URL (advanced mode)' },
fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' },
apiKey: { type: 'string', description: 'Reducto API key' },
pages: { type: 'string', description: 'Page selection' },
tableOutputFormat: { type: 'string', description: 'Table output format' },

View File

@@ -23,33 +23,32 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
],
},
{
id: 'inputMethod',
title: 'Select Input Method',
type: 'dropdown' as SubBlockType,
options: [
{ id: 'url', label: 'Document URL' },
{ id: 'upload', label: 'Upload Document' },
],
id: 'fileUpload',
title: 'Document',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'document',
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
placeholder: 'Upload a document',
condition: {
field: 'processingMode',
value: 'async',
not: true,
},
mode: 'basic',
maxSize: 10,
},
{
id: 'filePath',
title: 'Document URL',
title: 'Document',
type: 'short-input' as SubBlockType,
placeholder: 'Enter full URL to a document (JPEG, PNG, or single-page PDF)',
canonicalParamId: 'document',
placeholder: 'Document URL or reference from previous block',
condition: {
field: 'inputMethod',
value: 'url',
and: {
field: 'processingMode',
value: 'async',
not: true,
},
field: 'processingMode',
value: 'async',
not: true,
},
mode: 'advanced',
},
{
id: 's3Uri',
@@ -61,22 +60,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
value: 'async',
},
},
{
id: 'fileUpload',
title: 'Upload Document',
type: 'file-upload' as SubBlockType,
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
condition: {
field: 'inputMethod',
value: 'upload',
and: {
field: 'processingMode',
value: 'async',
not: true,
},
},
maxSize: 10,
},
{
id: 'region',
title: 'AWS Region',
@@ -150,17 +133,14 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
}
parameters.s3Uri = params.s3Uri.trim()
} else {
const inputMethod = params.inputMethod || 'url'
if (inputMethod === 'url') {
if (!params.filePath || params.filePath.trim() === '') {
throw new Error('Document URL is required')
}
parameters.filePath = params.filePath.trim()
} else if (inputMethod === 'upload') {
if (!params.fileUpload) {
throw new Error('Please upload a document')
}
parameters.fileUpload = params.fileUpload
const documentInput = params.fileUpload || params.filePath || params.document
if (!documentInput) {
throw new Error('Document is required')
}
if (typeof documentInput === 'object') {
parameters.fileUpload = documentInput
} else if (typeof documentInput === 'string') {
parameters.filePath = documentInput.trim()
}
}
@@ -180,10 +160,10 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
},
inputs: {
processingMode: { type: 'string', description: 'Document type: single-page or multi-page' },
inputMethod: { type: 'string', description: 'Input method selection for single-page mode' },
filePath: { type: 'string', description: 'Document URL' },
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
filePath: { type: 'string', description: 'Document URL (advanced mode)' },
fileUpload: { type: 'json', description: 'Uploaded document file (basic mode)' },
s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' },
fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' },
extractTables: { type: 'boolean', description: 'Extract tables from document' },
extractForms: { type: 'boolean', description: 'Extract form key-value pairs' },
detectSignatures: { type: 'boolean', description: 'Detect signatures' },

View File

@@ -25,7 +25,7 @@ import { ElasticsearchBlock } from '@/blocks/blocks/elasticsearch'
import { ElevenLabsBlock } from '@/blocks/blocks/elevenlabs'
import { EvaluatorBlock } from '@/blocks/blocks/evaluator'
import { ExaBlock } from '@/blocks/blocks/exa'
import { FileBlock } from '@/blocks/blocks/file'
import { FileBlock, FileV2Block } from '@/blocks/blocks/file'
import { FirecrawlBlock } from '@/blocks/blocks/firecrawl'
import { FirefliesBlock } from '@/blocks/blocks/fireflies'
import { FunctionBlock } from '@/blocks/blocks/function'
@@ -74,7 +74,7 @@ import { MemoryBlock } from '@/blocks/blocks/memory'
import { MicrosoftExcelBlock, MicrosoftExcelV2Block } from '@/blocks/blocks/microsoft_excel'
import { MicrosoftPlannerBlock } from '@/blocks/blocks/microsoft_planner'
import { MicrosoftTeamsBlock } from '@/blocks/blocks/microsoft_teams'
import { MistralParseBlock } from '@/blocks/blocks/mistral_parse'
import { MistralParseBlock, MistralParseV2Block } from '@/blocks/blocks/mistral_parse'
import { MongoDBBlock } from '@/blocks/blocks/mongodb'
import { MySQLBlock } from '@/blocks/blocks/mysql'
import { Neo4jBlock } from '@/blocks/blocks/neo4j'
@@ -182,6 +182,7 @@ export const registry: Record<string, BlockConfig> = {
evaluator: EvaluatorBlock,
exa: ExaBlock,
file: FileBlock,
file_v2: FileV2Block,
firecrawl: FirecrawlBlock,
fireflies: FirefliesBlock,
function: FunctionBlock,
@@ -237,6 +238,7 @@ export const registry: Record<string, BlockConfig> = {
microsoft_planner: MicrosoftPlannerBlock,
microsoft_teams: MicrosoftTeamsBlock,
mistral_parse: MistralParseBlock,
mistral_parse_v2: MistralParseV2Block,
mongodb: MongoDBBlock,
mysql: MySQLBlock,
neo4j: Neo4jBlock,

View File

@@ -1,3 +1,4 @@
import { fileParserTool } from '@/tools/file/parser'
import { fileParserTool, fileParserV2Tool } from '@/tools/file/parser'
export const fileParseTool = fileParserTool
export { fileParserV2Tool }

View File

@@ -146,3 +146,25 @@ export const fileParserTool: ToolConfig<FileParserInput, FileParserOutput> = {
combinedContent: { type: 'string', description: 'Combined content of all parsed files' },
},
}
export const fileParserV2Tool: ToolConfig<FileParserInput, FileParserOutput> = {
id: 'file_parser_v2',
name: 'File Parser',
description: 'Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc.)',
version: '2.0.0',
params: fileParserTool.params,
request: fileParserTool.request,
transformResponse: fileParserTool.transformResponse,
outputs: {
files: {
type: 'array',
description: 'Array of parsed files with content, metadata, and file properties',
},
combinedContent: {
type: 'string',
description: 'All file contents merged into a single text string',
},
},
}

View File

@@ -1,3 +1,3 @@
import { mistralParserTool } from '@/tools/mistral/parser'
import { mistralParserTool, mistralParserV2Tool } from '@/tools/mistral/parser'
export { mistralParserTool }
export { mistralParserTool, mistralParserV2Tool }

View File

@@ -415,3 +415,26 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
},
},
}
export const mistralParserV2Tool: ToolConfig<MistralParserInput, MistralParserOutput> = {
id: 'mistral_parser_v2',
name: 'Mistral PDF Parser',
description: 'Parse PDF documents using Mistral OCR API',
version: '2.0.0',
params: mistralParserTool.params,
request: mistralParserTool.request,
transformResponse: mistralParserTool.transformResponse,
outputs: {
success: { type: 'boolean', description: 'Whether the PDF was parsed successfully' },
content: {
type: 'string',
description: 'Extracted content in the requested format (markdown, text, or JSON)',
},
metadata: {
type: 'object',
description: 'Processing metadata including jobId, fileType, pageCount, and usage info',
},
},
}

View File

@@ -204,7 +204,7 @@ import {
exaResearchTool,
exaSearchTool,
} from '@/tools/exa'
import { fileParseTool } from '@/tools/file'
import { fileParserV2Tool, fileParseTool } from '@/tools/file'
import {
firecrawlAgentTool,
firecrawlCrawlTool,
@@ -979,7 +979,7 @@ import {
microsoftTeamsWriteChannelTool,
microsoftTeamsWriteChatTool,
} from '@/tools/microsoft_teams'
import { mistralParserTool } from '@/tools/mistral'
import { mistralParserTool, mistralParserV2Tool } from '@/tools/mistral'
import {
mongodbDeleteTool,
mongodbExecuteTool,
@@ -1683,6 +1683,7 @@ export const tools: Record<string, ToolConfig> = {
function_execute: functionExecuteTool,
vision_tool: visionTool,
file_parser: fileParseTool,
file_parser_v2: fileParserV2Tool,
firecrawl_scrape: firecrawlScrapeTool,
firecrawl_search: firecrawlSearchTool,
firecrawl_crawl: firecrawlCrawlTool,
@@ -2456,6 +2457,7 @@ export const tools: Record<string, ToolConfig> = {
apollo_task_search: apolloTaskSearchTool,
apollo_email_accounts: apolloEmailAccountsTool,
mistral_parser: mistralParserTool,
mistral_parser_v2: mistralParserV2Tool,
reducto_parser: reductoParserTool,
textract_parser: textractParserTool,
thinking_tool: thinkingTool,

View File

@@ -141,8 +141,8 @@ export const textractParserTool: ToolConfig<TextractParserInput, TextractParserO
throw new Error('Invalid response format from Textract API')
}
if (!apiResult.success && apiResult.error) {
throw new Error(apiResult.error)
if (!apiResult.success) {
throw new Error(apiResult.error || 'Request failed')
}
const textractData = apiResult.output ?? apiResult