mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-22 05:18:08 -05:00
added mistral v2, files v2, and finalized textract
This commit is contained in:
@@ -154,7 +154,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
|
||||
elasticsearch: ElasticsearchIcon,
|
||||
elevenlabs: ElevenLabsIcon,
|
||||
exa: ExaAIIcon,
|
||||
file: DocumentIcon,
|
||||
file_v2: DocumentIcon,
|
||||
firecrawl: FirecrawlIcon,
|
||||
fireflies: FirefliesIcon,
|
||||
github_v2: GithubIcon,
|
||||
@@ -196,7 +196,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
|
||||
microsoft_excel_v2: MicrosoftExcelIcon,
|
||||
microsoft_planner: MicrosoftPlannerIcon,
|
||||
microsoft_teams: MicrosoftTeamsIcon,
|
||||
mistral_parse: MistralIcon,
|
||||
mistral_parse_v2: MistralIcon,
|
||||
mongodb: MongoDBIcon,
|
||||
mysql: MySQLIcon,
|
||||
neo4j: Neo4jIcon,
|
||||
|
||||
@@ -6,7 +6,7 @@ description: Read and parse multiple files
|
||||
import { BlockInfoCard } from "@/components/ui/block-info-card"
|
||||
|
||||
<BlockInfoCard
|
||||
type="file"
|
||||
type="file_v2"
|
||||
color="#40916C"
|
||||
/>
|
||||
|
||||
@@ -48,7 +48,7 @@ Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `files` | array | Array of parsed files |
|
||||
| `combinedContent` | string | Combined content of all parsed files |
|
||||
| `files` | array | Array of parsed files with content, metadata, and file properties |
|
||||
| `combinedContent` | string | All file contents merged into a single text string |
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ description: Extract text from PDF documents
|
||||
import { BlockInfoCard } from "@/components/ui/block-info-card"
|
||||
|
||||
<BlockInfoCard
|
||||
type="mistral_parse"
|
||||
type="mistral_parse_v2"
|
||||
color="#000000"
|
||||
/>
|
||||
|
||||
@@ -57,15 +57,5 @@ Parse PDF documents using Mistral OCR API
|
||||
| `success` | boolean | Whether the PDF was parsed successfully |
|
||||
| `content` | string | Extracted content in the requested format \(markdown, text, or JSON\) |
|
||||
| `metadata` | object | Processing metadata including jobId, fileType, pageCount, and usage info |
|
||||
| ↳ `jobId` | string | Unique job identifier |
|
||||
| ↳ `fileType` | string | File type \(e.g., pdf\) |
|
||||
| ↳ `fileName` | string | Original file name |
|
||||
| ↳ `source` | string | Source type \(url\) |
|
||||
| ↳ `pageCount` | number | Number of pages processed |
|
||||
| ↳ `model` | string | Mistral model used |
|
||||
| ↳ `resultType` | string | Output format \(markdown, text, json\) |
|
||||
| ↳ `processedAt` | string | Processing timestamp |
|
||||
| ↳ `sourceUrl` | string | Source URL if applicable |
|
||||
| ↳ `usageInfo` | object | Usage statistics from OCR processing |
|
||||
|
||||
|
||||
|
||||
@@ -423,7 +423,12 @@ export async function POST(request: NextRequest) {
|
||||
|
||||
let fileUrl = validatedData.filePath
|
||||
|
||||
if (validatedData.filePath?.includes('/api/files/serve/')) {
|
||||
const isInternalFilePath =
|
||||
validatedData.filePath?.startsWith('/api/files/serve/') ||
|
||||
(validatedData.filePath?.startsWith('/') &&
|
||||
validatedData.filePath?.includes('/api/files/serve/'))
|
||||
|
||||
if (isInternalFilePath) {
|
||||
try {
|
||||
const storageKey = extractStorageKey(validatedData.filePath)
|
||||
const context = inferContextFromKey(storageKey)
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { DocumentIcon } from '@/components/icons'
|
||||
import type { BlockConfig, SubBlockType } from '@/blocks/types'
|
||||
import { createVersionedToolSelector } from '@/blocks/utils'
|
||||
import type { FileParserOutput } from '@/tools/file/types'
|
||||
|
||||
const logger = createLogger('FileBlock')
|
||||
|
||||
export const FileBlock: BlockConfig<FileParserOutput> = {
|
||||
type: 'file',
|
||||
name: 'File',
|
||||
name: 'File (Legacy)',
|
||||
description: 'Read and parse multiple files',
|
||||
longDescription: `Integrate File into the workflow. Can upload a file manually or insert a file url.`,
|
||||
bestPractices: `
|
||||
@@ -17,6 +18,7 @@ export const FileBlock: BlockConfig<FileParserOutput> = {
|
||||
category: 'tools',
|
||||
bgColor: '#40916C',
|
||||
icon: DocumentIcon,
|
||||
hideFromToolbar: true,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'inputMethod',
|
||||
@@ -123,3 +125,92 @@ export const FileBlock: BlockConfig<FileParserOutput> = {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
export const FileV2Block: BlockConfig<FileParserOutput> = {
|
||||
...FileBlock,
|
||||
type: 'file_v2',
|
||||
name: 'File',
|
||||
description: 'Read and parse multiple files',
|
||||
hideFromToolbar: false,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'file',
|
||||
title: 'Files',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
canonicalParamId: 'fileInput',
|
||||
acceptedTypes:
|
||||
'.pdf,.csv,.doc,.docx,.txt,.md,.xlsx,.xls,.html,.htm,.pptx,.ppt,.json,.xml,.rtf',
|
||||
placeholder: 'Upload files to process',
|
||||
multiple: true,
|
||||
mode: 'basic',
|
||||
maxSize: 100,
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'Files',
|
||||
type: 'short-input' as SubBlockType,
|
||||
canonicalParamId: 'fileInput',
|
||||
placeholder: 'File URL or reference from previous block',
|
||||
mode: 'advanced',
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
access: ['file_parser_v2'],
|
||||
config: {
|
||||
tool: createVersionedToolSelector({
|
||||
baseToolSelector: () => 'file_parser',
|
||||
suffix: '_v2',
|
||||
fallbackToolId: 'file_parser_v2',
|
||||
}),
|
||||
params: (params) => {
|
||||
const fileInput = params.file || params.filePath || params.fileInput
|
||||
if (!fileInput) {
|
||||
logger.error('No file input provided')
|
||||
throw new Error('File is required')
|
||||
}
|
||||
|
||||
if (typeof fileInput === 'string') {
|
||||
return {
|
||||
filePath: fileInput.trim(),
|
||||
fileType: params.fileType || 'auto',
|
||||
workspaceId: params._context?.workspaceId,
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(fileInput) && fileInput.length > 0) {
|
||||
const filePaths = fileInput.map((file) => file.path)
|
||||
return {
|
||||
filePath: filePaths.length === 1 ? filePaths[0] : filePaths,
|
||||
fileType: params.fileType || 'auto',
|
||||
}
|
||||
}
|
||||
|
||||
if (fileInput?.path) {
|
||||
return {
|
||||
filePath: fileInput.path,
|
||||
fileType: params.fileType || 'auto',
|
||||
}
|
||||
}
|
||||
|
||||
logger.error('Invalid file input format')
|
||||
throw new Error('Invalid file input')
|
||||
},
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
fileInput: { type: 'json', description: 'File input (upload or URL reference)' },
|
||||
filePath: { type: 'string', description: 'File URL (advanced mode)' },
|
||||
file: { type: 'json', description: 'Uploaded file data (basic mode)' },
|
||||
fileType: { type: 'string', description: 'File type' },
|
||||
},
|
||||
outputs: {
|
||||
files: {
|
||||
type: 'json',
|
||||
description: 'Array of parsed file objects with content, metadata, and file properties',
|
||||
},
|
||||
combinedContent: {
|
||||
type: 'string',
|
||||
description: 'All file contents merged into a single text string',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import { MistralIcon } from '@/components/icons'
|
||||
import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types'
|
||||
import { createVersionedToolSelector } from '@/blocks/utils'
|
||||
import type { MistralParserOutput } from '@/tools/mistral/types'
|
||||
|
||||
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
type: 'mistral_parse',
|
||||
name: 'Mistral Parser',
|
||||
name: 'Mistral Parser (Legacy)',
|
||||
description: 'Extract text from PDF documents',
|
||||
hideFromToolbar: true,
|
||||
authMode: AuthMode.ApiKey,
|
||||
longDescription: `Integrate Mistral Parse into the workflow. Can extract text from uploaded PDF documents, or from a URL.`,
|
||||
docsLink: 'https://docs.sim.ai/tools/mistral_parse',
|
||||
@@ -13,7 +15,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
bgColor: '#000000',
|
||||
icon: MistralIcon,
|
||||
subBlocks: [
|
||||
// Show input method selection
|
||||
{
|
||||
id: 'inputMethod',
|
||||
title: 'Select Input Method',
|
||||
@@ -23,8 +24,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
{ id: 'upload', label: 'Upload PDF Document' },
|
||||
],
|
||||
},
|
||||
|
||||
// URL input - conditional on inputMethod
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'PDF Document URL',
|
||||
@@ -35,8 +34,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
value: 'url',
|
||||
},
|
||||
},
|
||||
|
||||
// File upload option
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'Upload PDF',
|
||||
@@ -46,9 +43,8 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
field: 'inputMethod',
|
||||
value: 'upload',
|
||||
},
|
||||
maxSize: 50, // 50MB max via direct upload
|
||||
maxSize: 50,
|
||||
},
|
||||
|
||||
{
|
||||
id: 'resultType',
|
||||
title: 'Output Format',
|
||||
@@ -65,28 +61,6 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
|
||||
},
|
||||
/*
|
||||
* Image-related parameters - temporarily disabled
|
||||
* Uncomment if PDF image extraction is needed
|
||||
*
|
||||
{
|
||||
id: 'includeImageBase64',
|
||||
title: 'Include PDF Images',
|
||||
type: 'switch',
|
||||
},
|
||||
{
|
||||
id: 'imageLimit',
|
||||
title: 'Max Images',
|
||||
type: 'short-input',
|
||||
placeholder: 'Maximum number of images to extract',
|
||||
},
|
||||
{
|
||||
id: 'imageMinSize',
|
||||
title: 'Min Image Size (px)',
|
||||
type: 'short-input',
|
||||
placeholder: 'Min width/height in pixels',
|
||||
},
|
||||
*/
|
||||
{
|
||||
id: 'apiKey',
|
||||
title: 'API Key',
|
||||
@@ -101,18 +75,15 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
config: {
|
||||
tool: () => 'mistral_parser',
|
||||
params: (params) => {
|
||||
// Basic validation
|
||||
if (!params || !params.apiKey || params.apiKey.trim() === '') {
|
||||
throw new Error('Mistral API key is required')
|
||||
}
|
||||
|
||||
// Build parameters object - file processing is now handled at the tool level
|
||||
const parameters: any = {
|
||||
const parameters: Record<string, unknown> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
resultType: params.resultType || 'markdown',
|
||||
}
|
||||
|
||||
// Set filePath or fileUpload based on input method
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
if (!params.filePath || params.filePath.trim() === '') {
|
||||
@@ -123,11 +94,9 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
if (!params.fileUpload) {
|
||||
throw new Error('Please upload a PDF document')
|
||||
}
|
||||
// Pass the entire fileUpload object to the tool
|
||||
parameters.fileUpload = params.fileUpload
|
||||
}
|
||||
|
||||
// Convert pages input from string to array of numbers if provided
|
||||
let pagesArray: number[] | undefined
|
||||
if (params.pages && params.pages.trim() !== '') {
|
||||
try {
|
||||
@@ -146,12 +115,12 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
if (pagesArray && pagesArray.length === 0) {
|
||||
pagesArray = undefined
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(`Page number format error: ${error.message}`)
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(`Page number format error: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Add optional parameters
|
||||
if (pagesArray && pagesArray.length > 0) {
|
||||
parameters.pages = pagesArray
|
||||
}
|
||||
@@ -173,3 +142,127 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
metadata: { type: 'json', description: 'Processing metadata' },
|
||||
},
|
||||
}
|
||||
|
||||
export const MistralParseV2Block: BlockConfig<MistralParserOutput> = {
|
||||
...MistralParseBlock,
|
||||
type: 'mistral_parse_v2',
|
||||
name: 'Mistral Parser',
|
||||
description: 'Extract text from PDF documents',
|
||||
hideFromToolbar: false,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'PDF Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
canonicalParamId: 'document',
|
||||
acceptedTypes: 'application/pdf',
|
||||
placeholder: 'Upload a PDF document',
|
||||
mode: 'basic',
|
||||
maxSize: 50,
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'PDF Document',
|
||||
type: 'short-input' as SubBlockType,
|
||||
canonicalParamId: 'document',
|
||||
placeholder: 'Document URL or reference from previous block',
|
||||
mode: 'advanced',
|
||||
},
|
||||
{
|
||||
id: 'resultType',
|
||||
title: 'Output Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ id: 'markdown', label: 'Markdown (Formatted)' },
|
||||
{ id: 'text', label: 'Plain Text' },
|
||||
{ id: 'json', label: 'JSON (Raw)' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'pages',
|
||||
title: 'Specific Pages',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
|
||||
},
|
||||
{
|
||||
id: 'apiKey',
|
||||
title: 'API Key',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter your Mistral API key',
|
||||
password: true,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
access: ['mistral_parser_v2'],
|
||||
config: {
|
||||
tool: createVersionedToolSelector({
|
||||
baseToolSelector: () => 'mistral_parser',
|
||||
suffix: '_v2',
|
||||
fallbackToolId: 'mistral_parser_v2',
|
||||
}),
|
||||
params: (params) => {
|
||||
if (!params || !params.apiKey || params.apiKey.trim() === '') {
|
||||
throw new Error('Mistral API key is required')
|
||||
}
|
||||
|
||||
const parameters: Record<string, unknown> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
resultType: params.resultType || 'markdown',
|
||||
}
|
||||
|
||||
const documentInput = params.fileUpload || params.filePath || params.document
|
||||
if (!documentInput) {
|
||||
throw new Error('PDF document is required')
|
||||
}
|
||||
if (typeof documentInput === 'object') {
|
||||
parameters.fileUpload = documentInput
|
||||
} else if (typeof documentInput === 'string') {
|
||||
parameters.filePath = documentInput.trim()
|
||||
}
|
||||
|
||||
let pagesArray: number[] | undefined
|
||||
if (params.pages && params.pages.trim() !== '') {
|
||||
try {
|
||||
pagesArray = params.pages
|
||||
.split(',')
|
||||
.map((p: string) => p.trim())
|
||||
.filter((p: string) => p.length > 0)
|
||||
.map((p: string) => {
|
||||
const num = Number.parseInt(p, 10)
|
||||
if (Number.isNaN(num) || num < 0) {
|
||||
throw new Error(`Invalid page number: ${p}`)
|
||||
}
|
||||
return num
|
||||
})
|
||||
|
||||
if (pagesArray && pagesArray.length === 0) {
|
||||
pagesArray = undefined
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(`Page number format error: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
if (pagesArray && pagesArray.length > 0) {
|
||||
parameters.pages = pagesArray
|
||||
}
|
||||
|
||||
return parameters
|
||||
},
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
|
||||
filePath: { type: 'string', description: 'PDF document URL (advanced mode)' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' },
|
||||
apiKey: { type: 'string', description: 'Mistral API key' },
|
||||
resultType: { type: 'string', description: 'Output format type' },
|
||||
pages: { type: 'string', description: 'Page selection' },
|
||||
},
|
||||
outputs: {
|
||||
content: { type: 'string', description: 'Extracted content' },
|
||||
metadata: { type: 'json', description: 'Processing metadata' },
|
||||
},
|
||||
}
|
||||
|
||||
@@ -15,34 +15,22 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
|
||||
icon: PulseIcon,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'inputMethod',
|
||||
title: 'Select Input Method',
|
||||
type: 'dropdown' as SubBlockType,
|
||||
options: [
|
||||
{ id: 'url', label: 'Document URL' },
|
||||
{ id: 'upload', label: 'Upload Document' },
|
||||
],
|
||||
id: 'fileUpload',
|
||||
title: 'Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
canonicalParamId: 'document',
|
||||
acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx',
|
||||
placeholder: 'Upload a document',
|
||||
mode: 'basic',
|
||||
maxSize: 50,
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'Document URL',
|
||||
title: 'Document',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter full URL to a document (https://example.com/document.pdf)',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'url',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'Upload Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'upload',
|
||||
},
|
||||
maxSize: 50,
|
||||
canonicalParamId: 'document',
|
||||
placeholder: 'Document URL or reference from previous block',
|
||||
mode: 'advanced',
|
||||
},
|
||||
{
|
||||
id: 'pages',
|
||||
@@ -84,17 +72,14 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
}
|
||||
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
if (!params.filePath || params.filePath.trim() === '') {
|
||||
throw new Error('Document URL is required')
|
||||
}
|
||||
parameters.filePath = params.filePath.trim()
|
||||
} else if (inputMethod === 'upload') {
|
||||
if (!params.fileUpload) {
|
||||
throw new Error('Please upload a document')
|
||||
}
|
||||
parameters.fileUpload = params.fileUpload
|
||||
const documentInput = params.fileUpload || params.filePath || params.document
|
||||
if (!documentInput) {
|
||||
throw new Error('Document is required')
|
||||
}
|
||||
if (typeof documentInput === 'object') {
|
||||
parameters.fileUpload = documentInput
|
||||
} else if (typeof documentInput === 'string') {
|
||||
parameters.filePath = documentInput.trim()
|
||||
}
|
||||
|
||||
if (params.pages && params.pages.trim() !== '') {
|
||||
@@ -117,9 +102,9 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
inputMethod: { type: 'string', description: 'Input method selection' },
|
||||
filePath: { type: 'string', description: 'Document URL' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded document file' },
|
||||
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
|
||||
filePath: { type: 'string', description: 'Document URL (advanced mode)' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded document file (basic mode)' },
|
||||
apiKey: { type: 'string', description: 'Pulse API key' },
|
||||
pages: { type: 'string', description: 'Page range selection' },
|
||||
chunking: {
|
||||
|
||||
@@ -14,34 +14,22 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
|
||||
icon: ReductoIcon,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'inputMethod',
|
||||
title: 'Select Input Method',
|
||||
type: 'dropdown' as SubBlockType,
|
||||
options: [
|
||||
{ id: 'url', label: 'PDF Document URL' },
|
||||
{ id: 'upload', label: 'Upload PDF Document' },
|
||||
],
|
||||
id: 'fileUpload',
|
||||
title: 'PDF Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
canonicalParamId: 'document',
|
||||
acceptedTypes: 'application/pdf',
|
||||
placeholder: 'Upload a PDF document',
|
||||
mode: 'basic',
|
||||
maxSize: 50,
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'PDF Document URL',
|
||||
title: 'PDF Document',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'url',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'Upload PDF',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
acceptedTypes: 'application/pdf',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'upload',
|
||||
},
|
||||
maxSize: 50,
|
||||
canonicalParamId: 'document',
|
||||
placeholder: 'Document URL or reference from previous block',
|
||||
mode: 'advanced',
|
||||
},
|
||||
{
|
||||
id: 'pages',
|
||||
@@ -80,17 +68,15 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
}
|
||||
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
if (!params.filePath || params.filePath.trim() === '') {
|
||||
throw new Error('PDF Document URL is required')
|
||||
}
|
||||
parameters.filePath = params.filePath.trim()
|
||||
} else if (inputMethod === 'upload') {
|
||||
if (!params.fileUpload) {
|
||||
throw new Error('Please upload a PDF document')
|
||||
}
|
||||
parameters.fileUpload = params.fileUpload
|
||||
const documentInput = params.fileUpload || params.filePath || params.document
|
||||
if (!documentInput) {
|
||||
throw new Error('PDF document is required')
|
||||
}
|
||||
|
||||
if (typeof documentInput === 'object') {
|
||||
parameters.fileUpload = documentInput
|
||||
} else if (typeof documentInput === 'string') {
|
||||
parameters.filePath = documentInput.trim()
|
||||
}
|
||||
|
||||
let pagesArray: number[] | undefined
|
||||
@@ -130,9 +116,9 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
inputMethod: { type: 'string', description: 'Input method selection' },
|
||||
filePath: { type: 'string', description: 'PDF document URL' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded PDF file' },
|
||||
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
|
||||
filePath: { type: 'string', description: 'PDF document URL (advanced mode)' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' },
|
||||
apiKey: { type: 'string', description: 'Reducto API key' },
|
||||
pages: { type: 'string', description: 'Page selection' },
|
||||
tableOutputFormat: { type: 'string', description: 'Table output format' },
|
||||
|
||||
@@ -23,33 +23,32 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'inputMethod',
|
||||
title: 'Select Input Method',
|
||||
type: 'dropdown' as SubBlockType,
|
||||
options: [
|
||||
{ id: 'url', label: 'Document URL' },
|
||||
{ id: 'upload', label: 'Upload Document' },
|
||||
],
|
||||
id: 'fileUpload',
|
||||
title: 'Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
canonicalParamId: 'document',
|
||||
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
|
||||
placeholder: 'Upload a document',
|
||||
condition: {
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
not: true,
|
||||
},
|
||||
mode: 'basic',
|
||||
maxSize: 10,
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'Document URL',
|
||||
title: 'Document',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter full URL to a document (JPEG, PNG, or single-page PDF)',
|
||||
canonicalParamId: 'document',
|
||||
placeholder: 'Document URL or reference from previous block',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'url',
|
||||
and: {
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
not: true,
|
||||
},
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
not: true,
|
||||
},
|
||||
mode: 'advanced',
|
||||
},
|
||||
{
|
||||
id: 's3Uri',
|
||||
@@ -61,22 +60,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
value: 'async',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'Upload Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'upload',
|
||||
and: {
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
not: true,
|
||||
},
|
||||
},
|
||||
maxSize: 10,
|
||||
},
|
||||
{
|
||||
id: 'region',
|
||||
title: 'AWS Region',
|
||||
@@ -150,17 +133,14 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
}
|
||||
parameters.s3Uri = params.s3Uri.trim()
|
||||
} else {
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
if (!params.filePath || params.filePath.trim() === '') {
|
||||
throw new Error('Document URL is required')
|
||||
}
|
||||
parameters.filePath = params.filePath.trim()
|
||||
} else if (inputMethod === 'upload') {
|
||||
if (!params.fileUpload) {
|
||||
throw new Error('Please upload a document')
|
||||
}
|
||||
parameters.fileUpload = params.fileUpload
|
||||
const documentInput = params.fileUpload || params.filePath || params.document
|
||||
if (!documentInput) {
|
||||
throw new Error('Document is required')
|
||||
}
|
||||
if (typeof documentInput === 'object') {
|
||||
parameters.fileUpload = documentInput
|
||||
} else if (typeof documentInput === 'string') {
|
||||
parameters.filePath = documentInput.trim()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,10 +160,10 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
},
|
||||
inputs: {
|
||||
processingMode: { type: 'string', description: 'Document type: single-page or multi-page' },
|
||||
inputMethod: { type: 'string', description: 'Input method selection for single-page mode' },
|
||||
filePath: { type: 'string', description: 'Document URL' },
|
||||
document: { type: 'json', description: 'Document input (file upload or URL reference)' },
|
||||
filePath: { type: 'string', description: 'Document URL (advanced mode)' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded document file (basic mode)' },
|
||||
s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' },
|
||||
extractTables: { type: 'boolean', description: 'Extract tables from document' },
|
||||
extractForms: { type: 'boolean', description: 'Extract form key-value pairs' },
|
||||
detectSignatures: { type: 'boolean', description: 'Detect signatures' },
|
||||
|
||||
@@ -25,7 +25,7 @@ import { ElasticsearchBlock } from '@/blocks/blocks/elasticsearch'
|
||||
import { ElevenLabsBlock } from '@/blocks/blocks/elevenlabs'
|
||||
import { EvaluatorBlock } from '@/blocks/blocks/evaluator'
|
||||
import { ExaBlock } from '@/blocks/blocks/exa'
|
||||
import { FileBlock } from '@/blocks/blocks/file'
|
||||
import { FileBlock, FileV2Block } from '@/blocks/blocks/file'
|
||||
import { FirecrawlBlock } from '@/blocks/blocks/firecrawl'
|
||||
import { FirefliesBlock } from '@/blocks/blocks/fireflies'
|
||||
import { FunctionBlock } from '@/blocks/blocks/function'
|
||||
@@ -74,7 +74,7 @@ import { MemoryBlock } from '@/blocks/blocks/memory'
|
||||
import { MicrosoftExcelBlock, MicrosoftExcelV2Block } from '@/blocks/blocks/microsoft_excel'
|
||||
import { MicrosoftPlannerBlock } from '@/blocks/blocks/microsoft_planner'
|
||||
import { MicrosoftTeamsBlock } from '@/blocks/blocks/microsoft_teams'
|
||||
import { MistralParseBlock } from '@/blocks/blocks/mistral_parse'
|
||||
import { MistralParseBlock, MistralParseV2Block } from '@/blocks/blocks/mistral_parse'
|
||||
import { MongoDBBlock } from '@/blocks/blocks/mongodb'
|
||||
import { MySQLBlock } from '@/blocks/blocks/mysql'
|
||||
import { Neo4jBlock } from '@/blocks/blocks/neo4j'
|
||||
@@ -182,6 +182,7 @@ export const registry: Record<string, BlockConfig> = {
|
||||
evaluator: EvaluatorBlock,
|
||||
exa: ExaBlock,
|
||||
file: FileBlock,
|
||||
file_v2: FileV2Block,
|
||||
firecrawl: FirecrawlBlock,
|
||||
fireflies: FirefliesBlock,
|
||||
function: FunctionBlock,
|
||||
@@ -237,6 +238,7 @@ export const registry: Record<string, BlockConfig> = {
|
||||
microsoft_planner: MicrosoftPlannerBlock,
|
||||
microsoft_teams: MicrosoftTeamsBlock,
|
||||
mistral_parse: MistralParseBlock,
|
||||
mistral_parse_v2: MistralParseV2Block,
|
||||
mongodb: MongoDBBlock,
|
||||
mysql: MySQLBlock,
|
||||
neo4j: Neo4jBlock,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { fileParserTool } from '@/tools/file/parser'
|
||||
import { fileParserTool, fileParserV2Tool } from '@/tools/file/parser'
|
||||
|
||||
export const fileParseTool = fileParserTool
|
||||
export { fileParserV2Tool }
|
||||
|
||||
@@ -146,3 +146,25 @@ export const fileParserTool: ToolConfig<FileParserInput, FileParserOutput> = {
|
||||
combinedContent: { type: 'string', description: 'Combined content of all parsed files' },
|
||||
},
|
||||
}
|
||||
|
||||
export const fileParserV2Tool: ToolConfig<FileParserInput, FileParserOutput> = {
|
||||
id: 'file_parser_v2',
|
||||
name: 'File Parser',
|
||||
description: 'Parse one or more uploaded files or files from URLs (text, PDF, CSV, images, etc.)',
|
||||
version: '2.0.0',
|
||||
|
||||
params: fileParserTool.params,
|
||||
request: fileParserTool.request,
|
||||
transformResponse: fileParserTool.transformResponse,
|
||||
|
||||
outputs: {
|
||||
files: {
|
||||
type: 'array',
|
||||
description: 'Array of parsed files with content, metadata, and file properties',
|
||||
},
|
||||
combinedContent: {
|
||||
type: 'string',
|
||||
description: 'All file contents merged into a single text string',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
import { mistralParserTool } from '@/tools/mistral/parser'
|
||||
import { mistralParserTool, mistralParserV2Tool } from '@/tools/mistral/parser'
|
||||
|
||||
export { mistralParserTool }
|
||||
export { mistralParserTool, mistralParserV2Tool }
|
||||
|
||||
@@ -415,3 +415,26 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
export const mistralParserV2Tool: ToolConfig<MistralParserInput, MistralParserOutput> = {
|
||||
id: 'mistral_parser_v2',
|
||||
name: 'Mistral PDF Parser',
|
||||
description: 'Parse PDF documents using Mistral OCR API',
|
||||
version: '2.0.0',
|
||||
|
||||
params: mistralParserTool.params,
|
||||
request: mistralParserTool.request,
|
||||
transformResponse: mistralParserTool.transformResponse,
|
||||
|
||||
outputs: {
|
||||
success: { type: 'boolean', description: 'Whether the PDF was parsed successfully' },
|
||||
content: {
|
||||
type: 'string',
|
||||
description: 'Extracted content in the requested format (markdown, text, or JSON)',
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
description: 'Processing metadata including jobId, fileType, pageCount, and usage info',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -204,7 +204,7 @@ import {
|
||||
exaResearchTool,
|
||||
exaSearchTool,
|
||||
} from '@/tools/exa'
|
||||
import { fileParseTool } from '@/tools/file'
|
||||
import { fileParserV2Tool, fileParseTool } from '@/tools/file'
|
||||
import {
|
||||
firecrawlAgentTool,
|
||||
firecrawlCrawlTool,
|
||||
@@ -979,7 +979,7 @@ import {
|
||||
microsoftTeamsWriteChannelTool,
|
||||
microsoftTeamsWriteChatTool,
|
||||
} from '@/tools/microsoft_teams'
|
||||
import { mistralParserTool } from '@/tools/mistral'
|
||||
import { mistralParserTool, mistralParserV2Tool } from '@/tools/mistral'
|
||||
import {
|
||||
mongodbDeleteTool,
|
||||
mongodbExecuteTool,
|
||||
@@ -1683,6 +1683,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
function_execute: functionExecuteTool,
|
||||
vision_tool: visionTool,
|
||||
file_parser: fileParseTool,
|
||||
file_parser_v2: fileParserV2Tool,
|
||||
firecrawl_scrape: firecrawlScrapeTool,
|
||||
firecrawl_search: firecrawlSearchTool,
|
||||
firecrawl_crawl: firecrawlCrawlTool,
|
||||
@@ -2456,6 +2457,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
apollo_task_search: apolloTaskSearchTool,
|
||||
apollo_email_accounts: apolloEmailAccountsTool,
|
||||
mistral_parser: mistralParserTool,
|
||||
mistral_parser_v2: mistralParserV2Tool,
|
||||
reducto_parser: reductoParserTool,
|
||||
textract_parser: textractParserTool,
|
||||
thinking_tool: thinkingTool,
|
||||
|
||||
@@ -141,8 +141,8 @@ export const textractParserTool: ToolConfig<TextractParserInput, TextractParserO
|
||||
throw new Error('Invalid response format from Textract API')
|
||||
}
|
||||
|
||||
if (!apiResult.success && apiResult.error) {
|
||||
throw new Error(apiResult.error)
|
||||
if (!apiResult.success) {
|
||||
throw new Error(apiResult.error || 'Request failed')
|
||||
}
|
||||
|
||||
const textractData = apiResult.output ?? apiResult
|
||||
|
||||
Reference in New Issue
Block a user