Files
sim/apps/sim/blocks/blocks/mistral_parse.ts
Vikhyath Mondreti 5b0c2156e0 improvement(files): pass user file objects around consistently (#3119)
* improvement(collab): do not refetch active workflow id

* progress on files

* more integrations

* separate server and client logic

* consolidate more code

* fix integrations

* fix types

* consolidate more code

* fix tests

* fix more bugbot comments

* fix type check

* fix circular impport

* address more bugbot comments

* fix ocr integrations

* fix typing

* remove leftover type

* address bugbot comment

* fix file block adv mode

* fix

* normalize file input

* fix v2 blocmks for ocr

* fix for v2 versions

* fix more v2 blocks

* update single file blocks

* make interface simpler

* cleanup fireflies

* remove file only annotation

* accept all types

* added wand to ssh block

* user files should be passed through

* improve docs

* fix slack to include successful execs

* fix dropbox upload file

* fix sendgrid

* fix dropbox

* fix

* fix

* update skills

* fix uploaded file

---------

Co-authored-by: waleed <walif6@gmail.com>
2026-02-03 19:50:23 -08:00

272 lines
8.6 KiB
TypeScript

import { MistralIcon } from '@/components/icons'
import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types'
import { createVersionedToolSelector, normalizeFileInput } from '@/blocks/utils'
import type { MistralParserOutput } from '@/tools/mistral/types'
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
type: 'mistral_parse',
name: 'Mistral Parser (Legacy)',
description: 'Extract text from PDF documents',
hideFromToolbar: true,
authMode: AuthMode.ApiKey,
longDescription: `Integrate Mistral Parse into the workflow. Can extract text from uploaded PDF documents, or from a URL.`,
docsLink: 'https://docs.sim.ai/tools/mistral_parse',
category: 'tools',
bgColor: '#000000',
icon: MistralIcon,
subBlocks: [
{
id: 'inputMethod',
title: 'Select Input Method',
type: 'dropdown' as SubBlockType,
options: [
{ id: 'url', label: 'PDF Document URL' },
{ id: 'upload', label: 'Upload PDF Document' },
],
},
{
id: 'filePath',
title: 'PDF Document URL',
type: 'short-input' as SubBlockType,
placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
condition: {
field: 'inputMethod',
value: 'url',
},
},
{
id: 'fileUpload',
title: 'Upload PDF',
type: 'file-upload' as SubBlockType,
acceptedTypes: 'application/pdf',
condition: {
field: 'inputMethod',
value: 'upload',
},
maxSize: 50,
},
{
id: 'resultType',
title: 'Output Format',
type: 'dropdown',
options: [
{ id: 'markdown', label: 'Markdown (Formatted)' },
{ id: 'text', label: 'Plain Text' },
{ id: 'json', label: 'JSON (Raw)' },
],
},
{
id: 'pages',
title: 'Specific Pages',
type: 'short-input',
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
},
{
id: 'apiKey',
title: 'API Key',
type: 'short-input' as SubBlockType,
placeholder: 'Enter your Mistral API key',
password: true,
required: true,
},
],
tools: {
access: ['mistral_parser'],
config: {
tool: () => 'mistral_parser',
params: (params) => {
if (!params || !params.apiKey || params.apiKey.trim() === '') {
throw new Error('Mistral API key is required')
}
const parameters: Record<string, unknown> = {
apiKey: params.apiKey.trim(),
resultType: params.resultType || 'markdown',
}
const inputMethod = params.inputMethod || 'url'
if (inputMethod === 'url') {
if (!params.filePath || params.filePath.trim() === '') {
throw new Error('PDF Document URL is required')
}
parameters.filePath = params.filePath.trim()
} else if (inputMethod === 'upload') {
if (!params.fileUpload) {
throw new Error('Please upload a PDF document')
}
parameters.file = params.fileUpload
}
let pagesArray: number[] | undefined
if (params.pages && params.pages.trim() !== '') {
try {
pagesArray = params.pages
.split(',')
.map((p: string) => p.trim())
.filter((p: string) => p.length > 0)
.map((p: string) => {
const num = Number.parseInt(p, 10)
if (Number.isNaN(num) || num < 0) {
throw new Error(`Invalid page number: ${p}`)
}
return num
})
if (pagesArray && pagesArray.length === 0) {
pagesArray = undefined
}
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error)
throw new Error(`Page number format error: ${errorMessage}`)
}
}
if (pagesArray && pagesArray.length > 0) {
parameters.pages = pagesArray
}
return parameters
},
},
},
inputs: {
inputMethod: { type: 'string', description: 'Input method selection' },
filePath: { type: 'string', description: 'PDF document URL' },
fileUpload: { type: 'json', description: 'Uploaded PDF file' },
apiKey: { type: 'string', description: 'Mistral API key' },
resultType: { type: 'string', description: 'Output format type' },
pages: { type: 'string', description: 'Page selection' },
},
outputs: {
content: { type: 'string', description: 'Extracted content' },
metadata: { type: 'json', description: 'Processing metadata' },
},
}
export const MistralParseV2Block: BlockConfig<MistralParserOutput> = {
...MistralParseBlock,
type: 'mistral_parse_v2',
name: 'Mistral Parser',
description: 'Extract text from PDF documents',
hideFromToolbar: false,
subBlocks: [
{
id: 'fileUpload',
title: 'PDF Document',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'document',
acceptedTypes: 'application/pdf',
placeholder: 'Upload a PDF document',
mode: 'basic',
maxSize: 50,
required: true,
},
{
id: 'fileReference',
title: 'File Reference',
type: 'short-input' as SubBlockType,
canonicalParamId: 'document',
placeholder: 'File reference from previous block',
mode: 'advanced',
required: true,
},
{
id: 'resultType',
title: 'Output Format',
type: 'dropdown',
options: [
{ id: 'markdown', label: 'Markdown' },
{ id: 'text', label: 'Plain Text' },
{ id: 'json', label: 'JSON' },
],
},
{
id: 'pages',
title: 'Specific Pages',
type: 'short-input',
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
},
{
id: 'apiKey',
title: 'API Key',
type: 'short-input' as SubBlockType,
placeholder: 'Enter your Mistral API key',
password: true,
required: true,
},
],
tools: {
access: ['mistral_parser_v2'],
config: {
tool: createVersionedToolSelector({
baseToolSelector: () => 'mistral_parser',
suffix: '_v2',
fallbackToolId: 'mistral_parser_v2',
}),
params: (params) => {
if (!params || !params.apiKey || params.apiKey.trim() === '') {
throw new Error('Mistral API key is required')
}
const parameters: Record<string, unknown> = {
apiKey: params.apiKey.trim(),
resultType: params.resultType || 'markdown',
}
const documentInput = normalizeFileInput(
params.fileUpload || params.fileReference || params.document,
{ single: true }
)
if (!documentInput) {
throw new Error('PDF document is required')
}
parameters.file = documentInput
let pagesArray: number[] | undefined
if (params.pages && params.pages.trim() !== '') {
try {
pagesArray = params.pages
.split(',')
.map((p: string) => p.trim())
.filter((p: string) => p.length > 0)
.map((p: string) => {
const num = Number.parseInt(p, 10)
if (Number.isNaN(num) || num < 0) {
throw new Error(`Invalid page number: ${p}`)
}
return num
})
if (pagesArray && pagesArray.length === 0) {
pagesArray = undefined
}
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error)
throw new Error(`Page number format error: ${errorMessage}`)
}
}
if (pagesArray && pagesArray.length > 0) {
parameters.pages = pagesArray
}
return parameters
},
},
},
inputs: {
document: { type: 'json', description: 'Document input (file upload or file reference)' },
fileReference: { type: 'json', description: 'File reference (advanced mode)' },
fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' },
apiKey: { type: 'string', description: 'Mistral API key' },
resultType: { type: 'string', description: 'Output format type' },
pages: { type: 'string', description: 'Page selection' },
},
outputs: {
pages: { type: 'array', description: 'Array of page objects from Mistral OCR' },
model: { type: 'string', description: 'Mistral OCR model identifier' },
usage_info: { type: 'json', description: 'Usage statistics from the API' },
document_annotation: { type: 'string', description: 'Structured annotation data' },
},
}