mirror of
https://github.com/simstudioai/sim.git
synced 2026-02-07 21:25:38 -05:00
* improvement(collab): do not refetch active workflow id * progress on files * more integrations * separate server and client logic * consolidate more code * fix integrations * fix types * consolidate more code * fix tests * fix more bugbot comments * fix type check * fix circular impport * address more bugbot comments * fix ocr integrations * fix typing * remove leftover type * address bugbot comment * fix file block adv mode * fix * normalize file input * fix v2 blocmks for ocr * fix for v2 versions * fix more v2 blocks * update single file blocks * make interface simpler * cleanup fireflies * remove file only annotation * accept all types * added wand to ssh block * user files should be passed through * improve docs * fix slack to include successful execs * fix dropbox upload file * fix sendgrid * fix dropbox * fix * fix * update skills * fix uploaded file --------- Co-authored-by: waleed <walif6@gmail.com>
272 lines
8.6 KiB
TypeScript
272 lines
8.6 KiB
TypeScript
import { MistralIcon } from '@/components/icons'
|
|
import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types'
|
|
import { createVersionedToolSelector, normalizeFileInput } from '@/blocks/utils'
|
|
import type { MistralParserOutput } from '@/tools/mistral/types'
|
|
|
|
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
|
type: 'mistral_parse',
|
|
name: 'Mistral Parser (Legacy)',
|
|
description: 'Extract text from PDF documents',
|
|
hideFromToolbar: true,
|
|
authMode: AuthMode.ApiKey,
|
|
longDescription: `Integrate Mistral Parse into the workflow. Can extract text from uploaded PDF documents, or from a URL.`,
|
|
docsLink: 'https://docs.sim.ai/tools/mistral_parse',
|
|
category: 'tools',
|
|
bgColor: '#000000',
|
|
icon: MistralIcon,
|
|
subBlocks: [
|
|
{
|
|
id: 'inputMethod',
|
|
title: 'Select Input Method',
|
|
type: 'dropdown' as SubBlockType,
|
|
options: [
|
|
{ id: 'url', label: 'PDF Document URL' },
|
|
{ id: 'upload', label: 'Upload PDF Document' },
|
|
],
|
|
},
|
|
{
|
|
id: 'filePath',
|
|
title: 'PDF Document URL',
|
|
type: 'short-input' as SubBlockType,
|
|
placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
|
|
condition: {
|
|
field: 'inputMethod',
|
|
value: 'url',
|
|
},
|
|
},
|
|
{
|
|
id: 'fileUpload',
|
|
title: 'Upload PDF',
|
|
type: 'file-upload' as SubBlockType,
|
|
acceptedTypes: 'application/pdf',
|
|
condition: {
|
|
field: 'inputMethod',
|
|
value: 'upload',
|
|
},
|
|
maxSize: 50,
|
|
},
|
|
{
|
|
id: 'resultType',
|
|
title: 'Output Format',
|
|
type: 'dropdown',
|
|
options: [
|
|
{ id: 'markdown', label: 'Markdown (Formatted)' },
|
|
{ id: 'text', label: 'Plain Text' },
|
|
{ id: 'json', label: 'JSON (Raw)' },
|
|
],
|
|
},
|
|
{
|
|
id: 'pages',
|
|
title: 'Specific Pages',
|
|
type: 'short-input',
|
|
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
|
|
},
|
|
{
|
|
id: 'apiKey',
|
|
title: 'API Key',
|
|
type: 'short-input' as SubBlockType,
|
|
placeholder: 'Enter your Mistral API key',
|
|
password: true,
|
|
required: true,
|
|
},
|
|
],
|
|
tools: {
|
|
access: ['mistral_parser'],
|
|
config: {
|
|
tool: () => 'mistral_parser',
|
|
params: (params) => {
|
|
if (!params || !params.apiKey || params.apiKey.trim() === '') {
|
|
throw new Error('Mistral API key is required')
|
|
}
|
|
|
|
const parameters: Record<string, unknown> = {
|
|
apiKey: params.apiKey.trim(),
|
|
resultType: params.resultType || 'markdown',
|
|
}
|
|
|
|
const inputMethod = params.inputMethod || 'url'
|
|
if (inputMethod === 'url') {
|
|
if (!params.filePath || params.filePath.trim() === '') {
|
|
throw new Error('PDF Document URL is required')
|
|
}
|
|
parameters.filePath = params.filePath.trim()
|
|
} else if (inputMethod === 'upload') {
|
|
if (!params.fileUpload) {
|
|
throw new Error('Please upload a PDF document')
|
|
}
|
|
parameters.file = params.fileUpload
|
|
}
|
|
|
|
let pagesArray: number[] | undefined
|
|
if (params.pages && params.pages.trim() !== '') {
|
|
try {
|
|
pagesArray = params.pages
|
|
.split(',')
|
|
.map((p: string) => p.trim())
|
|
.filter((p: string) => p.length > 0)
|
|
.map((p: string) => {
|
|
const num = Number.parseInt(p, 10)
|
|
if (Number.isNaN(num) || num < 0) {
|
|
throw new Error(`Invalid page number: ${p}`)
|
|
}
|
|
return num
|
|
})
|
|
|
|
if (pagesArray && pagesArray.length === 0) {
|
|
pagesArray = undefined
|
|
}
|
|
} catch (error: unknown) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error)
|
|
throw new Error(`Page number format error: ${errorMessage}`)
|
|
}
|
|
}
|
|
|
|
if (pagesArray && pagesArray.length > 0) {
|
|
parameters.pages = pagesArray
|
|
}
|
|
|
|
return parameters
|
|
},
|
|
},
|
|
},
|
|
inputs: {
|
|
inputMethod: { type: 'string', description: 'Input method selection' },
|
|
filePath: { type: 'string', description: 'PDF document URL' },
|
|
fileUpload: { type: 'json', description: 'Uploaded PDF file' },
|
|
apiKey: { type: 'string', description: 'Mistral API key' },
|
|
resultType: { type: 'string', description: 'Output format type' },
|
|
pages: { type: 'string', description: 'Page selection' },
|
|
},
|
|
outputs: {
|
|
content: { type: 'string', description: 'Extracted content' },
|
|
metadata: { type: 'json', description: 'Processing metadata' },
|
|
},
|
|
}
|
|
|
|
export const MistralParseV2Block: BlockConfig<MistralParserOutput> = {
|
|
...MistralParseBlock,
|
|
type: 'mistral_parse_v2',
|
|
name: 'Mistral Parser',
|
|
description: 'Extract text from PDF documents',
|
|
hideFromToolbar: false,
|
|
subBlocks: [
|
|
{
|
|
id: 'fileUpload',
|
|
title: 'PDF Document',
|
|
type: 'file-upload' as SubBlockType,
|
|
canonicalParamId: 'document',
|
|
acceptedTypes: 'application/pdf',
|
|
placeholder: 'Upload a PDF document',
|
|
mode: 'basic',
|
|
maxSize: 50,
|
|
required: true,
|
|
},
|
|
{
|
|
id: 'fileReference',
|
|
title: 'File Reference',
|
|
type: 'short-input' as SubBlockType,
|
|
canonicalParamId: 'document',
|
|
placeholder: 'File reference from previous block',
|
|
mode: 'advanced',
|
|
required: true,
|
|
},
|
|
{
|
|
id: 'resultType',
|
|
title: 'Output Format',
|
|
type: 'dropdown',
|
|
options: [
|
|
{ id: 'markdown', label: 'Markdown' },
|
|
{ id: 'text', label: 'Plain Text' },
|
|
{ id: 'json', label: 'JSON' },
|
|
],
|
|
},
|
|
{
|
|
id: 'pages',
|
|
title: 'Specific Pages',
|
|
type: 'short-input',
|
|
placeholder: 'e.g. 0,1,2 (leave empty for all pages)',
|
|
},
|
|
{
|
|
id: 'apiKey',
|
|
title: 'API Key',
|
|
type: 'short-input' as SubBlockType,
|
|
placeholder: 'Enter your Mistral API key',
|
|
password: true,
|
|
required: true,
|
|
},
|
|
],
|
|
tools: {
|
|
access: ['mistral_parser_v2'],
|
|
config: {
|
|
tool: createVersionedToolSelector({
|
|
baseToolSelector: () => 'mistral_parser',
|
|
suffix: '_v2',
|
|
fallbackToolId: 'mistral_parser_v2',
|
|
}),
|
|
params: (params) => {
|
|
if (!params || !params.apiKey || params.apiKey.trim() === '') {
|
|
throw new Error('Mistral API key is required')
|
|
}
|
|
|
|
const parameters: Record<string, unknown> = {
|
|
apiKey: params.apiKey.trim(),
|
|
resultType: params.resultType || 'markdown',
|
|
}
|
|
|
|
const documentInput = normalizeFileInput(
|
|
params.fileUpload || params.fileReference || params.document,
|
|
{ single: true }
|
|
)
|
|
if (!documentInput) {
|
|
throw new Error('PDF document is required')
|
|
}
|
|
parameters.file = documentInput
|
|
|
|
let pagesArray: number[] | undefined
|
|
if (params.pages && params.pages.trim() !== '') {
|
|
try {
|
|
pagesArray = params.pages
|
|
.split(',')
|
|
.map((p: string) => p.trim())
|
|
.filter((p: string) => p.length > 0)
|
|
.map((p: string) => {
|
|
const num = Number.parseInt(p, 10)
|
|
if (Number.isNaN(num) || num < 0) {
|
|
throw new Error(`Invalid page number: ${p}`)
|
|
}
|
|
return num
|
|
})
|
|
|
|
if (pagesArray && pagesArray.length === 0) {
|
|
pagesArray = undefined
|
|
}
|
|
} catch (error: unknown) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error)
|
|
throw new Error(`Page number format error: ${errorMessage}`)
|
|
}
|
|
}
|
|
|
|
if (pagesArray && pagesArray.length > 0) {
|
|
parameters.pages = pagesArray
|
|
}
|
|
|
|
return parameters
|
|
},
|
|
},
|
|
},
|
|
inputs: {
|
|
document: { type: 'json', description: 'Document input (file upload or file reference)' },
|
|
fileReference: { type: 'json', description: 'File reference (advanced mode)' },
|
|
fileUpload: { type: 'json', description: 'Uploaded PDF file (basic mode)' },
|
|
apiKey: { type: 'string', description: 'Mistral API key' },
|
|
resultType: { type: 'string', description: 'Output format type' },
|
|
pages: { type: 'string', description: 'Page selection' },
|
|
},
|
|
outputs: {
|
|
pages: { type: 'array', description: 'Array of page objects from Mistral OCR' },
|
|
model: { type: 'string', description: 'Mistral OCR model identifier' },
|
|
usage_info: { type: 'json', description: 'Usage statistics from the API' },
|
|
document_annotation: { type: 'string', description: 'Structured annotation data' },
|
|
},
|
|
}
|