feat(mistal-ocr): added file upload to mistal ocr tool in production (#218)

* added file selector for mistral OCR tool * updated twilio icon
2026-01-14 09:27:58 -05:00 · 2025-04-02 13:52:56 -07:00
parent 79f8f34bf1
commit 40a917c0ea
5 changed files with 463 additions and 393 deletions
--- a/sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx
+++ b/sim/app/w/[id]/components/workflow-block/components/sub-block/components/file-upload.tsx
@@ -542,17 +542,19 @@ export function FileUpload({

      {/* Show upload button if no files and not uploading */}
      {!hasFiles && !isUploading && (
-        <Button
-          type="button"
-          variant="outline"
-          className="w-full justify-center text-center font-normal"
-          onClick={handleOpenFileDialog}
-        >
-          <Upload className="mr-2 h-4 w-4" />
-          {multiple ? 'Upload Files' : 'Upload File'}
+        <div className="flex items-center">
+          <Button
+            type="button"
+            variant="outline"
+            className="w-full justify-center text-center font-normal"
+            onClick={handleOpenFileDialog}
+          >
+            <Upload className="mr-2 h-4 w-4" />
+            {multiple ? 'Upload Files' : 'Upload File'}
+          </Button>

          <Tooltip>
-            <TooltipTrigger className="ml-1">
+            <TooltipTrigger className="ml-2">
              <Info className="h-4 w-4 text-muted-foreground" />
            </TooltipTrigger>
            <TooltipContent>
@@ -560,7 +562,7 @@ export function FileUpload({
              {multiple && <p>You can select multiple files at once</p>}
            </TooltipContent>
          </Tooltip>
-        </Button>
+        </div>
      )}
    </div>
  )
--- a/sim/blocks/blocks/mistral-parse.ts
+++ b/sim/blocks/blocks/mistral-parse.ts
@@ -1,24 +1,73 @@
-import { MistralParserOutput } from '@/tools/mistral/parser'
-import { BlockConfig } from '../types'
 import { MistralIcon } from '@/components/icons'
+import { MistralParserOutput } from '@/tools/mistral/types'
+import { BlockConfig, SubBlockConfig, SubBlockLayout, SubBlockType } from '../types'
+
+const isProduction = process.env.NODE_ENV === 'production'
+const isS3Enabled = process.env.USE_S3 === 'true'
+const shouldEnableFileUpload = isProduction || isS3Enabled
+
+// Define the input method selector block when needed
+const inputMethodBlock: SubBlockConfig = {
+  id: 'inputMethod',
+  title: 'Select Input Method',
+  type: 'dropdown' as SubBlockType,
+  layout: 'full' as SubBlockLayout,
+  options: [
+    { id: 'url', label: 'PDF Document URL' },
+    { id: 'upload', label: 'Upload PDF Document' },
+  ],
+}
+
+// Define the file upload block when needed
+const fileUploadBlock: SubBlockConfig = {
+  id: 'fileUpload',
+  title: 'Upload PDF',
+  type: 'file-upload' as SubBlockType,
+  layout: 'full' as SubBlockLayout,
+  acceptedTypes: 'application/pdf',
+  condition: {
+    field: 'inputMethod',
+    value: 'upload',
+  },
+}

 export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
  type: 'mistral_parse',
  name: 'Mistral Parser',
  description: 'Extract text from PDF documents',
  longDescription:
-    'Extract text and structure from PDF documents using Mistral\'s OCR API. Enter a URL to a PDF document (.pdf extension required), configure processing options, and get the content in your preferred format. The URL must be publicly accessible and point to a valid PDF file. Note: Google Drive, Dropbox, and other cloud storage links are not supported; use a direct download URL from a web server instead.',
+    "Extract text and structure from PDF documents using Mistral's OCR API." +
+    (shouldEnableFileUpload
+      ? ' Either enter a URL to a PDF document or upload a PDF file directly.'
+      : ' Enter a URL to a PDF document (.pdf extension required).') +
+    ' Configure processing options and get the content in your preferred format. For URLs, they must be publicly accessible and point to a valid PDF file. Note: Google Drive, Dropbox, and other cloud storage links are not supported; use a direct download URL from a web server instead.',
  category: 'tools',
  bgColor: '#000000',
  icon: MistralIcon,
  subBlocks: [
+    // Show input method selection only if file upload is available
+    ...(shouldEnableFileUpload ? [inputMethodBlock] : []),
+
+    // URL input - always shown, but conditional on inputMethod in production
    {
      id: 'filePath',
      title: 'PDF Document URL',
-      type: 'short-input',
-      layout: 'full',
+      type: 'short-input' as SubBlockType,
+      layout: 'full' as SubBlockLayout,
      placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
+      ...(shouldEnableFileUpload
+        ? {
+            condition: {
+              field: 'inputMethod',
+              value: 'url',
+            },
+          }
+        : {}),
    },
+
+    // File upload option - only shown in production environments
+    ...(shouldEnableFileUpload ? [fileUploadBlock] : []),
+
    {
      id: 'resultType',
      title: 'Output Format',
@@ -27,7 +76,7 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
      options: [
        { id: 'markdown', label: 'Markdown (Formatted)' },
        { id: 'text', label: 'Plain Text' },
-        { id: 'json', label: 'JSON (Raw)' }
+        { id: 'json', label: 'JSON (Raw)' },
      ],
    },
    {
@@ -65,8 +114,8 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
    {
      id: 'apiKey',
      title: 'API Key',
-      type: 'short-input',
-      layout: 'full',
+      type: 'short-input' as SubBlockType,
+      layout: 'full' as SubBlockLayout,
      placeholder: 'Enter your Mistral API key',
      password: true,
    },
@@ -78,48 +127,40 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
      params: (params) => {
        // Basic validation
        if (!params || !params.apiKey || params.apiKey.trim() === '') {
-          throw new Error('Mistral API key is required');
+          throw new Error('Mistral API key is required')
        }
-        
-        if (!params || !params.filePath || params.filePath.trim() === '') {
-          throw new Error('PDF Document URL is required');
+
+        // Build parameters object - file processing is now handled at the tool level
+        const parameters: any = {
+          apiKey: params.apiKey.trim(),
+          resultType: params.resultType || 'markdown',
        }
-        
-        // Validate URL format
-        let validatedUrl;
-        try {
-          // Try to create a URL object to validate format
-          validatedUrl = new URL(params.filePath.trim());
-          
-          // Ensure URL is using HTTP or HTTPS protocol
-          if (!['http:', 'https:'].includes(validatedUrl.protocol)) {
-            throw new Error(`URL must use HTTP or HTTPS protocol. Found: ${validatedUrl.protocol}`);
-          }
-          
-          // Check for PDF extension and provide specific guidance
-          const pathname = validatedUrl.pathname.toLowerCase();
-          if (!pathname.endsWith('.pdf')) {
-            if (!pathname.includes('pdf')) {
-              throw new Error(
-                'The URL does not appear to point to a PDF document. ' +
-                'Please provide a URL that ends with .pdf extension. ' +
-                'If your document is not a PDF, please convert it to PDF format first.'
-              );
-            } else {
-              // PDF is in the name but not at the end, so give a warning but proceed
-              console.warn(
-                'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
-                'This might still work if the server returns a valid PDF document.'
-              );
+
+        // Set filePath or fileUpload based on input method (or directly use filePath if no method selector)
+        if (shouldEnableFileUpload) {
+          const inputMethod = params.inputMethod || 'url'
+          if (inputMethod === 'url') {
+            if (!params.filePath || params.filePath.trim() === '') {
+              throw new Error('PDF Document URL is required')
            }
+            parameters.filePath = params.filePath.trim()
+          } else if (inputMethod === 'upload') {
+            if (!params.fileUpload) {
+              throw new Error('Please upload a PDF document')
+            }
+            // Pass the entire fileUpload object to the tool
+            parameters.fileUpload = params.fileUpload
          }
-        } catch (error) {
-          const errorMessage = error instanceof Error ? error.message : String(error);
-          throw new Error(`Invalid URL format: ${errorMessage}`);
+        } else {
+          // In local development, only URL input is available
+          if (!params.filePath || params.filePath.trim() === '') {
+            throw new Error('PDF Document URL is required')
+          }
+          parameters.filePath = params.filePath.trim()
        }
-        
-        // Process pages input (convert from comma-separated string to array of numbers)
-        let pagesArray: number[] | undefined = undefined;
+
+        // Convert pages input from string to array of numbers if provided
+        let pagesArray: number[] | undefined = undefined
        if (params.pages && params.pages.trim() !== '') {
          try {
            pagesArray = params.pages
@@ -127,77 +168,34 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
              .map((p: string) => p.trim())
              .filter((p: string) => p.length > 0)
              .map((p: string) => {
-                const num = parseInt(p, 10);
+                const num = parseInt(p, 10)
                if (isNaN(num) || num < 0) {
-                  throw new Error(`Invalid page number: ${p}`);
+                  throw new Error(`Invalid page number: ${p}`)
                }
-                return num;
-              });
-            
+                return num
+              })
+
            if (pagesArray && pagesArray.length === 0) {
-              pagesArray = undefined;
+              pagesArray = undefined
            }
          } catch (error: any) {
-            throw new Error(`Page number format error: ${error.message}`);
+            throw new Error(`Page number format error: ${error.message}`)
          }
        }
-        
-        // Process numeric inputs
-        let imageLimit: number | undefined = undefined;
-        if (params.imageLimit && params.imageLimit.trim() !== '') {
-          const limit = parseInt(params.imageLimit, 10);
-          if (!isNaN(limit) && limit > 0) {
-            imageLimit = limit;
-          } else {
-            throw new Error('Image limit must be a positive number');
-          }
-        }
-        
-        let imageMinSize: number | undefined = undefined;
-        if (params.imageMinSize && params.imageMinSize.trim() !== '') {
-          const size = parseInt(params.imageMinSize, 10);
-          if (!isNaN(size) && size > 0) {
-            imageMinSize = size;
-          } else {
-            throw new Error('Minimum image size must be a positive number');
-          }
-        }
-        
-        // Return structured parameters for the tool
-        const parameters: any = {
-          filePath: validatedUrl.toString(),
-          apiKey: params.apiKey.trim(),
-          resultType: params.resultType || 'markdown',
-        };
-        
-        // Add optional parameters if they're defined
+
+        // Add optional parameters
        if (pagesArray && pagesArray.length > 0) {
-          parameters.pages = pagesArray;
+          parameters.pages = pagesArray
        }
-        
-        /* 
-         * Image-related parameters - temporarily disabled
-         * Uncomment if PDF image extraction is needed
-         *
-        if (typeof params.includeImageBase64 === 'boolean') {
-          parameters.includeImageBase64 = params.includeImageBase64;
-        }
-        
-        if (imageLimit !== undefined) {
-          parameters.imageLimit = imageLimit;
-        }
-        
-        if (imageMinSize !== undefined) {
-          parameters.imageMinSize = imageMinSize;
-        }
-        */
-        
-        return parameters;
+
+        return parameters
      },
    },
  },
  inputs: {
-    filePath: { type: 'string', required: true },
+    inputMethod: { type: 'string', required: false },
+    filePath: { type: 'string', required: !shouldEnableFileUpload },
+    fileUpload: { type: 'json', required: false },
    apiKey: { type: 'string', required: true },
    resultType: { type: 'string', required: false },
    pages: { type: 'string', required: false },
@@ -214,4 +212,4 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
      },
    },
  },
-} 
+}
--- a/sim/components/icons.tsx
+++ b/sim/components/icons.tsx
@@ -1736,20 +1736,11 @@ export function ConfluenceIcon(props: SVGProps<SVGSVGElement>) {

 export function TwilioIcon(props: SVGProps<SVGSVGElement>) {
  return (
-    <svg
-      {...props}
-      xmlns="http://www.w3.org/2000/svg"
-      width="24"
-      height="24"
-      viewBox="0 0 256 256"
-      fill="none"
-      aria-hidden="true"
-    >
-      <circle cx="128" cy="128" r="128" fill="none" stroke="white" strokeWidth="21" />
-      <circle cx="85" cy="85" r="21" fill="white" />
-      <circle cx="171" cy="85" r="21" fill="white" />
-      <circle cx="85" cy="171" r="21" fill="white" />
-      <circle cx="171" cy="171" r="21" fill="white" />
+    <svg {...props} xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 256">
+      <path
+        fill="currentColor"
+        d="M128 0c70.656 0 128 57.344 128 128s-57.344 128-128 128S0 198.656 0 128 57.344 0 128 0zm0 33.792c-52.224 0-94.208 41.984-94.208 94.208S75.776 222.208 128 222.208s94.208-41.984 94.208-94.208S180.224 33.792 128 33.792zm31.744 99.328c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624zm-63.488 0c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624zm63.488-63.488c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624zm-63.488 0c14.704 0 26.624 11.92 26.624 26.624 0 14.704-11.92 26.624-26.624 26.624-14.704 0-26.624-11.92-26.624-26.624 0-14.704 11.92-26.624 26.624-26.624z"
+      />
    </svg>
  )
 }
--- a/sim/tools/mistral/parser.ts
+++ b/sim/tools/mistral/parser.ts
@@ -1,95 +1,5 @@
-import { ToolConfig, ToolResponse } from '../types'
-
-/**
- * Input parameters for the Mistral OCR parser tool
- */
-export interface MistralParserInput {
-  /** URL to a PDF document to be processed */
-  filePath: string;
-  
-  /** Mistral API key for authentication */
-  apiKey: string;
-  
-  /** Output format for the extracted content (default: 'markdown') */
-  resultType?: 'markdown' | 'text' | 'json';
-  
-  /** Whether to include base64-encoded images in the response */
-  includeImageBase64?: boolean;
-  
-  /** Specific pages to process (zero-indexed) */
-  pages?: number[];
-  
-  /** Maximum number of images to extract from the PDF */
-  imageLimit?: number;
-  
-  /** Minimum height and width (in pixels) for images to extract */
-  imageMinSize?: number;
-}
-
-/**
- * Usage information returned by the Mistral OCR API
- */
-export interface MistralOcrUsageInfo {
-  /** Number of pages processed in the document */
-  pagesProcessed: number;
-  
-  /** Size of the document in bytes */
-  docSizeBytes: number;
-}
-
-/**
- * Metadata about the processed document
- */
-export interface MistralParserMetadata {
-  /** Unique identifier for this OCR job */
-  jobId: string;
-  
-  /** File type of the document (typically 'pdf') */
-  fileType: string;
-  
-  /** Filename extracted from the document URL */
-  fileName: string;
-  
-  /** Source type (always 'url' for now) */
-  source: 'url';
-  
-  /** Original URL to the document */
-  sourceUrl: string;
-  
-  /** Total number of pages in the document */
-  pageCount: number;
-  
-  /** Usage statistics from the OCR processing */
-  usageInfo?: MistralOcrUsageInfo;
-  
-  /** The Mistral OCR model used for processing */
-  model: string;
-  
-  /** The output format that was requested */
-  resultType?: 'markdown' | 'text' | 'json';
-  
-  /** ISO timestamp when the document was processed */
-  processedAt: string;
-}
-
-/**
- * Output data structure from the Mistral OCR parser
- */
-export interface MistralParserOutputData {
-  /** Extracted content in the requested format */
-  content: string;
-  
-  /** Metadata about the parsed document and processing */
-  metadata: MistralParserMetadata;
-}
-
-/**
- * Complete response from the Mistral OCR parser tool
- */
-export interface MistralParserOutput extends ToolResponse {
-  /** The output data containing content and metadata */
-  output: MistralParserOutputData;
-}
+import { ToolConfig } from '../types'
+import { MistralParserInput, MistralParserOutput } from './types'

 export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutput> = {
  id: 'mistral_parser',
@@ -103,6 +13,11 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
      required: true,
      description: 'URL to a PDF document to be processed',
    },
+    fileUpload: {
+      type: 'object',
+      required: false,
+      description: 'File upload data from file-upload component',
+    },
    resultType: {
      type: 'string',
      required: false,
@@ -142,133 +57,169 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
    url: 'https://api.mistral.ai/v1/ocr',
    method: 'POST',
    headers: (params) => {
-      console.log('Setting up headers with API key:', params.apiKey ? `${params.apiKey.substring(0, 5)}...` : 'Missing');
+      console.log(
+        'Setting up headers with API key:',
+        params.apiKey ? `${params.apiKey.substring(0, 5)}...` : 'Missing'
+      )
      return {
        'Content-Type': 'application/json',
-        'Accept': 'application/json',
-        'Authorization': `Bearer ${params.apiKey}`,
-      };
+        Accept: 'application/json',
+        Authorization: `Bearer ${params.apiKey}`,
+      }
    },
    body: (params) => {
      if (!params || typeof params !== 'object') {
-        throw new Error('Invalid parameters: Parameters must be provided as an object');
+        throw new Error('Invalid parameters: Parameters must be provided as an object')
      }
-      
+
      // Validate required parameters
      if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
-        throw new Error('Missing or invalid API key: A valid Mistral API key is required');
+        throw new Error('Missing or invalid API key: A valid Mistral API key is required')
      }
-      
-      if (!params.filePath || typeof params.filePath !== 'string' || params.filePath.trim() === '') {
-        throw new Error('Missing or invalid file path: Please provide a URL to a PDF document');
+
+      // Check if we have a file upload instead of direct URL
+      if (
+        params.fileUpload &&
+        (!params.filePath || params.filePath === 'null' || params.filePath === '')
+      ) {
+        // Try to extract file path from upload data
+        if (
+          typeof params.fileUpload === 'object' &&
+          params.fileUpload !== null &&
+          params.fileUpload.path
+        ) {
+          // Get the full URL to the file
+          let uploadedFilePath = params.fileUpload.path
+
+          // Make sure the file path is an absolute URL
+          if (uploadedFilePath.startsWith('/')) {
+            // If it's a relative path starting with /, convert to absolute URL
+            const baseUrl = process.env.NEXT_PUBLIC_APP_URL || 'http://localhost:3000'
+            uploadedFilePath = `${baseUrl}${uploadedFilePath}`
+          }
+
+          // Set the filePath parameter
+          params.filePath = uploadedFilePath
+          console.log('Using uploaded file:', uploadedFilePath)
+        } else {
+          throw new Error('Invalid file upload: Upload data is missing or invalid')
+        }
      }
-      
+
+      if (
+        !params.filePath ||
+        typeof params.filePath !== 'string' ||
+        params.filePath.trim() === ''
+      ) {
+        throw new Error('Missing or invalid file path: Please provide a URL to a PDF document')
+      }
+
      // Validate and normalize URL
-      let url;
+      let url
      try {
-        url = new URL(params.filePath.trim());
-        
+        url = new URL(params.filePath.trim())
+
        // Validate protocol
        if (!['http:', 'https:'].includes(url.protocol)) {
-          throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`);
+          throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`)
        }
-        
+
        // Validate against known unsupported services
        if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
          throw new Error(
            'Google Drive links are not supported by the Mistral OCR API. ' +
-            'Please upload your PDF to a public web server or provide a direct download link ' +
-            'that ends with .pdf extension.'
-          );
+              'Please upload your PDF to a public web server or provide a direct download link ' +
+              'that ends with .pdf extension.'
+          )
        }
-        
+
        // Validate file appears to be a PDF (stricter check with informative warning)
-        const pathname = url.pathname.toLowerCase();
+        const pathname = url.pathname.toLowerCase()
        if (!pathname.endsWith('.pdf')) {
          // Check if PDF is included in the path at all
          if (!pathname.includes('pdf')) {
            console.warn(
              'Warning: URL does not appear to point to a PDF document. ' +
-              'The Mistral OCR API is designed to work with PDF files. ' +
-              'Please ensure your URL points to a valid PDF document (ideally ending with .pdf extension).'
-            );
+                'The Mistral OCR API is designed to work with PDF files. ' +
+                'Please ensure your URL points to a valid PDF document (ideally ending with .pdf extension).'
+            )
          } else {
            // If "pdf" is in the URL but not at the end, give a different warning
            console.warn(
              'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
-              'This might still work if the server returns a valid PDF document despite the missing extension.'
-            );
+                'This might still work if the server returns a valid PDF document despite the missing extension.'
+            )
          }
        }
      } catch (error) {
-        const errorMessage = error instanceof Error ? error.message : String(error);
+        const errorMessage = error instanceof Error ? error.message : String(error)
        throw new Error(
          `Invalid URL format: ${errorMessage}. ` +
-          'Please provide a valid HTTP or HTTPS URL to a PDF document (e.g., https://example.com/document.pdf)'
-        );
+            'Please provide a valid HTTP or HTTPS URL to a PDF document (e.g., https://example.com/document.pdf)'
+        )
      }
-      
+
      // Create the request body with required parameters
      const requestBody: Record<string, any> = {
-        model: "mistral-ocr-latest",
+        model: 'mistral-ocr-latest',
        document: {
-          type: "document_url",
-          document_url: url.toString()
-        }
-      };
+          type: 'document_url',
+          document_url: url.toString(),
+        },
+      }

      // Add optional parameters with proper validation
      // Include images (base64)
      if (params.includeImageBase64 !== undefined) {
        if (typeof params.includeImageBase64 !== 'boolean') {
-          console.warn('includeImageBase64 parameter should be a boolean, using default (false)');
+          console.warn('includeImageBase64 parameter should be a boolean, using default (false)')
        } else {
-          requestBody.include_image_base64 = params.includeImageBase64;
+          requestBody.include_image_base64 = params.includeImageBase64
        }
      }

-      // Page selection
-      if (params.pages !== undefined) {
+      // Page selection - safely handle null and undefined
+      if (params.pages !== undefined && params.pages !== null) {
        if (Array.isArray(params.pages) && params.pages.length > 0) {
          // Validate all page numbers are non-negative integers
          const validPages = params.pages.filter(
            (page) => typeof page === 'number' && Number.isInteger(page) && page >= 0
-          );
-          
+          )
+
          if (validPages.length > 0) {
-            requestBody.pages = validPages;
-            
+            requestBody.pages = validPages
+
            if (validPages.length !== params.pages.length) {
              console.warn(
                `Some invalid page numbers were removed. ` +
-                `Using ${validPages.length} valid pages: ${validPages.join(', ')}`
-              );
+                  `Using ${validPages.length} valid pages: ${validPages.join(', ')}`
+              )
            }
          } else {
-            console.warn('No valid page numbers provided, processing all pages');
+            console.warn('No valid page numbers provided, processing all pages')
          }
-        } else if (params.pages.length === 0) {
-          console.warn('Empty pages array provided, processing all pages');
+        } else if (Array.isArray(params.pages) && params.pages.length === 0) {
+          console.warn('Empty pages array provided, processing all pages')
        }
      }

-      // Image limit
-      if (params.imageLimit !== undefined) {
-        const imageLimit = Number(params.imageLimit);
+      // Image limit - safely handle null and undefined
+      if (params.imageLimit !== undefined && params.imageLimit !== null) {
+        const imageLimit = Number(params.imageLimit)
        if (Number.isInteger(imageLimit) && imageLimit > 0) {
-          requestBody.image_limit = imageLimit;
+          requestBody.image_limit = imageLimit
        } else {
-          console.warn('imageLimit must be a positive integer, ignoring this parameter');
+          console.warn('imageLimit must be a positive integer, ignoring this parameter')
        }
      }

-      // Minimum image size
-      if (params.imageMinSize !== undefined) {
-        const imageMinSize = Number(params.imageMinSize);
+      // Minimum image size - safely handle null and undefined
+      if (params.imageMinSize !== undefined && params.imageMinSize !== null) {
+        const imageMinSize = Number(params.imageMinSize)
        if (Number.isInteger(imageMinSize) && imageMinSize > 0) {
-          requestBody.image_min_size = imageMinSize;
+          requestBody.image_min_size = imageMinSize
        } else {
-          console.warn('imageMinSize must be a positive integer, ignoring this parameter');
+          console.warn('imageMinSize must be a positive integer, ignoring this parameter')
        }
      }

@@ -282,10 +233,10 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
          pages: requestBody.pages ?? 'all pages',
          imageLimit: requestBody.image_limit ?? 'no limit',
          imageMinSize: requestBody.image_min_size ?? 'no minimum',
-        }
-      });
-      
-      return requestBody;
+        },
+      })
+
+      return requestBody
    },
  },

@@ -293,105 +244,117 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
    try {
      // Verify response status
      if (!response.ok) {
-        const errorText = await response.text();
-        throw new Error(`Mistral OCR API error: ${response.status} ${response.statusText}${errorText ? ` - ${errorText}` : ''}`);
+        const errorText = await response.text()
+        throw new Error(
+          `Mistral OCR API error: ${response.status} ${response.statusText}${errorText ? ` - ${errorText}` : ''}`
+        )
      }
-      
+
      // Parse response data with proper error handling
-      let ocrResult;
+      let ocrResult
      try {
-        ocrResult = await response.json();
+        ocrResult = await response.json()
      } catch (jsonError) {
-        throw new Error(`Failed to parse Mistral OCR response: ${jsonError instanceof Error ? jsonError.message : String(jsonError)}`);
+        throw new Error(
+          `Failed to parse Mistral OCR response: ${jsonError instanceof Error ? jsonError.message : String(jsonError)}`
+        )
      }
-      
+
      if (!ocrResult || typeof ocrResult !== 'object') {
-        throw new Error('Invalid response format from Mistral OCR API');
+        throw new Error('Invalid response format from Mistral OCR API')
      }
-      
+
      // Set default values and extract from params if available
-      let resultType: 'markdown' | 'text' | 'json' = 'markdown';
-      let sourceUrl = '';
-      
+      let resultType: 'markdown' | 'text' | 'json' = 'markdown'
+      let sourceUrl = ''
+
      if (params && typeof params === 'object') {
        if (params.filePath && typeof params.filePath === 'string') {
-          sourceUrl = params.filePath.trim();
+          sourceUrl = params.filePath.trim()
        }
-        
+
        if (params.resultType && ['markdown', 'text', 'json'].includes(params.resultType)) {
-          resultType = params.resultType as 'markdown' | 'text' | 'json';
+          resultType = params.resultType as 'markdown' | 'text' | 'json'
        }
-      } else if (ocrResult.document && typeof ocrResult.document === 'object' && 
-                 ocrResult.document.document_url && typeof ocrResult.document.document_url === 'string') {
-        sourceUrl = ocrResult.document.document_url;
+      } else if (
+        ocrResult.document &&
+        typeof ocrResult.document === 'object' &&
+        ocrResult.document.document_url &&
+        typeof ocrResult.document.document_url === 'string'
+      ) {
+        sourceUrl = ocrResult.document.document_url
      }
-      
+
      // Process content from pages
-      let content = '';
-      const pageCount = ocrResult.pages && Array.isArray(ocrResult.pages) ? ocrResult.pages.length : 0;
-      
+      let content = ''
+      const pageCount =
+        ocrResult.pages && Array.isArray(ocrResult.pages) ? ocrResult.pages.length : 0
+
      if (pageCount > 0) {
        content = ocrResult.pages
-          .map((page: any) => (page && typeof page.markdown === 'string') ? page.markdown : '')
+          .map((page: any) => (page && typeof page.markdown === 'string' ? page.markdown : ''))
          .filter(Boolean)
-          .join('\n\n');
+          .join('\n\n')
      } else {
-        console.warn('No pages found in OCR result, returning raw response');
-        content = JSON.stringify(ocrResult, null, 2);
+        console.warn('No pages found in OCR result, returning raw response')
+        content = JSON.stringify(ocrResult, null, 2)
      }
-      
+
      // Process based on requested result type
      if (resultType === 'text') {
        // Strip markdown formatting
        content = content
-          .replace(/\#\#*\s/g, '')     // Remove markdown headers
-          .replace(/\*\*/g, '')        // Remove bold markers
-          .replace(/\*/g, '')          // Remove italic markers
-          .replace(/\n{3,}/g, '\n\n'); // Normalize newlines
+          .replace(/\#\#*\s/g, '') // Remove markdown headers
+          .replace(/\*\*/g, '') // Remove bold markers
+          .replace(/\*/g, '') // Remove italic markers
+          .replace(/\n{3,}/g, '\n\n') // Normalize newlines
      } else if (resultType === 'json') {
        // Return the structured data as JSON string
-        content = JSON.stringify(ocrResult, null, 2);
+        content = JSON.stringify(ocrResult, null, 2)
      }
-      
+
      // Extract file information with proper validation
-      let fileName = 'document.pdf';
-      let fileType = 'pdf';
-      
+      let fileName = 'document.pdf'
+      let fileType = 'pdf'
+
      if (sourceUrl) {
        try {
-          const url = new URL(sourceUrl);
-          const pathSegments = url.pathname.split('/');
-          const lastSegment = pathSegments[pathSegments.length - 1];
-          
+          const url = new URL(sourceUrl)
+          const pathSegments = url.pathname.split('/')
+          const lastSegment = pathSegments[pathSegments.length - 1]
+
          if (lastSegment && lastSegment.length > 0) {
-            fileName = lastSegment;
-            const fileExtParts = fileName.split('.');
+            fileName = lastSegment
+            const fileExtParts = fileName.split('.')
            if (fileExtParts.length > 1) {
-              fileType = fileExtParts[fileExtParts.length - 1].toLowerCase();
+              fileType = fileExtParts[fileExtParts.length - 1].toLowerCase()
            }
          }
        } catch (urlError) {
-          console.warn('Failed to parse document URL:', urlError);
+          console.warn('Failed to parse document URL:', urlError)
        }
      }
-      
+
      // Generate a tracking ID with timestamp and random component for uniqueness
-      const timestamp = Date.now();
-      const randomId = Math.random().toString(36).substring(2, 10);
-      const jobId = `mistral-ocr-${timestamp}-${randomId}`;
-      
+      const timestamp = Date.now()
+      const randomId = Math.random().toString(36).substring(2, 10)
+      const jobId = `mistral-ocr-${timestamp}-${randomId}`
+
      // Map API response fields to our schema with proper type checking
-      const usageInfo = ocrResult.usage_info && typeof ocrResult.usage_info === 'object' 
-        ? {
-            pagesProcessed: typeof ocrResult.usage_info.pages_processed === 'number' 
-              ? ocrResult.usage_info.pages_processed 
-              : Number(ocrResult.usage_info.pages_processed),
-            docSizeBytes: typeof ocrResult.usage_info.doc_size_bytes === 'number' 
-              ? ocrResult.usage_info.doc_size_bytes 
-              : Number(ocrResult.usage_info.doc_size_bytes)
-          } 
-        : undefined;
-      
+      const usageInfo =
+        ocrResult.usage_info && typeof ocrResult.usage_info === 'object'
+          ? {
+              pagesProcessed:
+                typeof ocrResult.usage_info.pages_processed === 'number'
+                  ? ocrResult.usage_info.pages_processed
+                  : Number(ocrResult.usage_info.pages_processed),
+              docSizeBytes:
+                typeof ocrResult.usage_info.doc_size_bytes === 'number'
+                  ? ocrResult.usage_info.doc_size_bytes
+                  : Number(ocrResult.usage_info.doc_size_bytes),
+            }
+          : undefined
+
      // Return properly structured response
      const parserResponse: MistralParserOutput = {
        success: true,
@@ -410,95 +373,116 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
            processedAt: new Date().toISOString(),
          },
        },
-      };
-      
-      return parserResponse;
+      }
+
+      return parserResponse
    } catch (error) {
-      console.error('Error processing OCR result:', error);
-      throw error;
+      console.error('Error processing OCR result:', error)
+      throw error
    }
  },

  transformError: (error) => {
-    console.error('Mistral OCR processing error:', error);
-    
+    console.error('Mistral OCR processing error:', error)
+
    // Helper function to extract message from various error types
    const getErrorMessage = (err: any): string => {
-      if (typeof err === 'string') return err;
-      if (err instanceof Error) return err.message;
+      if (typeof err === 'string') return err
+      if (err instanceof Error) return err.message
      if (err && typeof err === 'object') {
-        if (err.message) return String(err.message);
-        if (err.error) return typeof err.error === 'string' ? err.error : JSON.stringify(err.error);
+        if (err.message) return String(err.message)
+        if (err.error) return typeof err.error === 'string' ? err.error : JSON.stringify(err.error)
      }
-      return 'Unknown error';
-    };
-    
-    // Get base error message
-    const errorMsg = getErrorMessage(error);
-    
-    // Handle null reference errors which often occur with invalid PDF URLs
-    if (errorMsg.includes('Cannot read properties of null') || 
-        (errorMsg.includes('null') && errorMsg.includes('length'))) {
-      return 'Mistral OCR Error: Invalid PDF document URL. The URL provided either does not point to a valid PDF file or the PDF cannot be accessed. Please ensure you provide a direct link to a publicly accessible PDF file with .pdf extension.';
+      return 'Unknown error'
    }
-    
+
+    // Get base error message
+    const errorMsg = getErrorMessage(error)
+
+    // Handle null reference errors which often occur with invalid PDF URLs
+    if (
+      errorMsg.includes('Cannot read properties of null') ||
+      (errorMsg.includes('null') && errorMsg.includes('length'))
+    ) {
+      return 'Mistral OCR Error: Invalid PDF document URL. The URL provided either does not point to a valid PDF file or the PDF cannot be accessed. Please ensure you provide a direct link to a publicly accessible PDF file with .pdf extension.'
+    }
+
    // Handle common API error status codes
    if (typeof error === 'object' && error !== null) {
-      const status = error.status || (error.response && error.response.status);
-      
+      const status = error.status || (error.response && error.response.status)
+
      if (status) {
        switch (status) {
          case 400:
-            return 'Mistral OCR Error: The request was invalid. Please check your PDF URL and parameters.';
+            return 'Mistral OCR Error: The request was invalid. Please check your PDF URL and parameters.'
          case 401:
-            return 'Mistral OCR Error: Invalid API key. Please check your Mistral API key.';
+            return 'Mistral OCR Error: Invalid API key. Please check your Mistral API key.'
          case 403:
-            return 'Mistral OCR Error: Access forbidden. Your API key may not have permission to use the OCR service.';
+            return 'Mistral OCR Error: Access forbidden. Your API key may not have permission to use the OCR service.'
          case 404:
-            return 'Mistral OCR Error: The PDF document could not be found. Please check that the URL is accessible.';
+            return 'Mistral OCR Error: The PDF document could not be found. Please check that the URL is accessible.'
          case 413:
-            return 'Mistral OCR Error: The PDF document is too large for processing.';
+            return 'Mistral OCR Error: The PDF document is too large for processing.'
          case 415:
-            return 'Mistral OCR Error: Unsupported file format. Please ensure the URL points to a valid PDF document with a .pdf extension.';
+            return 'Mistral OCR Error: Unsupported file format. Please ensure the URL points to a valid PDF document with a .pdf extension.'
          case 429:
-            return 'Mistral OCR Error: Rate limit exceeded. Please try again later.';
+            return 'Mistral OCR Error: Rate limit exceeded. Please try again later.'
          case 500:
          case 502:
          case 503:
          case 504:
-            return 'Mistral OCR Error: Service temporarily unavailable. Please try again later.';
+            return 'Mistral OCR Error: Service temporarily unavailable. Please try again later.'
        }
      }
    }
-    
+
    // Handle common network and URL errors
    if (errorMsg.includes('URL') || errorMsg.includes('protocol') || errorMsg.includes('http')) {
-      return 'Mistral OCR Error: Invalid PDF URL format. Please provide a complete URL starting with https:// to your PDF document (e.g., https://example.com/document.pdf).';
+      return 'Mistral OCR Error: Invalid PDF URL format. Please provide a complete URL starting with https:// to your PDF document (e.g., https://example.com/document.pdf).'
    }
-    
-    if (errorMsg.includes('ETIMEDOUT') || errorMsg.includes('timeout') || errorMsg.includes('ECONNABORTED')) {
-      return 'Mistral OCR Error: The request timed out. The PDF document may be too large or the server is unresponsive.';
+
+    if (
+      errorMsg.includes('ETIMEDOUT') ||
+      errorMsg.includes('timeout') ||
+      errorMsg.includes('ECONNABORTED')
+    ) {
+      return 'Mistral OCR Error: The request timed out. The PDF document may be too large or the server is unresponsive.'
    }
-    
-    if (errorMsg.includes('ENOTFOUND') || errorMsg.includes('ECONNREFUSED') || errorMsg.includes('ECONNRESET')) {
-      return 'Mistral OCR Error: Could not connect to the document URL. Please verify the document is accessible.';
+
+    if (
+      errorMsg.includes('ENOTFOUND') ||
+      errorMsg.includes('ECONNREFUSED') ||
+      errorMsg.includes('ECONNRESET')
+    ) {
+      return 'Mistral OCR Error: Could not connect to the document URL. Please verify the document is accessible.'
    }
-    
-    if (errorMsg.includes('JSON') || errorMsg.includes('Unexpected token') || errorMsg.includes('parse')) {
-      return 'Mistral OCR Error: Failed to parse the response from the OCR service.';
+
+    if (
+      errorMsg.includes('JSON') ||
+      errorMsg.includes('Unexpected token') ||
+      errorMsg.includes('parse')
+    ) {
+      return 'Mistral OCR Error: Failed to parse the response from the OCR service.'
    }
-    
+
    // PDF-specific error handling
    if (errorMsg.toLowerCase().includes('pdf')) {
-      if (errorMsg.toLowerCase().includes('invalid') || errorMsg.toLowerCase().includes('corrupted')) {
-        return 'Mistral OCR Error: The document appears to be an invalid or corrupted PDF. Please check that the URL points to a valid, properly formatted PDF document.';
+      if (
+        errorMsg.toLowerCase().includes('invalid') ||
+        errorMsg.toLowerCase().includes('corrupted')
+      ) {
+        return 'Mistral OCR Error: The document appears to be an invalid or corrupted PDF. Please check that the URL points to a valid, properly formatted PDF document.'
      }
-      if (errorMsg.toLowerCase().includes('password') || errorMsg.toLowerCase().includes('protected') || errorMsg.toLowerCase().includes('encrypted')) {
-        return 'Mistral OCR Error: The PDF document appears to be password-protected or encrypted. The OCR service cannot process protected documents.';
+      if (
+        errorMsg.toLowerCase().includes('password') ||
+        errorMsg.toLowerCase().includes('protected') ||
+        errorMsg.toLowerCase().includes('encrypted')
+      ) {
+        return 'Mistral OCR Error: The PDF document appears to be password-protected or encrypted. The OCR service cannot process protected documents.'
      }
    }
-    
+
    // Default error message with the original error for context
-    return `Mistral OCR Error: Invalid PDF document or URL. Please ensure you provide a direct link to a valid PDF file. Technical details: ${errorMsg}`;
+    return `Mistral OCR Error: Invalid PDF document or URL. Please ensure you provide a direct link to a valid PDF file. Technical details: ${errorMsg}`
  },
-} 
+}
--- a/sim/tools/mistral/types.ts
+++ b/sim/tools/mistral/types.ts
@@ -0,0 +1,95 @@
+import { ToolResponse } from '../types'
+
+/**
+ * Input parameters for the Mistral OCR parser tool
+ */
+export interface MistralParserInput {
+  /** URL to a PDF document to be processed */
+  filePath: string
+
+  /** File upload data (from file-upload component) */
+  fileUpload?: any
+
+  /** Mistral API key for authentication */
+  apiKey: string
+
+  /** Output format for the extracted content (default: 'markdown') */
+  resultType?: 'markdown' | 'text' | 'json'
+
+  /** Whether to include base64-encoded images in the response */
+  includeImageBase64?: boolean
+
+  /** Specific pages to process (zero-indexed) */
+  pages?: number[]
+
+  /** Maximum number of images to extract from the PDF */
+  imageLimit?: number
+
+  /** Minimum height and width (in pixels) for images to extract */
+  imageMinSize?: number
+}
+
+/**
+ * Usage information returned by the Mistral OCR API
+ */
+export interface MistralOcrUsageInfo {
+  /** Number of pages processed in the document */
+  pagesProcessed: number
+
+  /** Size of the document in bytes */
+  docSizeBytes: number
+}
+
+/**
+ * Metadata about the processed document
+ */
+export interface MistralParserMetadata {
+  /** Unique identifier for this OCR job */
+  jobId: string
+
+  /** File type of the document (typically 'pdf') */
+  fileType: string
+
+  /** Filename extracted from the document URL */
+  fileName: string
+
+  /** Source type (always 'url' for now) */
+  source: 'url'
+
+  /** Original URL to the document */
+  sourceUrl: string
+
+  /** Total number of pages in the document */
+  pageCount: number
+
+  /** Usage statistics from the OCR processing */
+  usageInfo?: MistralOcrUsageInfo
+
+  /** The Mistral OCR model used for processing */
+  model: string
+
+  /** The output format that was requested */
+  resultType?: 'markdown' | 'text' | 'json'
+
+  /** ISO timestamp when the document was processed */
+  processedAt: string
+}
+
+/**
+ * Output data structure from the Mistral OCR parser
+ */
+export interface MistralParserOutputData {
+  /** Extracted content in the requested format */
+  content: string
+
+  /** Metadata about the parsed document and processing */
+  metadata: MistralParserMetadata
+}
+
+/**
+ * Complete response from the Mistral OCR parser tool
+ */
+export interface MistralParserOutput extends ToolResponse {
+  /** The output data containing content and metadata */
+  output: MistralParserOutputData
+}