mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
improvement(mistral-OCR): error handling
This commit is contained in:
@@ -4,10 +4,10 @@ import { MistralIcon } from '@/components/icons'
|
||||
|
||||
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
type: 'mistral_parse',
|
||||
name: 'Mistral PDF Parser',
|
||||
name: 'Mistral Parser',
|
||||
description: 'Extract text from PDF documents',
|
||||
longDescription:
|
||||
'Extract text and structure from PDF documents using Mistral\'s OCR API. Enter a URL to a PDF document, configure processing options, and get the content in your preferred format.',
|
||||
'Extract text and structure from PDF documents using Mistral\'s OCR API. Enter a URL to a PDF document (.pdf extension required), configure processing options, and get the content in your preferred format. The URL must be publicly accessible and point to a valid PDF file. Note: Google Drive, Dropbox, and other cloud storage links are not supported; use a direct download URL from a web server instead.',
|
||||
category: 'tools',
|
||||
bgColor: '#000000',
|
||||
icon: MistralIcon,
|
||||
@@ -95,6 +95,24 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
|
||||
if (!['http:', 'https:'].includes(validatedUrl.protocol)) {
|
||||
throw new Error(`URL must use HTTP or HTTPS protocol. Found: ${validatedUrl.protocol}`);
|
||||
}
|
||||
|
||||
// Check for PDF extension and provide specific guidance
|
||||
const pathname = validatedUrl.pathname.toLowerCase();
|
||||
if (!pathname.endsWith('.pdf')) {
|
||||
if (!pathname.includes('pdf')) {
|
||||
throw new Error(
|
||||
'The URL does not appear to point to a PDF document. ' +
|
||||
'Please provide a URL that ends with .pdf extension. ' +
|
||||
'If your document is not a PDF, please convert it to PDF format first.'
|
||||
);
|
||||
} else {
|
||||
// PDF is in the name but not at the end, so give a warning but proceed
|
||||
console.warn(
|
||||
'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
|
||||
'This might still work if the server returns a valid PDF document.'
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(`Invalid URL format: ${errorMessage}`);
|
||||
|
||||
@@ -173,14 +173,33 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`);
|
||||
}
|
||||
|
||||
// Validate file appears to be a PDF (loose check)
|
||||
const pathname = url.pathname.toLowerCase();
|
||||
if (!pathname.endsWith('.pdf') && !pathname.includes('pdf')) {
|
||||
console.warn(
|
||||
'Warning: URL does not appear to be a PDF document. ' +
|
||||
'If this is incorrect, the document may still be processed if it is a valid PDF.'
|
||||
// Validate against known unsupported services
|
||||
if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
|
||||
throw new Error(
|
||||
'Google Drive links are not supported by the Mistral OCR API. ' +
|
||||
'Please upload your PDF to a public web server or provide a direct download link ' +
|
||||
'that ends with .pdf extension.'
|
||||
);
|
||||
}
|
||||
|
||||
// Validate file appears to be a PDF (stricter check with informative warning)
|
||||
const pathname = url.pathname.toLowerCase();
|
||||
if (!pathname.endsWith('.pdf')) {
|
||||
// Check if PDF is included in the path at all
|
||||
if (!pathname.includes('pdf')) {
|
||||
console.warn(
|
||||
'Warning: URL does not appear to point to a PDF document. ' +
|
||||
'The Mistral OCR API is designed to work with PDF files. ' +
|
||||
'Please ensure your URL points to a valid PDF document (ideally ending with .pdf extension).'
|
||||
);
|
||||
} else {
|
||||
// If "pdf" is in the URL but not at the end, give a different warning
|
||||
console.warn(
|
||||
'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
|
||||
'This might still work if the server returns a valid PDF document despite the missing extension.'
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(
|
||||
@@ -417,6 +436,12 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
// Get base error message
|
||||
const errorMsg = getErrorMessage(error);
|
||||
|
||||
// Handle null reference errors which often occur with invalid PDF URLs
|
||||
if (errorMsg.includes('Cannot read properties of null') ||
|
||||
(errorMsg.includes('null') && errorMsg.includes('length'))) {
|
||||
return 'Mistral OCR Error: Invalid PDF document URL. The URL provided either does not point to a valid PDF file or the PDF cannot be accessed. Please ensure you provide a direct link to a publicly accessible PDF file with .pdf extension.';
|
||||
}
|
||||
|
||||
// Handle common API error status codes
|
||||
if (typeof error === 'object' && error !== null) {
|
||||
const status = error.status || (error.response && error.response.status);
|
||||
@@ -434,7 +459,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
case 413:
|
||||
return 'Mistral OCR Error: The PDF document is too large for processing.';
|
||||
case 415:
|
||||
return 'Mistral OCR Error: Unsupported file format. Please ensure the URL points to a valid PDF document.';
|
||||
return 'Mistral OCR Error: Unsupported file format. Please ensure the URL points to a valid PDF document with a .pdf extension.';
|
||||
case 429:
|
||||
return 'Mistral OCR Error: Rate limit exceeded. Please try again later.';
|
||||
case 500:
|
||||
@@ -448,7 +473,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
|
||||
// Handle common network and URL errors
|
||||
if (errorMsg.includes('URL') || errorMsg.includes('protocol') || errorMsg.includes('http')) {
|
||||
return 'Mistral OCR Error: Invalid PDF URL format. Please provide a complete URL starting with https:// to your PDF document.';
|
||||
return 'Mistral OCR Error: Invalid PDF URL format. Please provide a complete URL starting with https:// to your PDF document (e.g., https://example.com/document.pdf).';
|
||||
}
|
||||
|
||||
if (errorMsg.includes('ETIMEDOUT') || errorMsg.includes('timeout') || errorMsg.includes('ECONNABORTED')) {
|
||||
@@ -463,7 +488,17 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
|
||||
return 'Mistral OCR Error: Failed to parse the response from the OCR service.';
|
||||
}
|
||||
|
||||
// PDF-specific error handling
|
||||
if (errorMsg.toLowerCase().includes('pdf')) {
|
||||
if (errorMsg.toLowerCase().includes('invalid') || errorMsg.toLowerCase().includes('corrupted')) {
|
||||
return 'Mistral OCR Error: The document appears to be an invalid or corrupted PDF. Please check that the URL points to a valid, properly formatted PDF document.';
|
||||
}
|
||||
if (errorMsg.toLowerCase().includes('password') || errorMsg.toLowerCase().includes('protected') || errorMsg.toLowerCase().includes('encrypted')) {
|
||||
return 'Mistral OCR Error: The PDF document appears to be password-protected or encrypted. The OCR service cannot process protected documents.';
|
||||
}
|
||||
}
|
||||
|
||||
// Default error message with the original error for context
|
||||
return `Mistral OCR Error: ${errorMsg}`;
|
||||
return `Mistral OCR Error: Invalid PDF document or URL. Please ensure you provide a direct link to a valid PDF file. Technical details: ${errorMsg}`;
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user