Files
sim/apps/sim/tools/reducto/parser.ts
Vikhyath Mondreti 5b0c2156e0 improvement(files): pass user file objects around consistently (#3119)
* improvement(collab): do not refetch active workflow id

* progress on files

* more integrations

* separate server and client logic

* consolidate more code

* fix integrations

* fix types

* consolidate more code

* fix tests

* fix more bugbot comments

* fix type check

* fix circular impport

* address more bugbot comments

* fix ocr integrations

* fix typing

* remove leftover type

* address bugbot comment

* fix file block adv mode

* fix

* normalize file input

* fix v2 blocmks for ocr

* fix for v2 versions

* fix more v2 blocks

* update single file blocks

* make interface simpler

* cleanup fireflies

* remove file only annotation

* accept all types

* added wand to ssh block

* user files should be passed through

* improve docs

* fix slack to include successful execs

* fix dropbox upload file

* fix sendgrid

* fix dropbox

* fix

* fix

* update skills

* fix uploaded file

---------

Co-authored-by: waleed <walif6@gmail.com>
2026-02-03 19:50:23 -08:00

252 lines
7.6 KiB
TypeScript

import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils'
import type {
ReductoParserInput,
ReductoParserOutput,
ReductoParserV2Input,
} from '@/tools/reducto/types'
import type { ToolConfig } from '@/tools/types'
export const reductoParserTool: ToolConfig<ReductoParserInput, ReductoParserOutput> = {
id: 'reducto_parser',
name: 'Reducto PDF Parser',
description: 'Parse PDF documents using Reducto OCR API',
version: '1.0.0',
params: {
filePath: {
type: 'string',
required: false,
visibility: 'user-only',
description: 'URL to a PDF document to be processed',
},
file: {
type: 'file',
required: false,
visibility: 'hidden',
description: 'Document file to be processed',
},
fileUpload: {
type: 'object',
required: false,
visibility: 'hidden',
description: 'File upload data from file-upload component',
},
pages: {
type: 'array',
required: false,
visibility: 'user-only',
description: 'Specific pages to process (1-indexed page numbers)',
},
tableOutputFormat: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Table output format (html or markdown). Defaults to markdown.',
},
apiKey: {
type: 'string',
required: true,
visibility: 'user-only',
description: 'Reducto API key (REDUCTO_API_KEY)',
},
},
request: {
url: '/api/tools/reducto/parse',
method: 'POST',
headers: (params) => {
return {
'Content-Type': 'application/json',
Accept: 'application/json',
Authorization: `Bearer ${params.apiKey}`,
}
},
body: (params) => {
if (!params || typeof params !== 'object') {
throw new Error('Invalid parameters: Parameters must be provided as an object')
}
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
throw new Error('Missing or invalid API key: A valid Reducto API key is required')
}
const requestBody: Record<string, unknown> = {
apiKey: params.apiKey,
}
const fileInput =
params.file && typeof params.file === 'object' ? params.file : params.fileUpload
const hasFileUpload = fileInput && typeof fileInput === 'object'
const hasFilePath =
typeof params.filePath === 'string' &&
params.filePath !== 'null' &&
params.filePath.trim() !== ''
if (hasFilePath) {
const filePathToValidate = params.filePath!.trim()
if (filePathToValidate.startsWith('/')) {
if (!isInternalFileUrl(filePathToValidate)) {
throw new Error(
'Invalid file path. Only uploaded files are supported for internal paths.'
)
}
requestBody.filePath = filePathToValidate
} else {
let url
try {
url = new URL(filePathToValidate)
if (!['http:', 'https:'].includes(url.protocol)) {
throw new Error(
`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`
)
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error)
throw new Error(
`Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a PDF document.`
)
}
requestBody.filePath = url.toString()
}
} else if (hasFileUpload) {
requestBody.file = fileInput
} else {
throw new Error('Missing file input: Please provide a PDF URL or upload a file')
}
if (params.tableOutputFormat && ['html', 'md'].includes(params.tableOutputFormat)) {
requestBody.tableOutputFormat = params.tableOutputFormat
}
if (params.pages !== undefined && params.pages !== null) {
if (Array.isArray(params.pages) && params.pages.length > 0) {
const validPages = params.pages.filter(
(page) => typeof page === 'number' && Number.isInteger(page) && page >= 0
)
if (validPages.length > 0) {
requestBody.pages = validPages
}
}
}
return requestBody
},
},
transformResponse: async (response) => {
const data = await response.json()
if (!data || typeof data !== 'object') {
throw new Error('Invalid response format from Reducto API')
}
const reductoData = data.output ?? data
return {
success: true,
output: {
job_id: reductoData.job_id,
duration: reductoData.duration,
usage: reductoData.usage,
result: reductoData.result,
pdf_url: reductoData.pdf_url ?? null,
studio_link: reductoData.studio_link ?? null,
},
}
},
outputs: {
job_id: { type: 'string', description: 'Unique identifier for the processing job' },
duration: { type: 'number', description: 'Processing time in seconds' },
usage: {
type: 'json',
description: 'Resource consumption data',
},
result: {
type: 'json',
description: 'Parsed document content with chunks and blocks',
},
pdf_url: {
type: 'string',
description: 'Storage URL of converted PDF',
optional: true,
},
studio_link: {
type: 'string',
description: 'Link to Reducto studio interface',
optional: true,
},
},
}
export const reductoParserV2Tool: ToolConfig<ReductoParserV2Input, ReductoParserOutput> = {
...reductoParserTool,
id: 'reducto_parser_v2',
name: 'Reducto PDF Parser',
postProcess: undefined,
directExecution: undefined,
transformResponse: reductoParserTool.transformResponse
? (response: Response, params?: ReductoParserV2Input) =>
reductoParserTool.transformResponse!(response, params as unknown as ReductoParserInput)
: undefined,
params: {
file: {
type: 'file',
required: true,
visibility: 'hidden',
description: 'PDF document to be processed',
},
pages: reductoParserTool.params.pages,
tableOutputFormat: reductoParserTool.params.tableOutputFormat,
apiKey: reductoParserTool.params.apiKey,
},
request: {
url: '/api/tools/reducto/parse',
method: 'POST',
headers: (params) => ({
'Content-Type': 'application/json',
Accept: 'application/json',
Authorization: `Bearer ${params.apiKey}`,
}),
body: (params: ReductoParserV2Input) => {
if (!params || typeof params !== 'object') {
throw new Error('Invalid parameters: Parameters must be provided as an object')
}
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
throw new Error('Missing or invalid API key: A valid Reducto API key is required')
}
if (!params.file || typeof params.file !== 'object') {
throw new Error('Missing or invalid file: Please provide a file object')
}
const requestBody: Record<string, unknown> = {
apiKey: params.apiKey,
file: params.file,
}
if (params.tableOutputFormat && ['html', 'md'].includes(params.tableOutputFormat)) {
requestBody.tableOutputFormat = params.tableOutputFormat
}
if (params.pages !== undefined && params.pages !== null) {
if (Array.isArray(params.pages) && params.pages.length > 0) {
const validPages = params.pages.filter(
(page) => typeof page === 'number' && Number.isInteger(page) && page >= 0
)
if (validPages.length > 0) {
requestBody.pages = validPages
}
}
}
return requestBody
},
},
}