mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
feat(ocr): added reducto and pulse for OCR (#2843)
* feat(ocr): added reducto and pulse for OCR * ack comments
This commit is contained in:
2
apps/sim/tools/pulse/index.ts
Normal file
2
apps/sim/tools/pulse/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export { pulseParserTool } from '@/tools/pulse/parser'
|
||||
export * from './types'
|
||||
283
apps/sim/tools/pulse/parser.ts
Normal file
283
apps/sim/tools/pulse/parser.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import type { PulseParserInput, PulseParserOutput } from '@/tools/pulse/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
const logger = createLogger('PulseParserTool')
|
||||
|
||||
export const pulseParserTool: ToolConfig<PulseParserInput, PulseParserOutput> = {
|
||||
id: 'pulse_parser',
|
||||
name: 'Pulse Document Parser',
|
||||
description: 'Parse documents (PDF, images, Office docs) using Pulse OCR API',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
filePath: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'URL to a document to be processed',
|
||||
},
|
||||
fileUpload: {
|
||||
type: 'object',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'File upload data from file-upload component',
|
||||
},
|
||||
pages: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Page range to process (1-indexed, e.g., "1-2,5")',
|
||||
},
|
||||
extractFigure: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'Enable figure extraction from the document',
|
||||
},
|
||||
figureDescription: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'Generate descriptions/captions for extracted figures',
|
||||
},
|
||||
returnHtml: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'Include HTML in the response',
|
||||
},
|
||||
chunking: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Chunking strategies (comma-separated: semantic, header, page, recursive)',
|
||||
},
|
||||
chunkSize: {
|
||||
type: 'number',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Maximum characters per chunk when chunking is enabled',
|
||||
},
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Pulse API key',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
url: '/api/tools/pulse/parse',
|
||||
method: 'POST',
|
||||
headers: () => {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'application/json',
|
||||
}
|
||||
},
|
||||
body: (params) => {
|
||||
if (!params || typeof params !== 'object') {
|
||||
throw new Error('Invalid parameters: Parameters must be provided as an object')
|
||||
}
|
||||
|
||||
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
|
||||
throw new Error('Missing or invalid API key: A valid Pulse API key is required')
|
||||
}
|
||||
|
||||
// Check if we have a file upload instead of direct URL
|
||||
if (
|
||||
params.fileUpload &&
|
||||
(!params.filePath || params.filePath === 'null' || params.filePath === '')
|
||||
) {
|
||||
if (
|
||||
typeof params.fileUpload === 'object' &&
|
||||
params.fileUpload !== null &&
|
||||
(params.fileUpload.url || params.fileUpload.path)
|
||||
) {
|
||||
let uploadedFilePath: string = params.fileUpload.url ?? params.fileUpload.path ?? ''
|
||||
|
||||
if (!uploadedFilePath) {
|
||||
throw new Error('Invalid file upload: Upload data is missing or invalid')
|
||||
}
|
||||
|
||||
if (uploadedFilePath.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
uploadedFilePath = `${baseUrl}${uploadedFilePath}`
|
||||
}
|
||||
|
||||
params.filePath = uploadedFilePath
|
||||
logger.info('Using uploaded file:', uploadedFilePath)
|
||||
} else {
|
||||
throw new Error('Invalid file upload: Upload data is missing or invalid')
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!params.filePath ||
|
||||
typeof params.filePath !== 'string' ||
|
||||
params.filePath.trim() === ''
|
||||
) {
|
||||
throw new Error('Missing or invalid file path: Please provide a URL to a document')
|
||||
}
|
||||
|
||||
let filePathToValidate = params.filePath.trim()
|
||||
if (filePathToValidate.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
filePathToValidate = `${baseUrl}${filePathToValidate}`
|
||||
}
|
||||
|
||||
let url
|
||||
try {
|
||||
url = new URL(filePathToValidate)
|
||||
|
||||
if (!['http:', 'https:'].includes(url.protocol)) {
|
||||
throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`)
|
||||
}
|
||||
|
||||
if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
|
||||
throw new Error(
|
||||
'Google Drive links are not supported. ' +
|
||||
'Please upload your document or provide a direct download link.'
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(
|
||||
`Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a document`
|
||||
)
|
||||
}
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
filePath: url.toString(),
|
||||
}
|
||||
|
||||
// Check if this is an internal workspace file path
|
||||
if (params.fileUpload?.path?.startsWith('/api/files/serve/')) {
|
||||
requestBody.filePath = params.fileUpload.path
|
||||
}
|
||||
|
||||
// Add optional parameters
|
||||
if (params.pages && typeof params.pages === 'string' && params.pages.trim() !== '') {
|
||||
requestBody.pages = params.pages.trim()
|
||||
}
|
||||
|
||||
if (params.extractFigure !== undefined) {
|
||||
requestBody.extractFigure = params.extractFigure
|
||||
}
|
||||
|
||||
if (params.figureDescription !== undefined) {
|
||||
requestBody.figureDescription = params.figureDescription
|
||||
}
|
||||
|
||||
if (params.returnHtml !== undefined) {
|
||||
requestBody.returnHtml = params.returnHtml
|
||||
}
|
||||
|
||||
if (params.chunking && typeof params.chunking === 'string' && params.chunking.trim() !== '') {
|
||||
requestBody.chunking = params.chunking.trim()
|
||||
}
|
||||
|
||||
if (params.chunkSize !== undefined && params.chunkSize > 0) {
|
||||
requestBody.chunkSize = params.chunkSize
|
||||
}
|
||||
|
||||
return requestBody
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response) => {
|
||||
let parseResult
|
||||
try {
|
||||
parseResult = await response.json()
|
||||
} catch (jsonError) {
|
||||
throw new Error(
|
||||
`Failed to parse Pulse response: ${jsonError instanceof Error ? jsonError.message : String(jsonError)}`
|
||||
)
|
||||
}
|
||||
|
||||
if (!parseResult || typeof parseResult !== 'object') {
|
||||
throw new Error('Invalid response format from Pulse API')
|
||||
}
|
||||
|
||||
// Pass through the native Pulse API response
|
||||
const pulseData =
|
||||
parseResult.output && typeof parseResult.output === 'object'
|
||||
? parseResult.output
|
||||
: parseResult
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
markdown: pulseData.markdown ?? '',
|
||||
page_count: pulseData.page_count ?? 0,
|
||||
job_id: pulseData.job_id ?? '',
|
||||
'plan-info': pulseData['plan-info'] ?? { pages_used: 0, tier: 'unknown' },
|
||||
bounding_boxes: pulseData.bounding_boxes ?? null,
|
||||
extraction_url: pulseData.extraction_url ?? null,
|
||||
html: pulseData.html ?? null,
|
||||
structured_output: pulseData.structured_output ?? null,
|
||||
chunks: pulseData.chunks ?? null,
|
||||
figures: pulseData.figures ?? null,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
markdown: {
|
||||
type: 'string',
|
||||
description: 'Extracted content in markdown format',
|
||||
},
|
||||
page_count: {
|
||||
type: 'number',
|
||||
description: 'Number of pages in the document',
|
||||
},
|
||||
job_id: {
|
||||
type: 'string',
|
||||
description: 'Unique job identifier',
|
||||
},
|
||||
'plan-info': {
|
||||
type: 'object',
|
||||
description: 'Plan usage information',
|
||||
properties: {
|
||||
pages_used: { type: 'number', description: 'Number of pages used' },
|
||||
tier: { type: 'string', description: 'Plan tier' },
|
||||
note: { type: 'string', description: 'Optional note', optional: true },
|
||||
},
|
||||
},
|
||||
bounding_boxes: {
|
||||
type: 'json',
|
||||
description: 'Bounding box layout information',
|
||||
optional: true,
|
||||
},
|
||||
extraction_url: {
|
||||
type: 'string',
|
||||
description: 'URL for extraction results (for large documents)',
|
||||
optional: true,
|
||||
},
|
||||
html: {
|
||||
type: 'string',
|
||||
description: 'HTML content if requested',
|
||||
optional: true,
|
||||
},
|
||||
structured_output: {
|
||||
type: 'json',
|
||||
description: 'Structured output if schema was provided',
|
||||
optional: true,
|
||||
},
|
||||
chunks: {
|
||||
type: 'json',
|
||||
description: 'Chunked content if chunking was enabled',
|
||||
optional: true,
|
||||
},
|
||||
figures: {
|
||||
type: 'json',
|
||||
description: 'Extracted figures if figure extraction was enabled',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
93
apps/sim/tools/pulse/types.ts
Normal file
93
apps/sim/tools/pulse/types.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import type { ToolResponse } from '@/tools/types'
|
||||
|
||||
/**
|
||||
* Input parameters for the Pulse parser tool
|
||||
*/
|
||||
export interface PulseParserInput {
|
||||
/** URL to a document to be processed */
|
||||
filePath: string
|
||||
|
||||
/** File upload data (from file-upload component) */
|
||||
fileUpload?: {
|
||||
url?: string
|
||||
path?: string
|
||||
}
|
||||
|
||||
/** Pulse API key for authentication */
|
||||
apiKey: string
|
||||
|
||||
/** Page range to process (1-indexed, e.g., "1-2,5") */
|
||||
pages?: string
|
||||
|
||||
/** Whether to extract figures from the document */
|
||||
extractFigure?: boolean
|
||||
|
||||
/** Whether to generate figure descriptions/captions */
|
||||
figureDescription?: boolean
|
||||
|
||||
/** Whether to include HTML in the response */
|
||||
returnHtml?: boolean
|
||||
|
||||
/** Chunking strategies (comma-separated: semantic, header, page, recursive) */
|
||||
chunking?: string
|
||||
|
||||
/** Maximum characters per chunk when chunking is enabled */
|
||||
chunkSize?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Plan info returned by the Pulse API
|
||||
*/
|
||||
export interface PulsePlanInfo {
|
||||
/** Number of pages used */
|
||||
pages_used: number
|
||||
|
||||
/** Plan tier */
|
||||
tier: string
|
||||
|
||||
/** Optional note */
|
||||
note?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Native output structure from the Pulse API
|
||||
*/
|
||||
export interface PulseParserOutputData {
|
||||
/** Extracted content in markdown format */
|
||||
markdown: string
|
||||
|
||||
/** Number of pages in the document */
|
||||
page_count: number
|
||||
|
||||
/** Unique job identifier */
|
||||
job_id: string
|
||||
|
||||
/** Plan usage information */
|
||||
'plan-info': PulsePlanInfo
|
||||
|
||||
/** Bounding box layout information */
|
||||
bounding_boxes?: Record<string, unknown>
|
||||
|
||||
/** URL for extraction results (for large documents) */
|
||||
extraction_url?: string
|
||||
|
||||
/** HTML content if requested */
|
||||
html?: string
|
||||
|
||||
/** Structured output if schema was provided */
|
||||
structured_output?: Record<string, unknown>
|
||||
|
||||
/** Chunked content if chunking was enabled */
|
||||
chunks?: unknown[]
|
||||
|
||||
/** Extracted figures if figure extraction was enabled */
|
||||
figures?: unknown[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete response from the Pulse parser tool
|
||||
*/
|
||||
export interface PulseParserOutput extends ToolResponse {
|
||||
/** The native Pulse API output */
|
||||
output: PulseParserOutputData
|
||||
}
|
||||
3
apps/sim/tools/reducto/index.ts
Normal file
3
apps/sim/tools/reducto/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import { reductoParserTool } from '@/tools/reducto/parser'
|
||||
|
||||
export { reductoParserTool }
|
||||
203
apps/sim/tools/reducto/parser.ts
Normal file
203
apps/sim/tools/reducto/parser.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import type { ReductoParserInput, ReductoParserOutput } from '@/tools/reducto/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
const logger = createLogger('ReductoParserTool')
|
||||
|
||||
export const reductoParserTool: ToolConfig<ReductoParserInput, ReductoParserOutput> = {
|
||||
id: 'reducto_parser',
|
||||
name: 'Reducto PDF Parser',
|
||||
description: 'Parse PDF documents using Reducto OCR API',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
filePath: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'URL to a PDF document to be processed',
|
||||
},
|
||||
fileUpload: {
|
||||
type: 'object',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'File upload data from file-upload component',
|
||||
},
|
||||
pages: {
|
||||
type: 'array',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Specific pages to process (1-indexed page numbers)',
|
||||
},
|
||||
tableOutputFormat: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Table output format (html or markdown). Defaults to markdown.',
|
||||
},
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Reducto API key (REDUCTO_API_KEY)',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
url: '/api/tools/reducto/parse',
|
||||
method: 'POST',
|
||||
headers: (params) => {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}
|
||||
},
|
||||
body: (params) => {
|
||||
if (!params || typeof params !== 'object') {
|
||||
throw new Error('Invalid parameters: Parameters must be provided as an object')
|
||||
}
|
||||
|
||||
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
|
||||
throw new Error('Missing or invalid API key: A valid Reducto API key is required')
|
||||
}
|
||||
|
||||
// Check if we have a file upload instead of direct URL
|
||||
if (
|
||||
params.fileUpload &&
|
||||
(!params.filePath || params.filePath === 'null' || params.filePath === '')
|
||||
) {
|
||||
if (
|
||||
typeof params.fileUpload === 'object' &&
|
||||
params.fileUpload !== null &&
|
||||
(params.fileUpload.url || params.fileUpload.path)
|
||||
) {
|
||||
let uploadedFilePath = (params.fileUpload.url || params.fileUpload.path) as string
|
||||
|
||||
if (uploadedFilePath.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
uploadedFilePath = `${baseUrl}${uploadedFilePath}`
|
||||
}
|
||||
|
||||
params.filePath = uploadedFilePath as string
|
||||
logger.info('Using uploaded file:', uploadedFilePath)
|
||||
} else {
|
||||
throw new Error('Invalid file upload: Upload data is missing or invalid')
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!params.filePath ||
|
||||
typeof params.filePath !== 'string' ||
|
||||
params.filePath.trim() === ''
|
||||
) {
|
||||
throw new Error('Missing or invalid file path: Please provide a URL to a PDF document')
|
||||
}
|
||||
|
||||
let filePathToValidate = params.filePath.trim()
|
||||
if (filePathToValidate.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
filePathToValidate = `${baseUrl}${filePathToValidate}`
|
||||
}
|
||||
|
||||
let url
|
||||
try {
|
||||
url = new URL(filePathToValidate)
|
||||
|
||||
if (!['http:', 'https:'].includes(url.protocol)) {
|
||||
throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`)
|
||||
}
|
||||
|
||||
if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
|
||||
throw new Error(
|
||||
'Google Drive links are not supported by the Reducto API. ' +
|
||||
'Please upload your PDF to a public web server or provide a direct download link.'
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(
|
||||
`Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a PDF document.`
|
||||
)
|
||||
}
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
apiKey: params.apiKey,
|
||||
filePath: url.toString(),
|
||||
}
|
||||
|
||||
// Check if this is an internal workspace file path
|
||||
if (params.fileUpload?.path?.startsWith('/api/files/serve/')) {
|
||||
requestBody.filePath = params.fileUpload.path
|
||||
}
|
||||
|
||||
if (params.tableOutputFormat && ['html', 'md'].includes(params.tableOutputFormat)) {
|
||||
requestBody.tableOutputFormat = params.tableOutputFormat
|
||||
}
|
||||
|
||||
// Page selection
|
||||
if (params.pages !== undefined && params.pages !== null) {
|
||||
if (Array.isArray(params.pages) && params.pages.length > 0) {
|
||||
const validPages = params.pages.filter(
|
||||
(page) => typeof page === 'number' && Number.isInteger(page) && page >= 0
|
||||
)
|
||||
|
||||
if (validPages.length > 0) {
|
||||
requestBody.pages = validPages
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return requestBody
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response) => {
|
||||
const data = await response.json()
|
||||
|
||||
if (!data || typeof data !== 'object') {
|
||||
throw new Error('Invalid response format from Reducto API')
|
||||
}
|
||||
|
||||
// Pass through the native Reducto response
|
||||
const reductoData = data.output ?? data
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
job_id: reductoData.job_id,
|
||||
duration: reductoData.duration,
|
||||
usage: reductoData.usage,
|
||||
result: reductoData.result,
|
||||
pdf_url: reductoData.pdf_url ?? null,
|
||||
studio_link: reductoData.studio_link ?? null,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
job_id: { type: 'string', description: 'Unique identifier for the processing job' },
|
||||
duration: { type: 'number', description: 'Processing time in seconds' },
|
||||
usage: {
|
||||
type: 'json',
|
||||
description: 'Resource consumption data',
|
||||
},
|
||||
result: {
|
||||
type: 'json',
|
||||
description: 'Parsed document content with chunks and blocks',
|
||||
},
|
||||
pdf_url: {
|
||||
type: 'string',
|
||||
description: 'Storage URL of converted PDF',
|
||||
optional: true,
|
||||
},
|
||||
studio_link: {
|
||||
type: 'string',
|
||||
description: 'Link to Reducto studio interface',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
160
apps/sim/tools/reducto/types.ts
Normal file
160
apps/sim/tools/reducto/types.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import type { ToolResponse } from '@/tools/types'
|
||||
|
||||
/**
|
||||
* Input parameters for the Reducto parser tool
|
||||
*/
|
||||
export interface ReductoParserInput {
|
||||
/** URL to a document to be processed */
|
||||
filePath: string
|
||||
|
||||
/** File upload data (from file-upload component) */
|
||||
fileUpload?: {
|
||||
url?: string
|
||||
path?: string
|
||||
}
|
||||
|
||||
/** Reducto API key for authentication */
|
||||
apiKey: string
|
||||
|
||||
/** Specific pages to process (1-indexed) */
|
||||
pages?: number[]
|
||||
|
||||
/** Table output format (html or md) */
|
||||
tableOutputFormat?: 'html' | 'md'
|
||||
}
|
||||
|
||||
/**
|
||||
* Bounding box for spatial location data
|
||||
*/
|
||||
export interface ReductoBoundingBox {
|
||||
left: number
|
||||
top: number
|
||||
width: number
|
||||
height: number
|
||||
page: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Granular confidence scores
|
||||
*/
|
||||
export interface ReductoGranularConfidence {
|
||||
ocr: string | null
|
||||
layout: string | null
|
||||
order: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Block type classification
|
||||
*/
|
||||
export type ReductoBlockType =
|
||||
| 'Header'
|
||||
| 'Footer'
|
||||
| 'Title'
|
||||
| 'SectionHeader'
|
||||
| 'Text'
|
||||
| 'ListItem'
|
||||
| 'Table'
|
||||
| 'Figure'
|
||||
| 'Caption'
|
||||
| 'Equation'
|
||||
| 'Code'
|
||||
| 'PageNumber'
|
||||
| 'Watermark'
|
||||
| 'Handwriting'
|
||||
| 'Other'
|
||||
|
||||
/**
|
||||
* Parse block - structured content element
|
||||
*/
|
||||
export interface ReductoParseBlock {
|
||||
type: ReductoBlockType
|
||||
bbox: ReductoBoundingBox
|
||||
content: string
|
||||
image_url: string | null
|
||||
chart_data: string[] | null
|
||||
confidence: string | null
|
||||
granular_confidence: ReductoGranularConfidence | null
|
||||
extra: Record<string, unknown> | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse chunk - document segment
|
||||
*/
|
||||
export interface ReductoParseChunk {
|
||||
content: string
|
||||
embed: string
|
||||
enriched: string | null
|
||||
blocks: ReductoParseBlock[]
|
||||
enrichment_success: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR word data
|
||||
*/
|
||||
export interface ReductoOcrWord {
|
||||
text: string
|
||||
bbox: ReductoBoundingBox
|
||||
confidence: number
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR line data
|
||||
*/
|
||||
export interface ReductoOcrLine {
|
||||
text: string
|
||||
bbox: ReductoBoundingBox
|
||||
words: ReductoOcrWord[]
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR result data
|
||||
*/
|
||||
export interface ReductoOcrResult {
|
||||
lines: ReductoOcrLine[]
|
||||
words: ReductoOcrWord[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Full result - when response fits in payload
|
||||
*/
|
||||
export interface ReductoFullResult {
|
||||
type: 'full'
|
||||
chunks: ReductoParseChunk[]
|
||||
ocr: ReductoOcrResult | null
|
||||
custom: unknown
|
||||
}
|
||||
|
||||
/**
|
||||
* URL result - when response exceeds size limits
|
||||
*/
|
||||
export interface ReductoUrlResult {
|
||||
type: 'url'
|
||||
url: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Usage information returned by Reducto API
|
||||
*/
|
||||
export interface ReductoUsage {
|
||||
num_pages: number
|
||||
credits: number | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Native Reducto API response structure
|
||||
*/
|
||||
export interface ReductoParserOutputData {
|
||||
job_id: string
|
||||
duration: number
|
||||
usage: ReductoUsage
|
||||
result: ReductoFullResult | ReductoUrlResult
|
||||
pdf_url: string | null
|
||||
studio_link: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete response from the Reducto parser tool
|
||||
*/
|
||||
export interface ReductoParserOutput extends ToolResponse {
|
||||
output: ReductoParserOutputData
|
||||
}
|
||||
@@ -1032,6 +1032,7 @@ import {
|
||||
posthogUpdatePropertyDefinitionTool,
|
||||
posthogUpdateSurveyTool,
|
||||
} from '@/tools/posthog'
|
||||
import { pulseParserTool } from '@/tools/pulse'
|
||||
import { qdrantFetchTool, qdrantSearchTool, qdrantUpsertTool } from '@/tools/qdrant'
|
||||
import {
|
||||
rdsDeleteTool,
|
||||
@@ -1056,6 +1057,7 @@ import {
|
||||
redditUnsaveTool,
|
||||
redditVoteTool,
|
||||
} from '@/tools/reddit'
|
||||
import { reductoParserTool } from '@/tools/reducto'
|
||||
import { mailSendTool } from '@/tools/resend'
|
||||
import {
|
||||
s3CopyObjectTool,
|
||||
@@ -2126,6 +2128,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
google_slides_add_image: googleSlidesAddImageTool,
|
||||
perplexity_chat: perplexityChatTool,
|
||||
perplexity_search: perplexitySearchTool,
|
||||
pulse_parser: pulseParserTool,
|
||||
posthog_capture_event: posthogCaptureEventTool,
|
||||
posthog_batch_events: posthogBatchEventsTool,
|
||||
posthog_list_persons: posthogListPersonsTool,
|
||||
@@ -2248,6 +2251,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
apollo_task_search: apolloTaskSearchTool,
|
||||
apollo_email_accounts: apolloEmailAccountsTool,
|
||||
mistral_parser: mistralParserTool,
|
||||
reducto_parser: reductoParserTool,
|
||||
thinking_tool: thinkingTool,
|
||||
tinybird_events: tinybirdEventsTool,
|
||||
tinybird_query: tinybirdQueryTool,
|
||||
|
||||
Reference in New Issue
Block a user