mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
feat(ocr): added reducto and pulse for OCR (#2843)
* feat(ocr): added reducto and pulse for OCR * ack comments
This commit is contained in:
169
apps/sim/app/api/tools/pulse/parse/route.ts
Normal file
169
apps/sim/app/api/tools/pulse/parse/route.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { checkHybridAuth } from '@/lib/auth/hybrid'
|
||||
import { generateRequestId } from '@/lib/core/utils/request'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { StorageService } from '@/lib/uploads'
|
||||
import { extractStorageKey, inferContextFromKey } from '@/lib/uploads/utils/file-utils'
|
||||
import { verifyFileAccess } from '@/app/api/files/authorization'
|
||||
|
||||
export const dynamic = 'force-dynamic'
|
||||
|
||||
const logger = createLogger('PulseParseAPI')
|
||||
|
||||
const PulseParseSchema = z.object({
|
||||
apiKey: z.string().min(1, 'API key is required'),
|
||||
filePath: z.string().min(1, 'File path is required'),
|
||||
pages: z.string().optional(),
|
||||
extractFigure: z.boolean().optional(),
|
||||
figureDescription: z.boolean().optional(),
|
||||
returnHtml: z.boolean().optional(),
|
||||
chunking: z.string().optional(),
|
||||
chunkSize: z.number().optional(),
|
||||
})
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const requestId = generateRequestId()
|
||||
|
||||
try {
|
||||
const authResult = await checkHybridAuth(request, { requireWorkflowId: false })
|
||||
|
||||
if (!authResult.success || !authResult.userId) {
|
||||
logger.warn(`[${requestId}] Unauthorized Pulse parse attempt`, {
|
||||
error: authResult.error || 'Missing userId',
|
||||
})
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: authResult.error || 'Unauthorized',
|
||||
},
|
||||
{ status: 401 }
|
||||
)
|
||||
}
|
||||
|
||||
const userId = authResult.userId
|
||||
const body = await request.json()
|
||||
const validatedData = PulseParseSchema.parse(body)
|
||||
|
||||
logger.info(`[${requestId}] Pulse parse request`, {
|
||||
filePath: validatedData.filePath,
|
||||
isWorkspaceFile: validatedData.filePath.includes('/api/files/serve/'),
|
||||
userId,
|
||||
})
|
||||
|
||||
let fileUrl = validatedData.filePath
|
||||
|
||||
if (validatedData.filePath?.includes('/api/files/serve/')) {
|
||||
try {
|
||||
const storageKey = extractStorageKey(validatedData.filePath)
|
||||
const context = inferContextFromKey(storageKey)
|
||||
|
||||
const hasAccess = await verifyFileAccess(storageKey, userId, undefined, context, false)
|
||||
|
||||
if (!hasAccess) {
|
||||
logger.warn(`[${requestId}] Unauthorized presigned URL generation attempt`, {
|
||||
userId,
|
||||
key: storageKey,
|
||||
context,
|
||||
})
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'File not found',
|
||||
},
|
||||
{ status: 404 }
|
||||
)
|
||||
}
|
||||
|
||||
fileUrl = await StorageService.generatePresignedDownloadUrl(storageKey, context, 5 * 60)
|
||||
logger.info(`[${requestId}] Generated presigned URL for ${context} file`)
|
||||
} catch (error) {
|
||||
logger.error(`[${requestId}] Failed to generate presigned URL:`, error)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'Failed to generate file access URL',
|
||||
},
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
} else if (validatedData.filePath?.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
fileUrl = `${baseUrl}${validatedData.filePath}`
|
||||
}
|
||||
|
||||
const formData = new FormData()
|
||||
formData.append('file_url', fileUrl)
|
||||
|
||||
if (validatedData.pages) {
|
||||
formData.append('pages', validatedData.pages)
|
||||
}
|
||||
if (validatedData.extractFigure !== undefined) {
|
||||
formData.append('extract_figure', String(validatedData.extractFigure))
|
||||
}
|
||||
if (validatedData.figureDescription !== undefined) {
|
||||
formData.append('figure_description', String(validatedData.figureDescription))
|
||||
}
|
||||
if (validatedData.returnHtml !== undefined) {
|
||||
formData.append('return_html', String(validatedData.returnHtml))
|
||||
}
|
||||
if (validatedData.chunking) {
|
||||
formData.append('chunking', validatedData.chunking)
|
||||
}
|
||||
if (validatedData.chunkSize !== undefined) {
|
||||
formData.append('chunk_size', String(validatedData.chunkSize))
|
||||
}
|
||||
|
||||
const pulseResponse = await fetch('https://api.runpulse.com/extract', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': validatedData.apiKey,
|
||||
},
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!pulseResponse.ok) {
|
||||
const errorText = await pulseResponse.text()
|
||||
logger.error(`[${requestId}] Pulse API error:`, errorText)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: `Pulse API error: ${pulseResponse.statusText}`,
|
||||
},
|
||||
{ status: pulseResponse.status }
|
||||
)
|
||||
}
|
||||
|
||||
const pulseData = await pulseResponse.json()
|
||||
|
||||
logger.info(`[${requestId}] Pulse parse successful`)
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
output: pulseData,
|
||||
})
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'Invalid request data',
|
||||
details: error.errors,
|
||||
},
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
|
||||
logger.error(`[${requestId}] Error in Pulse parse:`, error)
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Internal server error',
|
||||
},
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
}
|
||||
167
apps/sim/app/api/tools/reducto/parse/route.ts
Normal file
167
apps/sim/app/api/tools/reducto/parse/route.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { checkHybridAuth } from '@/lib/auth/hybrid'
|
||||
import { generateRequestId } from '@/lib/core/utils/request'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { StorageService } from '@/lib/uploads'
|
||||
import { extractStorageKey, inferContextFromKey } from '@/lib/uploads/utils/file-utils'
|
||||
import { verifyFileAccess } from '@/app/api/files/authorization'
|
||||
|
||||
export const dynamic = 'force-dynamic'
|
||||
|
||||
const logger = createLogger('ReductoParseAPI')
|
||||
|
||||
const ReductoParseSchema = z.object({
|
||||
apiKey: z.string().min(1, 'API key is required'),
|
||||
filePath: z.string().min(1, 'File path is required'),
|
||||
pages: z.array(z.number()).optional(),
|
||||
tableOutputFormat: z.enum(['html', 'md']).optional(),
|
||||
})
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const requestId = generateRequestId()
|
||||
|
||||
try {
|
||||
const authResult = await checkHybridAuth(request, { requireWorkflowId: false })
|
||||
|
||||
if (!authResult.success || !authResult.userId) {
|
||||
logger.warn(`[${requestId}] Unauthorized Reducto parse attempt`, {
|
||||
error: authResult.error || 'Missing userId',
|
||||
})
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: authResult.error || 'Unauthorized',
|
||||
},
|
||||
{ status: 401 }
|
||||
)
|
||||
}
|
||||
|
||||
const userId = authResult.userId
|
||||
const body = await request.json()
|
||||
const validatedData = ReductoParseSchema.parse(body)
|
||||
|
||||
logger.info(`[${requestId}] Reducto parse request`, {
|
||||
filePath: validatedData.filePath,
|
||||
isWorkspaceFile: validatedData.filePath.includes('/api/files/serve/'),
|
||||
userId,
|
||||
})
|
||||
|
||||
let fileUrl = validatedData.filePath
|
||||
|
||||
if (validatedData.filePath?.includes('/api/files/serve/')) {
|
||||
try {
|
||||
const storageKey = extractStorageKey(validatedData.filePath)
|
||||
const context = inferContextFromKey(storageKey)
|
||||
|
||||
const hasAccess = await verifyFileAccess(
|
||||
storageKey,
|
||||
userId,
|
||||
undefined, // customConfig
|
||||
context, // context
|
||||
false // isLocal
|
||||
)
|
||||
|
||||
if (!hasAccess) {
|
||||
logger.warn(`[${requestId}] Unauthorized presigned URL generation attempt`, {
|
||||
userId,
|
||||
key: storageKey,
|
||||
context,
|
||||
})
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'File not found',
|
||||
},
|
||||
{ status: 404 }
|
||||
)
|
||||
}
|
||||
|
||||
fileUrl = await StorageService.generatePresignedDownloadUrl(storageKey, context, 5 * 60)
|
||||
logger.info(`[${requestId}] Generated presigned URL for ${context} file`)
|
||||
} catch (error) {
|
||||
logger.error(`[${requestId}] Failed to generate presigned URL:`, error)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'Failed to generate file access URL',
|
||||
},
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
} else if (validatedData.filePath?.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
fileUrl = `${baseUrl}${validatedData.filePath}`
|
||||
}
|
||||
|
||||
const reductoBody: Record<string, unknown> = {
|
||||
input: fileUrl,
|
||||
}
|
||||
|
||||
if (validatedData.pages && validatedData.pages.length > 0) {
|
||||
reductoBody.settings = {
|
||||
page_range: validatedData.pages,
|
||||
}
|
||||
}
|
||||
|
||||
if (validatedData.tableOutputFormat) {
|
||||
reductoBody.formatting = {
|
||||
table_output_format: validatedData.tableOutputFormat,
|
||||
}
|
||||
}
|
||||
|
||||
const reductoResponse = await fetch('https://platform.reducto.ai/parse', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'application/json',
|
||||
Authorization: `Bearer ${validatedData.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify(reductoBody),
|
||||
})
|
||||
|
||||
if (!reductoResponse.ok) {
|
||||
const errorText = await reductoResponse.text()
|
||||
logger.error(`[${requestId}] Reducto API error:`, errorText)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: `Reducto API error: ${reductoResponse.statusText}`,
|
||||
},
|
||||
{ status: reductoResponse.status }
|
||||
)
|
||||
}
|
||||
|
||||
const reductoData = await reductoResponse.json()
|
||||
|
||||
logger.info(`[${requestId}] Reducto parse successful`)
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
output: reductoData,
|
||||
})
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'Invalid request data',
|
||||
details: error.errors,
|
||||
},
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
|
||||
logger.error(`[${requestId}] Error in Reducto parse:`, error)
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Internal server error',
|
||||
},
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
}
|
||||
143
apps/sim/blocks/blocks/pulse.ts
Normal file
143
apps/sim/blocks/blocks/pulse.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { PulseIcon } from '@/components/icons'
|
||||
import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types'
|
||||
import type { PulseParserOutput } from '@/tools/pulse/types'
|
||||
|
||||
export const PulseBlock: BlockConfig<PulseParserOutput> = {
|
||||
type: 'pulse',
|
||||
name: 'Pulse',
|
||||
description: 'Extract text from documents using Pulse OCR',
|
||||
authMode: AuthMode.ApiKey,
|
||||
longDescription:
|
||||
'Integrate Pulse into the workflow. Extract text from PDF documents, images, and Office files via URL or upload.',
|
||||
docsLink: 'https://docs.sim.ai/tools/pulse',
|
||||
category: 'tools',
|
||||
bgColor: '#E0E0E0',
|
||||
icon: PulseIcon,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'inputMethod',
|
||||
title: 'Select Input Method',
|
||||
type: 'dropdown' as SubBlockType,
|
||||
options: [
|
||||
{ id: 'url', label: 'Document URL' },
|
||||
{ id: 'upload', label: 'Upload Document' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'Document URL',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter full URL to a document (https://example.com/document.pdf)',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'url',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'Upload Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
acceptedTypes: 'application/pdf,image/*,.docx,.pptx,.xlsx',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'upload',
|
||||
},
|
||||
maxSize: 50,
|
||||
},
|
||||
{
|
||||
id: 'pages',
|
||||
title: 'Specific Pages',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g. 1-3,5 (leave empty for all pages)',
|
||||
},
|
||||
{
|
||||
id: 'chunking',
|
||||
title: 'Chunking Strategy',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g. semantic,header,page,recursive',
|
||||
},
|
||||
{
|
||||
id: 'chunkSize',
|
||||
title: 'Chunk Size',
|
||||
type: 'short-input',
|
||||
placeholder: 'Max characters per chunk',
|
||||
},
|
||||
{
|
||||
id: 'apiKey',
|
||||
title: 'API Key',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter your Pulse API key',
|
||||
password: true,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
access: ['pulse_parser'],
|
||||
config: {
|
||||
tool: () => 'pulse_parser',
|
||||
params: (params) => {
|
||||
if (!params || !params.apiKey || params.apiKey.trim() === '') {
|
||||
throw new Error('Pulse API key is required')
|
||||
}
|
||||
|
||||
const parameters: Record<string, unknown> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
}
|
||||
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
if (!params.filePath || params.filePath.trim() === '') {
|
||||
throw new Error('Document URL is required')
|
||||
}
|
||||
parameters.filePath = params.filePath.trim()
|
||||
} else if (inputMethod === 'upload') {
|
||||
if (!params.fileUpload) {
|
||||
throw new Error('Please upload a document')
|
||||
}
|
||||
parameters.fileUpload = params.fileUpload
|
||||
}
|
||||
|
||||
if (params.pages && params.pages.trim() !== '') {
|
||||
parameters.pages = params.pages.trim()
|
||||
}
|
||||
|
||||
if (params.chunking && params.chunking.trim() !== '') {
|
||||
parameters.chunking = params.chunking.trim()
|
||||
}
|
||||
|
||||
if (params.chunkSize && params.chunkSize.trim() !== '') {
|
||||
const size = Number.parseInt(params.chunkSize.trim(), 10)
|
||||
if (!Number.isNaN(size) && size > 0) {
|
||||
parameters.chunkSize = size
|
||||
}
|
||||
}
|
||||
|
||||
return parameters
|
||||
},
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
inputMethod: { type: 'string', description: 'Input method selection' },
|
||||
filePath: { type: 'string', description: 'Document URL' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded document file' },
|
||||
apiKey: { type: 'string', description: 'Pulse API key' },
|
||||
pages: { type: 'string', description: 'Page range selection' },
|
||||
chunking: {
|
||||
type: 'string',
|
||||
description: 'Chunking strategies (semantic, header, page, recursive)',
|
||||
},
|
||||
chunkSize: { type: 'string', description: 'Maximum characters per chunk' },
|
||||
},
|
||||
outputs: {
|
||||
markdown: { type: 'string', description: 'Extracted content in markdown format' },
|
||||
page_count: { type: 'number', description: 'Number of pages in the document' },
|
||||
job_id: { type: 'string', description: 'Unique job identifier' },
|
||||
'plan-info': { type: 'json', description: 'Plan usage information' },
|
||||
bounding_boxes: { type: 'json', description: 'Bounding box layout information' },
|
||||
extraction_url: { type: 'string', description: 'URL for extraction results (large documents)' },
|
||||
html: { type: 'string', description: 'HTML content if requested' },
|
||||
structured_output: { type: 'json', description: 'Structured output if schema was provided' },
|
||||
chunks: { type: 'json', description: 'Chunked content if chunking was enabled' },
|
||||
figures: { type: 'json', description: 'Extracted figures if figure extraction was enabled' },
|
||||
},
|
||||
}
|
||||
148
apps/sim/blocks/blocks/reducto.ts
Normal file
148
apps/sim/blocks/blocks/reducto.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
import { ReductoIcon } from '@/components/icons'
|
||||
import { AuthMode, type BlockConfig, type SubBlockType } from '@/blocks/types'
|
||||
import type { ReductoParserOutput } from '@/tools/reducto/types'
|
||||
|
||||
export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
|
||||
type: 'reducto',
|
||||
name: 'Reducto',
|
||||
description: 'Extract text from PDF documents',
|
||||
authMode: AuthMode.ApiKey,
|
||||
longDescription: `Integrate Reducto Parse into the workflow. Can extract text from uploaded PDF documents, or from a URL.`,
|
||||
docsLink: 'https://docs.sim.ai/tools/reducto',
|
||||
category: 'tools',
|
||||
bgColor: '#5c0c5c',
|
||||
icon: ReductoIcon,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'inputMethod',
|
||||
title: 'Select Input Method',
|
||||
type: 'dropdown' as SubBlockType,
|
||||
options: [
|
||||
{ id: 'url', label: 'PDF Document URL' },
|
||||
{ id: 'upload', label: 'Upload PDF Document' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'PDF Document URL',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter full URL to a PDF document (https://example.com/document.pdf)',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'url',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'fileUpload',
|
||||
title: 'Upload PDF',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
acceptedTypes: 'application/pdf',
|
||||
condition: {
|
||||
field: 'inputMethod',
|
||||
value: 'upload',
|
||||
},
|
||||
maxSize: 50,
|
||||
},
|
||||
{
|
||||
id: 'pages',
|
||||
title: 'Specific Pages',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g. 1,2,3 (1-indexed, leave empty for all)',
|
||||
},
|
||||
{
|
||||
id: 'tableOutputFormat',
|
||||
title: 'Table Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ id: 'md', label: 'Markdown' },
|
||||
{ id: 'html', label: 'HTML' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'apiKey',
|
||||
title: 'API Key',
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 'Enter your Reducto API key',
|
||||
password: true,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
access: ['reducto_parser'],
|
||||
config: {
|
||||
tool: () => 'reducto_parser',
|
||||
params: (params) => {
|
||||
if (!params || !params.apiKey || params.apiKey.trim() === '') {
|
||||
throw new Error('Reducto API key is required')
|
||||
}
|
||||
|
||||
const parameters: Record<string, unknown> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
}
|
||||
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
if (!params.filePath || params.filePath.trim() === '') {
|
||||
throw new Error('PDF Document URL is required')
|
||||
}
|
||||
parameters.filePath = params.filePath.trim()
|
||||
} else if (inputMethod === 'upload') {
|
||||
if (!params.fileUpload) {
|
||||
throw new Error('Please upload a PDF document')
|
||||
}
|
||||
parameters.fileUpload = params.fileUpload
|
||||
}
|
||||
|
||||
let pagesArray: number[] | undefined
|
||||
if (params.pages && params.pages.trim() !== '') {
|
||||
try {
|
||||
pagesArray = params.pages
|
||||
.split(',')
|
||||
.map((p: string) => p.trim())
|
||||
.filter((p: string) => p.length > 0)
|
||||
.map((p: string) => {
|
||||
const num = Number.parseInt(p, 10)
|
||||
if (Number.isNaN(num) || num < 0) {
|
||||
throw new Error(`Invalid page number: ${p}`)
|
||||
}
|
||||
return num
|
||||
})
|
||||
|
||||
if (pagesArray && pagesArray.length === 0) {
|
||||
pagesArray = undefined
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(`Page number format error: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
if (pagesArray && pagesArray.length > 0) {
|
||||
parameters.pages = pagesArray
|
||||
}
|
||||
|
||||
if (params.tableOutputFormat) {
|
||||
parameters.tableOutputFormat = params.tableOutputFormat
|
||||
}
|
||||
|
||||
return parameters
|
||||
},
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
inputMethod: { type: 'string', description: 'Input method selection' },
|
||||
filePath: { type: 'string', description: 'PDF document URL' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded PDF file' },
|
||||
apiKey: { type: 'string', description: 'Reducto API key' },
|
||||
pages: { type: 'string', description: 'Page selection' },
|
||||
tableOutputFormat: { type: 'string', description: 'Table output format' },
|
||||
},
|
||||
outputs: {
|
||||
job_id: { type: 'string', description: 'Unique identifier for the processing job' },
|
||||
duration: { type: 'number', description: 'Processing time in seconds' },
|
||||
usage: { type: 'json', description: 'Resource consumption data (num_pages, credits)' },
|
||||
result: { type: 'json', description: 'Parsed document content with chunks and blocks' },
|
||||
pdf_url: { type: 'string', description: 'Storage URL of converted PDF' },
|
||||
studio_link: { type: 'string', description: 'Link to Reducto studio interface' },
|
||||
},
|
||||
}
|
||||
@@ -93,9 +93,11 @@ import { PipedriveBlock } from '@/blocks/blocks/pipedrive'
|
||||
import { PolymarketBlock } from '@/blocks/blocks/polymarket'
|
||||
import { PostgreSQLBlock } from '@/blocks/blocks/postgresql'
|
||||
import { PostHogBlock } from '@/blocks/blocks/posthog'
|
||||
import { PulseBlock } from '@/blocks/blocks/pulse'
|
||||
import { QdrantBlock } from '@/blocks/blocks/qdrant'
|
||||
import { RDSBlock } from '@/blocks/blocks/rds'
|
||||
import { RedditBlock } from '@/blocks/blocks/reddit'
|
||||
import { ReductoBlock } from '@/blocks/blocks/reducto'
|
||||
import { ResendBlock } from '@/blocks/blocks/resend'
|
||||
import { ResponseBlock } from '@/blocks/blocks/response'
|
||||
import { RouterBlock, RouterV2Block } from '@/blocks/blocks/router'
|
||||
@@ -237,6 +239,7 @@ export const registry: Record<string, BlockConfig> = {
|
||||
microsoft_planner: MicrosoftPlannerBlock,
|
||||
microsoft_teams: MicrosoftTeamsBlock,
|
||||
mistral_parse: MistralParseBlock,
|
||||
reducto: ReductoBlock,
|
||||
mongodb: MongoDBBlock,
|
||||
mysql: MySQLBlock,
|
||||
neo4j: Neo4jBlock,
|
||||
@@ -253,6 +256,7 @@ export const registry: Record<string, BlockConfig> = {
|
||||
polymarket: PolymarketBlock,
|
||||
postgresql: PostgreSQLBlock,
|
||||
posthog: PostHogBlock,
|
||||
pulse: PulseBlock,
|
||||
qdrant: QdrantBlock,
|
||||
rds: RDSBlock,
|
||||
sqs: SQSBlock,
|
||||
|
||||
@@ -4678,3 +4678,349 @@ export function BedrockIcon(props: SVGProps<SVGSVGElement>) {
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function ReductoIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
{...props}
|
||||
width='400'
|
||||
height='400'
|
||||
viewBox='50 40 300 320'
|
||||
fill='none'
|
||||
xmlns='http://www.w3.org/2000/svg'
|
||||
>
|
||||
<path
|
||||
fillRule='evenodd'
|
||||
clipRule='evenodd'
|
||||
d='M85.3434 70.7805H314.657V240.307L226.44 329.219H85.3434V70.7805ZM107.796 93.2319H292.205V204.487H206.493V306.767H107.801L107.796 93.2319Z'
|
||||
fill='#FFFFFF'
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function PulseIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
{...props}
|
||||
width='24'
|
||||
height='24'
|
||||
viewBox='0 6 24 24'
|
||||
fill='none'
|
||||
xmlns='http://www.w3.org/2000/svg'
|
||||
>
|
||||
<path
|
||||
d='M0 6.63667C0 6.28505 0.284685 6 0.635863 6H1.54133C1.89251 6 2.17719 6.28505 2.17719 6.63667V7.54329C2.17719 7.89492 1.89251 8.17997 1.54133 8.17997H0.635863C0.284686 8.17997 0 7.89492 0 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 6.63667C3.11318 6.28505 3.39787 6 3.74905 6H4.65452C5.00569 6 5.29038 6.28505 5.29038 6.63667V7.54329C5.29038 7.89492 5.00569 8.17997 4.65452 8.17997H3.74905C3.39787 8.17997 3.11318 7.89492 3.11318 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 6.63667C6.22637 6.28505 6.51105 6 6.86223 6H7.7677C8.11888 6 8.40356 6.28505 8.40356 6.63667V7.54329C8.40356 7.89492 8.11888 8.17997 7.7677 8.17997H6.86223C6.51105 8.17997 6.22637 7.89492 6.22637 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 6.63667C9.33955 6.28505 9.62424 6 9.97541 6H10.8809C11.2321 6 11.5167 6.28505 11.5167 6.63667V7.54329C11.5167 7.89492 11.2321 8.17997 10.8809 8.17997H9.97541C9.62424 8.17997 9.33955 7.89492 9.33955 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 6.63667C12.4527 6.28505 12.7374 6 13.0886 6H13.9941C14.3452 6 14.6299 6.28505 14.6299 6.63667V7.54329C14.6299 7.89492 14.3452 8.17997 13.9941 8.17997H13.0886C12.7374 8.17997 12.4527 7.89492 12.4527 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 6.63667C15.5659 6.28505 15.8506 6 16.2018 6H17.1073C17.4584 6 17.7431 6.28505 17.7431 6.63667V7.54329C17.7431 7.89492 17.4584 8.17997 17.1073 8.17997H16.2018C15.8506 8.17997 15.5659 7.89492 15.5659 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 6.63667C18.6791 6.28505 18.9638 6 19.315 6H20.2204C20.5716 6 20.8563 6.28505 20.8563 6.63667V7.54329C20.8563 7.89492 20.5716 8.17997 20.2204 8.17997H19.315C18.9638 8.17997 18.6791 7.89492 18.6791 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 6.63667C21.7923 6.28505 22.077 6 22.4282 6H23.3336C23.6848 6 23.9695 6.28505 23.9695 6.63667V7.54329C23.9695 7.89492 23.6848 8.17997 23.3336 8.17997H22.4282C22.077 8.17997 21.7923 7.89492 21.7923 7.54329V6.63667Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M0 9.75382C0 9.4022 0.284685 9.11715 0.635863 9.11715H1.54133C1.89251 9.11715 2.17719 9.4022 2.17719 9.75382V10.6604C2.17719 11.0121 1.89251 11.2971 1.54133 11.2971H0.635863C0.284686 11.2971 0 11.0121 0 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 9.75382C3.11318 9.4022 3.39787 9.11715 3.74905 9.11715H4.65452C5.00569 9.11715 5.29038 9.4022 5.29038 9.75382V10.6604C5.29038 11.0121 5.00569 11.2971 4.65452 11.2971H3.74905C3.39787 11.2971 3.11318 11.0121 3.11318 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 9.75382C6.22637 9.4022 6.51105 9.11715 6.86223 9.11715H7.7677C8.11888 9.11715 8.40356 9.4022 8.40356 9.75382V10.6604C8.40356 11.0121 8.11888 11.2971 7.7677 11.2971H6.86223C6.51105 11.2971 6.22637 11.0121 6.22637 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 9.75382C9.33955 9.4022 9.62424 9.11715 9.97541 9.11715H10.8809C11.2321 9.11715 11.5167 9.4022 11.5167 9.75382V10.6604C11.5167 11.0121 11.2321 11.2971 10.8809 11.2971H9.97541C9.62424 11.2971 9.33955 11.0121 9.33955 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 9.75382C12.4527 9.4022 12.7374 9.11715 13.0886 9.11715H13.9941C14.3452 9.11715 14.6299 9.4022 14.6299 9.75382V10.6604C14.6299 11.0121 14.3452 11.2971 13.9941 11.2971H13.0886C12.7374 11.2971 12.4527 11.0121 12.4527 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 9.75382C15.5659 9.4022 15.8506 9.11715 16.2018 9.11715H17.1073C17.4584 9.11715 17.7431 9.4022 17.7431 9.75382V10.6604C17.7431 11.0121 17.4584 11.2971 17.1073 11.2971H16.2018C15.8506 11.2971 15.5659 11.0121 15.5659 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 9.75382C18.6791 9.4022 18.9638 9.11715 19.315 9.11715H20.2204C20.5716 9.11715 20.8563 9.4022 20.8563 9.75382V10.6604C20.8563 11.0121 20.5716 11.2971 20.2204 11.2971H19.315C18.9638 11.2971 18.6791 11.0121 18.6791 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 9.75382C21.7923 9.4022 22.077 9.11715 22.4282 9.11715H23.3336C23.6848 9.11715 23.9695 9.4022 23.9695 9.75382V10.6604C23.9695 11.0121 23.6848 11.2971 23.3336 11.2971H22.4282C22.077 11.2971 21.7923 11.0121 21.7923 10.6604V9.75382Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M0 12.871C0 12.5193 0.284685 12.2343 0.635863 12.2343H1.54133C1.89251 12.2343 2.17719 12.5193 2.17719 12.871V13.7776C2.17719 14.1292 1.89251 14.4143 1.54133 14.4143H0.635863C0.284686 14.4143 0 14.1292 0 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 12.871C3.11318 12.5193 3.39787 12.2343 3.74905 12.2343H4.65452C5.00569 12.2343 5.29038 12.5193 5.29038 12.871V13.7776C5.29038 14.1292 5.00569 14.4143 4.65452 14.4143H3.74905C3.39787 14.4143 3.11318 14.1292 3.11318 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 12.871C6.22637 12.5193 6.51105 12.2343 6.86223 12.2343H7.7677C8.11888 12.2343 8.40356 12.5193 8.40356 12.871V13.7776C8.40356 14.1292 8.11888 14.4143 7.7677 14.4143H6.86223C6.51105 14.4143 6.22637 14.1292 6.22637 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 12.871C9.33955 12.5193 9.62424 12.2343 9.97541 12.2343H10.8809C11.2321 12.2343 11.5167 12.5193 11.5167 12.871V13.7776C11.5167 14.1292 11.2321 14.4143 10.8809 14.4143H9.97541C9.62424 14.4143 9.33955 14.1292 9.33955 13.7776V12.871Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 12.871C12.4527 12.5193 12.7374 12.2343 13.0886 12.2343H13.9941C14.3452 12.2343 14.6299 12.5193 14.6299 12.871V13.7776C14.6299 14.1292 14.3452 14.4143 13.9941 14.4143H13.0886C12.7374 14.4143 12.4527 14.1292 12.4527 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 12.871C15.5659 12.5193 15.8506 12.2343 16.2018 12.2343H17.1073C17.4584 12.2343 17.7431 12.5193 17.7431 12.871V13.7776C17.7431 14.1292 17.4584 14.4143 17.1073 14.4143H16.2018C15.8506 14.4143 15.5659 14.1292 15.5659 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 12.871C18.6791 12.5193 18.9638 12.2343 19.315 12.2343H20.2204C20.5716 12.2343 20.8563 12.5193 20.8563 12.871V13.7776C20.8563 14.1292 20.5716 14.4143 20.2204 14.4143H19.315C18.9638 14.4143 18.6791 14.1292 18.6791 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 12.871C21.7923 12.5193 22.077 12.2343 22.4282 12.2343H23.3336C23.6848 12.2343 23.9695 12.5193 23.9695 12.871V13.7776C23.9695 14.1292 23.6848 14.4143 23.3336 14.4143H22.4282C22.077 14.4143 21.7923 14.1292 21.7923 13.7776V12.871Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M0 15.9881C0 15.6365 0.284685 15.3514 0.635863 15.3514H1.54133C1.89251 15.3514 2.17719 15.6365 2.17719 15.9881V16.8947C2.17719 17.2464 1.89251 17.5314 1.54133 17.5314H0.635863C0.284686 17.5314 0 17.2464 0 16.8947V15.9881Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 15.9881C3.11318 15.6365 3.39787 15.3514 3.74905 15.3514H4.65452C5.00569 15.3514 5.29038 15.6365 5.29038 15.9881V16.8947C5.29038 17.2464 5.00569 17.5314 4.65452 17.5314H3.74905C3.39787 17.5314 3.11318 17.2464 3.11318 16.8947V15.9881Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 15.9881C6.22637 15.6365 6.51105 15.3514 6.86223 15.3514H7.7677C8.11888 15.3514 8.40356 15.6365 8.40356 15.9881V16.8947C8.40356 17.2464 8.11888 17.5314 7.7677 17.5314H6.86223C6.51105 17.5314 6.22637 17.2464 6.22637 16.8947V15.9881Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 15.9881C9.33955 15.6365 9.62424 15.3514 9.97541 15.3514H10.8809C11.2321 15.3514 11.5167 15.6365 11.5167 15.9881V16.8947C11.5167 17.2464 11.2321 17.5314 10.8809 17.5314H9.97541C9.62424 17.5314 9.33955 17.2464 9.33955 16.8947V15.9881Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 15.9881C12.4527 15.6365 12.7374 15.3514 13.0886 15.3514H13.9941C14.3452 15.3514 14.6299 15.6365 14.6299 15.9881V16.8947C14.6299 17.2464 14.3452 17.5314 13.9941 17.5314H13.0886C12.7374 17.5314 12.4527 17.2464 12.4527 16.8947V15.9881Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 15.9881C15.5659 15.6365 15.8506 15.3514 16.2018 15.3514H17.1073C17.4584 15.3514 17.7431 15.6365 17.7431 15.9881V16.8947C17.7431 17.2464 17.4584 17.5314 17.1073 17.5314H16.2018C15.8506 17.5314 15.5659 17.2464 15.5659 16.8947V15.9881Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 15.9881C18.6791 15.6365 18.9638 15.3514 19.315 15.3514H20.2204C20.5716 15.3514 20.8563 15.6365 20.8563 15.9881V16.8947C20.8563 17.2464 20.5716 17.5314 20.2204 17.5314H19.315C18.9638 17.5314 18.6791 17.2464 18.6791 16.8947V15.9881Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 15.9881C21.7923 15.6365 22.077 15.3514 22.4282 15.3514H23.3336C23.6848 15.3514 23.9695 15.6365 23.9695 15.9881V16.8947C23.9695 17.2464 23.6848 17.5314 23.3336 17.5314H22.4282C22.077 17.5314 21.7923 17.2464 21.7923 16.8947V15.9881Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M0 19.1053C0 18.7536 0.284685 18.4686 0.635863 18.4686H1.54133C1.89251 18.4686 2.17719 18.7536 2.17719 19.1053V20.0119C2.17719 20.3635 1.89251 20.6486 1.54133 20.6486H0.635863C0.284686 20.6486 0 20.3635 0 20.0119V19.1053Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 19.1053C3.11318 18.7536 3.39787 18.4686 3.74905 18.4686H4.65452C5.00569 18.4686 5.29038 18.7536 5.29038 19.1053V20.0119C5.29038 20.3635 5.00569 20.6486 4.65452 20.6486H3.74905C3.39787 20.6486 3.11318 20.3635 3.11318 20.0119V19.1053Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 19.1053C6.22637 18.7536 6.51105 18.4686 6.86223 18.4686H7.7677C8.11888 18.4686 8.40356 18.7536 8.40356 19.1053V20.0119C8.40356 20.3635 8.11888 20.6486 7.7677 20.6486H6.86223C6.51105 20.6486 6.22637 20.3635 6.22637 20.0119V19.1053Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 19.1053C9.33955 18.7536 9.62424 18.4686 9.97541 18.4686H10.8809C11.2321 18.4686 11.5167 18.7536 11.5167 19.1053V20.0119C11.5167 20.3635 11.2321 20.6486 10.8809 20.6486H9.97541C9.62424 20.6486 9.33955 20.3635 9.33955 20.0119V19.1053Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 19.1053C12.4527 18.7536 12.7374 18.4686 13.0886 18.4686H13.9941C14.3452 18.4686 14.6299 18.7536 14.6299 19.1053V20.0119C14.6299 20.3635 14.3452 20.6486 13.9941 20.6486H13.0886C12.7374 20.6486 12.4527 20.3635 12.4527 20.0119V19.1053Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 19.1053C15.5659 18.7536 15.8506 18.4686 16.2018 18.4686H17.1073C17.4584 18.4686 17.7431 18.7536 17.7431 19.1053V20.0119C17.7431 20.3635 17.4584 20.6486 17.1073 20.6486H16.2018C15.8506 20.6486 15.5659 20.3635 15.5659 20.0119V19.1053Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 19.1053C18.6791 18.7536 18.9638 18.4686 19.315 18.4686H20.2204C20.5716 18.4686 20.8563 18.7536 20.8563 19.1053V20.0119C20.8563 20.3635 20.5716 20.6486 20.2204 20.6486H19.315C18.9638 20.6486 18.6791 20.3635 18.6791 20.0119V19.1053Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 19.1053C21.7923 18.7536 22.077 18.4686 22.4282 18.4686H23.3336C23.6848 18.4686 23.9695 18.7536 23.9695 19.1053V20.0119C23.9695 20.3635 23.6848 20.6486 23.3336 20.6486H22.4282C22.077 20.6486 21.7923 20.3635 21.7923 20.0119V19.1053Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M0 22.2224C0 21.8708 0.284685 21.5857 0.635863 21.5857H1.54133C1.89251 21.5857 2.17719 21.8708 2.17719 22.2224V23.129C2.17719 23.4807 1.89251 23.7657 1.54133 23.7657H0.635863C0.284686 23.7657 0 23.4807 0 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 22.2224C3.11318 21.8708 3.39787 21.5857 3.74905 21.5857H4.65452C5.00569 21.5857 5.29038 21.8708 5.29038 22.2224V23.129C5.29038 23.4807 5.00569 23.7657 4.65452 23.7657H3.74905C3.39787 23.7657 3.11318 23.4807 3.11318 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 22.2224C6.22637 21.8708 6.51105 21.5857 6.86223 21.5857H7.7677C8.11888 21.5857 8.40356 21.8708 8.40356 22.2224V23.129C8.40356 23.4807 8.11888 23.7657 7.7677 23.7657H6.86223C6.51105 23.7657 6.22637 23.4807 6.22637 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 22.2224C9.33955 21.8708 9.62424 21.5857 9.97541 21.5857H10.8809C11.2321 21.5857 11.5167 21.8708 11.5167 22.2224V23.129C11.5167 23.4807 11.2321 23.7657 10.8809 23.7657H9.97541C9.62424 23.7657 9.33955 23.4807 9.33955 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 22.2224C12.4527 21.8708 12.7374 21.5857 13.0886 21.5857H13.9941C14.3452 21.5857 14.6299 21.8708 14.6299 22.2224V23.129C14.6299 23.4807 14.3452 23.7657 13.9941 23.7657H13.0886C12.7374 23.7657 12.4527 23.4807 12.4527 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 22.2224C15.5659 21.8708 15.8506 21.5857 16.2018 21.5857H17.1073C17.4584 21.5857 17.7431 21.8708 17.7431 22.2224V23.129C17.7431 23.4807 17.4584 23.7657 17.1073 23.7657H16.2018C15.8506 23.7657 15.5659 23.4807 15.5659 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 22.2224C18.6791 21.8708 18.9638 21.5857 19.315 21.5857H20.2204C20.5716 21.5857 20.8563 21.8708 20.8563 22.2224V23.129C20.8563 23.4807 20.5716 23.7657 20.2204 23.7657H19.315C18.9638 23.7657 18.6791 23.4807 18.6791 23.129V22.2224Z'
|
||||
fill='#0E7BC9'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 22.2224C21.7923 21.8708 22.077 21.5857 22.4282 21.5857H23.3336C23.6848 21.5857 23.9695 21.8708 23.9695 22.2224V23.129C23.9695 23.4807 23.6848 23.7657 23.3336 23.7657H22.4282C22.077 23.7657 21.7923 23.4807 21.7923 23.129V22.2224Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M0 25.3396C0 24.9879 0.284685 24.7029 0.635863 24.7029H1.54133C1.89251 24.7029 2.17719 24.9879 2.17719 25.3396V26.2462C2.17719 26.5978 1.89251 26.8829 1.54133 26.8829H0.635863C0.284686 26.8829 0 26.5978 0 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 25.3396C3.11318 24.9879 3.39787 24.7029 3.74905 24.7029H4.65452C5.00569 24.7029 5.29038 24.9879 5.29038 25.3396V26.2462C5.29038 26.5978 5.00569 26.8829 4.65452 26.8829H3.74905C3.39787 26.8829 3.11318 26.5978 3.11318 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 25.3396C6.22637 24.9879 6.51105 24.7029 6.86223 24.7029H7.7677C8.11888 24.7029 8.40356 24.9879 8.40356 25.3396V26.2462C8.40356 26.5978 8.11888 26.8829 7.7677 26.8829H6.86223C6.51105 26.8829 6.22637 26.5978 6.22637 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 25.3396C9.33955 24.9879 9.62424 24.7029 9.97541 24.7029H10.8809C11.2321 24.7029 11.5167 24.9879 11.5167 25.3396V26.2462C11.5167 26.5978 11.2321 26.8829 10.8809 26.8829H9.97541C9.62424 26.8829 9.33955 26.5978 9.33955 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 25.3396C12.4527 24.9879 12.7374 24.7029 13.0886 24.7029H13.9941C14.3452 24.7029 14.6299 24.9879 14.6299 25.3396V26.2462C14.6299 26.5978 14.3452 26.8829 13.9941 26.8829H13.0886C12.7374 26.8829 12.4527 26.5978 12.4527 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 25.3396C15.5659 24.9879 15.8506 24.7029 16.2018 24.7029H17.1073C17.4584 24.7029 17.7431 24.9879 17.7431 25.3396V26.2462C17.7431 26.5978 17.4584 26.8829 17.1073 26.8829H16.2018C15.8506 26.8829 15.5659 26.5978 15.5659 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 25.3396C18.6791 24.9879 18.9638 24.7029 19.315 24.7029H20.2204C20.5716 24.7029 20.8563 24.9879 20.8563 25.3396V26.2462C20.8563 26.5978 20.5716 26.8829 20.2204 26.8829H19.315C18.9638 26.8829 18.6791 26.5978 18.6791 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 25.3396C21.7923 24.9879 22.077 24.7029 22.4282 24.7029H23.3336C23.6848 24.7029 23.9695 24.9879 23.9695 25.3396V26.2462C23.9695 26.5978 23.6848 26.8829 23.3336 26.8829H22.4282C22.077 26.8829 21.7923 26.5978 21.7923 26.2462V25.3396Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M0 28.4567C0 28.1051 0.284685 27.82 0.635863 27.82H1.54133C1.89251 27.82 2.17719 28.1051 2.17719 28.4567V29.3633C2.17719 29.715 1.89251 30 1.54133 30H0.635863C0.284686 30 0 29.715 0 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M3.11318 28.4567C3.11318 28.1051 3.39787 27.82 3.74905 27.82H4.65452C5.00569 27.82 5.29038 28.1051 5.29038 28.4567V29.3633C5.29038 29.715 5.00569 30 4.65452 30H3.74905C3.39787 30 3.11318 29.715 3.11318 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M6.22637 28.4567C6.22637 28.1051 6.51105 27.82 6.86223 27.82H7.7677C8.11888 27.82 8.40356 28.1051 8.40356 28.4567V29.3633C8.40356 29.715 8.11888 30 7.7677 30H6.86223C6.51105 30 6.22637 29.715 6.22637 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M9.33955 28.4567C9.33955 28.1051 9.62424 27.82 9.97541 27.82H10.8809C11.2321 27.82 11.5167 28.1051 11.5167 28.4567V29.3633C11.5167 29.715 11.2321 30 10.8809 30H9.97541C9.62424 30 9.33955 29.715 9.33955 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M12.4527 28.4567C12.4527 28.1051 12.7374 27.82 13.0886 27.82H13.9941C14.3452 27.82 14.6299 28.1051 14.6299 28.4567V29.3633C14.6299 29.715 14.3452 30 13.9941 30H13.0886C12.7374 30 12.4527 29.715 12.4527 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M15.5659 28.4567C15.5659 28.1051 15.8506 27.82 16.2018 27.82H17.1073C17.4584 27.82 17.7431 28.1051 17.7431 28.4567V29.3633C17.7431 29.715 17.4584 30 17.1073 30H16.2018C15.8506 30 15.5659 29.715 15.5659 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M18.6791 28.4567C18.6791 28.1051 18.9638 27.82 19.315 27.82H20.2204C20.5716 27.82 20.8563 28.1051 20.8563 28.4567V29.3633C20.8563 29.715 20.5716 30 20.2204 30H19.315C18.9638 30 18.6791 29.715 18.6791 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
<path
|
||||
d='M21.7923 28.4567C21.7923 28.1051 22.077 27.82 22.4282 27.82H23.3336C23.6848 27.82 23.9695 28.1051 23.9695 28.4567V29.3633C23.9695 29.715 23.6848 30 23.3336 30H22.4282C22.077 30 21.7923 29.715 21.7923 29.3633V28.4567Z'
|
||||
fill='#030712'
|
||||
fillOpacity='0.1'
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
2
apps/sim/tools/pulse/index.ts
Normal file
2
apps/sim/tools/pulse/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export { pulseParserTool } from '@/tools/pulse/parser'
|
||||
export * from './types'
|
||||
283
apps/sim/tools/pulse/parser.ts
Normal file
283
apps/sim/tools/pulse/parser.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import type { PulseParserInput, PulseParserOutput } from '@/tools/pulse/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
const logger = createLogger('PulseParserTool')
|
||||
|
||||
export const pulseParserTool: ToolConfig<PulseParserInput, PulseParserOutput> = {
|
||||
id: 'pulse_parser',
|
||||
name: 'Pulse Document Parser',
|
||||
description: 'Parse documents (PDF, images, Office docs) using Pulse OCR API',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
filePath: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'URL to a document to be processed',
|
||||
},
|
||||
fileUpload: {
|
||||
type: 'object',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'File upload data from file-upload component',
|
||||
},
|
||||
pages: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Page range to process (1-indexed, e.g., "1-2,5")',
|
||||
},
|
||||
extractFigure: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'Enable figure extraction from the document',
|
||||
},
|
||||
figureDescription: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'Generate descriptions/captions for extracted figures',
|
||||
},
|
||||
returnHtml: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'Include HTML in the response',
|
||||
},
|
||||
chunking: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Chunking strategies (comma-separated: semantic, header, page, recursive)',
|
||||
},
|
||||
chunkSize: {
|
||||
type: 'number',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Maximum characters per chunk when chunking is enabled',
|
||||
},
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Pulse API key',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
url: '/api/tools/pulse/parse',
|
||||
method: 'POST',
|
||||
headers: () => {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'application/json',
|
||||
}
|
||||
},
|
||||
body: (params) => {
|
||||
if (!params || typeof params !== 'object') {
|
||||
throw new Error('Invalid parameters: Parameters must be provided as an object')
|
||||
}
|
||||
|
||||
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
|
||||
throw new Error('Missing or invalid API key: A valid Pulse API key is required')
|
||||
}
|
||||
|
||||
// Check if we have a file upload instead of direct URL
|
||||
if (
|
||||
params.fileUpload &&
|
||||
(!params.filePath || params.filePath === 'null' || params.filePath === '')
|
||||
) {
|
||||
if (
|
||||
typeof params.fileUpload === 'object' &&
|
||||
params.fileUpload !== null &&
|
||||
(params.fileUpload.url || params.fileUpload.path)
|
||||
) {
|
||||
let uploadedFilePath: string = params.fileUpload.url ?? params.fileUpload.path ?? ''
|
||||
|
||||
if (!uploadedFilePath) {
|
||||
throw new Error('Invalid file upload: Upload data is missing or invalid')
|
||||
}
|
||||
|
||||
if (uploadedFilePath.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
uploadedFilePath = `${baseUrl}${uploadedFilePath}`
|
||||
}
|
||||
|
||||
params.filePath = uploadedFilePath
|
||||
logger.info('Using uploaded file:', uploadedFilePath)
|
||||
} else {
|
||||
throw new Error('Invalid file upload: Upload data is missing or invalid')
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!params.filePath ||
|
||||
typeof params.filePath !== 'string' ||
|
||||
params.filePath.trim() === ''
|
||||
) {
|
||||
throw new Error('Missing or invalid file path: Please provide a URL to a document')
|
||||
}
|
||||
|
||||
let filePathToValidate = params.filePath.trim()
|
||||
if (filePathToValidate.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
filePathToValidate = `${baseUrl}${filePathToValidate}`
|
||||
}
|
||||
|
||||
let url
|
||||
try {
|
||||
url = new URL(filePathToValidate)
|
||||
|
||||
if (!['http:', 'https:'].includes(url.protocol)) {
|
||||
throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`)
|
||||
}
|
||||
|
||||
if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
|
||||
throw new Error(
|
||||
'Google Drive links are not supported. ' +
|
||||
'Please upload your document or provide a direct download link.'
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(
|
||||
`Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a document`
|
||||
)
|
||||
}
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
apiKey: params.apiKey.trim(),
|
||||
filePath: url.toString(),
|
||||
}
|
||||
|
||||
// Check if this is an internal workspace file path
|
||||
if (params.fileUpload?.path?.startsWith('/api/files/serve/')) {
|
||||
requestBody.filePath = params.fileUpload.path
|
||||
}
|
||||
|
||||
// Add optional parameters
|
||||
if (params.pages && typeof params.pages === 'string' && params.pages.trim() !== '') {
|
||||
requestBody.pages = params.pages.trim()
|
||||
}
|
||||
|
||||
if (params.extractFigure !== undefined) {
|
||||
requestBody.extractFigure = params.extractFigure
|
||||
}
|
||||
|
||||
if (params.figureDescription !== undefined) {
|
||||
requestBody.figureDescription = params.figureDescription
|
||||
}
|
||||
|
||||
if (params.returnHtml !== undefined) {
|
||||
requestBody.returnHtml = params.returnHtml
|
||||
}
|
||||
|
||||
if (params.chunking && typeof params.chunking === 'string' && params.chunking.trim() !== '') {
|
||||
requestBody.chunking = params.chunking.trim()
|
||||
}
|
||||
|
||||
if (params.chunkSize !== undefined && params.chunkSize > 0) {
|
||||
requestBody.chunkSize = params.chunkSize
|
||||
}
|
||||
|
||||
return requestBody
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response) => {
|
||||
let parseResult
|
||||
try {
|
||||
parseResult = await response.json()
|
||||
} catch (jsonError) {
|
||||
throw new Error(
|
||||
`Failed to parse Pulse response: ${jsonError instanceof Error ? jsonError.message : String(jsonError)}`
|
||||
)
|
||||
}
|
||||
|
||||
if (!parseResult || typeof parseResult !== 'object') {
|
||||
throw new Error('Invalid response format from Pulse API')
|
||||
}
|
||||
|
||||
// Pass through the native Pulse API response
|
||||
const pulseData =
|
||||
parseResult.output && typeof parseResult.output === 'object'
|
||||
? parseResult.output
|
||||
: parseResult
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
markdown: pulseData.markdown ?? '',
|
||||
page_count: pulseData.page_count ?? 0,
|
||||
job_id: pulseData.job_id ?? '',
|
||||
'plan-info': pulseData['plan-info'] ?? { pages_used: 0, tier: 'unknown' },
|
||||
bounding_boxes: pulseData.bounding_boxes ?? null,
|
||||
extraction_url: pulseData.extraction_url ?? null,
|
||||
html: pulseData.html ?? null,
|
||||
structured_output: pulseData.structured_output ?? null,
|
||||
chunks: pulseData.chunks ?? null,
|
||||
figures: pulseData.figures ?? null,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
markdown: {
|
||||
type: 'string',
|
||||
description: 'Extracted content in markdown format',
|
||||
},
|
||||
page_count: {
|
||||
type: 'number',
|
||||
description: 'Number of pages in the document',
|
||||
},
|
||||
job_id: {
|
||||
type: 'string',
|
||||
description: 'Unique job identifier',
|
||||
},
|
||||
'plan-info': {
|
||||
type: 'object',
|
||||
description: 'Plan usage information',
|
||||
properties: {
|
||||
pages_used: { type: 'number', description: 'Number of pages used' },
|
||||
tier: { type: 'string', description: 'Plan tier' },
|
||||
note: { type: 'string', description: 'Optional note', optional: true },
|
||||
},
|
||||
},
|
||||
bounding_boxes: {
|
||||
type: 'json',
|
||||
description: 'Bounding box layout information',
|
||||
optional: true,
|
||||
},
|
||||
extraction_url: {
|
||||
type: 'string',
|
||||
description: 'URL for extraction results (for large documents)',
|
||||
optional: true,
|
||||
},
|
||||
html: {
|
||||
type: 'string',
|
||||
description: 'HTML content if requested',
|
||||
optional: true,
|
||||
},
|
||||
structured_output: {
|
||||
type: 'json',
|
||||
description: 'Structured output if schema was provided',
|
||||
optional: true,
|
||||
},
|
||||
chunks: {
|
||||
type: 'json',
|
||||
description: 'Chunked content if chunking was enabled',
|
||||
optional: true,
|
||||
},
|
||||
figures: {
|
||||
type: 'json',
|
||||
description: 'Extracted figures if figure extraction was enabled',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
93
apps/sim/tools/pulse/types.ts
Normal file
93
apps/sim/tools/pulse/types.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import type { ToolResponse } from '@/tools/types'
|
||||
|
||||
/**
|
||||
* Input parameters for the Pulse parser tool
|
||||
*/
|
||||
export interface PulseParserInput {
|
||||
/** URL to a document to be processed */
|
||||
filePath: string
|
||||
|
||||
/** File upload data (from file-upload component) */
|
||||
fileUpload?: {
|
||||
url?: string
|
||||
path?: string
|
||||
}
|
||||
|
||||
/** Pulse API key for authentication */
|
||||
apiKey: string
|
||||
|
||||
/** Page range to process (1-indexed, e.g., "1-2,5") */
|
||||
pages?: string
|
||||
|
||||
/** Whether to extract figures from the document */
|
||||
extractFigure?: boolean
|
||||
|
||||
/** Whether to generate figure descriptions/captions */
|
||||
figureDescription?: boolean
|
||||
|
||||
/** Whether to include HTML in the response */
|
||||
returnHtml?: boolean
|
||||
|
||||
/** Chunking strategies (comma-separated: semantic, header, page, recursive) */
|
||||
chunking?: string
|
||||
|
||||
/** Maximum characters per chunk when chunking is enabled */
|
||||
chunkSize?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Plan info returned by the Pulse API
|
||||
*/
|
||||
export interface PulsePlanInfo {
|
||||
/** Number of pages used */
|
||||
pages_used: number
|
||||
|
||||
/** Plan tier */
|
||||
tier: string
|
||||
|
||||
/** Optional note */
|
||||
note?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Native output structure from the Pulse API
|
||||
*/
|
||||
export interface PulseParserOutputData {
|
||||
/** Extracted content in markdown format */
|
||||
markdown: string
|
||||
|
||||
/** Number of pages in the document */
|
||||
page_count: number
|
||||
|
||||
/** Unique job identifier */
|
||||
job_id: string
|
||||
|
||||
/** Plan usage information */
|
||||
'plan-info': PulsePlanInfo
|
||||
|
||||
/** Bounding box layout information */
|
||||
bounding_boxes?: Record<string, unknown>
|
||||
|
||||
/** URL for extraction results (for large documents) */
|
||||
extraction_url?: string
|
||||
|
||||
/** HTML content if requested */
|
||||
html?: string
|
||||
|
||||
/** Structured output if schema was provided */
|
||||
structured_output?: Record<string, unknown>
|
||||
|
||||
/** Chunked content if chunking was enabled */
|
||||
chunks?: unknown[]
|
||||
|
||||
/** Extracted figures if figure extraction was enabled */
|
||||
figures?: unknown[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete response from the Pulse parser tool
|
||||
*/
|
||||
export interface PulseParserOutput extends ToolResponse {
|
||||
/** The native Pulse API output */
|
||||
output: PulseParserOutputData
|
||||
}
|
||||
3
apps/sim/tools/reducto/index.ts
Normal file
3
apps/sim/tools/reducto/index.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import { reductoParserTool } from '@/tools/reducto/parser'
|
||||
|
||||
export { reductoParserTool }
|
||||
203
apps/sim/tools/reducto/parser.ts
Normal file
203
apps/sim/tools/reducto/parser.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import type { ReductoParserInput, ReductoParserOutput } from '@/tools/reducto/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
const logger = createLogger('ReductoParserTool')
|
||||
|
||||
export const reductoParserTool: ToolConfig<ReductoParserInput, ReductoParserOutput> = {
|
||||
id: 'reducto_parser',
|
||||
name: 'Reducto PDF Parser',
|
||||
description: 'Parse PDF documents using Reducto OCR API',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
filePath: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'URL to a PDF document to be processed',
|
||||
},
|
||||
fileUpload: {
|
||||
type: 'object',
|
||||
required: false,
|
||||
visibility: 'hidden',
|
||||
description: 'File upload data from file-upload component',
|
||||
},
|
||||
pages: {
|
||||
type: 'array',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Specific pages to process (1-indexed page numbers)',
|
||||
},
|
||||
tableOutputFormat: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Table output format (html or markdown). Defaults to markdown.',
|
||||
},
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Reducto API key (REDUCTO_API_KEY)',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
url: '/api/tools/reducto/parse',
|
||||
method: 'POST',
|
||||
headers: (params) => {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
Accept: 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}
|
||||
},
|
||||
body: (params) => {
|
||||
if (!params || typeof params !== 'object') {
|
||||
throw new Error('Invalid parameters: Parameters must be provided as an object')
|
||||
}
|
||||
|
||||
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
|
||||
throw new Error('Missing or invalid API key: A valid Reducto API key is required')
|
||||
}
|
||||
|
||||
// Check if we have a file upload instead of direct URL
|
||||
if (
|
||||
params.fileUpload &&
|
||||
(!params.filePath || params.filePath === 'null' || params.filePath === '')
|
||||
) {
|
||||
if (
|
||||
typeof params.fileUpload === 'object' &&
|
||||
params.fileUpload !== null &&
|
||||
(params.fileUpload.url || params.fileUpload.path)
|
||||
) {
|
||||
let uploadedFilePath = (params.fileUpload.url || params.fileUpload.path) as string
|
||||
|
||||
if (uploadedFilePath.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
uploadedFilePath = `${baseUrl}${uploadedFilePath}`
|
||||
}
|
||||
|
||||
params.filePath = uploadedFilePath as string
|
||||
logger.info('Using uploaded file:', uploadedFilePath)
|
||||
} else {
|
||||
throw new Error('Invalid file upload: Upload data is missing or invalid')
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!params.filePath ||
|
||||
typeof params.filePath !== 'string' ||
|
||||
params.filePath.trim() === ''
|
||||
) {
|
||||
throw new Error('Missing or invalid file path: Please provide a URL to a PDF document')
|
||||
}
|
||||
|
||||
let filePathToValidate = params.filePath.trim()
|
||||
if (filePathToValidate.startsWith('/')) {
|
||||
const baseUrl = getBaseUrl()
|
||||
if (!baseUrl) throw new Error('Failed to get base URL for file path conversion')
|
||||
filePathToValidate = `${baseUrl}${filePathToValidate}`
|
||||
}
|
||||
|
||||
let url
|
||||
try {
|
||||
url = new URL(filePathToValidate)
|
||||
|
||||
if (!['http:', 'https:'].includes(url.protocol)) {
|
||||
throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`)
|
||||
}
|
||||
|
||||
if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
|
||||
throw new Error(
|
||||
'Google Drive links are not supported by the Reducto API. ' +
|
||||
'Please upload your PDF to a public web server or provide a direct download link.'
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
throw new Error(
|
||||
`Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a PDF document.`
|
||||
)
|
||||
}
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
apiKey: params.apiKey,
|
||||
filePath: url.toString(),
|
||||
}
|
||||
|
||||
// Check if this is an internal workspace file path
|
||||
if (params.fileUpload?.path?.startsWith('/api/files/serve/')) {
|
||||
requestBody.filePath = params.fileUpload.path
|
||||
}
|
||||
|
||||
if (params.tableOutputFormat && ['html', 'md'].includes(params.tableOutputFormat)) {
|
||||
requestBody.tableOutputFormat = params.tableOutputFormat
|
||||
}
|
||||
|
||||
// Page selection
|
||||
if (params.pages !== undefined && params.pages !== null) {
|
||||
if (Array.isArray(params.pages) && params.pages.length > 0) {
|
||||
const validPages = params.pages.filter(
|
||||
(page) => typeof page === 'number' && Number.isInteger(page) && page >= 0
|
||||
)
|
||||
|
||||
if (validPages.length > 0) {
|
||||
requestBody.pages = validPages
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return requestBody
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response) => {
|
||||
const data = await response.json()
|
||||
|
||||
if (!data || typeof data !== 'object') {
|
||||
throw new Error('Invalid response format from Reducto API')
|
||||
}
|
||||
|
||||
// Pass through the native Reducto response
|
||||
const reductoData = data.output ?? data
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
job_id: reductoData.job_id,
|
||||
duration: reductoData.duration,
|
||||
usage: reductoData.usage,
|
||||
result: reductoData.result,
|
||||
pdf_url: reductoData.pdf_url ?? null,
|
||||
studio_link: reductoData.studio_link ?? null,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
job_id: { type: 'string', description: 'Unique identifier for the processing job' },
|
||||
duration: { type: 'number', description: 'Processing time in seconds' },
|
||||
usage: {
|
||||
type: 'json',
|
||||
description: 'Resource consumption data',
|
||||
},
|
||||
result: {
|
||||
type: 'json',
|
||||
description: 'Parsed document content with chunks and blocks',
|
||||
},
|
||||
pdf_url: {
|
||||
type: 'string',
|
||||
description: 'Storage URL of converted PDF',
|
||||
optional: true,
|
||||
},
|
||||
studio_link: {
|
||||
type: 'string',
|
||||
description: 'Link to Reducto studio interface',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
160
apps/sim/tools/reducto/types.ts
Normal file
160
apps/sim/tools/reducto/types.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import type { ToolResponse } from '@/tools/types'
|
||||
|
||||
/**
|
||||
* Input parameters for the Reducto parser tool
|
||||
*/
|
||||
export interface ReductoParserInput {
|
||||
/** URL to a document to be processed */
|
||||
filePath: string
|
||||
|
||||
/** File upload data (from file-upload component) */
|
||||
fileUpload?: {
|
||||
url?: string
|
||||
path?: string
|
||||
}
|
||||
|
||||
/** Reducto API key for authentication */
|
||||
apiKey: string
|
||||
|
||||
/** Specific pages to process (1-indexed) */
|
||||
pages?: number[]
|
||||
|
||||
/** Table output format (html or md) */
|
||||
tableOutputFormat?: 'html' | 'md'
|
||||
}
|
||||
|
||||
/**
|
||||
* Bounding box for spatial location data
|
||||
*/
|
||||
export interface ReductoBoundingBox {
|
||||
left: number
|
||||
top: number
|
||||
width: number
|
||||
height: number
|
||||
page: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Granular confidence scores
|
||||
*/
|
||||
export interface ReductoGranularConfidence {
|
||||
ocr: string | null
|
||||
layout: string | null
|
||||
order: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Block type classification
|
||||
*/
|
||||
export type ReductoBlockType =
|
||||
| 'Header'
|
||||
| 'Footer'
|
||||
| 'Title'
|
||||
| 'SectionHeader'
|
||||
| 'Text'
|
||||
| 'ListItem'
|
||||
| 'Table'
|
||||
| 'Figure'
|
||||
| 'Caption'
|
||||
| 'Equation'
|
||||
| 'Code'
|
||||
| 'PageNumber'
|
||||
| 'Watermark'
|
||||
| 'Handwriting'
|
||||
| 'Other'
|
||||
|
||||
/**
|
||||
* Parse block - structured content element
|
||||
*/
|
||||
export interface ReductoParseBlock {
|
||||
type: ReductoBlockType
|
||||
bbox: ReductoBoundingBox
|
||||
content: string
|
||||
image_url: string | null
|
||||
chart_data: string[] | null
|
||||
confidence: string | null
|
||||
granular_confidence: ReductoGranularConfidence | null
|
||||
extra: Record<string, unknown> | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse chunk - document segment
|
||||
*/
|
||||
export interface ReductoParseChunk {
|
||||
content: string
|
||||
embed: string
|
||||
enriched: string | null
|
||||
blocks: ReductoParseBlock[]
|
||||
enrichment_success: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR word data
|
||||
*/
|
||||
export interface ReductoOcrWord {
|
||||
text: string
|
||||
bbox: ReductoBoundingBox
|
||||
confidence: number
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR line data
|
||||
*/
|
||||
export interface ReductoOcrLine {
|
||||
text: string
|
||||
bbox: ReductoBoundingBox
|
||||
words: ReductoOcrWord[]
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR result data
|
||||
*/
|
||||
export interface ReductoOcrResult {
|
||||
lines: ReductoOcrLine[]
|
||||
words: ReductoOcrWord[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Full result - when response fits in payload
|
||||
*/
|
||||
export interface ReductoFullResult {
|
||||
type: 'full'
|
||||
chunks: ReductoParseChunk[]
|
||||
ocr: ReductoOcrResult | null
|
||||
custom: unknown
|
||||
}
|
||||
|
||||
/**
|
||||
* URL result - when response exceeds size limits
|
||||
*/
|
||||
export interface ReductoUrlResult {
|
||||
type: 'url'
|
||||
url: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Usage information returned by Reducto API
|
||||
*/
|
||||
export interface ReductoUsage {
|
||||
num_pages: number
|
||||
credits: number | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Native Reducto API response structure
|
||||
*/
|
||||
export interface ReductoParserOutputData {
|
||||
job_id: string
|
||||
duration: number
|
||||
usage: ReductoUsage
|
||||
result: ReductoFullResult | ReductoUrlResult
|
||||
pdf_url: string | null
|
||||
studio_link: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete response from the Reducto parser tool
|
||||
*/
|
||||
export interface ReductoParserOutput extends ToolResponse {
|
||||
output: ReductoParserOutputData
|
||||
}
|
||||
@@ -1032,6 +1032,7 @@ import {
|
||||
posthogUpdatePropertyDefinitionTool,
|
||||
posthogUpdateSurveyTool,
|
||||
} from '@/tools/posthog'
|
||||
import { pulseParserTool } from '@/tools/pulse'
|
||||
import { qdrantFetchTool, qdrantSearchTool, qdrantUpsertTool } from '@/tools/qdrant'
|
||||
import {
|
||||
rdsDeleteTool,
|
||||
@@ -1056,6 +1057,7 @@ import {
|
||||
redditUnsaveTool,
|
||||
redditVoteTool,
|
||||
} from '@/tools/reddit'
|
||||
import { reductoParserTool } from '@/tools/reducto'
|
||||
import { mailSendTool } from '@/tools/resend'
|
||||
import {
|
||||
s3CopyObjectTool,
|
||||
@@ -2126,6 +2128,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
google_slides_add_image: googleSlidesAddImageTool,
|
||||
perplexity_chat: perplexityChatTool,
|
||||
perplexity_search: perplexitySearchTool,
|
||||
pulse_parser: pulseParserTool,
|
||||
posthog_capture_event: posthogCaptureEventTool,
|
||||
posthog_batch_events: posthogBatchEventsTool,
|
||||
posthog_list_persons: posthogListPersonsTool,
|
||||
@@ -2248,6 +2251,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
apollo_task_search: apolloTaskSearchTool,
|
||||
apollo_email_accounts: apolloEmailAccountsTool,
|
||||
mistral_parser: mistralParserTool,
|
||||
reducto_parser: reductoParserTool,
|
||||
thinking_tool: thinkingTool,
|
||||
tinybird_events: tinybirdEventsTool,
|
||||
tinybird_query: tinybirdQueryTool,
|
||||
|
||||
Reference in New Issue
Block a user