feat(extend): add Extend AI document processing integration (#3869)

* feat(extend): add Extend AI document processing integration

* fix(extend): cast json response to fix type error

* fix(extend): correct API request body structure per Extend docs

* fix(extend): address PR review comments

* fix(extend): sync integrations.json bgColor to #000000

* lint
This commit is contained in:
Waleed
2026-03-31 16:26:34 -07:00
committed by GitHub
parent d99dd86bf2
commit 72e28baa07
14 changed files with 822 additions and 1 deletions

View File

@@ -2080,6 +2080,19 @@ export function Mem0Icon(props: SVGProps<SVGSVGElement>) {
)
}
export function ExtendIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 33 18' fill='none'>
<path
clipRule='evenodd'
d='M16.2893 0C16.6984 1.91708e-05 17.1074 0.0970011 17.5103 0.293745C22.3018 2.63326 27.0841 4.98521 31.8693 7.33722C32.3003 7.54649 32.5721 7.9868 32.5721 8.46461V9.51422C32.5721 9.99522 32.3004 10.4357 31.8693 10.645C31.8693 10.645 19.5816 16.6732 17.5542 17.6634C17.1357 17.8696 16.692 17.9727 16.2859 17.9727C15.8799 17.9727 15.4707 17.8758 15.0615 17.6759C12.8124 16.5795 1.9646 11.2604 0.705842 10.6419C0.274826 10.4295 2.31482e-05 9.99216 0 9.51117V8.46461C4.59913e-05 7.98366 0.271816 7.54656 0.702792 7.33417C5.8977 4.7819 15.0599 0.301869 15.1021 0.281239C15.4957 0.0938275 15.8801 0 16.2893 0ZM16.2859 2.96124C16.1516 2.96126 16.0173 2.98909 15.8924 3.05153L4.28874 8.77696C4.11382 8.86442 4.11382 9.10831 4.28874 9.19577L15.8924 14.9209C16.0173 14.9802 16.1516 15.0115 16.2859 15.0115C16.4202 15.0115 16.5548 14.9802 16.6797 14.9209L28.2864 9.19577C28.4582 9.10831 28.4582 8.86442 28.2864 8.77696L16.6797 3.05153C16.5548 2.98906 16.4202 2.96124 16.2859 2.96124Z'
fill='currentColor'
fillRule='evenodd'
/>
</svg>
)
}
export function EvernoteIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32' fill='#7fce2c'>

View File

@@ -45,6 +45,7 @@ import {
EnrichSoIcon,
EvernoteIcon,
ExaAIIcon,
ExtendIcon,
EyeIcon,
FathomIcon,
FirecrawlIcon,
@@ -223,6 +224,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
enrich: EnrichSoIcon,
evernote: EvernoteIcon,
exa: ExaAIIcon,
extend_v2: ExtendIcon,
fathom: FathomIcon,
file_v3: DocumentIcon,
firecrawl: FirecrawlIcon,

View File

@@ -0,0 +1,39 @@
---
title: Extend
description: Parse and extract content from documents
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="extend_v2"
color="#000000"
/>
## Usage Instructions
Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.
## Tools
### `extend_parser`
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `filePath` | string | No | URL to a document to be processed |
| `file` | file | No | Document file to be processed |
| `fileUpload` | object | No | File upload data from file-upload component |
| `outputFormat` | string | No | Target output format \(markdown or spatial\). Defaults to markdown. |
| `chunking` | string | No | Chunking strategy \(page, document, or section\). Defaults to page. |
| `engine` | string | No | Parsing engine \(parse_performance or parse_light\). Defaults to parse_performance. |
| `apiKey` | string | Yes | Extend API key |
#### Output
This tool does not produce any outputs.

View File

@@ -39,6 +39,7 @@
"enrich",
"evernote",
"exa",
"extend",
"fathom",
"file",
"firecrawl",

View File

@@ -45,6 +45,7 @@ import {
EnrichSoIcon,
EvernoteIcon,
ExaAIIcon,
ExtendIcon,
EyeIcon,
FathomIcon,
FirecrawlIcon,
@@ -223,6 +224,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
enrich: EnrichSoIcon,
evernote: EvernoteIcon,
exa: ExaAIIcon,
extend_v2: ExtendIcon,
fathom: FathomIcon,
file_v3: DocumentIcon,
firecrawl: FirecrawlIcon,

View File

@@ -2978,6 +2978,24 @@
"integrationType": "search",
"tags": ["web-scraping", "enrichment"]
},
{
"type": "extend_v2",
"slug": "extend",
"name": "Extend",
"description": "Parse and extract content from documents",
"longDescription": "Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.",
"bgColor": "#000000",
"iconName": "ExtendIcon",
"docsUrl": "https://docs.sim.ai/tools/extend",
"operations": [],
"operationCount": 0,
"triggers": [],
"triggerCount": 0,
"authType": "api-key",
"category": "tools",
"integrationType": "ai",
"tags": ["document-processing", "ocr"]
},
{
"type": "fathom",
"slug": "fathom",

View File

@@ -0,0 +1,188 @@
import { createLogger } from '@sim/logger'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { checkInternalAuth } from '@/lib/auth/hybrid'
import {
secureFetchWithPinnedIP,
validateUrlWithDNS,
} from '@/lib/core/security/input-validation.server'
import { generateRequestId } from '@/lib/core/utils/request'
import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas'
import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils'
import { resolveFileInputToUrl } from '@/lib/uploads/utils/file-utils.server'
export const dynamic = 'force-dynamic'
const logger = createLogger('ExtendParseAPI')
const ExtendParseSchema = z.object({
apiKey: z.string().min(1, 'API key is required'),
filePath: z.string().optional(),
file: RawFileInputSchema.optional(),
outputFormat: z.enum(['markdown', 'spatial']).optional(),
chunking: z.enum(['page', 'document', 'section']).optional(),
engine: z.enum(['parse_performance', 'parse_light']).optional(),
})
export async function POST(request: NextRequest) {
const requestId = generateRequestId()
try {
const authResult = await checkInternalAuth(request, { requireWorkflowId: false })
if (!authResult.success || !authResult.userId) {
logger.warn(`[${requestId}] Unauthorized Extend parse attempt`, {
error: authResult.error || 'Missing userId',
})
return NextResponse.json(
{
success: false,
error: authResult.error || 'Unauthorized',
},
{ status: 401 }
)
}
const userId = authResult.userId
const body = await request.json()
const validatedData = ExtendParseSchema.parse(body)
logger.info(`[${requestId}] Extend parse request`, {
fileName: validatedData.file?.name,
filePath: validatedData.filePath,
isWorkspaceFile: validatedData.filePath ? isInternalFileUrl(validatedData.filePath) : false,
userId,
})
const resolution = await resolveFileInputToUrl({
file: validatedData.file,
filePath: validatedData.filePath,
userId,
requestId,
logger,
})
if (resolution.error) {
return NextResponse.json(
{ success: false, error: resolution.error.message },
{ status: resolution.error.status }
)
}
const fileUrl = resolution.fileUrl
if (!fileUrl) {
return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 })
}
const extendBody: Record<string, unknown> = {
file: { fileUrl },
}
const config: Record<string, unknown> = {}
if (validatedData.outputFormat) {
config.target = validatedData.outputFormat
}
if (validatedData.chunking) {
config.chunkingStrategy = { type: validatedData.chunking }
}
if (validatedData.engine) {
config.engine = validatedData.engine
}
if (Object.keys(config).length > 0) {
extendBody.config = config
}
const extendEndpoint = 'https://api.extend.ai/parse'
const extendValidation = await validateUrlWithDNS(extendEndpoint, 'Extend API URL')
if (!extendValidation.isValid) {
logger.error(`[${requestId}] Extend API URL validation failed`, {
error: extendValidation.error,
})
return NextResponse.json(
{
success: false,
error: 'Failed to reach Extend API',
},
{ status: 502 }
)
}
const extendResponse = await secureFetchWithPinnedIP(
extendEndpoint,
extendValidation.resolvedIP!,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
Accept: 'application/json',
Authorization: `Bearer ${validatedData.apiKey}`,
'x-extend-api-version': '2025-04-21',
},
body: JSON.stringify(extendBody),
}
)
if (!extendResponse.ok) {
const errorText = await extendResponse.text()
logger.error(`[${requestId}] Extend API error:`, errorText)
let clientError = `Extend API error: ${extendResponse.statusText || extendResponse.status}`
try {
const parsedError = JSON.parse(errorText)
if (parsedError?.message || parsedError?.error) {
clientError = (parsedError.message ?? parsedError.error) as string
}
} catch {
// errorText is not JSON; keep generic message
}
return NextResponse.json(
{
success: false,
error: clientError,
},
{ status: extendResponse.status }
)
}
const extendData = (await extendResponse.json()) as Record<string, unknown>
logger.info(`[${requestId}] Extend parse successful`)
return NextResponse.json({
success: true,
output: {
id: extendData.id ?? null,
status: extendData.status ?? 'PROCESSED',
chunks: extendData.chunks ?? [],
blocks: extendData.blocks ?? [],
pageCount: extendData.pageCount ?? extendData.page_count ?? null,
creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null,
},
})
} catch (error) {
if (error instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
return NextResponse.json(
{
success: false,
error: 'Invalid request data',
details: error.errors,
},
{ status: 400 }
)
}
logger.error(`[${requestId}] Error in Extend parse:`, error)
return NextResponse.json(
{
success: false,
error: error instanceof Error ? error.message : 'Internal server error',
},
{ status: 500 }
)
}
}

View File

@@ -0,0 +1,199 @@
import { ExtendIcon } from '@/components/icons'
import { AuthMode, type BlockConfig, IntegrationType, type SubBlockType } from '@/blocks/types'
import { createVersionedToolSelector, normalizeFileInput } from '@/blocks/utils'
import type { ExtendParserOutput } from '@/tools/extend/types'
export const ExtendBlock: BlockConfig<ExtendParserOutput> = {
type: 'extend',
name: 'Extend',
description: 'Parse and extract content from documents',
hideFromToolbar: true,
authMode: AuthMode.ApiKey,
longDescription:
'Integrate Extend AI into the workflow. Parse and extract structured content from documents including PDFs, images, and Office files.',
docsLink: 'https://docs.sim.ai/tools/extend',
category: 'tools',
integrationType: IntegrationType.AI,
tags: ['document-processing', 'ocr'],
bgColor: '#000000',
icon: ExtendIcon,
subBlocks: [
{
id: 'fileUpload',
title: 'Document',
type: 'file-upload' as SubBlockType,
canonicalParamId: 'document',
acceptedTypes:
'application/pdf,image/jpeg,image/png,image/tiff,image/gif,image/bmp,image/webp,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.presentationml.presentation,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
placeholder: 'Upload a document',
mode: 'basic',
maxSize: 50,
required: true,
},
{
id: 'filePath',
title: 'Document',
type: 'short-input' as SubBlockType,
canonicalParamId: 'document',
placeholder: 'Document URL',
mode: 'advanced',
required: true,
},
{
id: 'outputFormat',
title: 'Output Format',
type: 'dropdown',
options: [
{ id: 'markdown', label: 'Markdown' },
{ id: 'spatial', label: 'Spatial' },
],
},
{
id: 'chunking',
title: 'Chunking Strategy',
type: 'dropdown',
options: [
{ id: 'page', label: 'Page' },
{ id: 'document', label: 'Document' },
{ id: 'section', label: 'Section' },
],
},
{
id: 'engine',
title: 'Engine',
type: 'dropdown',
mode: 'advanced',
options: [
{ id: 'parse_performance', label: 'Performance' },
{ id: 'parse_light', label: 'Light' },
],
},
{
id: 'apiKey',
title: 'API Key',
type: 'short-input' as SubBlockType,
placeholder: 'Enter your Extend API key',
password: true,
required: true,
},
],
tools: {
access: ['extend_parser'],
config: {
tool: () => 'extend_parser',
params: (params) => {
const parameters: Record<string, unknown> = {
apiKey: params.apiKey.trim(),
}
const documentInput = params.document
if (typeof documentInput === 'object') {
parameters.file = documentInput
} else if (typeof documentInput === 'string') {
parameters.filePath = documentInput.trim()
}
if (params.outputFormat) {
parameters.outputFormat = params.outputFormat
}
if (params.chunking) {
parameters.chunking = params.chunking
}
if (params.engine) {
parameters.engine = params.engine
}
return parameters
},
},
},
inputs: {
document: {
type: 'json',
description: 'Document input (canonical param for file upload or URL)',
},
apiKey: { type: 'string', description: 'Extend API key' },
outputFormat: { type: 'string', description: 'Output format (markdown or spatial)' },
chunking: { type: 'string', description: 'Chunking strategy' },
engine: { type: 'string', description: 'Parsing engine' },
},
outputs: {
id: { type: 'string', description: 'Unique identifier for the parser run' },
status: { type: 'string', description: 'Processing status' },
chunks: { type: 'json', description: 'Parsed document content chunks' },
blocks: { type: 'json', description: 'Block-level document elements' },
pageCount: { type: 'number', description: 'Number of pages processed' },
creditsUsed: { type: 'number', description: 'API credits consumed' },
},
}
const extendV2Inputs = ExtendBlock.inputs
const extendV2SubBlocks = (ExtendBlock.subBlocks || []).flatMap((subBlock) => {
if (subBlock.id === 'filePath') {
return []
}
if (subBlock.id === 'fileUpload') {
return [
subBlock,
{
id: 'fileReference',
title: 'Document',
type: 'short-input' as SubBlockType,
canonicalParamId: 'document',
placeholder: 'Connect a file output from another block',
mode: 'advanced' as const,
required: true,
},
]
}
return [subBlock]
})
export const ExtendV2Block: BlockConfig<ExtendParserOutput> = {
...ExtendBlock,
type: 'extend_v2',
name: 'Extend',
hideFromToolbar: false,
longDescription:
'Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.',
subBlocks: extendV2SubBlocks,
tools: {
access: ['extend_parser_v2'],
config: {
tool: createVersionedToolSelector({
baseToolSelector: () => 'extend_parser',
suffix: '_v2',
fallbackToolId: 'extend_parser_v2',
}),
params: (params) => {
const parameters: Record<string, unknown> = {
apiKey: params.apiKey.trim(),
}
const documentInput = normalizeFileInput(params.document, { single: true })
if (!documentInput) {
throw new Error('Document file is required')
}
parameters.file = documentInput
if (params.outputFormat) {
parameters.outputFormat = params.outputFormat
}
if (params.chunking) {
parameters.chunking = params.chunking
}
if (params.engine) {
parameters.engine = params.engine
}
return parameters
},
},
},
inputs: extendV2Inputs,
}

View File

@@ -42,6 +42,7 @@ import { EnrichBlock } from '@/blocks/blocks/enrich'
import { EvaluatorBlock } from '@/blocks/blocks/evaluator'
import { EvernoteBlock } from '@/blocks/blocks/evernote'
import { ExaBlock } from '@/blocks/blocks/exa'
import { ExtendBlock, ExtendV2Block } from '@/blocks/blocks/extend'
import { FathomBlock } from '@/blocks/blocks/fathom'
import { FileBlock, FileV2Block, FileV3Block } from '@/blocks/blocks/file'
import { FirecrawlBlock } from '@/blocks/blocks/firecrawl'
@@ -256,9 +257,11 @@ export const registry: Record<string, BlockConfig> = {
elevenlabs: ElevenLabsBlock,
fathom: FathomBlock,
enrich: EnrichBlock,
evernote: EvernoteBlock,
evaluator: EvaluatorBlock,
evernote: EvernoteBlock,
exa: ExaBlock,
extend: ExtendBlock,
extend_v2: ExtendV2Block,
file: FileBlock,
file_v2: FileV2Block,
file_v3: FileV3Block,

View File

@@ -2080,6 +2080,19 @@ export function Mem0Icon(props: SVGProps<SVGSVGElement>) {
)
}
export function ExtendIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 33 18' fill='none'>
<path
clipRule='evenodd'
d='M16.2893 0C16.6984 1.91708e-05 17.1074 0.0970011 17.5103 0.293745C22.3018 2.63326 27.0841 4.98521 31.8693 7.33722C32.3003 7.54649 32.5721 7.9868 32.5721 8.46461V9.51422C32.5721 9.99522 32.3004 10.4357 31.8693 10.645C31.8693 10.645 19.5816 16.6732 17.5542 17.6634C17.1357 17.8696 16.692 17.9727 16.2859 17.9727C15.8799 17.9727 15.4707 17.8758 15.0615 17.6759C12.8124 16.5795 1.9646 11.2604 0.705842 10.6419C0.274826 10.4295 2.31482e-05 9.99216 0 9.51117V8.46461C4.59913e-05 7.98366 0.271816 7.54656 0.702792 7.33417C5.8977 4.7819 15.0599 0.301869 15.1021 0.281239C15.4957 0.0938275 15.8801 0 16.2893 0ZM16.2859 2.96124C16.1516 2.96126 16.0173 2.98909 15.8924 3.05153L4.28874 8.77696C4.11382 8.86442 4.11382 9.10831 4.28874 9.19577L15.8924 14.9209C16.0173 14.9802 16.1516 15.0115 16.2859 15.0115C16.4202 15.0115 16.5548 14.9802 16.6797 14.9209L28.2864 9.19577C28.4582 9.10831 28.4582 8.86442 28.2864 8.77696L16.6797 3.05153C16.5548 2.98906 16.4202 2.96124 16.2859 2.96124Z'
fill='currentColor'
fillRule='evenodd'
/>
</svg>
)
}
export function EvernoteIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32' fill='#7fce2c'>

View File

@@ -0,0 +1 @@
export { extendParserTool, extendParserV2Tool } from '@/tools/extend/parser'

View File

@@ -0,0 +1,250 @@
import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils'
import type {
ExtendParserInput,
ExtendParserOutput,
ExtendParserV2Input,
} from '@/tools/extend/types'
import type { ToolConfig } from '@/tools/types'
export const extendParserTool: ToolConfig<ExtendParserInput, ExtendParserOutput> = {
id: 'extend_parser',
name: 'Extend Document Parser',
description: 'Parse and extract content from documents using Extend AI',
version: '1.0.0',
params: {
filePath: {
type: 'string',
required: false,
visibility: 'user-only',
description: 'URL to a document to be processed',
},
file: {
type: 'file',
required: false,
visibility: 'user-only',
description: 'Document file to be processed',
},
fileUpload: {
type: 'object',
required: false,
visibility: 'hidden',
description: 'File upload data from file-upload component',
},
outputFormat: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Target output format (markdown or spatial). Defaults to markdown.',
},
chunking: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Chunking strategy (page, document, or section). Defaults to page.',
},
engine: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description:
'Parsing engine (parse_performance or parse_light). Defaults to parse_performance.',
},
apiKey: {
type: 'string',
required: true,
visibility: 'user-only',
description: 'Extend API key',
},
},
request: {
url: '/api/tools/extend/parse',
method: 'POST',
headers: (params) => ({
'Content-Type': 'application/json',
Accept: 'application/json',
Authorization: `Bearer ${params.apiKey}`,
}),
body: (params) => {
if (!params || typeof params !== 'object') {
throw new Error('Invalid parameters: Parameters must be provided as an object')
}
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
throw new Error('Missing or invalid API key: A valid Extend API key is required')
}
const requestBody: Record<string, unknown> = {
apiKey: params.apiKey,
}
const fileInput =
params.file && typeof params.file === 'object' ? params.file : params.fileUpload
const hasFileUpload = fileInput && typeof fileInput === 'object'
const hasFilePath =
typeof params.filePath === 'string' &&
params.filePath !== 'null' &&
params.filePath.trim() !== ''
if (hasFilePath) {
const filePathToValidate = params.filePath!.trim()
if (filePathToValidate.startsWith('/')) {
if (!isInternalFileUrl(filePathToValidate)) {
throw new Error(
'Invalid file path. Only uploaded files are supported for internal paths.'
)
}
requestBody.filePath = filePathToValidate
} else {
let url
try {
url = new URL(filePathToValidate)
if (!['http:', 'https:'].includes(url.protocol)) {
throw new Error(
`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`
)
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error)
throw new Error(
`Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a document.`
)
}
requestBody.filePath = url.toString()
}
} else if (hasFileUpload) {
requestBody.file = fileInput
} else {
throw new Error('Missing file input: Please provide a document URL or upload a file')
}
if (params.outputFormat && ['markdown', 'spatial'].includes(params.outputFormat)) {
requestBody.outputFormat = params.outputFormat
}
if (params.chunking && ['page', 'document', 'section'].includes(params.chunking)) {
requestBody.chunking = params.chunking
}
if (params.engine && ['parse_performance', 'parse_light'].includes(params.engine)) {
requestBody.engine = params.engine
}
return requestBody
},
},
transformResponse: async (response) => {
const data = await response.json()
if (!data || typeof data !== 'object') {
throw new Error('Invalid response format from Extend API')
}
const extendData = data.output ?? data
return {
success: true,
output: {
id: extendData.id ?? null,
status: extendData.status ?? null,
chunks: extendData.chunks ?? [],
blocks: extendData.blocks ?? [],
pageCount: extendData.pageCount ?? extendData.page_count ?? null,
creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null,
},
}
},
outputs: {
id: { type: 'string', description: 'Unique identifier for the parser run' },
status: { type: 'string', description: 'Processing status' },
chunks: {
type: 'json',
description: 'Parsed document content chunks',
},
blocks: {
type: 'json',
description: 'Block-level document elements with type and content',
},
pageCount: {
type: 'number',
description: 'Number of pages processed',
optional: true,
},
creditsUsed: {
type: 'number',
description: 'API credits consumed',
optional: true,
},
},
}
export const extendParserV2Tool: ToolConfig<ExtendParserV2Input, ExtendParserOutput> = {
...extendParserTool,
id: 'extend_parser_v2',
name: 'Extend Document Parser',
postProcess: undefined,
directExecution: undefined,
transformResponse: extendParserTool.transformResponse
? (response: Response, params?: ExtendParserV2Input) =>
extendParserTool.transformResponse!(response, params as unknown as ExtendParserInput)
: undefined,
params: {
file: {
type: 'file',
required: true,
visibility: 'user-only',
description: 'Document to be processed',
},
outputFormat: extendParserTool.params.outputFormat,
chunking: extendParserTool.params.chunking,
engine: extendParserTool.params.engine,
apiKey: extendParserTool.params.apiKey,
},
request: {
url: '/api/tools/extend/parse',
method: 'POST',
headers: (params) => ({
'Content-Type': 'application/json',
Accept: 'application/json',
Authorization: `Bearer ${params.apiKey}`,
}),
body: (params: ExtendParserV2Input) => {
if (!params || typeof params !== 'object') {
throw new Error('Invalid parameters: Parameters must be provided as an object')
}
if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
throw new Error('Missing or invalid API key: A valid Extend API key is required')
}
if (!params.file || typeof params.file !== 'object') {
throw new Error('Missing or invalid file: Please provide a file object')
}
const requestBody: Record<string, unknown> = {
apiKey: params.apiKey,
file: params.file,
}
if (params.outputFormat && ['markdown', 'spatial'].includes(params.outputFormat)) {
requestBody.outputFormat = params.outputFormat
}
if (params.chunking && ['page', 'document', 'section'].includes(params.chunking)) {
requestBody.chunking = params.chunking
}
if (params.engine && ['parse_performance', 'parse_light'].includes(params.engine)) {
requestBody.engine = params.engine
}
return requestBody
},
},
}

View File

@@ -0,0 +1,89 @@
import type { RawFileInput } from '@/lib/uploads/utils/file-utils'
import type { UserFile } from '@/executor/types'
import type { ToolResponse } from '@/tools/types'
/**
* Input parameters for the Extend parser tool
*/
export interface ExtendParserInput {
/** URL to a document to be processed */
filePath?: string
file?: RawFileInput
/** File upload data (from file-upload component) */
fileUpload?: RawFileInput
/** Extend API key for authentication */
apiKey: string
/** Target output format */
outputFormat?: 'markdown' | 'spatial'
/** Chunking strategy */
chunking?: 'page' | 'document' | 'section'
/** Parsing engine */
engine?: 'parse_performance' | 'parse_light'
}
export interface ExtendParserV2Input {
/** File to be processed */
file: UserFile
/** Extend API key for authentication */
apiKey: string
/** Target output format */
outputFormat?: 'markdown' | 'spatial'
/** Chunking strategy */
chunking?: 'page' | 'document' | 'section'
/** Parsing engine */
engine?: 'parse_performance' | 'parse_light'
}
/**
* Chunk from parsed document
*/
export interface ExtendParseChunk {
content: string
page?: number
metadata?: Record<string, unknown>
}
/**
* Block-level element from parsed document
*/
export interface ExtendParseBlock {
type: string
content: string
bbox?: {
left: number
top: number
width: number
height: number
page: number
}
metadata?: Record<string, unknown>
}
/**
* Native Extend API response structure for parsing
*/
export interface ExtendParserOutputData {
id: string
status: string
chunks: ExtendParseChunk[]
blocks: ExtendParseBlock[]
pageCount: number | null
creditsUsed: number | null
}
/**
* Complete response from the Extend parser tool
*/
export interface ExtendParserOutput extends ToolResponse {
output: ExtendParserOutputData
}

View File

@@ -491,6 +491,7 @@ import {
exaResearchTool,
exaSearchTool,
} from '@/tools/exa'
import { extendParserTool, extendParserV2Tool } from '@/tools/extend'
import {
fathomGetSummaryTool,
fathomGetTranscriptTool,
@@ -3533,6 +3534,8 @@ export const tools: Record<string, ToolConfig> = {
enrich_search_posts: enrichSearchPostsTool,
enrich_search_similar_companies: enrichSearchSimilarCompaniesTool,
enrich_verify_email: enrichVerifyEmailTool,
extend_parser: extendParserTool,
extend_parser_v2: extendParserV2Tool,
exa_search: exaSearchTool,
exa_get_contents: exaGetContentsTool,
exa_find_similar_links: exaFindSimilarLinksTool,