mirror of
https://github.com/simstudioai/sim.git
synced 2026-02-14 08:25:03 -05:00
176 lines
5.0 KiB
TypeScript
176 lines
5.0 KiB
TypeScript
import { createLogger } from '@sim/logger'
|
|
import type { ExaGetContentsParams, ExaGetContentsResponse } from '@/tools/exa/types'
|
|
import type { ToolConfig } from '@/tools/types'
|
|
|
|
const logger = createLogger('ExaGetContentsTool')
|
|
|
|
export const getContentsTool: ToolConfig<ExaGetContentsParams, ExaGetContentsResponse> = {
|
|
id: 'exa_get_contents',
|
|
name: 'Exa Get Contents',
|
|
description:
|
|
'Retrieve the contents of webpages using Exa AI. Returns the title, text content, and optional summaries for each URL.',
|
|
version: '1.0.0',
|
|
|
|
params: {
|
|
urls: {
|
|
type: 'string',
|
|
required: true,
|
|
visibility: 'user-or-llm',
|
|
description: 'Comma-separated list of URLs to retrieve content from',
|
|
},
|
|
text: {
|
|
type: 'boolean',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description:
|
|
'If true, returns full page text with default settings. If false, disables text return.',
|
|
},
|
|
summaryQuery: {
|
|
type: 'string',
|
|
required: false,
|
|
visibility: 'user-or-llm',
|
|
description: 'Query to guide the summary generation',
|
|
},
|
|
subpages: {
|
|
type: 'number',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description: 'Number of subpages to crawl from the provided URLs',
|
|
},
|
|
subpageTarget: {
|
|
type: 'string',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description:
|
|
'Comma-separated keywords to target specific subpages (e.g., "docs,tutorial,about")',
|
|
},
|
|
highlights: {
|
|
type: 'boolean',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description: 'Include highlighted snippets in results (default: false)',
|
|
},
|
|
livecrawl: {
|
|
type: 'string',
|
|
required: false,
|
|
visibility: 'user-only',
|
|
description:
|
|
'Live crawling mode: never (default), fallback, always, or preferred (always try livecrawl, fall back to cache if fails)',
|
|
},
|
|
apiKey: {
|
|
type: 'string',
|
|
required: true,
|
|
visibility: 'user-only',
|
|
description: 'Exa AI API Key',
|
|
},
|
|
},
|
|
hosting: {
|
|
envKeys: ['EXA_API_KEY_1', 'EXA_API_KEY_2', 'EXA_API_KEY_3'],
|
|
apiKeyParam: 'apiKey',
|
|
byokProviderId: 'exa',
|
|
pricing: {
|
|
type: 'custom',
|
|
getCost: (_params, output) => {
|
|
// Use _costDollars from Exa API response (internal field, stripped from final output)
|
|
if (output._costDollars?.total) {
|
|
return { cost: output._costDollars.total, metadata: { costDollars: output._costDollars } }
|
|
}
|
|
// Fallback: $1/1000 pages
|
|
logger.warn('Exa get_contents response missing costDollars, using fallback pricing')
|
|
return (output.results?.length || 0) * 0.001
|
|
},
|
|
},
|
|
},
|
|
|
|
request: {
|
|
url: 'https://api.exa.ai/contents',
|
|
method: 'POST',
|
|
headers: (params) => ({
|
|
'Content-Type': 'application/json',
|
|
'x-api-key': params.apiKey,
|
|
}),
|
|
body: (params) => {
|
|
// Parse the comma-separated URLs into an array
|
|
const urlsString = params.urls
|
|
const urlArray = urlsString
|
|
.split(',')
|
|
.map((url: string) => url.trim())
|
|
.filter((url: string) => url.length > 0)
|
|
|
|
const body: Record<string, any> = {
|
|
urls: urlArray,
|
|
}
|
|
|
|
// Add optional parameters if provided
|
|
if (params.text !== undefined) {
|
|
body.text = params.text
|
|
}
|
|
|
|
// Add summary with query if provided
|
|
if (params.summaryQuery) {
|
|
body.summary = {
|
|
query: params.summaryQuery,
|
|
}
|
|
}
|
|
|
|
// Subpages crawling
|
|
if (params.subpages !== undefined) {
|
|
body.subpages = Number(params.subpages)
|
|
}
|
|
|
|
if (params.subpageTarget) {
|
|
body.subpageTarget = params.subpageTarget
|
|
.split(',')
|
|
.map((target: string) => target.trim())
|
|
.filter((target: string) => target.length > 0)
|
|
}
|
|
|
|
// Content options
|
|
if (params.highlights !== undefined) {
|
|
body.highlights = params.highlights
|
|
}
|
|
|
|
// Live crawl mode
|
|
if (params.livecrawl) {
|
|
body.livecrawl = params.livecrawl
|
|
}
|
|
|
|
return body
|
|
},
|
|
},
|
|
|
|
transformResponse: async (response: Response) => {
|
|
const data = await response.json()
|
|
|
|
return {
|
|
success: true,
|
|
output: {
|
|
results: data.results.map((result: any) => ({
|
|
url: result.url,
|
|
title: result.title || '',
|
|
text: result.text || '',
|
|
summary: result.summary || '',
|
|
highlights: result.highlights,
|
|
})),
|
|
_costDollars: data.costDollars,
|
|
},
|
|
}
|
|
},
|
|
|
|
outputs: {
|
|
results: {
|
|
type: 'array',
|
|
description: 'Retrieved content from URLs with title, text, and summaries',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
url: { type: 'string', description: 'The URL that content was retrieved from' },
|
|
title: { type: 'string', description: 'The title of the webpage' },
|
|
text: { type: 'string', description: 'The full text content of the webpage' },
|
|
summary: { type: 'string', description: 'AI-generated summary of the webpage content' },
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|