mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-08 22:48:14 -05:00
feat(tools): added new firecrawl agent endpoint (#2603)
This commit is contained in:
@@ -462,6 +462,19 @@ export function SlackIcon(props: SVGProps<SVGSVGElement>) {
|
||||
)
|
||||
}
|
||||
|
||||
export function SlackMonoIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg' fill='currentColor' {...props}>
|
||||
<g>
|
||||
<path d='M53.8412698,161.320635 C53.8412698,176.152381 41.8539683,188.139683 27.0222222,188.139683 C12.1904762,188.139683 0.203174603,176.152381 0.203174603,161.320635 C0.203174603,146.488889 12.1904762,134.501587 27.0222222,134.501587 L53.8412698,134.501587 L53.8412698,161.320635 Z M67.2507937,161.320635 C67.2507937,146.488889 79.2380952,134.501587 94.0698413,134.501587 C108.901587,134.501587 120.888889,146.488889 120.888889,161.320635 L120.888889,228.368254 C120.888889,243.2 108.901587,255.187302 94.0698413,255.187302 C79.2380952,255.187302 67.2507937,243.2 67.2507937,228.368254 L67.2507937,161.320635 Z' />
|
||||
<path d='M94.0698413,53.6380952 C79.2380952,53.6380952 67.2507937,41.6507937 67.2507937,26.8190476 C67.2507937,11.9873016 79.2380952,-7.10542736e-15 94.0698413,-7.10542736e-15 C108.901587,-7.10542736e-15 120.888889,11.9873016 120.888889,26.8190476 L120.888889,53.6380952 L94.0698413,53.6380952 Z M94.0698413,67.2507937 C108.901587,67.2507937 120.888889,79.2380952 120.888889,94.0698413 C120.888889,108.901587 108.901587,120.888889 94.0698413,120.888889 L26.8190476,120.888889 C11.9873016,120.888889 0,108.901587 0,94.0698413 C0,79.2380952 11.9873016,67.2507937 26.8190476,67.2507937 L94.0698413,67.2507937 Z' />
|
||||
<path d='M201.549206,94.0698413 C201.549206,79.2380952 213.536508,67.2507937 228.368254,67.2507937 C243.2,67.2507937 255.187302,79.2380952 255.187302,94.0698413 C255.187302,108.901587 243.2,120.888889 228.368254,120.888889 L201.549206,120.888889 L201.549206,94.0698413 Z M188.139683,94.0698413 C188.139683,108.901587 176.152381,120.888889 161.320635,120.888889 C146.488889,120.888889 134.501587,108.901587 134.501587,94.0698413 L134.501587,26.8190476 C134.501587,11.9873016 146.488889,-1.42108547e-14 161.320635,-1.42108547e-14 C176.152381,-1.42108547e-14 188.139683,11.9873016 188.139683,26.8190476 L188.139683,94.0698413 Z' />
|
||||
<path d='M161.320635,201.549206 C176.152381,201.549206 188.139683,213.536508 188.139683,228.368254 C188.139683,243.2 176.152381,255.187302 161.320635,255.187302 C146.488889,255.187302 134.501587,243.2 134.501587,228.368254 L134.501587,201.549206 L161.320635,201.549206 Z M161.320635,188.139683 C146.488889,188.139683 134.501587,176.152381 134.501587,161.320635 C134.501587,146.488889 146.488889,134.501587 161.320635,134.501587 L228.571429,134.501587 C243.403175,134.501587 255.390476,146.488889 255.390476,161.320635 C255.390476,176.152381 243.403175,188.139683 228.571429,188.139683 L161.320635,188.139683 Z' />
|
||||
</g>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function GithubIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg {...props} width='26' height='26' viewBox='0 0 26 26' xmlns='http://www.w3.org/2000/svg'>
|
||||
|
||||
@@ -149,6 +149,32 @@ Extract structured data from entire webpages using natural language prompts and
|
||||
| `success` | boolean | Whether the extraction operation was successful |
|
||||
| `data` | object | Extracted structured data according to the schema or prompt |
|
||||
|
||||
### `firecrawl_agent`
|
||||
|
||||
Autonomous web data extraction agent. Searches and gathers information based on natural language prompts without requiring specific URLs.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `prompt` | string | Yes | Natural language description of the data to extract \(max 10,000 characters\) |
|
||||
| `urls` | json | No | Optional array of URLs to focus the agent on |
|
||||
| `schema` | json | No | JSON Schema defining the structure of data to extract |
|
||||
| `maxCredits` | number | No | Maximum credits to spend on this agent task |
|
||||
| `strictConstrainToURLs` | boolean | No | If true, agent will only visit URLs provided in the urls array |
|
||||
| `apiKey` | string | Yes | Firecrawl API key |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Whether the agent operation was successful |
|
||||
| `status` | string | Current status of the agent job \(processing, completed, failed\) |
|
||||
| `data` | object | Extracted data from the agent |
|
||||
| `creditsUsed` | number | Number of credits consumed by this agent task |
|
||||
| `expiresAt` | string | Timestamp when the results expire \(24 hours\) |
|
||||
| `sources` | object | Array of source URLs used by the agent |
|
||||
|
||||
|
||||
|
||||
## Notes
|
||||
|
||||
@@ -58,8 +58,7 @@ Create a new contact in Intercom with email, external_id, or role
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Created contact data |
|
||||
| `contact` | object | Created contact object |
|
||||
|
||||
### `intercom_get_contact`
|
||||
|
||||
@@ -75,8 +74,7 @@ Get a single contact by ID from Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Contact data |
|
||||
| `contact` | object | Contact object |
|
||||
|
||||
### `intercom_update_contact`
|
||||
|
||||
@@ -104,8 +102,7 @@ Update an existing contact in Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Updated contact data |
|
||||
| `contact` | object | Updated contact object |
|
||||
|
||||
### `intercom_list_contacts`
|
||||
|
||||
@@ -122,8 +119,7 @@ List all contacts from Intercom with pagination support
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | List of contacts |
|
||||
| `contacts` | array | Array of contact objects |
|
||||
|
||||
### `intercom_search_contacts`
|
||||
|
||||
@@ -143,8 +139,7 @@ Search for contacts in Intercom using a query
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Search results |
|
||||
| `contacts` | array | Array of matching contact objects |
|
||||
|
||||
### `intercom_delete_contact`
|
||||
|
||||
@@ -160,8 +155,9 @@ Delete a contact from Intercom by ID
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Deletion result |
|
||||
| `id` | string | ID of deleted contact |
|
||||
| `deleted` | boolean | Whether the contact was deleted |
|
||||
| `metadata` | object | Operation metadata |
|
||||
|
||||
### `intercom_create_company`
|
||||
|
||||
@@ -185,8 +181,7 @@ Create or update a company in Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Created or updated company data |
|
||||
| `company` | object | Created or updated company object |
|
||||
|
||||
### `intercom_get_company`
|
||||
|
||||
@@ -202,8 +197,7 @@ Retrieve a single company by ID from Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Company data |
|
||||
| `company` | object | Company object |
|
||||
|
||||
### `intercom_list_companies`
|
||||
|
||||
@@ -221,8 +215,7 @@ List all companies from Intercom with pagination support. Note: This endpoint ha
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | List of companies |
|
||||
| `companies` | array | Array of company objects |
|
||||
|
||||
### `intercom_get_conversation`
|
||||
|
||||
@@ -240,8 +233,7 @@ Retrieve a single conversation by ID from Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Conversation data |
|
||||
| `conversation` | object | Conversation object |
|
||||
|
||||
### `intercom_list_conversations`
|
||||
|
||||
@@ -260,8 +252,7 @@ List all conversations from Intercom with pagination support
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | List of conversations |
|
||||
| `conversations` | array | Array of conversation objects |
|
||||
|
||||
### `intercom_reply_conversation`
|
||||
|
||||
@@ -282,8 +273,7 @@ Reply to a conversation as an admin in Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Updated conversation with reply |
|
||||
| `conversation` | object | Updated conversation object |
|
||||
|
||||
### `intercom_search_conversations`
|
||||
|
||||
@@ -303,8 +293,7 @@ Search for conversations in Intercom using a query
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Search results |
|
||||
| `conversations` | array | Array of matching conversation objects |
|
||||
|
||||
### `intercom_create_ticket`
|
||||
|
||||
@@ -326,8 +315,7 @@ Create a new ticket in Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Created ticket data |
|
||||
| `ticket` | object | Created ticket object |
|
||||
|
||||
### `intercom_get_ticket`
|
||||
|
||||
@@ -343,8 +331,7 @@ Retrieve a single ticket by ID from Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Ticket data |
|
||||
| `ticket` | object | Ticket object |
|
||||
|
||||
### `intercom_create_message`
|
||||
|
||||
@@ -368,8 +355,7 @@ Create and send a new admin-initiated message in Intercom
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `success` | boolean | Operation success status |
|
||||
| `output` | object | Created message data |
|
||||
| `message` | object | Created message object |
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -74,7 +74,6 @@ Insert or update text records in a Pinecone index
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `statusText` | string | Status of the upsert operation |
|
||||
| `upsertedCount` | number | Number of records successfully upserted |
|
||||
|
||||
### `pinecone_search_text`
|
||||
|
||||
|
||||
@@ -269,7 +269,8 @@ Upload a file to a Supabase storage bucket
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `projectId` | string | Yes | Your Supabase project ID \(e.g., jdrkgepadsdopsntdlom\) |
|
||||
| `bucket` | string | Yes | The name of the storage bucket |
|
||||
| `path` | string | Yes | The path where the file will be stored \(e.g., "folder/file.jpg"\) |
|
||||
| `fileName` | string | Yes | The name of the file \(e.g., "document.pdf", "image.jpg"\) |
|
||||
| `path` | string | No | Optional folder path \(e.g., "folder/subfolder/"\) |
|
||||
| `fileContent` | string | Yes | The file content \(base64 encoded for binary files, or plain text\) |
|
||||
| `contentType` | string | No | MIME type of the file \(e.g., "image/jpeg", "text/plain"\) |
|
||||
| `upsert` | boolean | No | If true, overwrites existing file \(default: false\) |
|
||||
|
||||
@@ -139,8 +139,11 @@ Retrieve complete details and structure of a specific form
|
||||
| `theme` | object | Theme reference |
|
||||
| `workspace` | object | Workspace reference |
|
||||
| `fields` | array | Array of form fields/questions |
|
||||
| `welcome_screens` | array | Array of welcome screens |
|
||||
| `welcome_screens` | array | Array of welcome screens \(empty if none configured\) |
|
||||
| `thankyou_screens` | array | Array of thank you screens |
|
||||
| `created_at` | string | Form creation timestamp \(ISO 8601 format\) |
|
||||
| `last_updated_at` | string | Form last update timestamp \(ISO 8601 format\) |
|
||||
| `published_at` | string | Form publication timestamp \(ISO 8601 format\) |
|
||||
| `_links` | object | Related resource links including public form URL |
|
||||
|
||||
### `typeform_create_form`
|
||||
@@ -166,7 +169,12 @@ Create a new form with fields and settings
|
||||
| `id` | string | Created form unique identifier |
|
||||
| `title` | string | Form title |
|
||||
| `type` | string | Form type |
|
||||
| `fields` | array | Array of created form fields |
|
||||
| `settings` | object | Form settings object |
|
||||
| `theme` | object | Theme reference |
|
||||
| `workspace` | object | Workspace reference |
|
||||
| `fields` | array | Array of created form fields \(empty if none added\) |
|
||||
| `welcome_screens` | array | Array of welcome screens \(empty if none configured\) |
|
||||
| `thankyou_screens` | array | Array of thank you screens |
|
||||
| `_links` | object | Related resource links including public form URL |
|
||||
|
||||
### `typeform_update_form`
|
||||
@@ -185,16 +193,7 @@ Update an existing form using JSON Patch operations
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `id` | string | Updated form unique identifier |
|
||||
| `title` | string | Form title |
|
||||
| `type` | string | Form type |
|
||||
| `settings` | object | Form settings |
|
||||
| `theme` | object | Theme reference |
|
||||
| `workspace` | object | Workspace reference |
|
||||
| `fields` | array | Array of form fields |
|
||||
| `welcome_screens` | array | Array of welcome screens |
|
||||
| `thankyou_screens` | array | Array of thank you screens |
|
||||
| `_links` | object | Related resource links |
|
||||
| `message` | string | Success confirmation message |
|
||||
|
||||
### `typeform_delete_form`
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
{ label: 'Crawl', id: 'crawl' },
|
||||
{ label: 'Map', id: 'map' },
|
||||
{ label: 'Extract', id: 'extract' },
|
||||
{ label: 'Agent', id: 'agent' },
|
||||
],
|
||||
value: () => 'scrape',
|
||||
},
|
||||
@@ -61,6 +62,116 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
value: 'extract',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'agentPrompt',
|
||||
title: 'Agent Prompt',
|
||||
type: 'long-input',
|
||||
placeholder:
|
||||
'Describe what data to find and extract (e.g., "Find the founders of Firecrawl and their backgrounds")',
|
||||
condition: {
|
||||
field: 'operation',
|
||||
value: 'agent',
|
||||
},
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
id: 'agentUrls',
|
||||
title: 'Focus URLs',
|
||||
type: 'long-input',
|
||||
placeholder: '["https://example.com/page1", "https://example.com/page2"]',
|
||||
condition: {
|
||||
field: 'operation',
|
||||
value: 'agent',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'schema',
|
||||
title: 'Output Schema',
|
||||
type: 'code',
|
||||
placeholder: 'Enter JSON schema...',
|
||||
language: 'json',
|
||||
condition: {
|
||||
field: 'operation',
|
||||
value: 'agent',
|
||||
},
|
||||
wandConfig: {
|
||||
enabled: true,
|
||||
maintainHistory: true,
|
||||
prompt: `You are an expert programmer specializing in creating JSON schemas for web data extraction.
|
||||
Generate ONLY the JSON schema based on the user's request.
|
||||
The output MUST be a single, valid JSON object, starting with { and ending with }.
|
||||
The JSON object should define the structure of data to extract from web pages.
|
||||
Use standard JSON Schema properties (type, description, enum, items for arrays, etc.).
|
||||
|
||||
Current schema: {context}
|
||||
|
||||
Do not include any explanations, markdown formatting, or other text outside the JSON object.
|
||||
|
||||
Valid Schema Examples:
|
||||
|
||||
Example 1 - Company Information:
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"company_name": {
|
||||
"type": "string",
|
||||
"description": "The name of the company"
|
||||
},
|
||||
"founders": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"role": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["company_name"]
|
||||
}
|
||||
|
||||
Example 2 - Product Data:
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"products": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"price": { "type": "number" },
|
||||
"description": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`,
|
||||
placeholder: 'Describe the data structure you want to extract...',
|
||||
generationType: 'json-schema',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'maxCredits',
|
||||
title: 'Max Credits',
|
||||
type: 'short-input',
|
||||
placeholder: 'Maximum credits to spend',
|
||||
condition: {
|
||||
field: 'operation',
|
||||
value: 'agent',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'strictConstrainToURLs',
|
||||
title: 'Strict URL Constraint',
|
||||
type: 'switch',
|
||||
condition: {
|
||||
field: 'operation',
|
||||
value: 'agent',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'onlyMainContent',
|
||||
title: 'Only Main Content',
|
||||
@@ -146,6 +257,7 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
'firecrawl_crawl',
|
||||
'firecrawl_map',
|
||||
'firecrawl_extract',
|
||||
'firecrawl_agent',
|
||||
],
|
||||
config: {
|
||||
tool: (params) => {
|
||||
@@ -160,6 +272,8 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
return 'firecrawl_map'
|
||||
case 'extract':
|
||||
return 'firecrawl_extract'
|
||||
case 'agent':
|
||||
return 'firecrawl_agent'
|
||||
default:
|
||||
return 'firecrawl_scrape'
|
||||
}
|
||||
@@ -178,6 +292,11 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
mobile,
|
||||
prompt,
|
||||
apiKey,
|
||||
agentPrompt,
|
||||
agentUrls,
|
||||
schema,
|
||||
maxCredits,
|
||||
strictConstrainToURLs,
|
||||
} = params
|
||||
|
||||
const result: Record<string, any> = { apiKey }
|
||||
@@ -235,6 +354,35 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
}
|
||||
if (prompt) result.prompt = prompt
|
||||
break
|
||||
|
||||
case 'agent':
|
||||
if (agentPrompt) result.prompt = agentPrompt
|
||||
if (agentUrls) {
|
||||
if (Array.isArray(agentUrls)) {
|
||||
result.urls = agentUrls
|
||||
} else if (typeof agentUrls === 'string') {
|
||||
try {
|
||||
const parsed = JSON.parse(agentUrls)
|
||||
result.urls = Array.isArray(parsed) ? parsed : [parsed]
|
||||
} catch {
|
||||
result.urls = [agentUrls]
|
||||
}
|
||||
}
|
||||
}
|
||||
if (schema) {
|
||||
if (typeof schema === 'object') {
|
||||
result.schema = schema
|
||||
} else if (typeof schema === 'string') {
|
||||
try {
|
||||
result.schema = JSON.parse(schema)
|
||||
} catch {
|
||||
// Skip invalid schema
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxCredits) result.maxCredits = Number.parseInt(maxCredits)
|
||||
if (strictConstrainToURLs != null) result.strictConstrainToURLs = strictConstrainToURLs
|
||||
break
|
||||
}
|
||||
|
||||
return result
|
||||
@@ -255,6 +403,34 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
mobile: { type: 'boolean', description: 'Use mobile emulation' },
|
||||
onlyMainContent: { type: 'boolean', description: 'Extract only main content' },
|
||||
scrapeOptions: { type: 'json', description: 'Advanced scraping options' },
|
||||
agentPrompt: { type: 'string', description: 'Agent prompt describing data to extract' },
|
||||
agentUrls: { type: 'json', description: 'Optional URLs to focus the agent on' },
|
||||
schema: {
|
||||
type: 'json',
|
||||
description: 'JSON schema for structured output',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
type: {
|
||||
type: 'string',
|
||||
enum: ['object'],
|
||||
description: 'Must be "object" for a valid JSON Schema',
|
||||
},
|
||||
properties: {
|
||||
type: 'object',
|
||||
description: 'Object containing property definitions',
|
||||
},
|
||||
required: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
description: 'Array of required property names',
|
||||
},
|
||||
},
|
||||
required: ['type', 'properties'],
|
||||
},
|
||||
},
|
||||
maxCredits: { type: 'number', description: 'Maximum credits to spend' },
|
||||
strictConstrainToURLs: { type: 'boolean', description: 'Limit agent to provided URLs only' },
|
||||
},
|
||||
outputs: {
|
||||
// Scrape output
|
||||
@@ -273,5 +449,8 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
links: { type: 'json', description: 'Discovered URLs array' },
|
||||
// Extract output
|
||||
sources: { type: 'json', description: 'Data sources array' },
|
||||
// Agent output
|
||||
status: { type: 'string', description: 'Agent job status' },
|
||||
expiresAt: { type: 'string', description: 'Result expiration timestamp' },
|
||||
},
|
||||
}
|
||||
|
||||
203
apps/sim/tools/firecrawl/agent.ts
Normal file
203
apps/sim/tools/firecrawl/agent.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import type { AgentParams, AgentResponse } from '@/tools/firecrawl/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
const logger = createLogger('FirecrawlAgentTool')
|
||||
|
||||
const POLL_INTERVAL_MS = 5000 // 5 seconds between polls
|
||||
const MAX_POLL_TIME_MS = 300000 // 5 minutes maximum polling time
|
||||
|
||||
export const agentTool: ToolConfig<AgentParams, AgentResponse> = {
|
||||
id: 'firecrawl_agent',
|
||||
name: 'Firecrawl Agent',
|
||||
description:
|
||||
'Autonomous web data extraction agent. Searches and gathers information based on natural language prompts without requiring specific URLs.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
prompt: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Natural language description of the data to extract (max 10,000 characters)',
|
||||
},
|
||||
urls: {
|
||||
type: 'json',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Optional array of URLs to focus the agent on',
|
||||
},
|
||||
schema: {
|
||||
type: 'json',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'JSON Schema defining the structure of data to extract',
|
||||
},
|
||||
maxCredits: {
|
||||
type: 'number',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'Maximum credits to spend on this agent task',
|
||||
},
|
||||
strictConstrainToURLs: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'user-only',
|
||||
description: 'If true, agent will only visit URLs provided in the urls array',
|
||||
},
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Firecrawl API key',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: 'https://api.firecrawl.dev/v2/agent',
|
||||
headers: (params) => ({
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
body: (params) => {
|
||||
const body: Record<string, any> = {
|
||||
prompt: params.prompt,
|
||||
}
|
||||
|
||||
if (params.urls) {
|
||||
if (Array.isArray(params.urls)) {
|
||||
body.urls = params.urls
|
||||
} else if (typeof params.urls === 'string') {
|
||||
try {
|
||||
const parsed = JSON.parse(params.urls)
|
||||
body.urls = Array.isArray(parsed) ? parsed : [parsed]
|
||||
} catch {
|
||||
body.urls = [params.urls]
|
||||
}
|
||||
}
|
||||
}
|
||||
if (params.schema) body.schema = params.schema
|
||||
if (params.maxCredits) body.maxCredits = Number(params.maxCredits)
|
||||
if (typeof params.strictConstrainToURLs === 'boolean')
|
||||
body.strictConstrainToURLs = params.strictConstrainToURLs
|
||||
|
||||
return body
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
const data = await response.json()
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
jobId: data.id,
|
||||
success: false,
|
||||
status: 'processing',
|
||||
data: {},
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
postProcess: async (result, params) => {
|
||||
if (!result.success) {
|
||||
return result
|
||||
}
|
||||
|
||||
const jobId = result.output.jobId
|
||||
logger.info(`Firecrawl agent job ${jobId} created, polling for completion...`)
|
||||
|
||||
let elapsedTime = 0
|
||||
|
||||
while (elapsedTime < MAX_POLL_TIME_MS) {
|
||||
try {
|
||||
const statusResponse = await fetch(`https://api.firecrawl.dev/v2/agent/${jobId}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
|
||||
if (!statusResponse.ok) {
|
||||
throw new Error(`Failed to get agent status: ${statusResponse.statusText}`)
|
||||
}
|
||||
|
||||
const agentData = await statusResponse.json()
|
||||
logger.info(`Firecrawl agent job ${jobId} status: ${agentData.status}`)
|
||||
|
||||
if (agentData.status === 'completed') {
|
||||
result.output = {
|
||||
jobId,
|
||||
success: true,
|
||||
status: 'completed',
|
||||
data: agentData.data || {},
|
||||
creditsUsed: agentData.creditsUsed,
|
||||
expiresAt: agentData.expiresAt,
|
||||
sources: agentData.sources,
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
if (agentData.status === 'failed') {
|
||||
return {
|
||||
...result,
|
||||
success: false,
|
||||
error: `Agent job failed: ${agentData.error || 'Unknown error'}`,
|
||||
}
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
|
||||
elapsedTime += POLL_INTERVAL_MS
|
||||
} catch (error: any) {
|
||||
logger.error('Error polling for agent job status:', {
|
||||
message: error.message || 'Unknown error',
|
||||
jobId,
|
||||
})
|
||||
|
||||
return {
|
||||
...result,
|
||||
success: false,
|
||||
error: `Error polling for agent job status: ${error.message || 'Unknown error'}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.warn(
|
||||
`Agent job ${jobId} did not complete within the maximum polling time (${MAX_POLL_TIME_MS / 1000}s)`
|
||||
)
|
||||
return {
|
||||
...result,
|
||||
success: false,
|
||||
error: `Agent job did not complete within the maximum polling time (${MAX_POLL_TIME_MS / 1000}s)`,
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
success: {
|
||||
type: 'boolean',
|
||||
description: 'Whether the agent operation was successful',
|
||||
},
|
||||
status: {
|
||||
type: 'string',
|
||||
description: 'Current status of the agent job (processing, completed, failed)',
|
||||
},
|
||||
data: {
|
||||
type: 'object',
|
||||
description: 'Extracted data from the agent',
|
||||
},
|
||||
creditsUsed: {
|
||||
type: 'number',
|
||||
description: 'Number of credits consumed by this agent task',
|
||||
},
|
||||
expiresAt: {
|
||||
type: 'string',
|
||||
description: 'Timestamp when the results expire (24 hours)',
|
||||
},
|
||||
sources: {
|
||||
type: 'object',
|
||||
description: 'Array of source URLs used by the agent',
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
import { agentTool } from '@/tools/firecrawl/agent'
|
||||
import { crawlTool } from '@/tools/firecrawl/crawl'
|
||||
import { extractTool } from '@/tools/firecrawl/extract'
|
||||
import { mapTool } from '@/tools/firecrawl/map'
|
||||
@@ -9,3 +10,4 @@ export const firecrawlSearchTool = searchTool
|
||||
export const firecrawlCrawlTool = crawlTool
|
||||
export const firecrawlMapTool = mapTool
|
||||
export const firecrawlExtractTool = extractTool
|
||||
export const firecrawlAgentTool = agentTool
|
||||
|
||||
@@ -122,6 +122,15 @@ export interface ExtractParams {
|
||||
scrapeOptions?: ScrapeOptions
|
||||
}
|
||||
|
||||
export interface AgentParams {
|
||||
apiKey: string
|
||||
prompt: string
|
||||
urls?: string[]
|
||||
schema?: Record<string, any>
|
||||
maxCredits?: number
|
||||
strictConstrainToURLs?: boolean
|
||||
}
|
||||
|
||||
export interface ScrapeResponse extends ToolResponse {
|
||||
output: {
|
||||
markdown: string
|
||||
@@ -200,9 +209,22 @@ export interface ExtractResponse extends ToolResponse {
|
||||
}
|
||||
}
|
||||
|
||||
export interface AgentResponse extends ToolResponse {
|
||||
output: {
|
||||
jobId: string
|
||||
success: boolean
|
||||
status: string
|
||||
data: Record<string, any>
|
||||
creditsUsed?: number
|
||||
expiresAt?: string
|
||||
sources?: string[]
|
||||
}
|
||||
}
|
||||
|
||||
export type FirecrawlResponse =
|
||||
| ScrapeResponse
|
||||
| SearchResponse
|
||||
| FirecrawlCrawlResponse
|
||||
| MapResponse
|
||||
| ExtractResponse
|
||||
| AgentResponse
|
||||
|
||||
@@ -187,6 +187,7 @@ import {
|
||||
} from '@/tools/exa'
|
||||
import { fileParseTool } from '@/tools/file'
|
||||
import {
|
||||
firecrawlAgentTool,
|
||||
firecrawlCrawlTool,
|
||||
firecrawlExtractTool,
|
||||
firecrawlMapTool,
|
||||
@@ -1396,6 +1397,7 @@ export const tools: Record<string, ToolConfig> = {
|
||||
firecrawl_crawl: firecrawlCrawlTool,
|
||||
firecrawl_map: firecrawlMapTool,
|
||||
firecrawl_extract: firecrawlExtractTool,
|
||||
firecrawl_agent: firecrawlAgentTool,
|
||||
grafana_get_dashboard: grafanaGetDashboardTool,
|
||||
grafana_list_dashboards: grafanaListDashboardsTool,
|
||||
grafana_create_dashboard: grafanaCreateDashboardTool,
|
||||
|
||||
Reference in New Issue
Block a user