fix(browserbase): consoldiated stagehand agent and extract, updated wand UI to resize based on panel size (#2340)

This commit is contained in:
Waleed
2025-12-12 16:58:51 -08:00
committed by GitHub
parent deb085881f
commit 0ea600d0e8
19 changed files with 391 additions and 1244 deletions

View File

@@ -142,7 +142,6 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
supabase: SupabaseIcon,
stt: STTIcon,
stripe: StripeIcon,
stagehand_agent: StagehandIcon,
stagehand: StagehandIcon,
ssh: SshIcon,
sqs: SQSIcon,

View File

@@ -1,59 +0,0 @@
---
title: Stagehand Agent
description: Autonomous web browsing agent
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="stagehand_agent"
color="#FFC83C"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://www.stagehand.dev/) is an autonomous web agent platform that enables AI systems to navigate and interact with websites just like a human would. It provides a powerful solution for automating complex web tasks without requiring custom code or browser automation scripts.
With Stagehand, you can:
- **Automate web navigation**: Enable AI to browse websites, click links, fill forms, and interact with web elements
- **Extract structured data**: Collect specific information from websites in a structured, usable format
- **Complete complex workflows**: Perform multi-step tasks across different websites and web applications
- **Handle authentication**: Navigate login processes and maintain sessions across websites
- **Process dynamic content**: Interact with JavaScript-heavy sites and single-page applications
- **Maintain context awareness**: Keep track of the current state and history while navigating
- **Generate detailed reports**: Receive comprehensive logs of actions taken and data collected
In Sim, the Stagehand integration enables your agents to seamlessly interact with web-based systems as part of their workflows. This allows for sophisticated automation scenarios that bridge the gap between your AI agents and the vast information and functionality available on the web. Your agents can search for information, interact with web applications, extract data from websites, and incorporate these capabilities into their decision-making processes. By connecting Sim with Stagehand, you can create agents that extend beyond API-based integrations to navigate the web just as a human would - filling forms, clicking buttons, reading content, and extracting valuable information to complete their tasks more effectively.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Integrate Stagehand Agent into the workflow. Can navigate the web and perform tasks.
## Tools
### `stagehand_agent`
Run an autonomous web agent to complete tasks and extract structured data
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `startUrl` | string | Yes | URL of the webpage to start the agent on |
| `task` | string | Yes | The task to complete or goal to achieve on the website |
| `variables` | json | No | Optional variables to substitute in the task \(format: \{key: value\}\). Reference in task using %key% |
| `format` | string | No | No description |
| `apiKey` | string | Yes | OpenAI API key for agent execution \(required by Stagehand\) |
| `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `agentResult` | object | Result from the Stagehand agent execution |
## Notes
- Category: `tools`
- Type: `stagehand_agent`

View File

@@ -88,7 +88,6 @@
"sqs",
"ssh",
"stagehand",
"stagehand_agent",
"stripe",
"stt",
"supabase",

View File

@@ -1,6 +1,6 @@
---
title: Stagehand Extract
description: Extract data from websites
title: Stagehand
description: Web automation and data extraction
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
@@ -11,21 +11,28 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://stagehand.com) is a tool that allows you to extract structured data from webpages using Browserbase and OpenAI.
[Stagehand](https://stagehand.com) is a tool that enables both extraction of structured data from webpages and autonomous web automation using Browserbase and modern LLMs (OpenAI or Anthropic).
With Stagehand, you can:
Stagehand offers two main capabilities in Sim:
- **Extract structured data**: Extract structured data from webpages using Browserbase and OpenAI
- **Save data to a database**: Save the extracted data to a database
- **Automate workflows**: Automate workflows to extract data from webpages
- **stagehand_extract**: Extract structured data from a single webpage. You specify what you want (a schema), and the AI retrieves and parses the data in that shape from the page. This is best for extracting lists, fields, or objects when you know exactly what information you need and where to get it.
In Sim, the Stagehand integration enables your agents to extract structured data from webpages using Browserbase and OpenAI. This allows for powerful automation scenarios such as data extraction, data analysis, and data integration. Your agents can extract structured data from webpages, save the extracted data to a database, and automate workflows to extract data from webpages. This integration bridges the gap between your AI workflows and your data management system, enabling seamless data extraction and integration. By connecting Sim with Stagehand, you can automate data extraction processes, maintain up-to-date information repositories, generate reports, and organize information intelligently - all through your intelligent agents.
- **stagehand_agent**: Run an autonomous web agent capable of completing multi-step tasks, interacting with elements, navigating between pages, and returning structured results. This is much more flexible: the agent can do things like log in, search, fill forms, gather data from multiple places, and output a final result according to a requested schema.
**Key Differences:**
- *stagehand_extract* is a rapid “extract this data from this page” operation. It works best for direct, one-step extraction tasks.
- *stagehand_agent* performs complex, multi-step autonomous tasks on the web — such as navigation, searching, or even transactions — and can dynamically extract data according to your instructions and an optional schema.
In practice, use **stagehand_extract** when you know what you want and where, and use **stagehand_agent** when you need a bot to think through and execute interactive workflows.
By integrating Stagehand, Sim agents can automate data gathering, analysis, and workflow execution on the web: updating databases, organizing information, and generating custom reports—seamlessly and autonomously.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Integrate Stagehand into the workflow. Can extract structured data from webpages.
Integrate Stagehand into the workflow. Can extract structured data from webpages or run an autonomous agent to perform tasks.
@@ -41,7 +48,8 @@ Extract structured data from a webpage using Stagehand
| --------- | ---- | -------- | ----------- |
| `url` | string | Yes | URL of the webpage to extract data from |
| `instruction` | string | Yes | Instructions for extraction |
| `apiKey` | string | Yes | OpenAI API key for extraction \(required by Stagehand\) |
| `provider` | string | No | AI provider to use: openai or anthropic |
| `apiKey` | string | Yes | API key for the selected provider |
| `schema` | json | Yes | JSON schema defining the structure of the data to extract |
#### Output
@@ -50,6 +58,28 @@ Extract structured data from a webpage using Stagehand
| --------- | ---- | ----------- |
| `data` | object | Extracted structured data matching the provided schema |
### `stagehand_agent`
Run an autonomous web agent to complete tasks and extract structured data
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `startUrl` | string | Yes | URL of the webpage to start the agent on |
| `task` | string | Yes | The task to complete or goal to achieve on the website |
| `variables` | json | No | Optional variables to substitute in the task \(format: \{key: value\}\). Reference in task using %key% |
| `format` | string | No | No description |
| `provider` | string | No | AI provider to use: openai or anthropic |
| `apiKey` | string | Yes | API key for the selected provider |
| `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `agentResult` | object | Result from the Stagehand agent execution |
## Notes

View File

@@ -1,64 +0,0 @@
---
title: Stagehand Agent
description: Autonomous web browsing agent
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="stagehand_agent"
color="#FFC83C"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://www.stagehand.dev/) is an autonomous web agent platform that enables AI systems to navigate and interact with websites just like a human would. It provides a powerful solution for automating complex web tasks without requiring custom code or browser automation scripts.
With Stagehand, you can:
- **Automate web navigation**: Enable AI to browse websites, click links, fill forms, and interact with web elements
- **Extract structured data**: Collect specific information from websites in a structured, usable format
- **Complete complex workflows**: Perform multi-step tasks across different websites and web applications
- **Handle authentication**: Navigate login processes and maintain sessions across websites
- **Process dynamic content**: Interact with JavaScript-heavy sites and single-page applications
- **Maintain context awareness**: Keep track of the current state and history while navigating
- **Generate detailed reports**: Receive comprehensive logs of actions taken and data collected
In Sim, the Stagehand integration enables your agents to seamlessly interact with web-based systems as part of their workflows. This allows for sophisticated automation scenarios that bridge the gap between your AI agents and the vast information and functionality available on the web. Your agents can search for information, interact with web applications, extract data from websites, and incorporate these capabilities into their decision-making processes. By connecting Sim with Stagehand, you can create agents that extend beyond API-based integrations to navigate the web just as a human would - filling forms, clicking buttons, reading content, and extracting valuable information to complete their tasks more effectively.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Integrate Stagehand Agent into the workflow. Can navigate the web and perform tasks.
## Tools
### `stagehand_agent`
Run an autonomous web agent to complete tasks and extract structured data
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `startUrl` | string | Yes | URL of the webpage to start the agent on |
| `task` | string | Yes | The task to complete or goal to achieve on the website |
| `variables` | json | No | Optional variables to substitute in the task \(format: \{key: value\}\). Reference in task using %key% |
| `format` | string | No | No description |
| `apiKey` | string | Yes | OpenAI API key for agent execution \(required by Stagehand\) |
| `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `agentResult` | object | Result from the Stagehand agent execution |
## Notes
- Category: `tools`
- Type: `stagehand_agent`

View File

@@ -1,59 +0,0 @@
---
title: Stagehand Agent
description: Agente autónomo de navegación web
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="stagehand_agent"
color="#FFC83C"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://www.stagehand.dev/) es una plataforma de agentes web autónomos que permite a los sistemas de IA navegar e interactuar con sitios web tal como lo haría un humano. Proporciona una solución potente para automatizar tareas web complejas sin necesidad de código personalizado o scripts de automatización de navegador.
Con Stagehand, puedes:
- **Automatizar la navegación web**: Permitir que la IA navegue por sitios web, haga clic en enlaces, complete formularios e interactúe con elementos web
- **Extraer datos estructurados**: Recopilar información específica de sitios web en un formato estructurado y utilizable
- **Completar flujos de trabajo complejos**: Realizar tareas de múltiples pasos en diferentes sitios web y aplicaciones web
- **Gestionar la autenticación**: Navegar por procesos de inicio de sesión y mantener sesiones en sitios web
- **Procesar contenido dinámico**: Interactuar con sitios con uso intensivo de JavaScript y aplicaciones de una sola página
- **Mantener la conciencia del contexto**: Realizar un seguimiento del estado actual y del historial durante la navegación
- **Generar informes detallados**: Recibir registros completos de las acciones realizadas y los datos recopilados
En Sim, la integración de Stagehand permite que tus agentes interactúen sin problemas con sistemas basados en web como parte de sus flujos de trabajo. Esto permite escenarios de automatización sofisticados que conectan a tus agentes de IA con la amplia información y funcionalidad disponible en la web. Tus agentes pueden buscar información, interactuar con aplicaciones web, extraer datos de sitios web e incorporar estas capacidades en sus procesos de toma de decisiones. Al conectar Sim con Stagehand, puedes crear agentes que van más allá de las integraciones basadas en API para navegar por la web tal como lo haría un humano: completando formularios, haciendo clic en botones, leyendo contenido y extrayendo información valiosa para completar sus tareas de manera más efectiva.
{/* MANUAL-CONTENT-END */}
## Instrucciones de uso
Integra el Agente Stagehand en el flujo de trabajo. Puede navegar por la web y realizar tareas. Requiere clave API.
## Herramientas
### `stagehand_agent`
Ejecuta un agente web autónomo para completar tareas y extraer datos estructurados
#### Entrada
| Parámetro | Tipo | Obligatorio | Descripción |
| --------- | ---- | ----------- | ----------- |
| `startUrl` | string | Sí | URL de la página web donde iniciará el agente |
| `task` | string | Sí | La tarea a completar o el objetivo a lograr en el sitio web |
| `variables` | json | No | Variables opcionales para sustituir en la tarea \(formato: \{key: value\}\). Referencia en la tarea usando %key% |
| `format` | string | No | Sin descripción |
| `apiKey` | string | Sí | Clave API de OpenAI para la ejecución del agente \(requerida por Stagehand\) |
| `outputSchema` | json | No | Esquema JSON opcional que define la estructura de los datos que el agente debe devolver |
#### Salida
| Parámetro | Tipo | Descripción |
| --------- | ---- | ----------- |
| `agentResult` | object | Resultado de la ejecución del agente Stagehand |
## Notas
- Categoría: `tools`
- Tipo: `stagehand_agent`

View File

@@ -1,59 +0,0 @@
---
title: Agent Stagehand
description: Agent de navigation web autonome
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="stagehand_agent"
color="#FFC83C"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://www.stagehand.dev/) est une plateforme d'agent web autonome qui permet aux systèmes d'IA de naviguer et d'interagir avec des sites web comme le ferait un humain. Elle offre une solution puissante pour automatiser des tâches web complexes sans nécessiter de code personnalisé ou de scripts d'automatisation de navigateur.
Avec Stagehand, vous pouvez :
- **Automatiser la navigation web** : permettre à l'IA de parcourir des sites web, cliquer sur des liens, remplir des formulaires et interagir avec des éléments web
- **Extraire des données structurées** : collecter des informations spécifiques à partir de sites web dans un format structuré et utilisable
- **Réaliser des flux de travail complexes** : effectuer des tâches en plusieurs étapes sur différents sites web et applications web
- **Gérer l'authentification** : naviguer dans les processus de connexion et maintenir les sessions sur les sites web
- **Traiter du contenu dynamique** : interagir avec des sites à forte composante JavaScript et des applications à page unique
- **Maintenir une conscience contextuelle** : suivre l'état actuel et l'historique pendant la navigation
- **Générer des rapports détaillés** : recevoir des journaux complets des actions entreprises et des données collectées
Dans Sim, l'intégration de Stagehand permet à vos agents d'interagir de manière transparente avec des systèmes basés sur le web dans le cadre de leurs flux de travail. Cela permet des scénarios d'automatisation sophistiqués qui comblent le fossé entre vos agents d'IA et la vaste quantité d'informations et de fonctionnalités disponibles sur le web. Vos agents peuvent rechercher des informations, interagir avec des applications web, extraire des données de sites web et intégrer ces capacités dans leurs processus de prise de décision. En connectant Sim avec Stagehand, vous pouvez créer des agents qui vont au-delà des intégrations basées sur API pour naviguer sur le web comme le ferait un humain - remplir des formulaires, cliquer sur des boutons, lire du contenu et extraire des informations précieuses pour accomplir leurs tâches plus efficacement.
{/* MANUAL-CONTENT-END */}
## Instructions d'utilisation
Intégrez l'agent Stagehand dans le flux de travail. Peut naviguer sur le web et effectuer des tâches. Nécessite une clé API.
## Outils
### `stagehand_agent`
Exécuter un agent web autonome pour accomplir des tâches et extraire des données structurées
#### Entrée
| Paramètre | Type | Obligatoire | Description |
| --------- | ---- | ----------- | ----------- |
| `startUrl` | string | Oui | URL de la page web sur laquelle démarrer l'agent |
| `task` | string | Oui | La tâche à accomplir ou l'objectif à atteindre sur le site web |
| `variables` | json | Non | Variables optionnelles à substituer dans la tâche \(format : \{key: value\}\). Référencez dans la tâche en utilisant %key% |
| `format` | string | Non | Pas de description |
| `apiKey` | string | Oui | Clé API OpenAI pour l'exécution de l'agent \(requise par Stagehand\) |
| `outputSchema` | json | Non | Schéma JSON optionnel définissant la structure des données que l'agent doit renvoyer |
#### Sortie
| Paramètre | Type | Description |
| --------- | ---- | ----------- |
| `agentResult` | object | Résultat de l'exécution de l'agent Stagehand |
## Remarques
- Catégorie : `tools`
- Type : `stagehand_agent`

View File

@@ -1,59 +0,0 @@
---
title: Stagehand Agent
description: Autonomous web browsing agent
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="stagehand_agent"
color="#FFC83C"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://www.stagehand.dev/) is an autonomous web agent platform that enables AI systems to navigate and interact with websites just like a human would. It provides a powerful solution for automating complex web tasks without requiring custom code or browser automation scripts.
With Stagehand, you can:
- **Automate web navigation**: Enable AI to browse websites, click links, fill forms, and interact with web elements
- **Extract structured data**: Collect specific information from websites in a structured, usable format
- **Complete complex workflows**: Perform multi-step tasks across different websites and web applications
- **Handle authentication**: Navigate login processes and maintain sessions across websites
- **Process dynamic content**: Interact with JavaScript-heavy sites and single-page applications
- **Maintain context awareness**: Keep track of the current state and history while navigating
- **Generate detailed reports**: Receive comprehensive logs of actions taken and data collected
In Sim, the Stagehand integration enables your agents to seamlessly interact with web-based systems as part of their workflows. This allows for sophisticated automation scenarios that bridge the gap between your AI agents and the vast information and functionality available on the web. Your agents can search for information, interact with web applications, extract data from websites, and incorporate these capabilities into their decision-making processes. By connecting Sim with Stagehand, you can create agents that extend beyond API-based integrations to navigate the web just as a human would - filling forms, clicking buttons, reading content, and extracting valuable information to complete their tasks more effectively.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Integrate Stagehand Agent into the workflow. Can navigate the web and perform tasks.
## Tools
### `stagehand_agent`
Run an autonomous web agent to complete tasks and extract structured data
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `startUrl` | string | Yes | URL of the webpage to start the agent on |
| `task` | string | Yes | The task to complete or goal to achieve on the website |
| `variables` | json | No | Optional variables to substitute in the task \(format: \{key: value\}\). Reference in task using %key% |
| `format` | string | No | No description |
| `apiKey` | string | Yes | OpenAI API key for agent execution \(required by Stagehand\) |
| `outputSchema` | json | No | Optional JSON schema defining the structure of data the agent should return |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `agentResult` | object | Result from the Stagehand agent execution |
## Notes
- Category: `tools`
- Type: `stagehand_agent`

View File

@@ -1,59 +0,0 @@
---
title: Stagehand Agent
description: 自主网页浏览代理
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="stagehand_agent"
color="#FFC83C"
/>
{/* MANUAL-CONTENT-START:intro */}
[Stagehand](https://www.stagehand.dev/) 是一个自主的网页代理平台,使 AI 系统能够像人类一样浏览和与网站交互。它为自动化复杂的网页任务提供了强大的解决方案,无需定制代码或浏览器自动化脚本。
使用 Stagehand您可以
- **自动化网页导航**:使 AI 能够浏览网站、点击链接、填写表单并与网页元素交互
- **提取结构化数据**:从网站收集特定信息并以结构化、可用的格式呈现
- **完成复杂工作流程**:在不同网站和网页应用程序之间执行多步骤任务
- **处理身份验证**:导航登录流程并维护跨网站的会话
- **处理动态内容**:与 JavaScript 密集型网站和单页应用程序交互
- **保持上下文感知**:在导航时跟踪当前状态和历史记录
- **生成详细报告**:接收所采取操作和收集数据的全面日志
在 Sim 中Stagehand 集成使您的代理能够无缝地与基于网页的系统交互,作为其工作流程的一部分。这允许复杂的自动化场景,将您的 AI 代理与网络上广泛的信息和功能连接起来。您的代理可以搜索信息、与网页应用程序交互、从网站提取数据,并将这些能力融入其决策过程中。通过将 Sim 与 Stagehand 连接,您可以创建超越基于 API 集成的代理,使其能够像人类一样浏览网页——填写表单、点击按钮、阅读内容并提取有价值的信息,从而更高效地完成任务。
{/* MANUAL-CONTENT-END */}
## 使用说明
将 Stagehand Agent 集成到工作流程中。可以浏览网页并执行任务。需要 API 密钥。
## 工具
### `stagehand_agent`
运行一个自主的网页代理来完成任务并提取结构化数据
#### 输入
| 参数 | 类型 | 必需 | 描述 |
| --------- | ---- | -------- | ----------- |
| `startUrl` | string | 是 | 启动代理的网页 URL |
| `task` | string | 是 | 在网站上完成的任务或实现的目标 |
| `variables` | json | 否 | 替代任务中的可选变量(格式:\{key: value\})。在任务中使用 %key% 引用 |
| `format` | string | 否 | 无描述 |
| `apiKey` | string | 是 | 用于代理执行的 OpenAI API 密钥Stagehand 必需) |
| `outputSchema` | json | 否 | 定义代理应返回的数据结构的可选 JSON 架构 |
#### 输出
| 参数 | 类型 | 描述 |
| --------- | ---- | ----------- |
| `agentResult` | object | Stagehand 代理执行的结果 |
## 注意
- 类别:`tools`
- 类型:`stagehand_agent`

View File

@@ -15,9 +15,13 @@ const requestSchema = z.object({
startUrl: z.string().url(),
outputSchema: z.any(),
variables: z.any(),
provider: z.enum(['openai', 'anthropic']).optional().default('openai'),
apiKey: z.string(),
})
/**
* Extracts the inner schema object from a potentially nested schema structure
*/
function getSchemaObject(outputSchema: Record<string, any>): Record<string, any> {
if (outputSchema.schema && typeof outputSchema.schema === 'object') {
return outputSchema.schema
@@ -25,6 +29,9 @@ function getSchemaObject(outputSchema: Record<string, any>): Record<string, any>
return outputSchema
}
/**
* Formats a schema object as a string for inclusion in agent instructions
*/
function formatSchemaForInstructions(schema: Record<string, any>): string {
try {
return JSON.stringify(schema, null, 2)
@@ -34,355 +41,51 @@ function formatSchemaForInstructions(schema: Record<string, any>): string {
}
}
function extractActionDirectives(task: string): {
processedTask: string
actionDirectives: Array<{ index: number; action: string }>
} {
const actionRegex = /\[\[ACTION:(.*?)\]\]/g
const actionDirectives: Array<{ index: number; action: string }> = []
let match
let processedTask = task
/**
* Processes variables from various input formats into a standardized key-value object
*/
function processVariables(variables: any): Record<string, string> | undefined {
if (!variables) return undefined
while ((match = actionRegex.exec(task)) !== null) {
const actionText = match[1].trim()
const index = match.index
let variablesObject: Record<string, string> = {}
actionDirectives.push({
index,
action: actionText,
if (Array.isArray(variables)) {
variables.forEach((item: any) => {
if (item?.cells?.Key && typeof item.cells.Key === 'string') {
variablesObject[item.cells.Key] = item.cells.Value || ''
}
})
}
if (actionDirectives.length > 0) {
let offset = 0
for (let i = 0; i < actionDirectives.length; i++) {
const directive = actionDirectives[i]
const originalIndex = directive.index
const placeholder = `[SECURE ACTION ${i + 1}]`
const adjustedIndex = originalIndex - offset
const fullMatch = task.substring(
originalIndex,
originalIndex + task.substring(originalIndex).indexOf(']]') + 2
)
processedTask =
processedTask.substring(0, adjustedIndex) +
placeholder +
processedTask.substring(adjustedIndex + fullMatch.length)
offset += fullMatch.length - placeholder.length
} else if (typeof variables === 'object' && variables !== null) {
variablesObject = { ...variables }
} else if (typeof variables === 'string') {
try {
variablesObject = JSON.parse(variables)
} catch (_e) {
logger.warn('Failed to parse variables string as JSON', { variables })
return undefined
}
}
return { processedTask, actionDirectives }
if (Object.keys(variablesObject).length === 0) {
return undefined
}
return variablesObject
}
async function processSecureActions(
message: string,
stagehand: Stagehand,
actionDirectives: Array<{ index: number; action: string }>,
variables: Record<string, string> | undefined
): Promise<{
modifiedMessage: string
executedActions: Array<{ action: string; result: { success: boolean; message: string } }>
}> {
const executedActions: Array<{ action: string; result: { success: boolean; message: string } }> =
[]
let modifiedMessage = message
/**
* Substitutes variable placeholders in text with their actual values
* Variables are referenced using %key% syntax
*/
function substituteVariables(text: string, variables: Record<string, string> | undefined): string {
if (!variables) return text
const secureActionMatches = [...message.matchAll(/EXECUTE SECURE ACTION (\d+)/gi)]
for (const match of secureActionMatches) {
const fullMatch = match[0]
const actionIndex = Number.parseInt(match[1], 10) - 1
if (actionDirectives[actionIndex]) {
const actionDirective = actionDirectives[actionIndex]
let resultMessage = ''
try {
logger.info(`Executing secure action ${actionIndex + 1}`, {
action: actionDirective.action,
})
const result = await stagehand.act(actionDirective.action, {
variables: variables || {},
})
executedActions.push({
action: actionDirective.action,
result: {
success: result.success,
message: result.message,
},
})
resultMessage = `\nSecure action ${actionIndex + 1} executed successfully.\n`
} catch (error) {
logger.error(`Error executing secure action ${actionIndex + 1}`, {
error,
action: actionDirective.action,
})
executedActions.push({
action: actionDirective.action,
result: {
success: false,
message: error instanceof Error ? error.message : 'Unknown error',
},
})
resultMessage = `\nError executing secure action ${actionIndex + 1}: ${error instanceof Error ? error.message : 'Unknown error'}\n`
}
modifiedMessage = modifiedMessage.replace(fullMatch, resultMessage)
} else {
const errorMessage = `\nError: Secure action ${actionIndex + 1} does not exist.\n`
modifiedMessage = modifiedMessage.replace(fullMatch, errorMessage)
}
}
return { modifiedMessage, executedActions }
}
async function attemptDirectLogin(
stagehand: Stagehand,
variables: Record<string, string> | undefined
): Promise<{
attempted: boolean
success: boolean
message: string
}> {
if (!stagehand || !variables) {
return {
attempted: false,
success: false,
message: 'Login not attempted: missing stagehand or variables',
}
}
const usernameKeys = ['username', 'email', 'user']
const passwordKeys = ['password', 'pass', 'secret']
const usernameKey = usernameKeys.find((key) => variables[key] !== undefined)
const passwordKey = passwordKeys.find((key) => variables[key] !== undefined)
if (!usernameKey || !passwordKey) {
logger.info('Direct login skipped: Missing username or password variable.')
return {
attempted: false,
success: false,
message: 'Login not attempted: Missing username or password variable.',
}
}
const usernameValue = variables[usernameKey]
const passwordValue = variables[passwordKey]
logger.info('Attempting direct login with provided variables.')
try {
const page = stagehand.context.pages()[0]
const usernameSelectors = [
'input[type="text"][name*="user"]',
'input[type="email"]',
'input[name*="email"]',
'input[id*="user"]',
'input[id*="email"]',
'input[placeholder*="user" i]',
'input[placeholder*="email" i]',
'input[aria-label*="user" i]',
'input[aria-label*="email" i]',
]
const passwordSelectors = [
'input[type="password"]',
'input[name*="pass"]',
'input[id*="pass"]',
'input[placeholder*="pass" i]',
'input[aria-label*="pass" i]',
]
const submitSelectors = [
'button[type="submit"]',
'input[type="submit"]',
'button:has-text("Login")',
'button:has-text("Sign in")',
'button[id*="login"]',
'button[id*="submit"]',
'button[name*="login"]',
'button[name*="submit"]',
]
let usernameFilled = false
for (const selector of usernameSelectors) {
const input = page.locator(selector).first()
if ((await input.count()) > 0 && (await input.isVisible())) {
logger.info(`Found username field: ${selector}`)
await input.fill(usernameValue)
usernameFilled = true
break
}
}
if (!usernameFilled) {
logger.warn('Could not find a visible username/email field for direct login.')
return {
attempted: false,
success: false,
message: 'Login not attempted: Could not find a username field.',
}
}
let passwordFilled = false
for (const selector of passwordSelectors) {
const input = page.locator(selector).first()
if ((await input.count()) > 0 && (await input.isVisible())) {
logger.info(`Found password field: ${selector}`)
await input.fill(passwordValue)
passwordFilled = true
break
}
}
if (!passwordFilled) {
logger.warn('Could not find a visible password field for direct login.')
return {
attempted: true,
success: false,
message:
'Login attempt incomplete: Found and filled username but could not find password field.',
}
}
let submitClicked = false
for (const selector of submitSelectors) {
const button = page.locator(selector).first()
if ((await button.count()) > 0 && (await button.isVisible())) {
logger.info(`Found submit button: ${selector}`)
await button.click()
await new Promise((resolve) => setTimeout(resolve, 3000))
submitClicked = true
break
}
}
if (!submitClicked) {
logger.warn('Could not find a visible/enabled submit button for direct login.')
return {
attempted: true,
success: false,
message:
'Login attempt incomplete: Found and filled form fields but could not find submit button.',
}
}
logger.info(
'Direct login attempt completed (fields filled, submit clicked). Verifying result...'
)
const currentUrl = page.url()
const isStillOnLoginPage =
currentUrl.includes('login') ||
currentUrl.includes('signin') ||
currentUrl.includes('auth') ||
currentUrl.includes('signup') ||
currentUrl.includes('register')
const hasLoginError = await page.evaluate(() => {
const errorSelectors = [
'[class*="error" i]',
'[id*="error" i]',
'[role="alert"]',
'.alert-danger',
'.text-danger',
'.text-error',
'.notification-error',
]
for (const selector of errorSelectors) {
const elements = document.querySelectorAll(selector)
for (const element of elements) {
const text = element.textContent || ''
if (
text.toLowerCase().includes('password') ||
text.toLowerCase().includes('login failed') ||
text.toLowerCase().includes('incorrect') ||
text.toLowerCase().includes('invalid') ||
text.toLowerCase().includes("doesn't match") ||
text.toLowerCase().includes('does not match')
) {
return true
}
}
}
return false
})
const hasSuccessIndicators = await page.evaluate(() => {
const userMenuSelectors = [
'[class*="avatar" i]',
'[class*="profile" i]',
'[class*="user-menu" i]',
'[class*="account" i]',
'[aria-label*="account" i]',
'[aria-label*="profile" i]',
]
for (const selector of userMenuSelectors) {
if (document.querySelector(selector)) {
return true
}
}
return false
})
if (!isStillOnLoginPage && !hasLoginError && hasSuccessIndicators) {
logger.info('Login verification successful: Detected successful login.')
return {
attempted: true,
success: true,
message: 'Login successful. User is now authenticated.',
}
}
if (hasLoginError) {
logger.warn('Login verification failed: Detected login error message.')
return {
attempted: true,
success: false,
message:
'Login attempted but failed: Detected error message on page. Likely invalid credentials.',
}
}
if (isStillOnLoginPage) {
logger.warn('Login verification inconclusive: Still on login page.')
return {
attempted: true,
success: false,
message: 'Login attempted but failed: Still on login/authentication page.',
}
}
logger.info('Login verification inconclusive. Proceeding as if login was successful.')
return {
attempted: true,
success: true,
message: 'Login likely successful, but could not verify with certainty.',
}
} catch (error) {
logger.error('Error during direct login attempt', {
error: error instanceof Error ? error.message : String(error),
})
return {
attempted: true,
success: false,
message: `Login attempt encountered an error: ${error instanceof Error ? error.message : String(error)}`,
}
let result = text
for (const [key, value] of Object.entries(variables)) {
const placeholder = `%${key}%`
result = result.split(placeholder).join(value)
}
return result
}
export async function POST(request: NextRequest) {
@@ -408,70 +111,17 @@ export async function POST(request: NextRequest) {
}
const params = validationResult.data
let variablesObject: Record<string, string> | undefined
const { task, startUrl: rawStartUrl, outputSchema, provider, apiKey } = params
const variablesObject = processVariables(params.variables)
if (params.variables) {
if (Array.isArray(params.variables)) {
variablesObject = {}
params.variables.forEach((item: any) => {
if (item?.cells?.Key && typeof item.cells.Key === 'string') {
variablesObject![item.cells.Key] = item.cells.Value || ''
}
})
} else if (typeof params.variables === 'object' && params.variables !== null) {
variablesObject = { ...params.variables }
} else if (typeof params.variables === 'string') {
try {
variablesObject = JSON.parse(params.variables)
} catch (_e) {
logger.warn('Failed to parse variables string as JSON', { variables: params.variables })
}
}
if (!variablesObject || Object.keys(variablesObject).length === 0) {
logger.warn('Variables object is empty after processing', {
originalVariables: params.variables,
variablesType: typeof params.variables,
})
if (typeof params.variables === 'object' && params.variables !== null) {
variablesObject = {}
for (const key in params.variables) {
if (typeof params.variables[key] === 'string') {
variablesObject[key] = params.variables[key]
}
}
logger.info('Recovered variables from raw object', {
recoveredCount: Object.keys(variablesObject).length,
})
}
}
if (variablesObject) {
const safeVarKeys = Object.keys(variablesObject).map((key) => {
return key.toLowerCase().includes('password')
? `${key}: [REDACTED]`
: `${key}: ${variablesObject?.[key]}`
})
logger.info('Collected variables for substitution', {
variableCount: Object.keys(variablesObject).length,
safeVariables: safeVarKeys,
})
}
}
const { task, startUrl: rawStartUrl, outputSchema, apiKey } = params
let startUrl = rawStartUrl
startUrl = normalizeUrl(startUrl)
const startUrl = normalizeUrl(rawStartUrl)
logger.info('Starting Stagehand agent process', {
rawStartUrl,
startUrl,
hasTask: !!task,
hasVariables: !!variablesObject && Object.keys(variablesObject).length > 0,
hasVariables: !!variablesObject,
provider,
})
if (!BROWSERBASE_API_KEY || !BROWSERBASE_PROJECT_ID) {
@@ -486,17 +136,36 @@ export async function POST(request: NextRequest) {
)
}
if (!apiKey || typeof apiKey !== 'string') {
logger.error('API key is required')
return NextResponse.json({ error: 'API key is required' }, { status: 400 })
}
if (provider === 'openai' && !apiKey.startsWith('sk-')) {
logger.error('Invalid OpenAI API key format')
return NextResponse.json({ error: 'Invalid OpenAI API key format' }, { status: 400 })
}
if (provider === 'anthropic' && !apiKey.startsWith('sk-ant-')) {
logger.error('Invalid Anthropic API key format')
return NextResponse.json({ error: 'Invalid Anthropic API key format' }, { status: 400 })
}
const modelName =
provider === 'anthropic' ? 'anthropic/claude-3-7-sonnet-latest' : 'openai/gpt-4.1'
try {
logger.info('Initializing Stagehand with Browserbase (v3)')
logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName })
stagehand = new Stagehand({
env: 'BROWSERBASE',
apiKey: BROWSERBASE_API_KEY,
projectId: BROWSERBASE_PROJECT_ID,
verbose: 1,
disableAPI: true, // Use local agent handler instead of Browserbase API
logger: (msg) => logger.info(typeof msg === 'string' ? msg : JSON.stringify(msg)),
model: {
modelName: 'anthropic/claude-3-7-sonnet-latest',
modelName,
apiKey: apiKey,
},
})
@@ -506,288 +175,47 @@ export async function POST(request: NextRequest) {
logger.info('Stagehand initialized successfully')
const page = stagehand.context.pages()[0]
logger.info(`Navigating to ${startUrl}`)
await page.goto(startUrl, { waitUntil: 'networkidle' })
logger.info('Navigation complete')
const ensureLoginPage = async (): Promise<boolean> => {
if (!stagehand) {
logger.error('Stagehand instance is null')
return false
}
const taskWithVariables = substituteVariables(task, variablesObject)
const currentPage = stagehand.context.pages()[0]
logger.info('Checking if we need to navigate to login page')
let agentInstructions = `You are a helpful web browsing assistant. Complete the following task: ${taskWithVariables}`
try {
const loginFormExists = await currentPage.evaluate(() => {
const usernameInput = document.querySelector(
'input[type="text"], input[type="email"], input[name="username"], input[id="username"]'
)
const passwordInput = document.querySelector('input[type="password"]')
return !!(usernameInput && passwordInput)
})
if (loginFormExists) {
logger.info('Already on login page with username/password fields')
return true
}
const loginElements = await stagehand.observe('Find login buttons or links on this page')
if (loginElements && loginElements.length > 0) {
for (const element of loginElements) {
if (
element.description.toLowerCase().includes('login') ||
element.description.toLowerCase().includes('sign in')
) {
logger.info(`Found login element: ${element.description}`)
if (element.selector) {
logger.info(`Clicking login element: ${element.selector}`)
await stagehand.act(`Click on the ${element.description}`)
await new Promise((resolve) => setTimeout(resolve, 2000))
const loginPageAfterClick = await currentPage.evaluate(() => {
const usernameInput = document.querySelector(
'input[type="text"], input[type="email"], input[name="username"], input[id="username"]'
)
const passwordInput = document.querySelector('input[type="password"]')
return !!(usernameInput && passwordInput)
})
if (loginPageAfterClick) {
logger.info('Successfully navigated to login page')
return true
}
}
}
}
}
logger.info('Trying direct navigation to /login path')
const currentUrl = currentPage.url()
const loginUrl = new URL('/login', currentUrl).toString()
await currentPage.goto(loginUrl, { waitUntil: 'networkidle' })
const loginPageAfterDirectNav = await currentPage.evaluate(() => {
const usernameInput = document.querySelector(
'input[type="text"], input[type="email"], input[name="username"], input[id="username"]'
)
const passwordInput = document.querySelector('input[type="password"]')
return !!(usernameInput && passwordInput)
})
if (loginPageAfterDirectNav) {
logger.info('Successfully navigated to login page via direct URL')
return true
}
logger.warn('Could not navigate to login page')
return false
} catch (error) {
logger.error('Error finding login page', { error })
return false
}
if (variablesObject && Object.keys(variablesObject).length > 0) {
const safeVarKeys = Object.keys(variablesObject).map((key) => {
return key.toLowerCase().includes('password') ? `${key}: [REDACTED]` : key
})
logger.info('Variables available for task', { variables: safeVarKeys })
}
let directLoginAttempted = false
let directLoginSuccess = false
let loginMessage = ''
let taskForAgent = task
let agentInstructions = ''
const hasLoginVars =
variablesObject &&
Object.keys(variablesObject).some((k) =>
['username', 'email', 'user'].includes(k.toLowerCase())
) &&
Object.keys(variablesObject).some((k) =>
['password', 'pass', 'secret'].includes(k.toLowerCase())
)
if (hasLoginVars) {
logger.info('Login variables detected, checking if login page navigation is needed.')
const isOnLoginPage = await ensureLoginPage()
if (isOnLoginPage && stagehand) {
logger.info('Attempting direct login before involving the agent.')
const loginResult = await attemptDirectLogin(stagehand, variablesObject)
directLoginAttempted = loginResult.attempted
directLoginSuccess = loginResult.success
loginMessage = loginResult.message
logger.info('Direct login attempt result', {
attempted: directLoginAttempted,
success: directLoginSuccess,
message: loginMessage,
})
if (directLoginAttempted) {
if (directLoginSuccess) {
taskForAgent = `Login has been completed programmatically and was successful. Please verify that you are logged in and then proceed with the original task: ${task}`
} else {
taskForAgent = `Login was attempted programmatically but failed (${loginMessage}). You will need to check the current state and either:
1. Try to login again if you see a login form
2. Or proceed with the task if login actually succeeded: ${task}`
}
logger.info('Task modified for agent after direct login attempt.')
}
} else {
logger.info('Skipping direct login attempt: Not on login page or stagehand unavailable.')
}
} else {
logger.info('Skipping direct login: No relevant username/password variables found.')
if (outputSchema && typeof outputSchema === 'object' && outputSchema !== null) {
const schemaObj = getSchemaObject(outputSchema)
agentInstructions += `\n\nIMPORTANT: You MUST return your final result in the following JSON format exactly:\n${formatSchemaForInstructions(schemaObj)}\n\nYour response should consist of valid JSON only, with no additional text.`
}
const { processedTask, actionDirectives } = extractActionDirectives(task)
logger.info('Extracted action directives', {
actionCount: actionDirectives.length,
hasActionDirectives: actionDirectives.length > 0,
})
if (directLoginAttempted) {
const loginInstructions = directLoginSuccess
? 'Login was completed programmatically and appears successful. Please VERIFY if the login was successful by checking for elements that only appear when logged in.'
: `Login was attempted programmatically but appears to have FAILED (${loginMessage}).
IMPORTANT: Check if you see a login form, and if so:
1. Username and password fields may already be filled (but may contain placeholder text if the login failed)
2. If you need to attempt login again, make sure you use the actual variable placeholders (%username%, %password%) so they are properly substituted.
3. Check for any error messages to understand why the login failed.`
agentInstructions = `You are a helpful web browsing assistant. ${loginInstructions}
Once you've verified the login state, proceed with the following task: ${task}
${actionDirectives.length > 0 ? `\n\nNote on Secure Actions: You might see [SECURE ACTION X] placeholders. Handle these by outputting "EXECUTE SECURE ACTION X" when appropriate.` : ''}
${outputSchema && typeof outputSchema === 'object' && outputSchema !== null ? `\n\nIMPORTANT: You MUST return your final result in the following JSON format exactly:\n${formatSchemaForInstructions(getSchemaObject(outputSchema))}\n\nYour response should consist of valid JSON only, with no additional text.` : ''}`
} else {
agentInstructions = `You are a helpful web browsing assistant that will complete tasks on websites. Your goal is to accomplish the following task: ${processedTask}\n
${actionDirectives.length > 0 ? `\n\nYou'll see [SECURE ACTION X] placeholders in the task. These represent secure actions that will be handled automatically when you navigate to the appropriate page. When you reach a point where a secure action should be performed, output a line with exactly: "EXECUTE SECURE ACTION X" (where X is the action number). Then wait for confirmation before proceeding.` : ''}\n
IMPORTANT: For any form fields that require sensitive information like usernames or passwords:
1. If you see placeholders like %username% or %password% in the task, DO NOT ask for the actual values.
2. If you need to type in login forms, use the EXACT placeholder text (e.g., "%username%" or "%password%") UNLESS instructed otherwise.
3. The system will automatically substitute the real values when you use these placeholders IF direct login was not attempted.
4. Example correct approach: "type %username% in the username field".${
variablesObject && Object.keys(variablesObject).length > 0
? `\n5. Available variables: ${Object.keys(variablesObject)
.map((k) => `%${k}%`)
.join(', ')}`
: ''
}\n
WEBSITE NAVIGATION GUIDANCE:
1. If you need to log in but don't see a login form, LOOK for login buttons or links (they might say "Login" or "Sign in").
2. If you're on a login page but don't see a username/password form, try scrolling or looking for "Continue with email" or similar options.
3. Always TYPE carefully in form fields - use accurate coordinates for clicking if necessary.
4. Use specific actions like "type %username% in the username field".
5. After logging in, verify you've successfully authenticated before proceeding.\n
${outputSchema && typeof outputSchema === 'object' && outputSchema !== null ? `\n\nIMPORTANT: You MUST return your final result in the following JSON format exactly:\n${formatSchemaForInstructions(getSchemaObject(outputSchema))}\n\nYour response should consist of valid JSON only, with no additional text. Ensure the data in your response adheres strictly to the schema provided.` : ''}`
}
logger.info('Creating Stagehand agent', {
directLoginAttempted,
directLoginSuccess,
loginMessage,
})
const additionalContext = directLoginAttempted
? `Login was ${directLoginSuccess ? 'successfully completed' : 'attempted but failed'}.
${loginMessage}
First check the current state of the page.
If login failed, you may need to click the login button again after ensuring fields are properly filled.`
: `
This task may contain placeholder variables like %username% and %password%.
When you need to fill form fields, use these placeholders directly (e.g., type "%username%").
The system will substitute actual values when these placeholders are used, keeping sensitive data secure.
`.trim()
logger.info('Creating Stagehand agent')
const agent = stagehand.agent({
model: {
modelName: 'anthropic/claude-3-7-sonnet-latest',
modelName,
apiKey: apiKey,
},
executionModel: {
modelName: 'anthropic/claude-3-7-sonnet-latest',
modelName,
apiKey: apiKey,
},
systemPrompt: `${agentInstructions}\n\n${additionalContext}`,
systemPrompt: agentInstructions,
})
const runAgentWithSecureActions = async (): Promise<any> => {
let currentResult = await agent.execute({ instruction: taskForAgent })
let allExecutedActions: Array<{
action: string
result: { success: boolean; message: string }
}> = []
let iterationCount = 0
const maxIterations = 10 // Safety limit for iterations
logger.info('Executing agent task', { task: taskWithVariables })
while (iterationCount < maxIterations && stagehand !== null) {
if (!currentResult.message) {
break
}
if (!/EXECUTE SECURE ACTION \d+/i.test(currentResult.message)) {
break
}
const { modifiedMessage, executedActions } = await processSecureActions(
currentResult.message,
stagehand,
actionDirectives,
variablesObject
)
allExecutedActions = [...allExecutedActions, ...executedActions]
if (executedActions.length === 0) {
break
}
iterationCount++
const hasStructuredOutput = /```json|^\s*{/.test(modifiedMessage)
if (hasStructuredOutput) {
currentResult.message = modifiedMessage
break
}
logger.info(
`Continuing agent execution with processed actions, iteration ${iterationCount}`
)
try {
const continuationPrompt = `${modifiedMessage}\n\nPlease continue with the task.`
const nextResult = await agent.execute({ instruction: continuationPrompt })
currentResult = {
...nextResult,
actions: [...currentResult.actions, ...nextResult.actions],
}
} catch (error) {
logger.error('Error continuing agent execution', { error })
break
}
}
return {
...currentResult,
secureActions: allExecutedActions,
}
}
logger.info('Executing agent task', {
task: taskForAgent,
actionDirectiveCount: actionDirectives.length,
directLoginAttempted,
directLoginSuccess,
loginMessage,
const agentExecutionResult = await agent.execute({
instruction: taskWithVariables,
maxSteps: 20,
})
const agentExecutionResult = await runAgentWithSecureActions()
const agentResult = {
success: agentExecutionResult.success,
completed: agentExecutionResult.completed,
@@ -798,7 +226,7 @@ The system will substitute actual values when these placeholders are used, keepi
logger.info('Agent execution complete', {
success: agentResult.success,
completed: agentResult.completed,
executedActionCount: agentExecutionResult.secureActions?.length || 0,
actionCount: agentResult.actions?.length || 0,
})
let structuredOutput = null
@@ -849,7 +277,6 @@ The system will substitute actual values when these placeholders are used, keepi
return NextResponse.json({
agentResult,
structuredOutput,
secureActions: agentExecutionResult.secureActions || [],
})
} catch (error) {
logger.error('Stagehand agent execution error', {

View File

@@ -15,6 +15,7 @@ const requestSchema = z.object({
schema: z.record(z.any()),
useTextExtract: z.boolean().optional().default(false),
selector: z.string().nullable().optional(),
provider: z.enum(['openai', 'anthropic']).optional().default('openai'),
apiKey: z.string(),
url: z.string().url(),
})
@@ -41,7 +42,7 @@ export async function POST(request: NextRequest) {
}
const params = validationResult.data
const { url: rawUrl, instruction, selector, apiKey, schema } = params
const { url: rawUrl, instruction, selector, provider, apiKey, schema } = params
const url = normalizeUrl(rawUrl)
logger.info('Starting Stagehand extraction process', {
@@ -71,13 +72,26 @@ export async function POST(request: NextRequest) {
)
}
if (!apiKey || typeof apiKey !== 'string' || !apiKey.startsWith('sk-')) {
if (!apiKey || typeof apiKey !== 'string') {
logger.error('API key is required')
return NextResponse.json({ error: 'API key is required' }, { status: 400 })
}
if (provider === 'openai' && !apiKey.startsWith('sk-')) {
logger.error('Invalid OpenAI API key format')
return NextResponse.json({ error: 'Invalid OpenAI API key format' }, { status: 400 })
}
if (provider === 'anthropic' && !apiKey.startsWith('sk-ant-')) {
logger.error('Invalid Anthropic API key format')
return NextResponse.json({ error: 'Invalid Anthropic API key format' }, { status: 400 })
}
try {
logger.info('Initializing Stagehand with Browserbase (v3)')
const modelName =
provider === 'anthropic' ? 'anthropic/claude-3-7-sonnet-latest' : 'openai/gpt-4.1'
logger.info('Initializing Stagehand with Browserbase (v3)', { provider, modelName })
stagehand = new Stagehand({
env: 'BROWSERBASE',
@@ -86,7 +100,7 @@ export async function POST(request: NextRequest) {
verbose: 1,
logger: (msg) => logger.info(typeof msg === 'string' ? msg : JSON.stringify(msg)),
model: {
modelName: 'openai/gpt-4o',
modelName,
apiKey: apiKey,
},
})

View File

@@ -209,11 +209,11 @@ const renderLabel = (
{/* Wand inline prompt */}
{isWandEnabled && !isPreview && (
<div className='flex items-center pr-[4px]'>
<div className='flex min-w-0 flex-1 items-center justify-end pr-[4px]'>
{!isSearchActive ? (
<Button
variant='ghost'
className='h-[12px] w-[12px] p-0 hover:bg-transparent'
className='h-[12px] w-[12px] flex-shrink-0 p-0 hover:bg-transparent'
aria-label='Generate with AI'
onClick={onSearchClick}
>
@@ -235,7 +235,7 @@ const renderLabel = (
}}
disabled={isStreaming}
className={cn(
'h-[12px] w-full max-w-[200px] border-none bg-transparent py-0 pr-[2px] text-right font-medium text-[12px] text-[var(--text-primary)] leading-[14px] placeholder:text-[var(--text-muted)] focus:outline-none',
'h-[12px] w-full min-w-[100px] border-none bg-transparent py-0 pr-[2px] text-right font-medium text-[12px] text-[var(--text-primary)] leading-[14px] placeholder:text-[var(--text-muted)] focus:outline-none',
isStreaming && 'text-muted-foreground'
)}
placeholder='Describe...'

View File

@@ -8,23 +8,65 @@ export interface StagehandExtractResponse extends ToolResponse {
}
}
export const StagehandBlock: BlockConfig<StagehandExtractResponse> = {
export interface StagehandAgentResponse extends ToolResponse {
output: {
agentResult: {
success: boolean
completed: boolean
message: string
actions?: Array<{
type: string
description: string
result?: string
}>
}
structuredOutput?: Record<string, any>
}
}
export type StagehandResponse = StagehandExtractResponse | StagehandAgentResponse
export const StagehandBlock: BlockConfig<StagehandResponse> = {
type: 'stagehand',
name: 'Stagehand Extract',
description: 'Extract data from websites',
name: 'Stagehand',
description: 'Web automation and data extraction',
authMode: AuthMode.ApiKey,
longDescription:
'Integrate Stagehand into the workflow. Can extract structured data from webpages.',
'Integrate Stagehand into the workflow. Can extract structured data from webpages or run an autonomous agent to perform tasks.',
docsLink: 'https://docs.sim.ai/tools/stagehand',
category: 'tools',
bgColor: '#FFC83C',
icon: StagehandIcon,
subBlocks: [
// Operation selection
{
id: 'operation',
title: 'Operation',
type: 'dropdown',
options: [
{ label: 'Extract Data', id: 'extract' },
{ label: 'Run Agent', id: 'agent' },
],
value: () => 'extract',
},
// Provider selection
{
id: 'provider',
title: 'AI Provider',
type: 'dropdown',
options: [
{ label: 'OpenAI', id: 'openai' },
{ label: 'Anthropic', id: 'anthropic' },
],
value: () => 'openai',
},
// Extract operation fields
{
id: 'url',
title: 'URL',
type: 'short-input',
placeholder: 'Enter the URL of the website to extract data from',
condition: { field: 'operation', value: 'extract' },
required: true,
},
{
@@ -32,14 +74,7 @@ export const StagehandBlock: BlockConfig<StagehandExtractResponse> = {
title: 'Instructions',
type: 'long-input',
placeholder: 'Enter detailed instructions for what data to extract from the page...',
required: true,
},
{
id: 'apiKey',
title: 'OpenAI API Key',
type: 'short-input',
placeholder: 'Enter your OpenAI API key',
password: true,
condition: { field: 'operation', value: 'extract' },
required: true,
},
{
@@ -48,6 +83,7 @@ export const StagehandBlock: BlockConfig<StagehandExtractResponse> = {
type: 'code',
placeholder: 'Enter JSON Schema...',
language: 'json',
condition: { field: 'operation', value: 'extract' },
required: true,
wandConfig: {
enabled: true,
@@ -162,20 +198,188 @@ Example 3 (List Extraction):
generationType: 'json-schema',
},
},
// Agent operation fields
{
id: 'startUrl',
title: 'Starting URL',
type: 'short-input',
placeholder: 'Enter the starting URL for the agent',
condition: { field: 'operation', value: 'agent' },
required: true,
},
{
id: 'task',
title: 'Task',
type: 'long-input',
placeholder:
'Enter the task or goal for the agent to achieve. Reference variables using %key% syntax.',
condition: { field: 'operation', value: 'agent' },
required: true,
},
{
id: 'variables',
title: 'Variables',
type: 'table',
columns: ['Key', 'Value'],
condition: { field: 'operation', value: 'agent' },
},
{
id: 'outputSchema',
title: 'Output Schema',
type: 'code',
placeholder: 'Enter JSON Schema...',
language: 'json',
condition: { field: 'operation', value: 'agent' },
wandConfig: {
enabled: true,
maintainHistory: true,
prompt: `You are an expert programmer specializing in creating JSON schemas for web automation agents.
Generate ONLY the JSON schema based on the user's request.
The output MUST be a single, valid JSON object, starting with { and ending with }.
The JSON object MUST have the following top-level properties: 'name' (string), 'description' (string), 'strict' (boolean, usually true), and 'schema' (object).
The 'schema' object must define the structure and MUST contain 'type': 'object', 'properties': {...}, 'additionalProperties': false, and 'required': [...].
Inside 'properties', use standard JSON Schema properties (type, description, enum, items for arrays, etc.).
Current schema: {context}
Do not include any explanations, markdown formatting, or other text outside the JSON object.
Valid Schema Examples:
Example 1 (Login Result):
{
"name": "login_result",
"description": "Result of a login task performed by the agent",
"strict": true,
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"description": "Whether the login was successful"
},
"username": {
"type": "string",
"description": "The username that was logged in"
},
"dashboardUrl": {
"type": "string",
"description": "The URL of the dashboard after login"
}
},
"additionalProperties": false,
"required": ["success"]
}
}
Example 2 (Form Submission):
{
"name": "form_submission_result",
"description": "Result of submitting a form",
"strict": true,
"schema": {
"type": "object",
"properties": {
"submitted": {
"type": "boolean",
"description": "Whether the form was submitted"
},
"confirmationNumber": {
"type": "string",
"description": "Confirmation or reference number if provided"
},
"errorMessage": {
"type": "string",
"description": "Error message if submission failed"
}
},
"additionalProperties": false,
"required": ["submitted"]
}
}
Example 3 (Data Collection):
{
"name": "collected_data",
"description": "Data collected by the agent from multiple pages",
"strict": true,
"schema": {
"type": "object",
"properties": {
"items": {
"type": "array",
"description": "List of collected items",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Item name"
},
"value": {
"type": "string",
"description": "Item value or content"
},
"sourceUrl": {
"type": "string",
"description": "URL where the item was found"
}
},
"additionalProperties": false,
"required": ["name"]
}
},
"totalCount": {
"type": "number",
"description": "Total number of items collected"
}
},
"additionalProperties": false,
"required": ["items"]
}
}
`,
placeholder: 'Describe what output format you expect from the agent task...',
generationType: 'json-schema',
},
},
// Shared API key field
{
id: 'apiKey',
title: 'API Key',
type: 'short-input',
placeholder: 'Enter your API key for the selected provider',
password: true,
required: true,
},
],
tools: {
access: ['stagehand_extract'],
access: ['stagehand_extract', 'stagehand_agent'],
config: {
tool: () => 'stagehand_extract',
tool: (params) => {
return params.operation === 'agent' ? 'stagehand_agent' : 'stagehand_extract'
},
},
},
inputs: {
url: { type: 'string', description: 'Website URL to extract' },
instruction: { type: 'string', description: 'Extraction instructions' },
schema: { type: 'json', description: 'JSON schema definition' },
apiKey: { type: 'string', description: 'OpenAI API key' },
operation: { type: 'string', description: 'Operation: extract or agent' },
provider: { type: 'string', description: 'AI provider: openai or anthropic' },
apiKey: { type: 'string', description: 'API key for the selected provider' },
// Extract inputs
url: { type: 'string', description: 'Website URL to extract (extract operation)' },
instruction: { type: 'string', description: 'Extraction instructions (extract operation)' },
schema: { type: 'json', description: 'JSON schema definition (extract operation)' },
// Agent inputs
startUrl: { type: 'string', description: 'Starting URL for agent (agent operation)' },
task: { type: 'string', description: 'Task description (agent operation)' },
variables: { type: 'json', description: 'Task variables (agent operation)' },
outputSchema: { type: 'json', description: 'Output schema (agent operation)' },
},
outputs: {
data: { type: 'json', description: 'Extracted data' },
// Extract outputs
data: { type: 'json', description: 'Extracted data (extract operation)' },
// Agent outputs
agentResult: { type: 'json', description: 'Agent execution result (agent operation)' },
structuredOutput: { type: 'json', description: 'Structured output data (agent operation)' },
},
}

View File

@@ -1,183 +0,0 @@
import { StagehandIcon } from '@/components/icons'
import { AuthMode, type BlockConfig } from '@/blocks/types'
import type { StagehandAgentResponse } from '@/tools/stagehand/types'
export const StagehandAgentBlock: BlockConfig<StagehandAgentResponse> = {
type: 'stagehand_agent',
name: 'Stagehand Agent',
description: 'Autonomous web browsing agent',
authMode: AuthMode.ApiKey,
longDescription:
'Integrate Stagehand Agent into the workflow. Can navigate the web and perform tasks.',
docsLink: 'https://docs.sim.ai/tools/stagehand_agent',
category: 'tools',
bgColor: '#FFC83C',
icon: StagehandIcon,
subBlocks: [
{
id: 'startUrl',
title: 'Starting URL',
type: 'short-input',
placeholder: 'Enter the starting URL for the agent',
required: true,
},
{
id: 'task',
title: 'Task',
type: 'long-input',
placeholder:
'Enter the task or goal for the agent to achieve. Reference variables using %key% syntax.',
required: true,
},
{
id: 'variables',
title: 'Variables',
type: 'table',
columns: ['Key', 'Value'],
},
{
id: 'apiKey',
title: 'Anthropic API Key',
type: 'short-input',
placeholder: 'Enter your Anthropic API key',
password: true,
required: true,
},
{
id: 'outputSchema',
title: 'Output Schema',
type: 'code',
placeholder: 'Enter JSON Schema...',
language: 'json',
wandConfig: {
enabled: true,
maintainHistory: true,
prompt: `You are an expert programmer specializing in creating JSON schemas for web automation agents.
Generate ONLY the JSON schema based on the user's request.
The output MUST be a single, valid JSON object, starting with { and ending with }.
The JSON object MUST have the following top-level properties: 'name' (string), 'description' (string), 'strict' (boolean, usually true), and 'schema' (object).
The 'schema' object must define the structure and MUST contain 'type': 'object', 'properties': {...}, 'additionalProperties': false, and 'required': [...].
Inside 'properties', use standard JSON Schema properties (type, description, enum, items for arrays, etc.).
Current schema: {context}
Do not include any explanations, markdown formatting, or other text outside the JSON object.
Valid Schema Examples:
Example 1 (Login Result):
{
"name": "login_result",
"description": "Result of a login task performed by the agent",
"strict": true,
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"description": "Whether the login was successful"
},
"username": {
"type": "string",
"description": "The username that was logged in"
},
"dashboardUrl": {
"type": "string",
"description": "The URL of the dashboard after login"
}
},
"additionalProperties": false,
"required": ["success"]
}
}
Example 2 (Form Submission):
{
"name": "form_submission_result",
"description": "Result of submitting a form",
"strict": true,
"schema": {
"type": "object",
"properties": {
"submitted": {
"type": "boolean",
"description": "Whether the form was submitted"
},
"confirmationNumber": {
"type": "string",
"description": "Confirmation or reference number if provided"
},
"errorMessage": {
"type": "string",
"description": "Error message if submission failed"
}
},
"additionalProperties": false,
"required": ["submitted"]
}
}
Example 3 (Data Collection):
{
"name": "collected_data",
"description": "Data collected by the agent from multiple pages",
"strict": true,
"schema": {
"type": "object",
"properties": {
"items": {
"type": "array",
"description": "List of collected items",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Item name"
},
"value": {
"type": "string",
"description": "Item value or content"
},
"sourceUrl": {
"type": "string",
"description": "URL where the item was found"
}
},
"additionalProperties": false,
"required": ["name"]
}
},
"totalCount": {
"type": "number",
"description": "Total number of items collected"
}
},
"additionalProperties": false,
"required": ["items"]
}
}
`,
placeholder: 'Describe what output format you expect from the agent task...',
generationType: 'json-schema',
},
},
],
tools: {
access: ['stagehand_agent'],
config: {
tool: () => 'stagehand_agent',
},
},
inputs: {
startUrl: { type: 'string', description: 'Starting URL for agent' },
task: { type: 'string', description: 'Task description' },
variables: { type: 'json', description: 'Task variables' },
apiKey: { type: 'string', description: 'Anthropic API key' },
outputSchema: { type: 'json', description: 'Output schema' },
},
outputs: {
agentResult: { type: 'json', description: 'Agent execution result' },
structuredOutput: { type: 'json', description: 'Structured output data' },
},
}

View File

@@ -103,7 +103,6 @@ import { SlackBlock } from '@/blocks/blocks/slack'
import { SmtpBlock } from '@/blocks/blocks/smtp'
import { SSHBlock } from '@/blocks/blocks/ssh'
import { StagehandBlock } from '@/blocks/blocks/stagehand'
import { StagehandAgentBlock } from '@/blocks/blocks/stagehand_agent'
import { StartTriggerBlock } from '@/blocks/blocks/start_trigger'
import { StarterBlock } from '@/blocks/blocks/starter'
import { StripeBlock } from '@/blocks/blocks/stripe'
@@ -246,7 +245,6 @@ export const registry: Record<string, BlockConfig> = {
sftp: SftpBlock,
ssh: SSHBlock,
stagehand: StagehandBlock,
stagehand_agent: StagehandAgentBlock,
starter: StarterBlock,
start_trigger: StartTriggerBlock,
stt: SttBlock,

View File

@@ -80,6 +80,8 @@ const nextConfig: NextConfig = {
'pino-pretty',
'thread-stream',
'@browserbasehq/stagehand',
'@anthropic-ai/sdk',
'openai',
],
experimental: {
optimizeCss: true,

View File

@@ -30,11 +30,17 @@ export const agentTool: ToolConfig<StagehandAgentParams, StagehandAgentResponse>
description:
'Optional variables to substitute in the task (format: {key: value}). Reference in task using %key%',
},
provider: {
type: 'string',
required: false,
visibility: 'user-only',
description: 'AI provider to use: openai or anthropic',
},
apiKey: {
type: 'string',
required: true,
visibility: 'user-only',
description: 'OpenAI API key for agent execution (required by Stagehand)',
description: 'API key for the selected provider',
},
outputSchema: {
type: 'json',
@@ -62,6 +68,7 @@ export const agentTool: ToolConfig<StagehandAgentParams, StagehandAgentResponse>
startUrl: startUrl,
outputSchema: params.outputSchema,
variables: params.variables,
provider: params.provider || 'openai',
apiKey: params.apiKey,
}
},

View File

@@ -23,11 +23,17 @@ export const extractTool: ToolConfig<StagehandExtractParams, StagehandExtractRes
visibility: 'user-or-llm',
description: 'Instructions for extraction',
},
provider: {
type: 'string',
required: false,
visibility: 'user-only',
description: 'AI provider to use: openai or anthropic',
},
apiKey: {
type: 'string',
required: true,
visibility: 'user-only',
description: 'OpenAI API key for extraction (required by Stagehand)',
description: 'API key for the selected provider',
},
schema: {
type: 'json',
@@ -46,6 +52,7 @@ export const extractTool: ToolConfig<StagehandExtractParams, StagehandExtractRes
body: (params) => ({
instruction: params.instruction,
schema: params.schema,
provider: params.provider || 'openai',
apiKey: params.apiKey,
url: params.url,
}),

View File

@@ -4,6 +4,7 @@ import type { ToolResponse } from '@/tools/types'
export interface StagehandExtractParams {
instruction: string
schema: Record<string, any>
provider?: 'openai' | 'anthropic'
apiKey: string
url: string
}
@@ -17,6 +18,7 @@ export interface StagehandAgentParams {
startUrl: string
outputSchema?: Record<string, any>
variables?: Record<string, string>
provider?: 'openai' | 'anthropic'
apiKey: string
options?: {
useTextExtract?: boolean