mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
v0.6.45: superagent, csp, brightdata integration, gemini response format, logs performance improvements
fix(csp): add missing analytics domains, remove unsafe-eval, fix workspace CSP gap (#4179) fix(landing): return 404 for invalid dynamic route slugs (#4182) improvement(seo): optimize sitemaps, robots.txt, and core web vitals across sim and docs (#4170) fix(gemini): support structured output with tools on Gemini 3 models (#4184) feat(brightdata): add Bright Data integration with 8 tools (#4183) fix(mothership): fix superagent credentials (#4185) fix(logs): close sidebar when selected log disappears from filtered list; cleanup (#4186)
This commit is contained in:
@@ -17,9 +17,10 @@ import { ResponseSection } from '@/components/ui/response-section'
|
||||
import { i18n } from '@/lib/i18n'
|
||||
import { getApiSpecContent, openapi } from '@/lib/openapi'
|
||||
import { type PageData, source } from '@/lib/source'
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
|
||||
const SUPPORTED_LANGUAGES: Set<string> = new Set(i18n.languages)
|
||||
const BASE_URL = 'https://docs.sim.ai'
|
||||
const BASE_URL = DOCS_BASE_URL
|
||||
|
||||
const OG_LOCALE_MAP: Record<string, string> = {
|
||||
en: 'en_US',
|
||||
|
||||
@@ -3,7 +3,6 @@ import { defineI18nUI } from 'fumadocs-ui/i18n'
|
||||
import { DocsLayout } from 'fumadocs-ui/layouts/docs'
|
||||
import { RootProvider } from 'fumadocs-ui/provider/next'
|
||||
import { Geist_Mono, Inter } from 'next/font/google'
|
||||
import Script from 'next/script'
|
||||
import {
|
||||
SidebarFolder,
|
||||
SidebarItem,
|
||||
@@ -13,6 +12,7 @@ import { Navbar } from '@/components/navbar/navbar'
|
||||
import { SimLogoFull } from '@/components/ui/sim-logo'
|
||||
import { i18n } from '@/lib/i18n'
|
||||
import { source } from '@/lib/source'
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
import '../global.css'
|
||||
|
||||
const inter = Inter({
|
||||
@@ -67,14 +67,14 @@ export default async function Layout({ children, params }: LayoutProps) {
|
||||
name: 'Sim Documentation',
|
||||
description:
|
||||
'Documentation for Sim — the open-source AI workspace where teams build, deploy, and manage AI agents. Connect 1,000+ integrations and every major LLM.',
|
||||
url: 'https://docs.sim.ai',
|
||||
url: DOCS_BASE_URL,
|
||||
publisher: {
|
||||
'@type': 'Organization',
|
||||
name: 'Sim',
|
||||
url: 'https://sim.ai',
|
||||
logo: {
|
||||
'@type': 'ImageObject',
|
||||
url: 'https://docs.sim.ai/static/logo.png',
|
||||
url: `${DOCS_BASE_URL}/static/logo.png`,
|
||||
},
|
||||
},
|
||||
inLanguage: lang,
|
||||
@@ -82,7 +82,7 @@ export default async function Layout({ children, params }: LayoutProps) {
|
||||
'@type': 'SearchAction',
|
||||
target: {
|
||||
'@type': 'EntryPoint',
|
||||
urlTemplate: 'https://docs.sim.ai/api/search?q={search_term_string}',
|
||||
urlTemplate: `${DOCS_BASE_URL}/api/search?q={search_term_string}`,
|
||||
},
|
||||
'query-input': 'required name=search_term_string',
|
||||
},
|
||||
@@ -101,7 +101,6 @@ export default async function Layout({ children, params }: LayoutProps) {
|
||||
/>
|
||||
</head>
|
||||
<body className='flex min-h-screen flex-col font-sans'>
|
||||
<Script src='https://assets.onedollarstats.com/stonks.js' strategy='lazyOnload' />
|
||||
<RootProvider i18n={provider(lang)}>
|
||||
<Navbar />
|
||||
<DocsLayout
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { ReactNode } from 'react'
|
||||
import type { Viewport } from 'next'
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
|
||||
export default function RootLayout({ children }: { children: ReactNode }) {
|
||||
return children
|
||||
@@ -12,7 +13,7 @@ export const viewport: Viewport = {
|
||||
}
|
||||
|
||||
export const metadata = {
|
||||
metadataBase: new URL('https://docs.sim.ai'),
|
||||
metadataBase: new URL(DOCS_BASE_URL),
|
||||
title: {
|
||||
default: 'Sim Documentation — The AI Workspace for Teams',
|
||||
template: '%s | Sim Docs',
|
||||
@@ -61,14 +62,14 @@ export const metadata = {
|
||||
type: 'website',
|
||||
locale: 'en_US',
|
||||
alternateLocale: ['es_ES', 'fr_FR', 'de_DE', 'ja_JP', 'zh_CN'],
|
||||
url: 'https://docs.sim.ai',
|
||||
url: DOCS_BASE_URL,
|
||||
siteName: 'Sim Documentation',
|
||||
title: 'Sim Documentation — The AI Workspace for Teams',
|
||||
description:
|
||||
'Documentation for Sim — the open-source AI workspace where teams build, deploy, and manage AI agents. Connect 1,000+ integrations and every major LLM.',
|
||||
images: [
|
||||
{
|
||||
url: 'https://docs.sim.ai/api/og?title=Sim%20Documentation',
|
||||
url: `${DOCS_BASE_URL}/api/og?title=Sim%20Documentation`,
|
||||
width: 1200,
|
||||
height: 630,
|
||||
alt: 'Sim Documentation',
|
||||
@@ -82,7 +83,7 @@ export const metadata = {
|
||||
'Documentation for Sim — the open-source AI workspace where teams build, deploy, and manage AI agents. Connect 1,000+ integrations and every major LLM.',
|
||||
creator: '@simdotai',
|
||||
site: '@simdotai',
|
||||
images: ['https://docs.sim.ai/api/og?title=Sim%20Documentation'],
|
||||
images: [`${DOCS_BASE_URL}/api/og?title=Sim%20Documentation`],
|
||||
},
|
||||
robots: {
|
||||
index: true,
|
||||
@@ -96,15 +97,15 @@ export const metadata = {
|
||||
},
|
||||
},
|
||||
alternates: {
|
||||
canonical: 'https://docs.sim.ai',
|
||||
canonical: DOCS_BASE_URL,
|
||||
languages: {
|
||||
'x-default': 'https://docs.sim.ai',
|
||||
en: 'https://docs.sim.ai',
|
||||
es: 'https://docs.sim.ai/es',
|
||||
fr: 'https://docs.sim.ai/fr',
|
||||
de: 'https://docs.sim.ai/de',
|
||||
ja: 'https://docs.sim.ai/ja',
|
||||
zh: 'https://docs.sim.ai/zh',
|
||||
'x-default': DOCS_BASE_URL,
|
||||
en: DOCS_BASE_URL,
|
||||
es: `${DOCS_BASE_URL}/es`,
|
||||
fr: `${DOCS_BASE_URL}/fr`,
|
||||
de: `${DOCS_BASE_URL}/de`,
|
||||
ja: `${DOCS_BASE_URL}/ja`,
|
||||
zh: `${DOCS_BASE_URL}/zh`,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { source } from '@/lib/source'
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
|
||||
export const revalidate = false
|
||||
|
||||
export async function GET() {
|
||||
const baseUrl = 'https://docs.sim.ai'
|
||||
const baseUrl = DOCS_BASE_URL
|
||||
|
||||
try {
|
||||
const pages = source.getPages().filter((page) => {
|
||||
|
||||
@@ -1,70 +1,18 @@
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
|
||||
export const revalidate = false
|
||||
|
||||
export async function GET() {
|
||||
const baseUrl = 'https://docs.sim.ai'
|
||||
const baseUrl = DOCS_BASE_URL
|
||||
|
||||
const robotsTxt = `# Robots.txt for Sim Documentation
|
||||
|
||||
User-agent: *
|
||||
Allow: /
|
||||
|
||||
# Search engine crawlers
|
||||
User-agent: Googlebot
|
||||
Allow: /
|
||||
|
||||
User-agent: Bingbot
|
||||
Allow: /
|
||||
|
||||
User-agent: Slurp
|
||||
Allow: /
|
||||
|
||||
User-agent: DuckDuckBot
|
||||
Allow: /
|
||||
|
||||
User-agent: Baiduspider
|
||||
Allow: /
|
||||
|
||||
User-agent: YandexBot
|
||||
Allow: /
|
||||
|
||||
# AI and LLM crawlers - explicitly allowed for documentation indexing
|
||||
User-agent: GPTBot
|
||||
Allow: /
|
||||
|
||||
User-agent: ChatGPT-User
|
||||
Allow: /
|
||||
|
||||
User-agent: CCBot
|
||||
Allow: /
|
||||
|
||||
User-agent: anthropic-ai
|
||||
Allow: /
|
||||
|
||||
User-agent: Claude-Web
|
||||
Allow: /
|
||||
|
||||
User-agent: Applebot
|
||||
Allow: /
|
||||
|
||||
User-agent: PerplexityBot
|
||||
Allow: /
|
||||
|
||||
User-agent: Diffbot
|
||||
Allow: /
|
||||
|
||||
User-agent: FacebookBot
|
||||
Allow: /
|
||||
|
||||
User-agent: cohere-ai
|
||||
Allow: /
|
||||
|
||||
# Disallow admin and internal paths (if any exist)
|
||||
Disallow: /.next/
|
||||
Disallow: /api/internal/
|
||||
Disallow: /_next/static/
|
||||
Disallow: /admin/
|
||||
|
||||
# Allow but don't prioritize these
|
||||
Allow: /
|
||||
Allow: /api/search
|
||||
Allow: /llms.txt
|
||||
Allow: /llms-full.txt
|
||||
@@ -73,23 +21,12 @@ Allow: /llms.mdx/
|
||||
# Sitemaps
|
||||
Sitemap: ${baseUrl}/sitemap.xml
|
||||
|
||||
# Crawl delay for aggressive bots (optional)
|
||||
# Crawl-delay: 1
|
||||
|
||||
# Additional resources for AI indexing
|
||||
# See https://github.com/AnswerDotAI/llms-txt for more info
|
||||
# LLM-friendly content:
|
||||
# Manifest: ${baseUrl}/llms.txt
|
||||
# Full content: ${baseUrl}/llms-full.txt
|
||||
# Individual pages: ${baseUrl}/llms.mdx/[page-path]
|
||||
|
||||
# Multi-language documentation available at:
|
||||
# ${baseUrl}/en - English
|
||||
# ${baseUrl}/es - Español
|
||||
# ${baseUrl}/fr - Français
|
||||
# ${baseUrl}/de - Deutsch
|
||||
# ${baseUrl}/ja - 日本語
|
||||
# ${baseUrl}/zh - 简体中文`
|
||||
# Individual pages: ${baseUrl}/llms.mdx/[page-path]`
|
||||
|
||||
return new Response(robotsTxt, {
|
||||
headers: {
|
||||
|
||||
42
apps/docs/app/sitemap.ts
Normal file
42
apps/docs/app/sitemap.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import type { MetadataRoute } from 'next'
|
||||
import { i18n } from '@/lib/i18n'
|
||||
import { source } from '@/lib/source'
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
|
||||
export const revalidate = 3600
|
||||
|
||||
export default function sitemap(): MetadataRoute.Sitemap {
|
||||
const baseUrl = DOCS_BASE_URL
|
||||
const languages = source.getLanguages()
|
||||
|
||||
const pagesBySlug = new Map<string, Map<string, string>>()
|
||||
for (const { language, pages } of languages) {
|
||||
for (const page of pages) {
|
||||
const key = page.slugs.join('/')
|
||||
if (!pagesBySlug.has(key)) {
|
||||
pagesBySlug.set(key, new Map())
|
||||
}
|
||||
pagesBySlug.get(key)!.set(language, `${baseUrl}${page.url}`)
|
||||
}
|
||||
}
|
||||
|
||||
const entries: MetadataRoute.Sitemap = []
|
||||
for (const [, localeMap] of pagesBySlug) {
|
||||
const defaultUrl = localeMap.get(i18n.defaultLanguage)
|
||||
if (!defaultUrl) continue
|
||||
|
||||
const langAlternates: Record<string, string> = {}
|
||||
for (const [lang, url] of localeMap) {
|
||||
langAlternates[lang] = url
|
||||
}
|
||||
|
||||
langAlternates['x-default'] = defaultUrl
|
||||
|
||||
entries.push({
|
||||
url: defaultUrl,
|
||||
alternates: { languages: langAlternates },
|
||||
})
|
||||
}
|
||||
|
||||
return entries
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
import { i18n } from '@/lib/i18n'
|
||||
import { source } from '@/lib/source'
|
||||
|
||||
export const revalidate = 3600
|
||||
|
||||
export async function GET() {
|
||||
const baseUrl = 'https://docs.sim.ai'
|
||||
|
||||
const allPages = source.getPages()
|
||||
|
||||
const getPriority = (url: string): string => {
|
||||
if (url === '/introduction' || url === '/') return '1.0'
|
||||
if (url === '/getting-started') return '0.9'
|
||||
if (url.match(/^\/[^/]+$/)) return '0.8'
|
||||
if (url.includes('/sdks/') || url.includes('/tools/')) return '0.7'
|
||||
return '0.6'
|
||||
}
|
||||
|
||||
const urls = allPages
|
||||
.flatMap((page) => {
|
||||
const urlWithoutLang = page.url.replace(/^\/[a-z]{2}\//, '/')
|
||||
|
||||
return i18n.languages.map((lang) => {
|
||||
const url =
|
||||
lang === i18n.defaultLanguage
|
||||
? `${baseUrl}${urlWithoutLang}`
|
||||
: `${baseUrl}/${lang}${urlWithoutLang}`
|
||||
|
||||
return ` <url>
|
||||
<loc>${url}</loc>
|
||||
<priority>${getPriority(urlWithoutLang)}</priority>
|
||||
${i18n.languages.length > 1 ? generateAlternateLinks(baseUrl, urlWithoutLang) : ''}
|
||||
</url>`
|
||||
})
|
||||
})
|
||||
.join('\n')
|
||||
|
||||
const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
${urls}
|
||||
</urlset>`
|
||||
|
||||
return new Response(sitemap, {
|
||||
headers: {
|
||||
'Content-Type': 'application/xml',
|
||||
'Cache-Control': 'public, max-age=3600, s-maxage=3600',
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
function generateAlternateLinks(baseUrl: string, urlWithoutLang: string): string {
|
||||
const langLinks = i18n.languages
|
||||
.map((lang) => {
|
||||
const url =
|
||||
lang === i18n.defaultLanguage
|
||||
? `${baseUrl}${urlWithoutLang}`
|
||||
: `${baseUrl}/${lang}${urlWithoutLang}`
|
||||
return ` <xhtml:link rel="alternate" hreflang="${lang}" href="${url}" />`
|
||||
})
|
||||
.join('\n')
|
||||
return `${langLinks}\n <xhtml:link rel="alternate" hreflang="x-default" href="${baseUrl}${urlWithoutLang}" />`
|
||||
}
|
||||
@@ -2087,6 +2087,21 @@ export function BrandfetchIcon(props: SVGProps<SVGSVGElement>) {
|
||||
)
|
||||
}
|
||||
|
||||
export function BrightDataIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg {...props} viewBox='54 93 22 52' fill='none' xmlns='http://www.w3.org/2000/svg'>
|
||||
<path
|
||||
d='M62 95.21c.19 2.16 1.85 3.24 2.82 4.74.25.38.48.11.67-.16.21-.31.6-1.21 1.15-1.28-.35 1.38-.04 3.15.16 4.45.49 3.05-1.22 5.64-4.07 6.18-3.38.65-6.22-2.21-5.6-5.62.23-1.24 1.37-2.5.77-3.7-.85-1.7.54-.52.79-.22 1.04 1.2 1.21.09 1.45-.55.24-.63.31-1.31.47-1.97.19-.77.55-1.4 1.39-1.87z'
|
||||
fill='currentColor'
|
||||
/>
|
||||
<path
|
||||
d='M66.70 123.37c0 3.69.04 7.38-.03 11.07-.02 1.04.31 1.48 1.32 1.49.29 0 .59.12.88.13.93.01 1.18.47 1.16 1.37-.05 2.19 0 2.19-2.24 2.19-3.48 0-6.96-.04-10.44.03-1.09.02-1.47-.33-1.3-1.36.02-.12.02-.26 0-.38-.28-1.39.39-1.96 1.7-1.9 1.36.06 1.76-.51 1.74-1.88-.09-5.17-.08-10.35 0-15.53.02-1.22-.32-1.87-1.52-2.17-.57-.14-1.47-.11-1.57-.85-.15-1.04-.05-2.11.01-3.17.02-.34.44-.35.73-.39 2.81-.39 5.63-.77 8.44-1.18.92-.14 1.15.2 1.14 1.09-.04 3.8-.02 7.62-.02 11.44z'
|
||||
fill='currentColor'
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function BrowserUseIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { DOCS_BASE_URL } from '@/lib/urls'
|
||||
|
||||
interface StructuredDataProps {
|
||||
title: string
|
||||
description: string
|
||||
@@ -15,7 +17,7 @@ export function StructuredData({
|
||||
dateModified,
|
||||
breadcrumb,
|
||||
}: StructuredDataProps) {
|
||||
const baseUrl = 'https://docs.sim.ai'
|
||||
const baseUrl = DOCS_BASE_URL
|
||||
|
||||
const articleStructuredData = {
|
||||
'@context': 'https://schema.org',
|
||||
|
||||
@@ -23,6 +23,7 @@ import {
|
||||
BoxCompanyIcon,
|
||||
BrainIcon,
|
||||
BrandfetchIcon,
|
||||
BrightDataIcon,
|
||||
BrowserUseIcon,
|
||||
CalComIcon,
|
||||
CalendlyIcon,
|
||||
@@ -215,6 +216,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
|
||||
attio: AttioIcon,
|
||||
box: BoxCompanyIcon,
|
||||
brandfetch: BrandfetchIcon,
|
||||
brightdata: BrightDataIcon,
|
||||
browser_use: BrowserUseIcon,
|
||||
calcom: CalComIcon,
|
||||
calendly: CalendlyIcon,
|
||||
|
||||
201
apps/docs/content/docs/en/tools/brightdata.mdx
Normal file
201
apps/docs/content/docs/en/tools/brightdata.mdx
Normal file
@@ -0,0 +1,201 @@
|
||||
---
|
||||
title: Bright Data
|
||||
description: Scrape websites, search engines, and extract structured data
|
||||
---
|
||||
|
||||
import { BlockInfoCard } from "@/components/ui/block-info-card"
|
||||
|
||||
<BlockInfoCard
|
||||
type="brightdata"
|
||||
color="#FFFFFF"
|
||||
/>
|
||||
|
||||
## Usage Instructions
|
||||
|
||||
Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.
|
||||
|
||||
|
||||
|
||||
## Tools
|
||||
|
||||
### `brightdata_scrape_url`
|
||||
|
||||
Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `zone` | string | Yes | Web Unlocker zone name from your Bright Data dashboard \(e.g., "web_unlocker1"\) |
|
||||
| `url` | string | Yes | The URL to scrape \(e.g., "https://example.com/page"\) |
|
||||
| `format` | string | No | Response format: "raw" for HTML or "json" for parsed content. Defaults to "raw" |
|
||||
| `country` | string | No | Two-letter country code for geo-targeting \(e.g., "us", "gb"\) |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `content` | string | The scraped page content \(HTML or JSON depending on format\) |
|
||||
| `url` | string | The URL that was scraped |
|
||||
| `statusCode` | number | HTTP status code of the response |
|
||||
|
||||
### `brightdata_serp_search`
|
||||
|
||||
Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `zone` | string | Yes | SERP API zone name from your Bright Data dashboard \(e.g., "serp_api1"\) |
|
||||
| `query` | string | Yes | The search query \(e.g., "best project management tools"\) |
|
||||
| `searchEngine` | string | No | Search engine to use: "google", "bing", "duckduckgo", or "yandex". Defaults to "google" |
|
||||
| `country` | string | No | Two-letter country code for localized results \(e.g., "us", "gb"\) |
|
||||
| `language` | string | No | Two-letter language code \(e.g., "en", "es"\) |
|
||||
| `numResults` | number | No | Number of results to return \(e.g., 10, 20\). Defaults to 10 |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `results` | array | Array of search results |
|
||||
| ↳ `title` | string | Title of the search result |
|
||||
| ↳ `url` | string | URL of the search result |
|
||||
| ↳ `description` | string | Snippet or description of the result |
|
||||
| ↳ `rank` | number | Position in search results |
|
||||
| `query` | string | The search query that was executed |
|
||||
| `searchEngine` | string | The search engine that was used |
|
||||
|
||||
### `brightdata_discover`
|
||||
|
||||
AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `query` | string | Yes | The search query \(e.g., "competitor pricing changes enterprise plan"\) |
|
||||
| `numResults` | number | No | Number of results to return, up to 1000. Defaults to 10 |
|
||||
| `intent` | string | No | Describes what the agent is trying to accomplish, used to rank results by relevance \(e.g., "find official pricing pages and change notes"\) |
|
||||
| `includeContent` | boolean | No | Whether to include cleaned page content in results |
|
||||
| `format` | string | No | Response format: "json" or "markdown". Defaults to "json" |
|
||||
| `language` | string | No | Search language code \(e.g., "en", "es", "fr"\). Defaults to "en" |
|
||||
| `country` | string | No | Two-letter ISO country code for localized results \(e.g., "us", "gb"\) |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `results` | array | Array of discovered web results ranked by intent relevance |
|
||||
| ↳ `url` | string | URL of the discovered page |
|
||||
| ↳ `title` | string | Page title |
|
||||
| ↳ `description` | string | Page description or snippet |
|
||||
| ↳ `relevanceScore` | number | AI-calculated relevance score for intent-based ranking |
|
||||
| ↳ `content` | string | Cleaned page content in the requested format \(when includeContent is true\) |
|
||||
| `query` | string | The search query that was executed |
|
||||
| `totalResults` | number | Total number of results returned |
|
||||
|
||||
### `brightdata_sync_scrape`
|
||||
|
||||
Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `datasetId` | string | Yes | Dataset scraper ID from your Bright Data dashboard \(e.g., "gd_l1viktl72bvl7bjuj0"\) |
|
||||
| `urls` | string | Yes | JSON array of URL objects to scrape, up to 20 \(e.g., \[\{"url": "https://example.com/product"\}\]\) |
|
||||
| `format` | string | No | Output format: "json", "ndjson", or "csv". Defaults to "json" |
|
||||
| `includeErrors` | boolean | No | Whether to include error reports in results |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `data` | array | Array of scraped result objects with fields specific to the dataset scraper used |
|
||||
| `snapshotId` | string | Snapshot ID returned if the request exceeded the 1-minute timeout and switched to async processing |
|
||||
| `isAsync` | boolean | Whether the request fell back to async mode \(true means use snapshot ID to retrieve results\) |
|
||||
|
||||
### `brightdata_scrape_dataset`
|
||||
|
||||
Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `datasetId` | string | Yes | Dataset scraper ID from your Bright Data dashboard \(e.g., "gd_l1viktl72bvl7bjuj0"\) |
|
||||
| `urls` | string | Yes | JSON array of URL objects to scrape \(e.g., \[\{"url": "https://example.com/product"\}\]\) |
|
||||
| `format` | string | No | Output format: "json" or "csv". Defaults to "json" |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `snapshotId` | string | The snapshot ID to retrieve results later |
|
||||
| `status` | string | Status of the scraping job \(e.g., "triggered", "running"\) |
|
||||
|
||||
### `brightdata_snapshot_status`
|
||||
|
||||
Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `snapshotId` | string | Yes | The snapshot ID returned when the collection was triggered \(e.g., "s_m4x7enmven8djfqak"\) |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `snapshotId` | string | The snapshot ID that was queried |
|
||||
| `datasetId` | string | The dataset ID associated with this snapshot |
|
||||
| `status` | string | Current status of the snapshot: "starting", "running", "ready", or "failed" |
|
||||
|
||||
### `brightdata_download_snapshot`
|
||||
|
||||
Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `snapshotId` | string | Yes | The snapshot ID returned when the collection was triggered \(e.g., "s_m4x7enmven8djfqak"\) |
|
||||
| `format` | string | No | Output format: "json", "ndjson", "jsonl", or "csv". Defaults to "json" |
|
||||
| `compress` | boolean | No | Whether to compress the results |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `data` | array | Array of scraped result records |
|
||||
| `format` | string | The content type of the downloaded data |
|
||||
| `snapshotId` | string | The snapshot ID that was downloaded |
|
||||
|
||||
### `brightdata_cancel_snapshot`
|
||||
|
||||
Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress.
|
||||
|
||||
#### Input
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
| --------- | ---- | -------- | ----------- |
|
||||
| `apiKey` | string | Yes | Bright Data API token |
|
||||
| `snapshotId` | string | Yes | The snapshot ID of the collection to cancel \(e.g., "s_m4x7enmven8djfqak"\) |
|
||||
|
||||
#### Output
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `snapshotId` | string | The snapshot ID that was cancelled |
|
||||
| `cancelled` | boolean | Whether the cancellation was successful |
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
"attio",
|
||||
"box",
|
||||
"brandfetch",
|
||||
"brightdata",
|
||||
"browser_use",
|
||||
"calcom",
|
||||
"calendly",
|
||||
|
||||
1
apps/docs/lib/urls.ts
Normal file
1
apps/docs/lib/urls.ts
Normal file
@@ -0,0 +1 @@
|
||||
export const DOCS_BASE_URL = process.env.NEXT_PUBLIC_DOCS_URL ?? 'https://docs.sim.ai'
|
||||
@@ -9,6 +9,8 @@ import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { BackLink } from '@/app/(landing)/blog/[slug]/back-link'
|
||||
import { ShareButton } from '@/app/(landing)/blog/[slug]/share-button'
|
||||
|
||||
export const dynamicParams = false
|
||||
|
||||
export async function generateStaticParams() {
|
||||
const posts = await getAllPostMeta()
|
||||
return posts.map((p) => ({ slug: p.slug }))
|
||||
|
||||
@@ -20,6 +20,8 @@ const baseUrl = SITE_URL
|
||||
const bySlug = new Map(allIntegrations.map((i) => [i.slug, i]))
|
||||
const byType = new Map(allIntegrations.map((i) => [i.type, i]))
|
||||
|
||||
export const dynamicParams = false
|
||||
|
||||
/**
|
||||
* Returns up to `limit` related integration slugs.
|
||||
*
|
||||
|
||||
@@ -23,6 +23,7 @@ import {
|
||||
BoxCompanyIcon,
|
||||
BrainIcon,
|
||||
BrandfetchIcon,
|
||||
BrightDataIcon,
|
||||
BrowserUseIcon,
|
||||
CalComIcon,
|
||||
CalendlyIcon,
|
||||
@@ -215,6 +216,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
|
||||
attio: AttioIcon,
|
||||
box: BoxCompanyIcon,
|
||||
brandfetch: BrandfetchIcon,
|
||||
brightdata: BrightDataIcon,
|
||||
browser_use: BrowserUseIcon,
|
||||
calcom: CalComIcon,
|
||||
calendly: CalendlyIcon,
|
||||
|
||||
@@ -214,7 +214,7 @@
|
||||
"name": "Agiloft",
|
||||
"description": "Manage records in Agiloft CLM",
|
||||
"longDescription": "Integrate with Agiloft contract lifecycle management to create, read, update, delete, and search records. Supports file attachments, SQL-based selection, saved searches, and record locking across any table in your knowledge base.",
|
||||
"bgColor": "#263A5C",
|
||||
"bgColor": "#FFFFFF",
|
||||
"iconName": "AgiloftIcon",
|
||||
"docsUrl": "https://docs.sim.ai/tools/agiloft",
|
||||
"operations": [
|
||||
@@ -1743,6 +1743,57 @@
|
||||
"integrationTypes": ["sales", "analytics"],
|
||||
"tags": ["enrichment", "marketing"]
|
||||
},
|
||||
{
|
||||
"type": "brightdata",
|
||||
"slug": "bright-data",
|
||||
"name": "Bright Data",
|
||||
"description": "Scrape websites, search engines, and extract structured data",
|
||||
"longDescription": "Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.",
|
||||
"bgColor": "#FFFFFF",
|
||||
"iconName": "BrightDataIcon",
|
||||
"docsUrl": "https://docs.sim.ai/tools/brightdata",
|
||||
"operations": [
|
||||
{
|
||||
"name": "Scrape URL",
|
||||
"description": "Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically."
|
||||
},
|
||||
{
|
||||
"name": "SERP Search",
|
||||
"description": "Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API."
|
||||
},
|
||||
{
|
||||
"name": "Discover",
|
||||
"description": "AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification."
|
||||
},
|
||||
{
|
||||
"name": "Sync Scrape",
|
||||
"description": "Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout."
|
||||
},
|
||||
{
|
||||
"name": "Scrape Dataset",
|
||||
"description": "Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more."
|
||||
},
|
||||
{
|
||||
"name": "Snapshot Status",
|
||||
"description": "Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed."
|
||||
},
|
||||
{
|
||||
"name": "Download Snapshot",
|
||||
"description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status."
|
||||
},
|
||||
{
|
||||
"name": "Cancel Snapshot",
|
||||
"description": "Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress."
|
||||
}
|
||||
],
|
||||
"operationCount": 8,
|
||||
"triggers": [],
|
||||
"triggerCount": 0,
|
||||
"authType": "api-key",
|
||||
"category": "tools",
|
||||
"integrationTypes": ["search", "developer-tools"],
|
||||
"tags": ["web-scraping", "automation"]
|
||||
},
|
||||
{
|
||||
"type": "browser_use",
|
||||
"slug": "browser-use",
|
||||
|
||||
@@ -20,6 +20,8 @@ import {
|
||||
|
||||
const baseUrl = SITE_URL
|
||||
|
||||
export const dynamicParams = false
|
||||
|
||||
export async function generateStaticParams() {
|
||||
return ALL_CATALOG_MODELS.map((model) => ({
|
||||
provider: model.providerSlug,
|
||||
|
||||
@@ -22,6 +22,8 @@ import {
|
||||
|
||||
const baseUrl = SITE_URL
|
||||
|
||||
export const dynamicParams = false
|
||||
|
||||
export async function generateStaticParams() {
|
||||
return MODEL_PROVIDERS_WITH_CATALOGS.map((provider) => ({
|
||||
provider: provider.slug,
|
||||
|
||||
@@ -5,7 +5,6 @@ import { BrandedLayout } from '@/components/branded-layout'
|
||||
import { PostHogProvider } from '@/app/_shell/providers/posthog-provider'
|
||||
import { generateBrandedMetadata, generateThemeCSS } from '@/ee/whitelabeling'
|
||||
import '@/app/_styles/globals.css'
|
||||
import { OneDollarStats } from '@/components/analytics/onedollarstats'
|
||||
import { isHosted, isReactGrabEnabled, isReactScanEnabled } from '@/lib/core/config/feature-flags'
|
||||
import { HydrationErrorHandler } from '@/app/_shell/hydration-error-handler'
|
||||
import { QueryProvider } from '@/app/_shell/providers/query-provider'
|
||||
@@ -207,10 +206,6 @@ export default function RootLayout({ children }: { children: React.ReactNode })
|
||||
<meta name='format-detection' content='telephone=no' />
|
||||
<meta httpEquiv='x-ua-compatible' content='ie=edge' />
|
||||
|
||||
{/* OneDollarStats Analytics */}
|
||||
<link rel='dns-prefetch' href='https://assets.onedollarstats.com' />
|
||||
<script defer src='https://assets.onedollarstats.com/stonks.js' />
|
||||
|
||||
{/* Google Tag Manager — hosted only */}
|
||||
{isHosted && (
|
||||
<Script
|
||||
@@ -260,7 +255,6 @@ j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
|
||||
</noscript>
|
||||
)}
|
||||
<HydrationErrorHandler />
|
||||
<OneDollarStats />
|
||||
<PostHogProvider>
|
||||
<ThemeProvider>
|
||||
<QueryProvider>
|
||||
|
||||
@@ -4,133 +4,27 @@ import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
export default function robots(): MetadataRoute.Robots {
|
||||
const baseUrl = getBaseUrl()
|
||||
|
||||
const disallowedPaths = [
|
||||
'/api/',
|
||||
'/workspace/',
|
||||
'/chat/',
|
||||
'/playground/',
|
||||
'/resume/',
|
||||
'/invite/',
|
||||
'/unsubscribe/',
|
||||
'/w/',
|
||||
'/_next/',
|
||||
'/private/',
|
||||
]
|
||||
|
||||
return {
|
||||
rules: [
|
||||
{
|
||||
userAgent: '*',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Googlebot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Bingbot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'YandexBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Baiduspider',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'GPTBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'ChatGPT-User',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'OAI-SearchBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'ClaudeBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Claude-SearchBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Google-Extended',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'PerplexityBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Meta-ExternalAgent',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'FacebookBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Applebot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Applebot-Extended',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Amazonbot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Bytespider',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'CCBot',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'cohere-ai',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'Grok-web-crawl',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
},
|
||||
{
|
||||
userAgent: 'DeepSeek-AI',
|
||||
allow: '/',
|
||||
disallow: disallowedPaths,
|
||||
disallow: [
|
||||
'/api/',
|
||||
'/workspace/',
|
||||
'/chat/',
|
||||
'/playground/',
|
||||
'/resume/',
|
||||
'/invite/',
|
||||
'/unsubscribe/',
|
||||
'/w/',
|
||||
'/form/',
|
||||
'/credential-account/',
|
||||
'/_next/',
|
||||
'/private/',
|
||||
],
|
||||
},
|
||||
],
|
||||
sitemap: `${baseUrl}/sitemap.xml`,
|
||||
host: baseUrl,
|
||||
sitemap: [`${baseUrl}/sitemap.xml`, `${baseUrl}/blog/sitemap-images.xml`],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { MetadataRoute } from 'next'
|
||||
import { COURSES } from '@/lib/academy/content'
|
||||
import { getAllPostMeta } from '@/lib/blog/registry'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import integrations from '@/app/(landing)/integrations/data/integrations.json'
|
||||
@@ -6,69 +7,44 @@ import { ALL_CATALOG_MODELS, MODEL_PROVIDERS_WITH_CATALOGS } from '@/app/(landin
|
||||
|
||||
export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
|
||||
const baseUrl = getBaseUrl()
|
||||
const posts = await getAllPostMeta()
|
||||
|
||||
const now = new Date()
|
||||
const integrationPages: MetadataRoute.Sitemap = integrations.map((integration) => ({
|
||||
url: `${baseUrl}/integrations/${integration.slug}`,
|
||||
lastModified: now,
|
||||
changeFrequency: 'monthly',
|
||||
priority: 0.6,
|
||||
}))
|
||||
const modelHubPages: MetadataRoute.Sitemap = [
|
||||
{
|
||||
url: `${baseUrl}/integrations`,
|
||||
lastModified: now,
|
||||
changeFrequency: 'weekly',
|
||||
priority: 0.8,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/models`,
|
||||
lastModified: now,
|
||||
changeFrequency: 'weekly',
|
||||
priority: 0.8,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/partners`,
|
||||
lastModified: now,
|
||||
changeFrequency: 'monthly',
|
||||
priority: 0.5,
|
||||
},
|
||||
]
|
||||
const providerPages: MetadataRoute.Sitemap = MODEL_PROVIDERS_WITH_CATALOGS.map((provider) => ({
|
||||
url: `${baseUrl}${provider.href}`,
|
||||
lastModified: new Date(
|
||||
Math.max(...provider.models.map((model) => new Date(model.pricing.updatedAt).getTime()))
|
||||
),
|
||||
changeFrequency: 'weekly',
|
||||
priority: 0.7,
|
||||
}))
|
||||
const modelPages: MetadataRoute.Sitemap = ALL_CATALOG_MODELS.map((model) => ({
|
||||
url: `${baseUrl}${model.href}`,
|
||||
lastModified: new Date(model.pricing.updatedAt),
|
||||
changeFrequency: 'monthly',
|
||||
priority: 0.6,
|
||||
}))
|
||||
const latestPostDate =
|
||||
posts.length > 0
|
||||
? new Date(Math.max(...posts.map((p) => new Date(p.updated ?? p.date).getTime())))
|
||||
: undefined
|
||||
|
||||
const modelTimes = MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) =>
|
||||
provider.models.map((model) => new Date(model.pricing.updatedAt).getTime())
|
||||
)
|
||||
const latestModelDate = modelTimes.length > 0 ? new Date(Math.max(...modelTimes)) : undefined
|
||||
|
||||
const staticPages: MetadataRoute.Sitemap = [
|
||||
{
|
||||
url: baseUrl,
|
||||
lastModified: now,
|
||||
changeFrequency: 'daily',
|
||||
priority: 1.0,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/blog`,
|
||||
lastModified: now,
|
||||
changeFrequency: 'daily',
|
||||
priority: 0.8,
|
||||
lastModified: latestPostDate,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/blog/tags`,
|
||||
lastModified: now,
|
||||
lastModified: latestPostDate,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/changelog`,
|
||||
lastModified: now,
|
||||
lastModified: latestPostDate,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/integrations`,
|
||||
lastModified: latestModelDate,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/models`,
|
||||
lastModified: latestModelDate,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/partners`,
|
||||
},
|
||||
{
|
||||
url: `${baseUrl}/terms`,
|
||||
@@ -80,20 +56,61 @@ export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
|
||||
},
|
||||
]
|
||||
|
||||
const posts = await getAllPostMeta()
|
||||
const blogPages: MetadataRoute.Sitemap = posts.map((p) => ({
|
||||
url: p.canonical,
|
||||
lastModified: new Date(p.updated ?? p.date),
|
||||
changeFrequency: 'weekly',
|
||||
priority: 0.7,
|
||||
}))
|
||||
|
||||
const authorsMap = new Map<string, Date>()
|
||||
for (const p of posts) {
|
||||
for (const author of p.authors ?? [p.author]) {
|
||||
const postDate = new Date(p.updated ?? p.date)
|
||||
const existing = authorsMap.get(author.id)
|
||||
if (!existing || postDate > existing) {
|
||||
authorsMap.set(author.id, postDate)
|
||||
}
|
||||
}
|
||||
}
|
||||
const authorPages: MetadataRoute.Sitemap = [...authorsMap.entries()].map(([id, date]) => ({
|
||||
url: `${baseUrl}/blog/authors/${id}`,
|
||||
lastModified: date,
|
||||
}))
|
||||
|
||||
const integrationPages: MetadataRoute.Sitemap = integrations.map((integration) => ({
|
||||
url: `${baseUrl}/integrations/${integration.slug}`,
|
||||
}))
|
||||
|
||||
const providerPages: MetadataRoute.Sitemap = MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) => {
|
||||
if (provider.models.length === 0) return []
|
||||
return [
|
||||
{
|
||||
url: `${baseUrl}${provider.href}`,
|
||||
lastModified: new Date(
|
||||
Math.max(...provider.models.map((model) => new Date(model.pricing.updatedAt).getTime()))
|
||||
),
|
||||
},
|
||||
]
|
||||
})
|
||||
|
||||
const modelEntries: MetadataRoute.Sitemap = ALL_CATALOG_MODELS.map((model) => ({
|
||||
url: `${baseUrl}${model.href}`,
|
||||
lastModified: new Date(model.pricing.updatedAt),
|
||||
}))
|
||||
|
||||
const academyPages: MetadataRoute.Sitemap = [
|
||||
{ url: `${baseUrl}/academy` },
|
||||
...COURSES.map((course) => ({
|
||||
url: `${baseUrl}/academy/${course.slug}`,
|
||||
})),
|
||||
]
|
||||
|
||||
return [
|
||||
...staticPages,
|
||||
...modelHubPages,
|
||||
...blogPages,
|
||||
...authorPages,
|
||||
...integrationPages,
|
||||
...providerPages,
|
||||
...modelPages,
|
||||
...blogPages,
|
||||
...modelEntries,
|
||||
...academyPages,
|
||||
]
|
||||
}
|
||||
|
||||
@@ -58,12 +58,16 @@ import {
|
||||
WorkspaceFile,
|
||||
WorkspaceFileOperation,
|
||||
} from '@/lib/copilot/generated/tool-catalog-v1'
|
||||
import { parsePersistedStreamEventEnvelopeJson } from '@/lib/copilot/request/session/contract'
|
||||
import {
|
||||
type ParseStreamEventEnvelopeFailure,
|
||||
parsePersistedStreamEventEnvelope,
|
||||
parsePersistedStreamEventEnvelopeJson,
|
||||
} from '@/lib/copilot/request/session/contract'
|
||||
import {
|
||||
type FilePreviewSession,
|
||||
isFilePreviewSession,
|
||||
} from '@/lib/copilot/request/session/file-preview-session-contract'
|
||||
import { isStreamBatchEvent, type StreamBatchEvent } from '@/lib/copilot/request/session/types'
|
||||
import type { StreamBatchEvent } from '@/lib/copilot/request/session/types'
|
||||
import {
|
||||
extractResourcesFromToolResult,
|
||||
isResourceToolName,
|
||||
@@ -509,6 +513,33 @@ function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value) && typeof value === 'object' && !Array.isArray(value)
|
||||
}
|
||||
|
||||
const STREAM_SCHEMA_ENFORCEMENT_PREFIX = 'Client stream schema enforcement failed.'
|
||||
|
||||
class StreamSchemaValidationError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = 'StreamSchemaValidationError'
|
||||
}
|
||||
}
|
||||
|
||||
function createStreamSchemaValidationError(
|
||||
failure: ParseStreamEventEnvelopeFailure,
|
||||
context?: string
|
||||
): StreamSchemaValidationError {
|
||||
const details = failure.errors?.filter(Boolean).join('; ')
|
||||
return new StreamSchemaValidationError(
|
||||
[STREAM_SCHEMA_ENFORCEMENT_PREFIX, context, failure.message, details].filter(Boolean).join(' ')
|
||||
)
|
||||
}
|
||||
|
||||
function createBatchSchemaValidationError(message: string): StreamSchemaValidationError {
|
||||
return new StreamSchemaValidationError([STREAM_SCHEMA_ENFORCEMENT_PREFIX, message].join(' '))
|
||||
}
|
||||
|
||||
function isStreamSchemaValidationError(error: unknown): error is StreamSchemaValidationError {
|
||||
return error instanceof StreamSchemaValidationError
|
||||
}
|
||||
|
||||
function parseStreamBatchResponse(value: unknown): StreamBatchResponse {
|
||||
if (!isRecord(value)) {
|
||||
throw new Error('Invalid stream batch response')
|
||||
@@ -516,20 +547,41 @@ function parseStreamBatchResponse(value: unknown): StreamBatchResponse {
|
||||
|
||||
const rawEvents = Array.isArray(value.events) ? value.events : []
|
||||
const events: StreamBatchEvent[] = []
|
||||
for (const entry of rawEvents) {
|
||||
if (!isStreamBatchEvent(entry)) {
|
||||
throw new Error('Invalid stream batch event')
|
||||
for (const [index, entry] of rawEvents.entries()) {
|
||||
if (!isRecord(entry)) {
|
||||
throw createBatchSchemaValidationError(`Reconnect batch event ${index + 1} is not an object.`)
|
||||
}
|
||||
events.push(entry)
|
||||
if (
|
||||
typeof entry.eventId !== 'number' ||
|
||||
!Number.isFinite(entry.eventId) ||
|
||||
typeof entry.streamId !== 'string'
|
||||
) {
|
||||
throw createBatchSchemaValidationError(
|
||||
`Reconnect batch event ${index + 1} is missing required metadata.`
|
||||
)
|
||||
}
|
||||
|
||||
const parsedEvent = parsePersistedStreamEventEnvelope(entry.event)
|
||||
if (!parsedEvent.ok) {
|
||||
throw createStreamSchemaValidationError(parsedEvent, `Reconnect batch event ${index + 1}.`)
|
||||
}
|
||||
|
||||
events.push({
|
||||
eventId: entry.eventId,
|
||||
streamId: entry.streamId,
|
||||
event: parsedEvent.event,
|
||||
})
|
||||
}
|
||||
|
||||
const rawPreviewSessions = Array.isArray(value.previewSessions)
|
||||
? value.previewSessions
|
||||
: undefined
|
||||
const previewSessions =
|
||||
rawPreviewSessions?.map((session) => {
|
||||
rawPreviewSessions?.map((session, index) => {
|
||||
if (!isFilePreviewSession(session)) {
|
||||
throw new Error('Invalid stream preview session')
|
||||
throw createBatchSchemaValidationError(
|
||||
`Reconnect preview session ${index + 1} failed validation.`
|
||||
)
|
||||
}
|
||||
return session
|
||||
}) ?? undefined
|
||||
@@ -1579,12 +1631,14 @@ export function useChat(
|
||||
|
||||
const parsedResult = parsePersistedStreamEventEnvelopeJson(raw)
|
||||
if (!parsedResult.ok) {
|
||||
logger.warn('Failed to parse chat SSE event', {
|
||||
const error = createStreamSchemaValidationError(parsedResult, 'Live SSE event.')
|
||||
logger.error('Rejected chat SSE event due to client-side schema enforcement', {
|
||||
reason: parsedResult.reason,
|
||||
message: parsedResult.message,
|
||||
errors: parsedResult.errors,
|
||||
error: error.message,
|
||||
})
|
||||
continue
|
||||
throw error
|
||||
}
|
||||
const parsed = parsedResult.event
|
||||
|
||||
@@ -2533,6 +2587,17 @@ export function useChat(
|
||||
}
|
||||
return true
|
||||
}
|
||||
if (isStreamSchemaValidationError(err)) {
|
||||
logger.error('Reconnect halted by client-side stream schema enforcement', {
|
||||
streamId,
|
||||
attempt: attempt + 1,
|
||||
error: err.message,
|
||||
})
|
||||
if (streamGenRef.current === gen) {
|
||||
setError(err.message)
|
||||
}
|
||||
return false
|
||||
}
|
||||
logger.warn('Reconnect attempt failed', {
|
||||
streamId,
|
||||
attempt: attempt + 1,
|
||||
@@ -2892,6 +2957,13 @@ export function useChat(
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name === 'AbortError') return consumedByTranscript
|
||||
if (isStreamSchemaValidationError(err)) {
|
||||
setError(err.message)
|
||||
if (streamGenRef.current === gen) {
|
||||
finalize({ error: true })
|
||||
}
|
||||
return consumedByTranscript
|
||||
}
|
||||
|
||||
const activeStreamId = streamIdRef.current
|
||||
if (activeStreamId && streamGenRef.current === gen) {
|
||||
|
||||
@@ -241,7 +241,7 @@ function LineChartComponent({
|
||||
)}
|
||||
style={{ width, height }}
|
||||
>
|
||||
<p className='text-muted-foreground text-sm'>No data</p>
|
||||
<p className='text-[var(--text-muted)] text-sm'>No data</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -256,7 +256,7 @@ function LineChartComponent({
|
||||
>
|
||||
{!hasExternalWrapper && (
|
||||
<div className='mb-3 flex items-center gap-3'>
|
||||
<h4 className='font-medium text-foreground text-sm'>{label}</h4>
|
||||
<h4 className='font-medium text-[var(--text-primary)] text-sm'>{label}</h4>
|
||||
{allSeries.length > 1 && (
|
||||
<div className='flex items-center gap-2'>
|
||||
{scaledSeries.slice(1).map((s) => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import { memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { memo, useCallback, useMemo, useRef, useState } from 'react'
|
||||
import { Loader2 } from 'lucide-react'
|
||||
import { useParams } from 'next/navigation'
|
||||
import { useShallow } from 'zustand/react/shallow'
|
||||
@@ -441,10 +441,13 @@ function DashboardInner({ stats, isLoading, error }: DashboardProps) {
|
||||
[]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
setSelectedSegments((prev) => (Object.keys(prev).length > 0 ? {} : prev))
|
||||
setLastAnchorIndices((prev) => (Object.keys(prev).length > 0 ? {} : prev))
|
||||
}, [stats, timeRange, workflowIds, searchQuery])
|
||||
const resetKey = `${JSON.stringify(stats?.workflows?.map((w) => w.workflowId))}-${timeRange}-${workflowIds.join(',')}-${searchQuery}`
|
||||
const prevResetKeyRef = useRef(resetKey)
|
||||
if (resetKey !== prevResetKeyRef.current) {
|
||||
prevResetKeyRef.current = resetKey
|
||||
if (Object.keys(selectedSegments).length > 0) setSelectedSegments({})
|
||||
if (Object.keys(lastAnchorIndices).length > 0) setLastAnchorIndices({})
|
||||
}
|
||||
|
||||
if (isLoading) {
|
||||
return <DashboardSkeleton />
|
||||
|
||||
@@ -296,13 +296,10 @@ export const LogDetails = memo(function LogDetails({
|
||||
}
|
||||
}, [log?.id])
|
||||
|
||||
const isWorkflowExecutionLog = useMemo(() => {
|
||||
if (!log) return false
|
||||
return (
|
||||
(log.trigger === 'manual' && !!log.duration) ||
|
||||
(log.executionData?.enhanced && log.executionData?.traceSpans)
|
||||
)
|
||||
}, [log])
|
||||
const isWorkflowExecutionLog =
|
||||
!!log &&
|
||||
((log.trigger === 'manual' && !!log.duration) ||
|
||||
!!(log.executionData?.enhanced && log.executionData?.traceSpans))
|
||||
|
||||
const hasCostInfo = isWorkflowExecutionLog && log?.cost
|
||||
|
||||
@@ -337,10 +334,7 @@ export const LogDetails = memo(function LogDetails({
|
||||
return () => window.removeEventListener('keydown', handleKeyDown)
|
||||
}, [isOpen, onClose, hasPrev, hasNext, onNavigatePrev, onNavigateNext])
|
||||
|
||||
const formattedTimestamp = useMemo(
|
||||
() => (log ? formatDate(log.createdAt) : null),
|
||||
[log?.createdAt]
|
||||
)
|
||||
const formattedTimestamp = log ? formatDate(log.createdAt) : null
|
||||
|
||||
const logStatus = getDisplayStatus(log?.status)
|
||||
|
||||
|
||||
@@ -58,19 +58,14 @@ const LogRow = memo(
|
||||
? DELETED_WORKFLOW_COLOR
|
||||
: log.workflow?.color
|
||||
|
||||
const handleClick = useCallback(() => onClick(log), [onClick, log])
|
||||
|
||||
const handleMouseEnter = useCallback(() => onHover?.(log), [onHover, log])
|
||||
|
||||
const handleContextMenu = useCallback(
|
||||
(e: React.MouseEvent) => {
|
||||
if (onContextMenu) {
|
||||
e.preventDefault()
|
||||
onContextMenu(e, log)
|
||||
}
|
||||
},
|
||||
[onContextMenu, log]
|
||||
)
|
||||
const handleClick = () => onClick(log)
|
||||
const handleMouseEnter = () => onHover?.(log)
|
||||
const handleContextMenu = (e: React.MouseEvent) => {
|
||||
if (onContextMenu) {
|
||||
e.preventDefault()
|
||||
onContextMenu(e, log)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
|
||||
@@ -38,9 +38,7 @@ export function WorkflowSelector({
|
||||
* When allWorkflows is true, pass empty array so the "All" option is selected.
|
||||
* Otherwise, pass the selected workflow IDs.
|
||||
*/
|
||||
const currentValues = useMemo(() => {
|
||||
return allWorkflows ? [] : selectedIds
|
||||
}, [allWorkflows, selectedIds])
|
||||
const currentValues = allWorkflows ? [] : selectedIds
|
||||
|
||||
/**
|
||||
* Handle multi-select changes from Combobox.
|
||||
|
||||
@@ -197,12 +197,9 @@ export const NotificationSettings = memo(function NotificationSettings({
|
||||
// Show form if user explicitly opened it OR if loading is complete with no subscriptions
|
||||
const displayForm = showForm || (!isLoading && !hasSubscriptions && !editingId)
|
||||
|
||||
const getSubscriptionsForTab = useCallback(
|
||||
(tab: NotificationType) => {
|
||||
return subscriptions.filter((s) => s.notificationType === tab)
|
||||
},
|
||||
[subscriptions]
|
||||
)
|
||||
const getSubscriptionsForTab = (tab: NotificationType) => {
|
||||
return subscriptions.filter((s) => s.notificationType === tab)
|
||||
}
|
||||
|
||||
const resetForm = useCallback(() => {
|
||||
setFormData({
|
||||
|
||||
@@ -568,6 +568,8 @@ export default function Logs() {
|
||||
}
|
||||
}, [selectedLogId, selectedLogIndex])
|
||||
|
||||
const effectiveSidebarOpen = isSidebarOpen && selectedLogIndex !== -1
|
||||
|
||||
const handleRefresh = useCallback(() => {
|
||||
setIsVisuallyRefreshing(true)
|
||||
const timerId = window.setTimeout(() => {
|
||||
@@ -777,7 +779,7 @@ export default function Logs() {
|
||||
() => (
|
||||
<LogDetails
|
||||
log={selectedLog}
|
||||
isOpen={isSidebarOpen}
|
||||
isOpen={effectiveSidebarOpen}
|
||||
onClose={handleCloseSidebar}
|
||||
onNavigateNext={handleNavigateNext}
|
||||
onNavigatePrev={handleNavigatePrev}
|
||||
@@ -787,7 +789,7 @@ export default function Logs() {
|
||||
),
|
||||
[
|
||||
selectedLog,
|
||||
isSidebarOpen,
|
||||
effectiveSidebarOpen,
|
||||
handleCloseSidebar,
|
||||
handleNavigateNext,
|
||||
handleNavigatePrev,
|
||||
@@ -1260,20 +1262,10 @@ function LogsFilterPanel({ searchQuery, onSearchQueryChange }: LogsFilterPanelPr
|
||||
const { data: folders = {} } = useFolderMap(workspaceId)
|
||||
const { data: allWorkflowList = [] } = useWorkflows(workspaceId)
|
||||
|
||||
const workflows = useMemo(
|
||||
() => allWorkflowList.map((w) => ({ id: w.id, name: w.name, color: w.color })),
|
||||
[allWorkflowList]
|
||||
)
|
||||
const workflows = allWorkflowList.map((w) => ({ id: w.id, name: w.name, color: w.color }))
|
||||
const folderList = Object.values(folders).filter((f) => f.workspaceId === workspaceId)
|
||||
|
||||
const folderList = useMemo(
|
||||
() => Object.values(folders).filter((f) => f.workspaceId === workspaceId),
|
||||
[folders, workspaceId]
|
||||
)
|
||||
|
||||
const selectedStatuses = useMemo((): string[] => {
|
||||
if (level === 'all' || !level) return []
|
||||
return level.split(',').filter(Boolean)
|
||||
}, [level])
|
||||
const selectedStatuses = level === 'all' || !level ? [] : level.split(',').filter(Boolean)
|
||||
|
||||
const statusOptions: ComboboxOption[] = useMemo(
|
||||
() =>
|
||||
@@ -1285,58 +1277,46 @@ function LogsFilterPanel({ searchQuery, onSearchQueryChange }: LogsFilterPanelPr
|
||||
[]
|
||||
)
|
||||
|
||||
const handleStatusChange = useCallback(
|
||||
(values: string[]) => {
|
||||
setLevel(values.length === 0 ? 'all' : values.join(','))
|
||||
},
|
||||
[setLevel]
|
||||
)
|
||||
const handleStatusChange = (values: string[]) => {
|
||||
setLevel(values.length === 0 ? 'all' : values.join(','))
|
||||
}
|
||||
|
||||
const statusDisplayLabel = useMemo(() => {
|
||||
if (selectedStatuses.length === 0) return 'Status'
|
||||
if (selectedStatuses.length === 1) {
|
||||
const status = statusOptions.find((s) => s.value === selectedStatuses[0])
|
||||
return status?.label || '1 selected'
|
||||
}
|
||||
return `${selectedStatuses.length} selected`
|
||||
}, [selectedStatuses, statusOptions])
|
||||
const statusDisplayLabel =
|
||||
selectedStatuses.length === 0
|
||||
? 'Status'
|
||||
: selectedStatuses.length === 1
|
||||
? statusOptions.find((s) => s.value === selectedStatuses[0])?.label || '1 selected'
|
||||
: `${selectedStatuses.length} selected`
|
||||
|
||||
const selectedStatusColor = useMemo(() => {
|
||||
if (selectedStatuses.length !== 1) return null
|
||||
const status = selectedStatuses[0] as LogStatus
|
||||
return STATUS_CONFIG[status]?.color ?? null
|
||||
}, [selectedStatuses])
|
||||
const selectedStatusColor =
|
||||
selectedStatuses.length === 1
|
||||
? (STATUS_CONFIG[selectedStatuses[0] as LogStatus]?.color ?? null)
|
||||
: null
|
||||
|
||||
const workflowOptions: ComboboxOption[] = useMemo(
|
||||
() => workflows.map((w) => ({ value: w.id, label: w.name, icon: getColorIcon(w.color, true) })),
|
||||
[workflows]
|
||||
)
|
||||
const workflowOptions: ComboboxOption[] = workflows.map((w) => ({
|
||||
value: w.id,
|
||||
label: w.name,
|
||||
icon: getColorIcon(w.color, true),
|
||||
}))
|
||||
|
||||
const workflowDisplayLabel = useMemo(() => {
|
||||
if (workflowIds.length === 0) return 'Workflow'
|
||||
if (workflowIds.length === 1) {
|
||||
const workflow = workflows.find((w) => w.id === workflowIds[0])
|
||||
return workflow?.name || '1 selected'
|
||||
}
|
||||
return `${workflowIds.length} workflows`
|
||||
}, [workflowIds, workflows])
|
||||
const workflowDisplayLabel =
|
||||
workflowIds.length === 0
|
||||
? 'Workflow'
|
||||
: workflowIds.length === 1
|
||||
? workflows.find((w) => w.id === workflowIds[0])?.name || '1 selected'
|
||||
: `${workflowIds.length} workflows`
|
||||
|
||||
const selectedWorkflow =
|
||||
workflowIds.length === 1 ? workflows.find((w) => w.id === workflowIds[0]) : null
|
||||
|
||||
const folderOptions: ComboboxOption[] = useMemo(
|
||||
() => folderList.map((f) => ({ value: f.id, label: f.name })),
|
||||
[folderList]
|
||||
)
|
||||
const folderOptions: ComboboxOption[] = folderList.map((f) => ({ value: f.id, label: f.name }))
|
||||
|
||||
const folderDisplayLabel = useMemo(() => {
|
||||
if (folderIds.length === 0) return 'Folder'
|
||||
if (folderIds.length === 1) {
|
||||
const folder = folderList.find((f) => f.id === folderIds[0])
|
||||
return folder?.name || '1 selected'
|
||||
}
|
||||
return `${folderIds.length} folders`
|
||||
}, [folderIds, folderList])
|
||||
const folderDisplayLabel =
|
||||
folderIds.length === 0
|
||||
? 'Folder'
|
||||
: folderIds.length === 1
|
||||
? folderList.find((f) => f.id === folderIds[0])?.name || '1 selected'
|
||||
: `${folderIds.length} folders`
|
||||
|
||||
const triggerOptions: ComboboxOption[] = useMemo(
|
||||
() =>
|
||||
@@ -1348,69 +1328,57 @@ function LogsFilterPanel({ searchQuery, onSearchQueryChange }: LogsFilterPanelPr
|
||||
[]
|
||||
)
|
||||
|
||||
const triggerDisplayLabel = useMemo(() => {
|
||||
if (triggers.length === 0) return 'Trigger'
|
||||
if (triggers.length === 1) {
|
||||
const trigger = triggerOptions.find((t) => t.value === triggers[0])
|
||||
return trigger?.label || '1 selected'
|
||||
const triggerDisplayLabel =
|
||||
triggers.length === 0
|
||||
? 'Trigger'
|
||||
: triggers.length === 1
|
||||
? triggerOptions.find((t) => t.value === triggers[0])?.label || '1 selected'
|
||||
: `${triggers.length} triggers`
|
||||
|
||||
const timeDisplayLabel =
|
||||
timeRange === 'All time'
|
||||
? 'Time'
|
||||
: timeRange === 'Custom range' && startDate && endDate
|
||||
? `${formatDateShort(startDate)} - ${formatDateShort(endDate)}`
|
||||
: timeRange === 'Custom range'
|
||||
? 'Custom range'
|
||||
: timeRange
|
||||
|
||||
const handleTimeRangeChange = (val: string) => {
|
||||
if (val === 'Custom range') {
|
||||
setPreviousTimeRange(timeRange)
|
||||
setDatePickerOpen(true)
|
||||
} else {
|
||||
clearDateRange()
|
||||
setTimeRange(val as typeof timeRange)
|
||||
}
|
||||
return `${triggers.length} triggers`
|
||||
}, [triggers, triggerOptions])
|
||||
}
|
||||
|
||||
const timeDisplayLabel = useMemo(() => {
|
||||
if (timeRange === 'All time') return 'Time'
|
||||
if (timeRange === 'Custom range' && startDate && endDate) {
|
||||
return `${formatDateShort(startDate)} - ${formatDateShort(endDate)}`
|
||||
}
|
||||
if (timeRange === 'Custom range') return 'Custom range'
|
||||
return timeRange
|
||||
}, [timeRange, startDate, endDate])
|
||||
const handleDateRangeApply = (start: string, end: string) => {
|
||||
setDateRange(start, end)
|
||||
setDatePickerOpen(false)
|
||||
}
|
||||
|
||||
const handleTimeRangeChange = useCallback(
|
||||
(val: string) => {
|
||||
if (val === 'Custom range') {
|
||||
setPreviousTimeRange(timeRange)
|
||||
setDatePickerOpen(true)
|
||||
} else {
|
||||
clearDateRange()
|
||||
setTimeRange(val as typeof timeRange)
|
||||
}
|
||||
},
|
||||
[timeRange, setTimeRange, clearDateRange]
|
||||
)
|
||||
|
||||
const handleDateRangeApply = useCallback(
|
||||
(start: string, end: string) => {
|
||||
setDateRange(start, end)
|
||||
setDatePickerOpen(false)
|
||||
},
|
||||
[setDateRange]
|
||||
)
|
||||
|
||||
const handleDatePickerCancel = useCallback(() => {
|
||||
const handleDatePickerCancel = () => {
|
||||
if (timeRange === 'Custom range' && !startDate) {
|
||||
setTimeRange(previousTimeRange)
|
||||
}
|
||||
setDatePickerOpen(false)
|
||||
}, [timeRange, startDate, previousTimeRange, setTimeRange])
|
||||
}
|
||||
|
||||
const filtersActive = useMemo(
|
||||
() =>
|
||||
hasActiveFilters({
|
||||
timeRange,
|
||||
level,
|
||||
workflowIds,
|
||||
folderIds,
|
||||
triggers,
|
||||
searchQuery,
|
||||
}),
|
||||
[timeRange, level, workflowIds, folderIds, triggers, searchQuery]
|
||||
)
|
||||
const filtersActive = hasActiveFilters({
|
||||
timeRange,
|
||||
level,
|
||||
workflowIds,
|
||||
folderIds,
|
||||
triggers,
|
||||
searchQuery,
|
||||
})
|
||||
|
||||
const handleClearFilters = useCallback(() => {
|
||||
const handleClearFilters = () => {
|
||||
resetFilters()
|
||||
onSearchQueryChange('')
|
||||
}, [resetFilters, onSearchQueryChange])
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='flex w-[240px] flex-col gap-3 p-3'>
|
||||
|
||||
@@ -13,7 +13,7 @@ export const AgiloftBlock: BlockConfig = {
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Productivity,
|
||||
tags: ['automation'],
|
||||
bgColor: '#263A5C',
|
||||
bgColor: '#FFFFFF',
|
||||
icon: AgiloftIcon,
|
||||
authMode: AuthMode.ApiKey,
|
||||
|
||||
|
||||
346
apps/sim/blocks/blocks/brightdata.ts
Normal file
346
apps/sim/blocks/blocks/brightdata.ts
Normal file
@@ -0,0 +1,346 @@
|
||||
import { BrightDataIcon } from '@/components/icons'
|
||||
import type { BlockConfig } from '@/blocks/types'
|
||||
import { AuthMode, IntegrationType } from '@/blocks/types'
|
||||
import type { BrightDataResponse } from '@/tools/brightdata/types'
|
||||
|
||||
export const BrightDataBlock: BlockConfig<BrightDataResponse> = {
|
||||
type: 'brightdata',
|
||||
name: 'Bright Data',
|
||||
description: 'Scrape websites, search engines, and extract structured data',
|
||||
authMode: AuthMode.ApiKey,
|
||||
longDescription:
|
||||
'Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.',
|
||||
docsLink: 'https://docs.sim.ai/tools/brightdata',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Search,
|
||||
tags: ['web-scraping', 'automation'],
|
||||
bgColor: '#FFFFFF',
|
||||
icon: BrightDataIcon,
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'operation',
|
||||
title: 'Operation',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'Scrape URL', id: 'scrape_url' },
|
||||
{ label: 'SERP Search', id: 'serp_search' },
|
||||
{ label: 'Discover', id: 'discover' },
|
||||
{ label: 'Sync Scrape', id: 'sync_scrape' },
|
||||
{ label: 'Scrape Dataset', id: 'scrape_dataset' },
|
||||
{ label: 'Snapshot Status', id: 'snapshot_status' },
|
||||
{ label: 'Download Snapshot', id: 'download_snapshot' },
|
||||
{ label: 'Cancel Snapshot', id: 'cancel_snapshot' },
|
||||
],
|
||||
value: () => 'scrape_url',
|
||||
},
|
||||
{
|
||||
id: 'zone',
|
||||
title: 'Zone',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., web_unlocker1',
|
||||
condition: { field: 'operation', value: ['scrape_url', 'serp_search'] },
|
||||
required: { field: 'operation', value: ['scrape_url', 'serp_search'] },
|
||||
},
|
||||
{
|
||||
id: 'url',
|
||||
title: 'URL',
|
||||
type: 'short-input',
|
||||
placeholder: 'https://example.com/page',
|
||||
condition: { field: 'operation', value: 'scrape_url' },
|
||||
required: { field: 'operation', value: 'scrape_url' },
|
||||
},
|
||||
{
|
||||
id: 'format',
|
||||
title: 'Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'Raw HTML', id: 'raw' },
|
||||
{ label: 'JSON', id: 'json' },
|
||||
],
|
||||
value: () => 'raw',
|
||||
condition: { field: 'operation', value: 'scrape_url' },
|
||||
},
|
||||
{
|
||||
id: 'country',
|
||||
title: 'Country',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., us, gb',
|
||||
mode: 'advanced',
|
||||
condition: { field: 'operation', value: ['scrape_url', 'serp_search', 'discover'] },
|
||||
},
|
||||
{
|
||||
id: 'query',
|
||||
title: 'Search Query',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., best project management tools',
|
||||
condition: { field: 'operation', value: 'serp_search' },
|
||||
required: { field: 'operation', value: 'serp_search' },
|
||||
},
|
||||
{
|
||||
id: 'searchEngine',
|
||||
title: 'Search Engine',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'Google', id: 'google' },
|
||||
{ label: 'Bing', id: 'bing' },
|
||||
{ label: 'DuckDuckGo', id: 'duckduckgo' },
|
||||
{ label: 'Yandex', id: 'yandex' },
|
||||
],
|
||||
value: () => 'google',
|
||||
condition: { field: 'operation', value: 'serp_search' },
|
||||
},
|
||||
{
|
||||
id: 'language',
|
||||
title: 'Language',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., en, es',
|
||||
mode: 'advanced',
|
||||
condition: { field: 'operation', value: ['serp_search', 'discover'] },
|
||||
},
|
||||
{
|
||||
id: 'numResults',
|
||||
title: 'Number of Results',
|
||||
type: 'short-input',
|
||||
placeholder: '10',
|
||||
mode: 'advanced',
|
||||
condition: { field: 'operation', value: ['serp_search', 'discover'] },
|
||||
},
|
||||
{
|
||||
id: 'discoverQuery',
|
||||
title: 'Search Query',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., competitor pricing changes',
|
||||
condition: { field: 'operation', value: 'discover' },
|
||||
required: { field: 'operation', value: 'discover' },
|
||||
},
|
||||
{
|
||||
id: 'intent',
|
||||
title: 'Intent',
|
||||
type: 'long-input',
|
||||
placeholder:
|
||||
'Describe what you are looking for (e.g., "find official pricing pages and change notes")',
|
||||
condition: { field: 'operation', value: 'discover' },
|
||||
},
|
||||
{
|
||||
id: 'includeContent',
|
||||
title: 'Include Page Content',
|
||||
type: 'switch',
|
||||
mode: 'advanced',
|
||||
condition: { field: 'operation', value: 'discover' },
|
||||
},
|
||||
{
|
||||
id: 'contentFormat',
|
||||
title: 'Response Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'JSON', id: 'json' },
|
||||
{ label: 'Markdown', id: 'markdown' },
|
||||
],
|
||||
value: () => 'json',
|
||||
mode: 'advanced',
|
||||
condition: { field: 'operation', value: 'discover' },
|
||||
},
|
||||
{
|
||||
id: 'syncDatasetId',
|
||||
title: 'Dataset ID',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., gd_l1viktl72bvl7bjuj0',
|
||||
condition: { field: 'operation', value: 'sync_scrape' },
|
||||
required: { field: 'operation', value: 'sync_scrape' },
|
||||
},
|
||||
{
|
||||
id: 'syncUrls',
|
||||
title: 'URLs (max 20)',
|
||||
type: 'long-input',
|
||||
placeholder: '[{"url": "https://example.com/product"}]',
|
||||
condition: { field: 'operation', value: 'sync_scrape' },
|
||||
required: { field: 'operation', value: 'sync_scrape' },
|
||||
},
|
||||
{
|
||||
id: 'syncFormat',
|
||||
title: 'Output Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'JSON', id: 'json' },
|
||||
{ label: 'NDJSON', id: 'ndjson' },
|
||||
{ label: 'CSV', id: 'csv' },
|
||||
],
|
||||
value: () => 'json',
|
||||
condition: { field: 'operation', value: 'sync_scrape' },
|
||||
},
|
||||
{
|
||||
id: 'datasetId',
|
||||
title: 'Dataset ID',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., gd_l1viktl72bvl7bjuj0',
|
||||
condition: { field: 'operation', value: 'scrape_dataset' },
|
||||
required: { field: 'operation', value: 'scrape_dataset' },
|
||||
},
|
||||
{
|
||||
id: 'urls',
|
||||
title: 'URLs',
|
||||
type: 'long-input',
|
||||
placeholder: '[{"url": "https://example.com/product"}]',
|
||||
condition: { field: 'operation', value: 'scrape_dataset' },
|
||||
required: { field: 'operation', value: 'scrape_dataset' },
|
||||
},
|
||||
{
|
||||
id: 'datasetFormat',
|
||||
title: 'Output Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'JSON', id: 'json' },
|
||||
{ label: 'CSV', id: 'csv' },
|
||||
],
|
||||
value: () => 'json',
|
||||
condition: { field: 'operation', value: 'scrape_dataset' },
|
||||
},
|
||||
{
|
||||
id: 'snapshotId',
|
||||
title: 'Snapshot ID',
|
||||
type: 'short-input',
|
||||
placeholder: 'e.g., s_m4x7enmven8djfqak',
|
||||
condition: {
|
||||
field: 'operation',
|
||||
value: ['snapshot_status', 'download_snapshot', 'cancel_snapshot'],
|
||||
},
|
||||
required: {
|
||||
field: 'operation',
|
||||
value: ['snapshot_status', 'download_snapshot', 'cancel_snapshot'],
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'downloadFormat',
|
||||
title: 'Download Format',
|
||||
type: 'dropdown',
|
||||
options: [
|
||||
{ label: 'JSON', id: 'json' },
|
||||
{ label: 'NDJSON', id: 'ndjson' },
|
||||
{ label: 'CSV', id: 'csv' },
|
||||
],
|
||||
value: () => 'json',
|
||||
condition: { field: 'operation', value: 'download_snapshot' },
|
||||
},
|
||||
{
|
||||
id: 'apiKey',
|
||||
title: 'API Key',
|
||||
type: 'short-input',
|
||||
placeholder: 'Enter your Bright Data API token',
|
||||
password: true,
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
access: [
|
||||
'brightdata_scrape_url',
|
||||
'brightdata_serp_search',
|
||||
'brightdata_discover',
|
||||
'brightdata_sync_scrape',
|
||||
'brightdata_scrape_dataset',
|
||||
'brightdata_snapshot_status',
|
||||
'brightdata_download_snapshot',
|
||||
'brightdata_cancel_snapshot',
|
||||
],
|
||||
config: {
|
||||
tool: (params) => `brightdata_${params.operation}`,
|
||||
params: (params) => {
|
||||
const result: Record<string, unknown> = { apiKey: params.apiKey }
|
||||
|
||||
switch (params.operation) {
|
||||
case 'scrape_url':
|
||||
result.zone = params.zone
|
||||
result.url = params.url
|
||||
if (params.format) result.format = params.format
|
||||
if (params.country) result.country = params.country
|
||||
break
|
||||
|
||||
case 'serp_search':
|
||||
result.zone = params.zone
|
||||
result.query = params.query
|
||||
if (params.searchEngine) result.searchEngine = params.searchEngine
|
||||
if (params.country) result.country = params.country
|
||||
if (params.language) result.language = params.language
|
||||
if (params.numResults) result.numResults = Number(params.numResults)
|
||||
break
|
||||
|
||||
case 'discover':
|
||||
result.query = params.discoverQuery
|
||||
if (params.numResults) result.numResults = Number(params.numResults)
|
||||
if (params.intent) result.intent = params.intent
|
||||
if (params.includeContent != null) result.includeContent = params.includeContent
|
||||
if (params.contentFormat) result.format = params.contentFormat
|
||||
if (params.language) result.language = params.language
|
||||
if (params.country) result.country = params.country
|
||||
break
|
||||
|
||||
case 'sync_scrape':
|
||||
result.datasetId = params.syncDatasetId
|
||||
result.urls = params.syncUrls
|
||||
if (params.syncFormat) result.format = params.syncFormat
|
||||
break
|
||||
|
||||
case 'scrape_dataset':
|
||||
result.datasetId = params.datasetId
|
||||
result.urls = params.urls
|
||||
if (params.datasetFormat) result.format = params.datasetFormat
|
||||
break
|
||||
|
||||
case 'snapshot_status':
|
||||
result.snapshotId = params.snapshotId
|
||||
break
|
||||
|
||||
case 'download_snapshot':
|
||||
result.snapshotId = params.snapshotId
|
||||
if (params.downloadFormat) result.format = params.downloadFormat
|
||||
break
|
||||
|
||||
case 'cancel_snapshot':
|
||||
result.snapshotId = params.snapshotId
|
||||
break
|
||||
}
|
||||
|
||||
return result
|
||||
},
|
||||
},
|
||||
},
|
||||
inputs: {
|
||||
operation: { type: 'string', description: 'Operation to perform' },
|
||||
apiKey: { type: 'string', description: 'Bright Data API token' },
|
||||
zone: { type: 'string', description: 'Bright Data zone name' },
|
||||
url: { type: 'string', description: 'URL to scrape' },
|
||||
format: { type: 'string', description: 'Response format' },
|
||||
country: { type: 'string', description: 'Country code for geo-targeting' },
|
||||
query: { type: 'string', description: 'Search query' },
|
||||
searchEngine: { type: 'string', description: 'Search engine to use' },
|
||||
language: { type: 'string', description: 'Language code' },
|
||||
numResults: { type: 'number', description: 'Number of results' },
|
||||
discoverQuery: { type: 'string', description: 'Discover search query' },
|
||||
intent: { type: 'string', description: 'Intent for ranking results' },
|
||||
includeContent: { type: 'boolean', description: 'Include page content in discover results' },
|
||||
contentFormat: { type: 'string', description: 'Content format for discover results' },
|
||||
syncDatasetId: { type: 'string', description: 'Dataset scraper ID for sync scrape' },
|
||||
syncUrls: { type: 'string', description: 'JSON array of URL objects for sync scrape' },
|
||||
syncFormat: { type: 'string', description: 'Output format for sync scrape' },
|
||||
datasetId: { type: 'string', description: 'Dataset scraper ID' },
|
||||
urls: { type: 'string', description: 'JSON array of URL objects to scrape' },
|
||||
datasetFormat: { type: 'string', description: 'Dataset output format' },
|
||||
snapshotId: { type: 'string', description: 'Snapshot ID for status/download/cancel' },
|
||||
downloadFormat: { type: 'string', description: 'Download output format' },
|
||||
},
|
||||
outputs: {
|
||||
content: { type: 'string', description: 'Scraped page content' },
|
||||
url: { type: 'string', description: 'URL that was scraped' },
|
||||
statusCode: { type: 'number', description: 'HTTP status code' },
|
||||
results: { type: 'json', description: 'Search or discover results array' },
|
||||
query: { type: 'string', description: 'Search query executed' },
|
||||
searchEngine: { type: 'string', description: 'Search engine used' },
|
||||
totalResults: { type: 'number', description: 'Total number of discover results' },
|
||||
data: { type: 'json', description: 'Scraped data records' },
|
||||
snapshotId: { type: 'string', description: 'Snapshot ID' },
|
||||
isAsync: { type: 'boolean', description: 'Whether sync scrape fell back to async' },
|
||||
status: { type: 'string', description: 'Job status' },
|
||||
datasetId: { type: 'string', description: 'Dataset ID of the snapshot' },
|
||||
format: { type: 'string', description: 'Content type of downloaded data' },
|
||||
cancelled: { type: 'boolean', description: 'Whether cancellation was successful' },
|
||||
},
|
||||
}
|
||||
@@ -18,6 +18,7 @@ import { AthenaBlock } from '@/blocks/blocks/athena'
|
||||
import { AttioBlock } from '@/blocks/blocks/attio'
|
||||
import { BoxBlock } from '@/blocks/blocks/box'
|
||||
import { BrandfetchBlock } from '@/blocks/blocks/brandfetch'
|
||||
import { BrightDataBlock } from '@/blocks/blocks/brightdata'
|
||||
import { BrowserUseBlock } from '@/blocks/blocks/browser_use'
|
||||
import { CalComBlock } from '@/blocks/blocks/calcom'
|
||||
import { CalendlyBlock } from '@/blocks/blocks/calendly'
|
||||
@@ -245,6 +246,7 @@ export const registry: Record<string, BlockConfig> = {
|
||||
athena: AthenaBlock,
|
||||
attio: AttioBlock,
|
||||
brandfetch: BrandfetchBlock,
|
||||
brightdata: BrightDataBlock,
|
||||
box: BoxBlock,
|
||||
browser_use: BrowserUseBlock,
|
||||
calcom: CalComBlock,
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect } from 'react'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
|
||||
export function OneDollarStats() {
|
||||
useEffect(() => {
|
||||
const shouldInitialize = !!env.DRIZZLE_ODS_API_KEY
|
||||
|
||||
if (!shouldInitialize) {
|
||||
return
|
||||
}
|
||||
|
||||
import('onedollarstats')
|
||||
.then(({ configure }) => {
|
||||
configure({
|
||||
collectorUrl: 'https://collector.onedollarstats.com/events',
|
||||
autocollect: true,
|
||||
hashRouting: true,
|
||||
})
|
||||
})
|
||||
.catch(() => {})
|
||||
}, [])
|
||||
|
||||
return null
|
||||
}
|
||||
@@ -2087,6 +2087,21 @@ export function BrandfetchIcon(props: SVGProps<SVGSVGElement>) {
|
||||
)
|
||||
}
|
||||
|
||||
export function BrightDataIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg {...props} viewBox='54 93 22 52' fill='none' xmlns='http://www.w3.org/2000/svg'>
|
||||
<path
|
||||
d='M62 95.21c.19 2.16 1.85 3.24 2.82 4.74.25.38.48.11.67-.16.21-.31.6-1.21 1.15-1.28-.35 1.38-.04 3.15.16 4.45.49 3.05-1.22 5.64-4.07 6.18-3.38.65-6.22-2.21-5.6-5.62.23-1.24 1.37-2.5.77-3.7-.85-1.7.54-.52.79-.22 1.04 1.2 1.21.09 1.45-.55.24-.63.31-1.31.47-1.97.19-.77.55-1.4 1.39-1.87z'
|
||||
fill='currentColor'
|
||||
/>
|
||||
<path
|
||||
d='M66.70 123.37c0 3.69.04 7.38-.03 11.07-.02 1.04.31 1.48 1.32 1.49.29 0 .59.12.88.13.93.01 1.18.47 1.16 1.37-.05 2.19 0 2.19-2.24 2.19-3.48 0-6.96-.04-10.44.03-1.09.02-1.47-.33-1.3-1.36.02-.12.02-.26 0-.38-.28-1.39.39-1.96 1.7-1.9 1.36.06 1.76-.51 1.74-1.88-.09-5.17-.08-10.35 0-15.53.02-1.22-.32-1.87-1.52-2.17-.57-.14-1.47-.11-1.57-.85-.15-1.04-.05-2.11.01-3.17.02-.34.44-.35.73-.39 2.81-.39 5.63-.77 8.44-1.18.92-.14 1.15.2 1.14 1.09-.04 3.8-.02 7.62-.02 11.44z'
|
||||
fill='currentColor'
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function BrowserUseIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
|
||||
@@ -25,8 +25,9 @@ export const logKeys = {
|
||||
[...logKeys.lists(), workspaceId ?? '', filters] as const,
|
||||
details: () => [...logKeys.all, 'detail'] as const,
|
||||
detail: (logId: string | undefined) => [...logKeys.details(), logId ?? ''] as const,
|
||||
statsAll: () => [...logKeys.all, 'stats'] as const,
|
||||
stats: (workspaceId: string | undefined, filters: object) =>
|
||||
[...logKeys.all, 'stats', workspaceId ?? '', filters] as const,
|
||||
[...logKeys.statsAll(), workspaceId ?? '', filters] as const,
|
||||
executionSnapshots: () => [...logKeys.all, 'executionSnapshot'] as const,
|
||||
executionSnapshot: (executionId: string | undefined) =>
|
||||
[...logKeys.executionSnapshots(), executionId ?? ''] as const,
|
||||
@@ -327,7 +328,7 @@ export function useCancelExecution() {
|
||||
onSettled: () => {
|
||||
queryClient.invalidateQueries({ queryKey: logKeys.lists() })
|
||||
queryClient.invalidateQueries({ queryKey: logKeys.details() })
|
||||
queryClient.invalidateQueries({ queryKey: [...logKeys.all, 'stats'] })
|
||||
queryClient.invalidateQueries({ queryKey: logKeys.statsAll() })
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -43,6 +43,92 @@ describe('stream session contract parser', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('accepts contract session chat events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'session' as const,
|
||||
payload: { kind: 'chat' as const, chatId: 'chat-1' },
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts contract complete events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'complete' as const,
|
||||
payload: { status: 'complete' as const },
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts contract error events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'error' as const,
|
||||
payload: { message: 'something went wrong' },
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts contract tool call events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'tool' as const,
|
||||
payload: {
|
||||
toolCallId: 'tc-1',
|
||||
toolName: 'read',
|
||||
phase: 'call' as const,
|
||||
executor: 'sim' as const,
|
||||
mode: 'sync' as const,
|
||||
},
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts contract span events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'span' as const,
|
||||
payload: { kind: 'subagent' as const, event: 'start' as const, agent: 'file' },
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts contract resource events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'resource' as const,
|
||||
payload: {
|
||||
op: 'upsert' as const,
|
||||
resource: { id: 'r-1', type: 'file', title: 'test.md' },
|
||||
},
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts contract run events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'run' as const,
|
||||
payload: { kind: 'compaction_start' as const },
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
expect(parsePersistedStreamEventEnvelope(event).ok).toBe(true)
|
||||
})
|
||||
|
||||
it('accepts synthetic file preview events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
@@ -82,7 +168,32 @@ describe('stream session contract parser', () => {
|
||||
throw new Error('expected invalid result')
|
||||
}
|
||||
expect(parsed.reason).toBe('invalid_stream_event')
|
||||
expect(parsed.errors?.length).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('rejects unknown event types', () => {
|
||||
const parsed = parsePersistedStreamEventEnvelope({
|
||||
...BASE_ENVELOPE,
|
||||
type: 'unknown_type',
|
||||
payload: {},
|
||||
})
|
||||
|
||||
expect(parsed.ok).toBe(false)
|
||||
if (parsed.ok) {
|
||||
throw new Error('expected invalid result')
|
||||
}
|
||||
expect(parsed.reason).toBe('invalid_stream_event')
|
||||
expect(parsed.errors).toContain('unknown type="unknown_type"')
|
||||
})
|
||||
|
||||
it('rejects non-object values', () => {
|
||||
const parsed = parsePersistedStreamEventEnvelope('not an object')
|
||||
|
||||
expect(parsed.ok).toBe(false)
|
||||
if (parsed.ok) {
|
||||
throw new Error('expected invalid result')
|
||||
}
|
||||
expect(parsed.reason).toBe('invalid_stream_event')
|
||||
expect(parsed.errors).toContain('value is not an object')
|
||||
})
|
||||
|
||||
it('reports invalid JSON separately from schema failures', () => {
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
import type { ErrorObject, ValidateFunction } from 'ajv'
|
||||
import Ajv2020 from 'ajv/dist/2020.js'
|
||||
import type {
|
||||
MothershipStreamV1EventEnvelope,
|
||||
MothershipStreamV1StreamRef,
|
||||
MothershipStreamV1StreamScope,
|
||||
MothershipStreamV1Trace,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { MOTHERSHIP_STREAM_V1_SCHEMA } from '@/lib/copilot/generated/mothership-stream-v1-schema'
|
||||
import {
|
||||
MothershipStreamV1EventType,
|
||||
MothershipStreamV1ResourceOp,
|
||||
MothershipStreamV1RunKind,
|
||||
MothershipStreamV1SessionKind,
|
||||
MothershipStreamV1SpanPayloadKind,
|
||||
MothershipStreamV1TextChannel,
|
||||
MothershipStreamV1ToolPhase,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import type { FilePreviewTargetKind } from './file-preview-session-contract'
|
||||
|
||||
type JsonRecord = Record<string, unknown>
|
||||
|
||||
const ajv = new Ajv2020({
|
||||
allErrors: true,
|
||||
strict: false,
|
||||
})
|
||||
|
||||
const FILE_PREVIEW_PHASE = {
|
||||
start: 'file_preview_start',
|
||||
target: 'file_preview_target',
|
||||
@@ -144,26 +145,9 @@ export type ParseStreamEventEnvelopeResult =
|
||||
| ParseStreamEventEnvelopeSuccess
|
||||
| ParseStreamEventEnvelopeFailure
|
||||
|
||||
let validator: ValidateFunction<MothershipStreamV1EventEnvelope> | null = null
|
||||
|
||||
function getValidator(): ValidateFunction<MothershipStreamV1EventEnvelope> {
|
||||
if (validator) {
|
||||
return validator
|
||||
}
|
||||
|
||||
validator = ajv.compile<MothershipStreamV1EventEnvelope>(MOTHERSHIP_STREAM_V1_SCHEMA as object)
|
||||
return validator
|
||||
}
|
||||
|
||||
function formatValidationErrors(errors: ErrorObject[] | null | undefined): string[] | undefined {
|
||||
if (!errors || errors.length === 0) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return errors
|
||||
.slice(0, 5)
|
||||
.map((error) => `${error.instancePath || '/'} ${error.message || 'is invalid'}`.trim())
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
// Structural helpers (CSP-safe – no codegen / eval / new Function)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function isRecord(value: unknown): value is JsonRecord {
|
||||
return Boolean(value) && typeof value === 'object' && !Array.isArray(value)
|
||||
@@ -199,6 +183,140 @@ function isStreamScope(value: unknown): value is MothershipStreamV1StreamScope {
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Contract envelope validator (replaces Ajv runtime compilation)
|
||||
//
|
||||
// Validates the envelope shell (v, seq, ts, stream, trace?, scope?) and that
|
||||
// `type` is one of the known event types with a non-null payload object.
|
||||
// Per-payload-variant validation is intentionally lightweight: the server
|
||||
// already performs strict schema validation; the client only needs enough
|
||||
// structural checking to safely dispatch inside the switch statement.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const KNOWN_EVENT_TYPES: ReadonlySet<string> = new Set(Object.values(MothershipStreamV1EventType))
|
||||
|
||||
function isValidEnvelopeShell(value: unknown): value is JsonRecord & {
|
||||
v: 1
|
||||
seq: number
|
||||
ts: string
|
||||
stream: MothershipStreamV1StreamRef
|
||||
type: string
|
||||
payload: JsonRecord
|
||||
} {
|
||||
if (!isRecord(value)) return false
|
||||
if (value.v !== 1) return false
|
||||
if (typeof value.seq !== 'number' || !Number.isFinite(value.seq)) return false
|
||||
if (typeof value.ts !== 'string') return false
|
||||
if (!isStreamRef(value.stream)) return false
|
||||
if (value.trace !== undefined && !isTrace(value.trace)) return false
|
||||
if (value.scope !== undefined && !isStreamScope(value.scope)) return false
|
||||
if (typeof value.type !== 'string' || !KNOWN_EVENT_TYPES.has(value.type)) return false
|
||||
if (!isRecord(value.payload)) return false
|
||||
return true
|
||||
}
|
||||
|
||||
function isValidSessionPayload(payload: JsonRecord): boolean {
|
||||
const kind = payload.kind
|
||||
if (typeof kind !== 'string') return false
|
||||
switch (kind) {
|
||||
case MothershipStreamV1SessionKind.start:
|
||||
return true
|
||||
case MothershipStreamV1SessionKind.chat:
|
||||
return typeof payload.chatId === 'string'
|
||||
case MothershipStreamV1SessionKind.title:
|
||||
return typeof payload.title === 'string'
|
||||
case MothershipStreamV1SessionKind.trace:
|
||||
return typeof payload.requestId === 'string'
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
function isValidTextPayload(payload: JsonRecord): boolean {
|
||||
return (
|
||||
(payload.channel === MothershipStreamV1TextChannel.assistant ||
|
||||
payload.channel === MothershipStreamV1TextChannel.thinking) &&
|
||||
typeof payload.text === 'string'
|
||||
)
|
||||
}
|
||||
|
||||
function isValidToolPayload(payload: JsonRecord): boolean {
|
||||
if (typeof payload.toolCallId !== 'string') return false
|
||||
if (typeof payload.toolName !== 'string') return false
|
||||
const phase = payload.phase
|
||||
return (
|
||||
phase === MothershipStreamV1ToolPhase.call ||
|
||||
phase === MothershipStreamV1ToolPhase.args_delta ||
|
||||
phase === MothershipStreamV1ToolPhase.result
|
||||
)
|
||||
}
|
||||
|
||||
function isValidSpanPayload(payload: JsonRecord): boolean {
|
||||
const kind = payload.kind
|
||||
return (
|
||||
kind === MothershipStreamV1SpanPayloadKind.subagent ||
|
||||
kind === MothershipStreamV1SpanPayloadKind.structured_result ||
|
||||
kind === MothershipStreamV1SpanPayloadKind.subagent_result
|
||||
)
|
||||
}
|
||||
|
||||
function isValidResourcePayload(payload: JsonRecord): boolean {
|
||||
return (
|
||||
(payload.op === MothershipStreamV1ResourceOp.upsert ||
|
||||
payload.op === MothershipStreamV1ResourceOp.remove) &&
|
||||
isRecord(payload.resource) &&
|
||||
typeof (payload.resource as JsonRecord).id === 'string' &&
|
||||
typeof (payload.resource as JsonRecord).type === 'string'
|
||||
)
|
||||
}
|
||||
|
||||
function isValidRunPayload(payload: JsonRecord): boolean {
|
||||
const kind = payload.kind
|
||||
return (
|
||||
kind === MothershipStreamV1RunKind.checkpoint_pause ||
|
||||
kind === MothershipStreamV1RunKind.resumed ||
|
||||
kind === MothershipStreamV1RunKind.compaction_start ||
|
||||
kind === MothershipStreamV1RunKind.compaction_done
|
||||
)
|
||||
}
|
||||
|
||||
function isValidErrorPayload(payload: JsonRecord): boolean {
|
||||
return typeof payload.message === 'string' || typeof payload.error === 'string'
|
||||
}
|
||||
|
||||
function isValidCompletePayload(payload: JsonRecord): boolean {
|
||||
return typeof payload.status === 'string'
|
||||
}
|
||||
|
||||
function isContractEnvelope(value: unknown): value is MothershipStreamV1EventEnvelope {
|
||||
if (!isValidEnvelopeShell(value)) return false
|
||||
const payload = value.payload as JsonRecord
|
||||
switch (value.type) {
|
||||
case MothershipStreamV1EventType.session:
|
||||
return isValidSessionPayload(payload)
|
||||
case MothershipStreamV1EventType.text:
|
||||
return isValidTextPayload(payload)
|
||||
case MothershipStreamV1EventType.tool:
|
||||
return isValidToolPayload(payload)
|
||||
case MothershipStreamV1EventType.span:
|
||||
return isValidSpanPayload(payload)
|
||||
case MothershipStreamV1EventType.resource:
|
||||
return isValidResourcePayload(payload)
|
||||
case MothershipStreamV1EventType.run:
|
||||
return isValidRunPayload(payload)
|
||||
case MothershipStreamV1EventType.error:
|
||||
return isValidErrorPayload(payload)
|
||||
case MothershipStreamV1EventType.complete:
|
||||
return isValidCompletePayload(payload)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Synthetic file-preview envelope validators
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function isSyntheticEnvelopeBase(
|
||||
value: unknown
|
||||
): value is Omit<SyntheticFilePreviewEventEnvelope, 'payload'> & { payload?: unknown } {
|
||||
@@ -269,6 +387,10 @@ export function isSyntheticFilePreviewEventEnvelope(
|
||||
return isSyntheticEnvelopeBase(value) && isSyntheticFilePreviewPayload(value.payload)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Stream event type guards
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function isToolCallStreamEvent(event: SessionStreamEvent): event is ToolCallStreamEvent {
|
||||
return event.type === 'tool' && isRecord(event.payload) && event.payload.phase === 'call'
|
||||
}
|
||||
@@ -289,33 +411,40 @@ export function isSubagentSpanStreamEvent(
|
||||
return event.type === 'span' && isRecord(event.payload) && event.payload.kind === 'subagent'
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public contract validators & parsers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function isContractStreamEventEnvelope(
|
||||
value: unknown
|
||||
): value is MothershipStreamV1EventEnvelope {
|
||||
return getValidator()(value)
|
||||
return isContractEnvelope(value)
|
||||
}
|
||||
|
||||
export function parsePersistedStreamEventEnvelope(value: unknown): ParseStreamEventEnvelopeResult {
|
||||
const envelopeValidator = getValidator()
|
||||
if (envelopeValidator(value)) {
|
||||
return {
|
||||
ok: true,
|
||||
event: value,
|
||||
}
|
||||
if (isContractEnvelope(value)) {
|
||||
return { ok: true, event: value }
|
||||
}
|
||||
|
||||
if (isSyntheticFilePreviewEventEnvelope(value)) {
|
||||
return {
|
||||
ok: true,
|
||||
event: value,
|
||||
}
|
||||
return { ok: true, event: value }
|
||||
}
|
||||
|
||||
const hints: string[] = []
|
||||
if (!isRecord(value)) {
|
||||
hints.push('value is not an object')
|
||||
} else {
|
||||
if (value.v !== 1) hints.push(`unexpected v=${JSON.stringify(value.v)}`)
|
||||
if (typeof value.type !== 'string') hints.push('missing type')
|
||||
else if (!KNOWN_EVENT_TYPES.has(value.type)) hints.push(`unknown type="${value.type}"`)
|
||||
if (!isRecord(value.payload)) hints.push('missing or invalid payload')
|
||||
}
|
||||
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'invalid_stream_event',
|
||||
message: 'Stream event failed validation',
|
||||
errors: formatValidationErrors(envelopeValidator.errors),
|
||||
message: 'A stream event failed validation.',
|
||||
...(hints.length > 0 ? { errors: hints } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -324,10 +453,12 @@ export function parsePersistedStreamEventEnvelopeJson(raw: string): ParseStreamE
|
||||
try {
|
||||
parsed = JSON.parse(raw)
|
||||
} catch (error) {
|
||||
const rawMessage = error instanceof Error ? error.message : 'Invalid JSON'
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'invalid_json',
|
||||
message: error instanceof Error ? error.message : 'Invalid JSON',
|
||||
message: 'Received invalid JSON while parsing a stream event.',
|
||||
...(rawMessage ? { errors: [rawMessage] } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -752,7 +752,7 @@ export function serializeIntegrationSchema(tool: ToolConfig): string {
|
||||
type: 'string',
|
||||
required: false,
|
||||
description:
|
||||
'Optional credential ID to use when multiple accounts are connected for this provider. Get IDs from environment/credentials.json. If omitted, auto-selects the first available credential.',
|
||||
'Credential ID to use for this OAuth tool call. For Copilot/Superagent execution, pass this explicitly. Get valid IDs from environment/credentials.json.',
|
||||
},
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -132,7 +132,6 @@ export const env = createEnv({
|
||||
TELEMETRY_ENDPOINT: z.string().url().optional(), // Custom telemetry/analytics endpoint
|
||||
COST_MULTIPLIER: z.number().optional(), // Multiplier for cost calculations
|
||||
LOG_LEVEL: z.enum(['DEBUG', 'INFO', 'WARN', 'ERROR']).optional(), // Minimum log level to display (defaults to ERROR in production, DEBUG in development)
|
||||
DRIZZLE_ODS_API_KEY: z.string().min(1).optional(), // OneDollarStats API key for analytics tracking
|
||||
PROFOUND_API_KEY: z.string().min(1).optional(), // Profound analytics API key
|
||||
PROFOUND_ENDPOINT: z.string().url().optional(), // Profound analytics endpoint
|
||||
|
||||
|
||||
@@ -29,38 +29,109 @@ export interface CSPDirectives {
|
||||
'object-src'?: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Static CSP sources shared between build-time and runtime.
|
||||
* Add new domains here — both paths pick them up automatically.
|
||||
*/
|
||||
const STATIC_SCRIPT_SRC = [
|
||||
"'self'",
|
||||
"'unsafe-inline'",
|
||||
'https://*.google.com',
|
||||
'https://apis.google.com',
|
||||
'https://challenges.cloudflare.com',
|
||||
...(isReactGrabEnabled ? ['https://unpkg.com'] : []),
|
||||
...(isHosted
|
||||
? [
|
||||
'https://www.googletagmanager.com',
|
||||
'https://www.google-analytics.com',
|
||||
'https://analytics.ahrefs.com',
|
||||
]
|
||||
: []),
|
||||
] as const
|
||||
|
||||
const STATIC_IMG_SRC = [
|
||||
"'self'",
|
||||
'data:',
|
||||
'blob:',
|
||||
'https://*.googleusercontent.com',
|
||||
'https://*.google.com',
|
||||
'https://*.atlassian.com',
|
||||
'https://cdn.discordapp.com',
|
||||
'https://*.githubusercontent.com',
|
||||
'https://*.s3.amazonaws.com',
|
||||
'https://s3.amazonaws.com',
|
||||
'https://*.amazonaws.com',
|
||||
'https://*.blob.core.windows.net',
|
||||
'https://github.com/*',
|
||||
'https://cursor.com',
|
||||
...(isHosted ? ['https://www.googletagmanager.com', 'https://www.google-analytics.com'] : []),
|
||||
] as const
|
||||
|
||||
const STATIC_CONNECT_SRC = [
|
||||
"'self'",
|
||||
'https://api.browser-use.com',
|
||||
'https://api.elevenlabs.io',
|
||||
'wss://api.elevenlabs.io',
|
||||
'https://api.exa.ai',
|
||||
'https://api.firecrawl.dev',
|
||||
'https://*.googleapis.com',
|
||||
'https://*.amazonaws.com',
|
||||
'https://*.s3.amazonaws.com',
|
||||
'https://*.blob.core.windows.net',
|
||||
'https://*.atlassian.com',
|
||||
'https://*.supabase.co',
|
||||
'https://api.github.com',
|
||||
'https://github.com/*',
|
||||
'https://challenges.cloudflare.com',
|
||||
...(isHosted
|
||||
? [
|
||||
'https://www.googletagmanager.com',
|
||||
'https://*.google-analytics.com',
|
||||
'https://*.analytics.google.com',
|
||||
'https://analytics.google.com',
|
||||
'https://www.google.com',
|
||||
]
|
||||
: []),
|
||||
] as const
|
||||
|
||||
const STATIC_FRAME_SRC = [
|
||||
"'self'",
|
||||
'https://challenges.cloudflare.com',
|
||||
'https://drive.google.com',
|
||||
'https://docs.google.com',
|
||||
'https://*.google.com',
|
||||
'https://www.youtube.com',
|
||||
'https://player.vimeo.com',
|
||||
'https://www.dailymotion.com',
|
||||
'https://player.twitch.tv',
|
||||
'https://clips.twitch.tv',
|
||||
'https://streamable.com',
|
||||
'https://fast.wistia.net',
|
||||
'https://www.tiktok.com',
|
||||
'https://w.soundcloud.com',
|
||||
'https://open.spotify.com',
|
||||
'https://embed.music.apple.com',
|
||||
'https://www.loom.com',
|
||||
'https://www.facebook.com',
|
||||
'https://www.instagram.com',
|
||||
'https://platform.twitter.com',
|
||||
'https://rumble.com',
|
||||
'https://play.vidyard.com',
|
||||
'https://iframe.cloudflarestream.com',
|
||||
'https://www.mixcloud.com',
|
||||
'https://tenor.com',
|
||||
'https://giphy.com',
|
||||
...(isHosted ? ['https://www.googletagmanager.com'] : []),
|
||||
] as const
|
||||
|
||||
// Build-time CSP directives (for next.config.ts)
|
||||
export const buildTimeCSPDirectives: CSPDirectives = {
|
||||
'default-src': ["'self'"],
|
||||
|
||||
'script-src': [
|
||||
"'self'",
|
||||
"'unsafe-inline'",
|
||||
"'unsafe-eval'",
|
||||
'https://*.google.com',
|
||||
'https://apis.google.com',
|
||||
'https://assets.onedollarstats.com',
|
||||
'https://challenges.cloudflare.com',
|
||||
...(isReactGrabEnabled ? ['https://unpkg.com'] : []),
|
||||
...(isHosted ? ['https://www.googletagmanager.com', 'https://www.google-analytics.com'] : []),
|
||||
],
|
||||
|
||||
'script-src': [...STATIC_SCRIPT_SRC],
|
||||
'style-src': ["'self'", "'unsafe-inline'", 'https://fonts.googleapis.com'],
|
||||
|
||||
'img-src': [
|
||||
"'self'",
|
||||
'data:',
|
||||
'blob:',
|
||||
'https://*.googleusercontent.com',
|
||||
'https://*.google.com',
|
||||
'https://*.atlassian.com',
|
||||
'https://cdn.discordapp.com',
|
||||
'https://*.githubusercontent.com',
|
||||
'https://*.s3.amazonaws.com',
|
||||
'https://s3.amazonaws.com',
|
||||
'https://github.com/*',
|
||||
'https://collector.onedollarstats.com',
|
||||
...(isHosted ? ['https://www.googletagmanager.com', 'https://www.google-analytics.com'] : []),
|
||||
...STATIC_IMG_SRC,
|
||||
...(env.S3_BUCKET_NAME && env.AWS_REGION
|
||||
? [`https://${env.S3_BUCKET_NAME}.s3.${env.AWS_REGION}.amazonaws.com`]
|
||||
: []),
|
||||
@@ -70,21 +141,16 @@ export const buildTimeCSPDirectives: CSPDirectives = {
|
||||
...(env.S3_CHAT_BUCKET_NAME && env.AWS_REGION
|
||||
? [`https://${env.S3_CHAT_BUCKET_NAME}.s3.${env.AWS_REGION}.amazonaws.com`]
|
||||
: []),
|
||||
'https://*.amazonaws.com',
|
||||
'https://*.blob.core.windows.net',
|
||||
'https://github.com/*',
|
||||
...getHostnameFromUrl(env.NEXT_PUBLIC_BRAND_LOGO_URL),
|
||||
...getHostnameFromUrl(env.NEXT_PUBLIC_BRAND_FAVICON_URL),
|
||||
],
|
||||
|
||||
'media-src': ["'self'", 'blob:'],
|
||||
|
||||
'font-src': ["'self'", 'https://fonts.gstatic.com'],
|
||||
|
||||
'connect-src': [
|
||||
"'self'",
|
||||
...STATIC_CONNECT_SRC,
|
||||
env.NEXT_PUBLIC_APP_URL || '',
|
||||
// Only include localhost fallbacks in development mode
|
||||
...(env.OLLAMA_URL ? [env.OLLAMA_URL] : isDev ? ['http://localhost:11434'] : []),
|
||||
...(env.NEXT_PUBLIC_SOCKET_URL
|
||||
? [
|
||||
@@ -94,42 +160,12 @@ export const buildTimeCSPDirectives: CSPDirectives = {
|
||||
: isDev
|
||||
? ['http://localhost:3002', 'ws://localhost:3002']
|
||||
: []),
|
||||
'https://api.browser-use.com',
|
||||
'https://api.elevenlabs.io',
|
||||
'wss://api.elevenlabs.io',
|
||||
'https://api.exa.ai',
|
||||
'https://api.firecrawl.dev',
|
||||
'https://*.googleapis.com',
|
||||
'https://*.amazonaws.com',
|
||||
'https://*.s3.amazonaws.com',
|
||||
'https://*.blob.core.windows.net',
|
||||
'https://*.atlassian.com',
|
||||
'https://*.supabase.co',
|
||||
'https://api.github.com',
|
||||
'https://github.com/*',
|
||||
'https://challenges.cloudflare.com',
|
||||
'https://collector.onedollarstats.com',
|
||||
...(isHosted
|
||||
? [
|
||||
'https://www.googletagmanager.com',
|
||||
'https://*.google-analytics.com',
|
||||
'https://*.analytics.google.com',
|
||||
]
|
||||
: []),
|
||||
...getHostnameFromUrl(env.NEXT_PUBLIC_BRAND_LOGO_URL),
|
||||
...getHostnameFromUrl(env.NEXT_PUBLIC_PRIVACY_URL),
|
||||
...getHostnameFromUrl(env.NEXT_PUBLIC_TERMS_URL),
|
||||
],
|
||||
|
||||
'frame-src': [
|
||||
"'self'",
|
||||
'https://challenges.cloudflare.com',
|
||||
'https://drive.google.com',
|
||||
'https://docs.google.com',
|
||||
'https://*.google.com',
|
||||
...(isHosted ? ['https://www.googletagmanager.com'] : []),
|
||||
],
|
||||
|
||||
'frame-src': [...STATIC_FRAME_SRC],
|
||||
'frame-ancestors': ["'self'"],
|
||||
'form-action': ["'self'"],
|
||||
'base-uri': ["'self'"],
|
||||
@@ -152,13 +188,14 @@ export function buildCSPString(directives: CSPDirectives): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate runtime CSP header with dynamic environment variables (safer approach)
|
||||
* This maintains compatibility with existing inline scripts while fixing Docker env var issues
|
||||
* Generate runtime CSP header with dynamic environment variables.
|
||||
* Composes from the same STATIC_* constants as buildTimeCSPDirectives,
|
||||
* but resolves env vars at request time via getEnv() to fix Docker
|
||||
* deployments where build-time values may be stale placeholders.
|
||||
*/
|
||||
export function generateRuntimeCSP(): string {
|
||||
const appUrl = getEnv('NEXT_PUBLIC_APP_URL') || ''
|
||||
|
||||
// Only include localhost URLs in development or when explicitly configured
|
||||
const socketUrl = getEnv('NEXT_PUBLIC_SOCKET_URL') || (isDev ? 'http://localhost:3002' : '')
|
||||
const socketWsUrl = socketUrl
|
||||
? socketUrl.replace('http://', 'ws://').replace('https://', 'wss://')
|
||||
@@ -172,42 +209,24 @@ export function generateRuntimeCSP(): string {
|
||||
const privacyDomains = getHostnameFromUrl(getEnv('NEXT_PUBLIC_PRIVACY_URL'))
|
||||
const termsDomains = getHostnameFromUrl(getEnv('NEXT_PUBLIC_TERMS_URL'))
|
||||
|
||||
const allDynamicDomains = [
|
||||
...brandLogoDomains,
|
||||
...brandFaviconDomains,
|
||||
...privacyDomains,
|
||||
...termsDomains,
|
||||
]
|
||||
const uniqueDynamicDomains = Array.from(new Set(allDynamicDomains))
|
||||
const dynamicDomainsStr = uniqueDynamicDomains.join(' ')
|
||||
const brandLogoDomain = brandLogoDomains[0] || ''
|
||||
const brandFaviconDomain = brandFaviconDomains[0] || ''
|
||||
const reactGrabScript = isReactGrabEnabled ? 'https://unpkg.com' : ''
|
||||
const gtmScript = isHosted
|
||||
? 'https://www.googletagmanager.com https://www.google-analytics.com'
|
||||
: ''
|
||||
const gtmConnect = isHosted
|
||||
? 'https://www.googletagmanager.com https://*.google-analytics.com https://*.analytics.google.com'
|
||||
: ''
|
||||
const gtmImg = isHosted ? 'https://www.googletagmanager.com https://www.google-analytics.com' : ''
|
||||
const gtmFrame = isHosted ? 'https://www.googletagmanager.com' : ''
|
||||
const runtimeDirectives: CSPDirectives = {
|
||||
...buildTimeCSPDirectives,
|
||||
|
||||
return `
|
||||
default-src 'self';
|
||||
script-src 'self' 'unsafe-inline' 'unsafe-eval' https://*.google.com https://apis.google.com https://assets.onedollarstats.com https://challenges.cloudflare.com ${reactGrabScript} ${gtmScript};
|
||||
style-src 'self' 'unsafe-inline' https://fonts.googleapis.com;
|
||||
img-src 'self' data: blob: https://*.googleusercontent.com https://*.google.com https://*.atlassian.com https://cdn.discordapp.com https://*.githubusercontent.com https://*.s3.amazonaws.com https://s3.amazonaws.com https://*.amazonaws.com https://*.blob.core.windows.net https://github.com/* https://collector.onedollarstats.com ${gtmImg} ${brandLogoDomain} ${brandFaviconDomain};
|
||||
media-src 'self' blob:;
|
||||
font-src 'self' https://fonts.gstatic.com;
|
||||
connect-src 'self' ${appUrl} ${ollamaUrl} ${socketUrl} ${socketWsUrl} https://api.browser-use.com https://api.elevenlabs.io wss://api.elevenlabs.io https://api.exa.ai https://api.firecrawl.dev https://*.googleapis.com https://*.amazonaws.com https://*.s3.amazonaws.com https://*.blob.core.windows.net https://api.github.com https://github.com/* https://*.atlassian.com https://*.supabase.co https://challenges.cloudflare.com https://collector.onedollarstats.com ${gtmConnect} ${dynamicDomainsStr};
|
||||
frame-src 'self' https://challenges.cloudflare.com https://drive.google.com https://docs.google.com https://*.google.com ${gtmFrame};
|
||||
frame-ancestors 'self';
|
||||
form-action 'self';
|
||||
base-uri 'self';
|
||||
object-src 'none';
|
||||
`
|
||||
.replace(/\s{2,}/g, ' ')
|
||||
.trim()
|
||||
'img-src': [...STATIC_IMG_SRC, ...brandLogoDomains, ...brandFaviconDomains],
|
||||
|
||||
'connect-src': [
|
||||
...STATIC_CONNECT_SRC,
|
||||
appUrl,
|
||||
ollamaUrl,
|
||||
socketUrl,
|
||||
socketWsUrl,
|
||||
...brandLogoDomains,
|
||||
...privacyDomains,
|
||||
...termsDomains,
|
||||
],
|
||||
}
|
||||
|
||||
return buildCSPString(runtimeDirectives)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -338,10 +338,10 @@ const nextConfig: NextConfig = {
|
||||
],
|
||||
},
|
||||
// Apply security headers to routes not handled by middleware runtime CSP
|
||||
// Middleware handles: /, /workspace/*
|
||||
// Middleware handles: /, /login, /signup, /workspace/*
|
||||
// Exclude chat and form routes which have their own permissive embed headers
|
||||
{
|
||||
source: '/((?!workspace|chat|form).*)',
|
||||
source: '/((?!workspace|chat|form|login|signup|$).*)',
|
||||
headers: [
|
||||
{
|
||||
key: 'X-Content-Type-Options',
|
||||
|
||||
@@ -155,7 +155,6 @@
|
||||
"next-themes": "^0.4.6",
|
||||
"nodemailer": "7.0.11",
|
||||
"officeparser": "^5.2.0",
|
||||
"onedollarstats": "0.0.10",
|
||||
"openai": "^4.91.1",
|
||||
"papaparse": "5.5.3",
|
||||
"pdf-lib": "1.17.1",
|
||||
|
||||
@@ -29,6 +29,7 @@ import type { FunctionCallResponse, ProviderRequest, ProviderResponse } from '@/
|
||||
import {
|
||||
calculateCost,
|
||||
isDeepResearchModel,
|
||||
isGemini3Model,
|
||||
prepareToolExecution,
|
||||
prepareToolsWithUsageControl,
|
||||
sumToolCosts,
|
||||
@@ -295,7 +296,8 @@ function buildNextConfig(
|
||||
state: ExecutionState,
|
||||
forcedTools: string[],
|
||||
request: ProviderRequest,
|
||||
logger: ReturnType<typeof createLogger>
|
||||
logger: ReturnType<typeof createLogger>,
|
||||
model: string
|
||||
): GenerateContentConfig {
|
||||
const nextConfig = { ...baseConfig }
|
||||
const allForcedToolsUsed =
|
||||
@@ -304,9 +306,13 @@ function buildNextConfig(
|
||||
if (allForcedToolsUsed && request.responseFormat) {
|
||||
nextConfig.tools = undefined
|
||||
nextConfig.toolConfig = undefined
|
||||
nextConfig.responseMimeType = 'application/json'
|
||||
nextConfig.responseSchema = cleanSchemaForGemini(request.responseFormat.schema) as Schema
|
||||
logger.info('Using structured output for final response after tool execution')
|
||||
if (isGemini3Model(model)) {
|
||||
logger.info('Gemini 3: Stripping tools after forced tool execution, schema already set')
|
||||
} else {
|
||||
nextConfig.responseMimeType = 'application/json'
|
||||
nextConfig.responseSchema = cleanSchemaForGemini(request.responseFormat.schema) as Schema
|
||||
logger.info('Using structured output for final response after tool execution')
|
||||
}
|
||||
} else if (state.currentToolConfig) {
|
||||
nextConfig.toolConfig = state.currentToolConfig
|
||||
} else {
|
||||
@@ -921,13 +927,19 @@ export async function executeGeminiRequest(
|
||||
geminiConfig.systemInstruction = systemInstruction
|
||||
}
|
||||
|
||||
// Handle response format (only when no tools)
|
||||
// Handle response format
|
||||
if (request.responseFormat && !tools?.length) {
|
||||
geminiConfig.responseMimeType = 'application/json'
|
||||
geminiConfig.responseSchema = cleanSchemaForGemini(request.responseFormat.schema) as Schema
|
||||
logger.info('Using Gemini native structured output format')
|
||||
} else if (request.responseFormat && tools?.length && isGemini3Model(model)) {
|
||||
geminiConfig.responseMimeType = 'application/json'
|
||||
geminiConfig.responseJsonSchema = request.responseFormat.schema
|
||||
logger.info('Using Gemini 3 structured output with tools (responseJsonSchema)')
|
||||
} else if (request.responseFormat && tools?.length) {
|
||||
logger.warn('Gemini does not support responseFormat with tools. Structured output ignored.')
|
||||
logger.warn(
|
||||
'Gemini 2 does not support responseFormat with tools. Structured output will be applied after tool execution.'
|
||||
)
|
||||
}
|
||||
|
||||
// Configure thinking only when the user explicitly selects a thinking level
|
||||
@@ -1099,7 +1111,7 @@ export async function executeGeminiRequest(
|
||||
}
|
||||
|
||||
state = { ...updatedState, iterationCount: updatedState.iterationCount + 1 }
|
||||
const nextConfig = buildNextConfig(geminiConfig, state, forcedTools, request, logger)
|
||||
const nextConfig = buildNextConfig(geminiConfig, state, forcedTools, request, logger, model)
|
||||
|
||||
// Stream final response if requested
|
||||
if (request.stream) {
|
||||
@@ -1120,10 +1132,12 @@ export async function executeGeminiRequest(
|
||||
if (request.responseFormat) {
|
||||
nextConfig.tools = undefined
|
||||
nextConfig.toolConfig = undefined
|
||||
nextConfig.responseMimeType = 'application/json'
|
||||
nextConfig.responseSchema = cleanSchemaForGemini(
|
||||
request.responseFormat.schema
|
||||
) as Schema
|
||||
if (!isGemini3Model(model)) {
|
||||
nextConfig.responseMimeType = 'application/json'
|
||||
nextConfig.responseSchema = cleanSchemaForGemini(
|
||||
request.responseFormat.schema
|
||||
) as Schema
|
||||
}
|
||||
}
|
||||
|
||||
// Capture accumulated cost before streaming
|
||||
|
||||
@@ -1064,6 +1064,11 @@ export function isDeepResearchModel(model: string): boolean {
|
||||
return MODELS_WITH_DEEP_RESEARCH.includes(model.toLowerCase())
|
||||
}
|
||||
|
||||
export function isGemini3Model(model: string): boolean {
|
||||
const normalized = model.toLowerCase().replace(/^vertex\//, '')
|
||||
return normalized.startsWith('gemini-3')
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the maximum temperature value for a model
|
||||
* @returns Maximum temperature value (1 or 2) or undefined if temperature not supported
|
||||
|
||||
@@ -154,6 +154,8 @@ export async function proxy(request: NextRequest) {
|
||||
}
|
||||
const response = NextResponse.next()
|
||||
response.headers.set('Content-Security-Policy', generateRuntimeCSP())
|
||||
response.headers.set('X-Content-Type-Options', 'nosniff')
|
||||
response.headers.set('X-Frame-Options', 'SAMEORIGIN')
|
||||
return track(request, response)
|
||||
}
|
||||
|
||||
@@ -176,7 +178,11 @@ export async function proxy(request: NextRequest) {
|
||||
if (!hasActiveSession) {
|
||||
return track(request, NextResponse.redirect(new URL('/login', request.url)))
|
||||
}
|
||||
return track(request, NextResponse.next())
|
||||
const response = NextResponse.next()
|
||||
response.headers.set('Content-Security-Policy', generateRuntimeCSP())
|
||||
response.headers.set('X-Content-Type-Options', 'nosniff')
|
||||
response.headers.set('X-Frame-Options', 'SAMEORIGIN')
|
||||
return track(request, response)
|
||||
}
|
||||
|
||||
const invitationRedirect = handleInvitationRedirects(request, hasActiveSession)
|
||||
@@ -191,8 +197,10 @@ export async function proxy(request: NextRequest) {
|
||||
const response = NextResponse.next()
|
||||
response.headers.set('Vary', 'User-Agent')
|
||||
|
||||
if (url.pathname.startsWith('/workspace') || url.pathname === '/') {
|
||||
if (url.pathname === '/') {
|
||||
response.headers.set('Content-Security-Policy', generateRuntimeCSP())
|
||||
response.headers.set('X-Content-Type-Options', 'nosniff')
|
||||
response.headers.set('X-Frame-Options', 'SAMEORIGIN')
|
||||
}
|
||||
|
||||
return track(request, response)
|
||||
|
||||
@@ -25,6 +25,7 @@ export const useLogDetailsUIStore = create<LogDetailsUIState>()(
|
||||
}),
|
||||
{
|
||||
name: 'log-details-ui-state',
|
||||
partialize: (state) => ({ panelWidth: state.panelWidth }),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
68
apps/sim/tools/brightdata/cancel_snapshot.ts
Normal file
68
apps/sim/tools/brightdata/cancel_snapshot.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import type {
|
||||
BrightDataCancelSnapshotParams,
|
||||
BrightDataCancelSnapshotResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataCancelSnapshotTool: ToolConfig<
|
||||
BrightDataCancelSnapshotParams,
|
||||
BrightDataCancelSnapshotResponse
|
||||
> = {
|
||||
id: 'brightdata_cancel_snapshot',
|
||||
name: 'Bright Data Cancel Snapshot',
|
||||
description:
|
||||
'Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'The snapshot ID of the collection to cancel (e.g., "s_m4x7enmven8djfqak")',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: (params) =>
|
||||
`https://api.brightdata.com/datasets/v3/snapshot/${params.snapshotId?.trim()}/cancel`,
|
||||
headers: (params) => ({
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Cancel snapshot failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const data = (await response.json().catch(() => null)) as Record<string, unknown> | null
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
snapshotId: (data?.snapshot_id as string) ?? null,
|
||||
cancelled: true,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
description: 'The snapshot ID that was cancelled',
|
||||
optional: true,
|
||||
},
|
||||
cancelled: {
|
||||
type: 'boolean',
|
||||
description: 'Whether the cancellation was successful',
|
||||
},
|
||||
},
|
||||
}
|
||||
158
apps/sim/tools/brightdata/discover.ts
Normal file
158
apps/sim/tools/brightdata/discover.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
import type { BrightDataDiscoverParams, BrightDataDiscoverResponse } from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataDiscoverTool: ToolConfig<
|
||||
BrightDataDiscoverParams,
|
||||
BrightDataDiscoverResponse
|
||||
> = {
|
||||
id: 'brightdata_discover',
|
||||
name: 'Bright Data Discover',
|
||||
description:
|
||||
'AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
query: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'The search query (e.g., "competitor pricing changes enterprise plan")',
|
||||
},
|
||||
numResults: {
|
||||
type: 'number',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Number of results to return, up to 1000. Defaults to 10',
|
||||
},
|
||||
intent: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'Describes what the agent is trying to accomplish, used to rank results by relevance (e.g., "find official pricing pages and change notes")',
|
||||
},
|
||||
includeContent: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Whether to include cleaned page content in results',
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Response format: "json" or "markdown". Defaults to "json"',
|
||||
},
|
||||
language: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Search language code (e.g., "en", "es", "fr"). Defaults to "en"',
|
||||
},
|
||||
country: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Two-letter ISO country code for localized results (e.g., "us", "gb")',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: 'https://api.brightdata.com/discover',
|
||||
headers: (params) => ({
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
body: (params) => {
|
||||
const body: Record<string, unknown> = {
|
||||
query: params.query,
|
||||
}
|
||||
if (params.numResults) body.num_results = params.numResults
|
||||
if (params.intent) body.intent = params.intent
|
||||
if (params.includeContent != null) body.include_content = params.includeContent
|
||||
if (params.format) body.format = params.format
|
||||
if (params.language) body.language = params.language
|
||||
if (params.country) body.country = params.country
|
||||
return body
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Discover request failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
|
||||
let results: Array<{
|
||||
url: string | null
|
||||
title: string | null
|
||||
description: string | null
|
||||
relevanceScore: number | null
|
||||
content: string | null
|
||||
}> = []
|
||||
|
||||
const items = Array.isArray(data) ? data : (data?.results ?? data?.data ?? [])
|
||||
|
||||
if (Array.isArray(items)) {
|
||||
results = items.map((item: Record<string, unknown>) => ({
|
||||
url: (item.link as string) ?? (item.url as string) ?? null,
|
||||
title: (item.title as string) ?? null,
|
||||
description: (item.description as string) ?? (item.snippet as string) ?? null,
|
||||
relevanceScore: (item.relevance_score as number) ?? null,
|
||||
content:
|
||||
(item.content as string) ?? (item.text as string) ?? (item.markdown as string) ?? null,
|
||||
}))
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
results,
|
||||
query: null,
|
||||
totalResults: results.length,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
results: {
|
||||
type: 'array',
|
||||
description: 'Array of discovered web results ranked by intent relevance',
|
||||
items: {
|
||||
type: 'object',
|
||||
description: 'A discovered result',
|
||||
properties: {
|
||||
url: { type: 'string', description: 'URL of the discovered page', optional: true },
|
||||
title: { type: 'string', description: 'Page title', optional: true },
|
||||
description: {
|
||||
type: 'string',
|
||||
description: 'Page description or snippet',
|
||||
optional: true,
|
||||
},
|
||||
relevanceScore: {
|
||||
type: 'number',
|
||||
description: 'AI-calculated relevance score for intent-based ranking',
|
||||
optional: true,
|
||||
},
|
||||
content: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Cleaned page content in the requested format (when includeContent is true)',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
query: { type: 'string', description: 'The search query that was executed', optional: true },
|
||||
totalResults: { type: 'number', description: 'Total number of results returned' },
|
||||
},
|
||||
}
|
||||
116
apps/sim/tools/brightdata/download_snapshot.ts
Normal file
116
apps/sim/tools/brightdata/download_snapshot.ts
Normal file
@@ -0,0 +1,116 @@
|
||||
import type {
|
||||
BrightDataDownloadSnapshotParams,
|
||||
BrightDataDownloadSnapshotResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataDownloadSnapshotTool: ToolConfig<
|
||||
BrightDataDownloadSnapshotParams,
|
||||
BrightDataDownloadSnapshotResponse
|
||||
> = {
|
||||
id: 'brightdata_download_snapshot',
|
||||
name: 'Bright Data Download Snapshot',
|
||||
description:
|
||||
'Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'The snapshot ID returned when the collection was triggered (e.g., "s_m4x7enmven8djfqak")',
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Output format: "json", "ndjson", "jsonl", or "csv". Defaults to "json"',
|
||||
},
|
||||
compress: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Whether to compress the results',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'GET',
|
||||
url: (params) => {
|
||||
const queryParams = new URLSearchParams()
|
||||
if (params.format) queryParams.set('format', params.format)
|
||||
if (params.compress) queryParams.set('compress', 'true')
|
||||
const qs = queryParams.toString()
|
||||
return `https://api.brightdata.com/datasets/v3/snapshot/${params.snapshotId?.trim()}${qs ? `?${qs}` : ''}`
|
||||
},
|
||||
headers: (params) => ({
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (response.status === 409) {
|
||||
throw new Error(
|
||||
'Snapshot is not ready for download. Check the snapshot status first and wait until it is "ready".'
|
||||
)
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Snapshot download failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') || ''
|
||||
let data: Array<Record<string, unknown>>
|
||||
|
||||
if (contentType.includes('application/json')) {
|
||||
const parsed = await response.json()
|
||||
data = Array.isArray(parsed) ? parsed : [parsed]
|
||||
} else {
|
||||
const text = await response.text()
|
||||
try {
|
||||
const parsed = JSON.parse(text)
|
||||
data = Array.isArray(parsed) ? parsed : [parsed]
|
||||
} catch {
|
||||
data = [{ raw: text }]
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
data,
|
||||
format: contentType,
|
||||
snapshotId: (data[0]?.snapshot_id as string) ?? null,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
data: {
|
||||
type: 'array',
|
||||
description: 'Array of scraped result records',
|
||||
items: {
|
||||
type: 'json',
|
||||
description: 'A scraped record with dataset-specific fields',
|
||||
},
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
description: 'The content type of the downloaded data',
|
||||
},
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
description: 'The snapshot ID that was downloaded',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
19
apps/sim/tools/brightdata/index.ts
Normal file
19
apps/sim/tools/brightdata/index.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { brightDataCancelSnapshotTool } from '@/tools/brightdata/cancel_snapshot'
|
||||
import { brightDataDiscoverTool } from '@/tools/brightdata/discover'
|
||||
import { brightDataDownloadSnapshotTool } from '@/tools/brightdata/download_snapshot'
|
||||
import { brightDataScrapeDatasetTool } from '@/tools/brightdata/scrape_dataset'
|
||||
import { brightDataScrapeUrlTool } from '@/tools/brightdata/scrape_url'
|
||||
import { brightDataSerpSearchTool } from '@/tools/brightdata/serp_search'
|
||||
import { brightDataSnapshotStatusTool } from '@/tools/brightdata/snapshot_status'
|
||||
import { brightDataSyncScrapeTool } from '@/tools/brightdata/sync_scrape'
|
||||
|
||||
export {
|
||||
brightDataCancelSnapshotTool,
|
||||
brightDataDiscoverTool,
|
||||
brightDataDownloadSnapshotTool,
|
||||
brightDataScrapeDatasetTool,
|
||||
brightDataScrapeUrlTool,
|
||||
brightDataSerpSearchTool,
|
||||
brightDataSnapshotStatusTool,
|
||||
brightDataSyncScrapeTool,
|
||||
}
|
||||
97
apps/sim/tools/brightdata/scrape_dataset.ts
Normal file
97
apps/sim/tools/brightdata/scrape_dataset.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import type {
|
||||
BrightDataScrapeDatasetParams,
|
||||
BrightDataScrapeDatasetResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataScrapeDatasetTool: ToolConfig<
|
||||
BrightDataScrapeDatasetParams,
|
||||
BrightDataScrapeDatasetResponse
|
||||
> = {
|
||||
id: 'brightdata_scrape_dataset',
|
||||
name: 'Bright Data Scrape Dataset',
|
||||
description:
|
||||
'Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
datasetId: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'Dataset scraper ID from your Bright Data dashboard (e.g., "gd_l1viktl72bvl7bjuj0")',
|
||||
},
|
||||
urls: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'JSON array of URL objects to scrape (e.g., [{"url": "https://example.com/product"}])',
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Output format: "json" or "csv". Defaults to "json"',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: (params) => {
|
||||
const queryParams = new URLSearchParams()
|
||||
queryParams.set('dataset_id', params.datasetId)
|
||||
queryParams.set('format', params.format || 'json')
|
||||
return `https://api.brightdata.com/datasets/v3/trigger?${queryParams.toString()}`
|
||||
},
|
||||
headers: (params) => ({
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
body: (params) => {
|
||||
if (typeof params.urls === 'string') {
|
||||
try {
|
||||
return JSON.parse(params.urls)
|
||||
} catch {
|
||||
return [{ url: params.urls }]
|
||||
}
|
||||
}
|
||||
return params.urls
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Dataset trigger failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
snapshotId: data.snapshot_id ?? data.snapshotId ?? '',
|
||||
status: data.status ?? 'triggered',
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
description: 'The snapshot ID to retrieve results later',
|
||||
},
|
||||
status: {
|
||||
type: 'string',
|
||||
description: 'Status of the scraping job (e.g., "triggered", "running")',
|
||||
},
|
||||
},
|
||||
}
|
||||
103
apps/sim/tools/brightdata/scrape_url.ts
Normal file
103
apps/sim/tools/brightdata/scrape_url.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import type {
|
||||
BrightDataScrapeUrlParams,
|
||||
BrightDataScrapeUrlResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataScrapeUrlTool: ToolConfig<
|
||||
BrightDataScrapeUrlParams,
|
||||
BrightDataScrapeUrlResponse
|
||||
> = {
|
||||
id: 'brightdata_scrape_url',
|
||||
name: 'Bright Data Scrape URL',
|
||||
description:
|
||||
'Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
zone: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Web Unlocker zone name from your Bright Data dashboard (e.g., "web_unlocker1")',
|
||||
},
|
||||
url: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'The URL to scrape (e.g., "https://example.com/page")',
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'Response format: "raw" for HTML or "json" for parsed content. Defaults to "raw"',
|
||||
},
|
||||
country: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Two-letter country code for geo-targeting (e.g., "us", "gb")',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: 'https://api.brightdata.com/request',
|
||||
headers: (params) => ({
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
body: (params) => {
|
||||
const body: Record<string, unknown> = {
|
||||
zone: params.zone,
|
||||
url: params.url,
|
||||
format: params.format || 'raw',
|
||||
}
|
||||
if (params.country) body.country = params.country
|
||||
return body
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
const contentType = response.headers.get('content-type') || ''
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Request failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
let content: string
|
||||
if (contentType.includes('application/json')) {
|
||||
const data = await response.json()
|
||||
content = typeof data === 'string' ? data : JSON.stringify(data)
|
||||
} else {
|
||||
content = await response.text()
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
content,
|
||||
url: null,
|
||||
statusCode: response.status,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
content: {
|
||||
type: 'string',
|
||||
description: 'The scraped page content (HTML or JSON depending on format)',
|
||||
},
|
||||
url: { type: 'string', description: 'The URL that was scraped', optional: true },
|
||||
statusCode: { type: 'number', description: 'HTTP status code of the response', optional: true },
|
||||
},
|
||||
}
|
||||
214
apps/sim/tools/brightdata/serp_search.ts
Normal file
214
apps/sim/tools/brightdata/serp_search.ts
Normal file
@@ -0,0 +1,214 @@
|
||||
import type {
|
||||
BrightDataSerpSearchParams,
|
||||
BrightDataSerpSearchResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
const SEARCH_ENGINE_CONFIG: Record<
|
||||
string,
|
||||
{ url: string; queryKey: string; numKey: string; langKey: string; countryKey: string }
|
||||
> = {
|
||||
google: {
|
||||
url: 'https://www.google.com/search',
|
||||
queryKey: 'q',
|
||||
numKey: 'num',
|
||||
langKey: 'hl',
|
||||
countryKey: 'gl',
|
||||
},
|
||||
bing: {
|
||||
url: 'https://www.bing.com/search',
|
||||
queryKey: 'q',
|
||||
numKey: 'count',
|
||||
langKey: 'setLang',
|
||||
countryKey: 'cc',
|
||||
},
|
||||
duckduckgo: {
|
||||
url: 'https://duckduckgo.com/',
|
||||
queryKey: 'q',
|
||||
numKey: '',
|
||||
langKey: '',
|
||||
countryKey: '',
|
||||
},
|
||||
yandex: {
|
||||
url: 'https://yandex.com/search/',
|
||||
queryKey: 'text',
|
||||
numKey: 'numdoc',
|
||||
langKey: 'lang',
|
||||
countryKey: '',
|
||||
},
|
||||
} as const
|
||||
|
||||
export const brightDataSerpSearchTool: ToolConfig<
|
||||
BrightDataSerpSearchParams,
|
||||
BrightDataSerpSearchResponse
|
||||
> = {
|
||||
id: 'brightdata_serp_search',
|
||||
name: 'Bright Data SERP Search',
|
||||
description:
|
||||
'Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
zone: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'SERP API zone name from your Bright Data dashboard (e.g., "serp_api1")',
|
||||
},
|
||||
query: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'The search query (e.g., "best project management tools")',
|
||||
},
|
||||
searchEngine: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'Search engine to use: "google", "bing", "duckduckgo", or "yandex". Defaults to "google"',
|
||||
},
|
||||
country: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Two-letter country code for localized results (e.g., "us", "gb")',
|
||||
},
|
||||
language: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Two-letter language code (e.g., "en", "es")',
|
||||
},
|
||||
numResults: {
|
||||
type: 'number',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Number of results to return (e.g., 10, 20). Defaults to 10',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: 'https://api.brightdata.com/request',
|
||||
headers: (params) => ({
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
body: (params) => {
|
||||
const engine = params.searchEngine || 'google'
|
||||
const config = SEARCH_ENGINE_CONFIG[engine] || SEARCH_ENGINE_CONFIG.google
|
||||
|
||||
const searchParams = new URLSearchParams()
|
||||
searchParams.set(config.queryKey, params.query)
|
||||
if (params.numResults && config.numKey) {
|
||||
searchParams.set(config.numKey, String(params.numResults))
|
||||
}
|
||||
if (params.language && config.langKey) {
|
||||
searchParams.set(config.langKey, params.language)
|
||||
}
|
||||
if (params.country && config.countryKey) {
|
||||
searchParams.set(config.countryKey, params.country)
|
||||
}
|
||||
|
||||
searchParams.set('brd_json', '1')
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
zone: params.zone,
|
||||
url: `${config.url}?${searchParams.toString()}`,
|
||||
format: 'raw',
|
||||
}
|
||||
if (params.country) body.country = params.country
|
||||
return body
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `SERP request failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') || ''
|
||||
let results: Array<{
|
||||
title: string | null
|
||||
url: string | null
|
||||
description: string | null
|
||||
rank: number | null
|
||||
}> = []
|
||||
let data: Record<string, unknown> | null = null
|
||||
|
||||
if (contentType.includes('application/json')) {
|
||||
data = await response.json()
|
||||
|
||||
if (Array.isArray(data?.organic)) {
|
||||
results = data.organic.map((item: Record<string, unknown>, index: number) => ({
|
||||
title: (item.title as string) ?? null,
|
||||
url: (item.link as string) ?? (item.url as string) ?? null,
|
||||
description: (item.description as string) ?? (item.snippet as string) ?? null,
|
||||
rank: index + 1,
|
||||
}))
|
||||
} else if (Array.isArray(data)) {
|
||||
results = data.map((item: Record<string, unknown>, index: number) => ({
|
||||
title: (item.title as string) ?? null,
|
||||
url: (item.link as string) ?? (item.url as string) ?? null,
|
||||
description: (item.description as string) ?? (item.snippet as string) ?? null,
|
||||
rank: index + 1,
|
||||
}))
|
||||
}
|
||||
} else {
|
||||
const text = await response.text()
|
||||
results = [
|
||||
{
|
||||
title: 'Raw SERP Response',
|
||||
url: null,
|
||||
description: text.slice(0, 500),
|
||||
rank: 1,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
results,
|
||||
query: ((data?.general as Record<string, unknown> | undefined)?.query as string) ?? null,
|
||||
searchEngine:
|
||||
((data?.general as Record<string, unknown> | undefined)?.search_engine as string) ?? null,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
results: {
|
||||
type: 'array',
|
||||
description: 'Array of search results',
|
||||
items: {
|
||||
type: 'object',
|
||||
description: 'A search result entry',
|
||||
properties: {
|
||||
title: { type: 'string', description: 'Title of the search result', optional: true },
|
||||
url: { type: 'string', description: 'URL of the search result', optional: true },
|
||||
description: {
|
||||
type: 'string',
|
||||
description: 'Snippet or description of the result',
|
||||
optional: true,
|
||||
},
|
||||
rank: { type: 'number', description: 'Position in search results', optional: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
query: { type: 'string', description: 'The search query that was executed', optional: true },
|
||||
searchEngine: {
|
||||
type: 'string',
|
||||
description: 'The search engine that was used',
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
74
apps/sim/tools/brightdata/snapshot_status.ts
Normal file
74
apps/sim/tools/brightdata/snapshot_status.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import type {
|
||||
BrightDataSnapshotStatusParams,
|
||||
BrightDataSnapshotStatusResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataSnapshotStatusTool: ToolConfig<
|
||||
BrightDataSnapshotStatusParams,
|
||||
BrightDataSnapshotStatusResponse
|
||||
> = {
|
||||
id: 'brightdata_snapshot_status',
|
||||
name: 'Bright Data Snapshot Status',
|
||||
description:
|
||||
'Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'The snapshot ID returned when the collection was triggered (e.g., "s_m4x7enmven8djfqak")',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'GET',
|
||||
url: (params) => `https://api.brightdata.com/datasets/v3/progress/${params.snapshotId?.trim()}`,
|
||||
headers: (params) => ({
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Snapshot status check failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
snapshotId: data.snapshot_id ?? null,
|
||||
datasetId: data.dataset_id ?? null,
|
||||
status: data.status ?? 'unknown',
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
description: 'The snapshot ID that was queried',
|
||||
},
|
||||
datasetId: {
|
||||
type: 'string',
|
||||
description: 'The dataset ID associated with this snapshot',
|
||||
optional: true,
|
||||
},
|
||||
status: {
|
||||
type: 'string',
|
||||
description: 'Current status of the snapshot: "starting", "running", "ready", or "failed"',
|
||||
},
|
||||
},
|
||||
}
|
||||
131
apps/sim/tools/brightdata/sync_scrape.ts
Normal file
131
apps/sim/tools/brightdata/sync_scrape.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import type {
|
||||
BrightDataSyncScrapeParams,
|
||||
BrightDataSyncScrapeResponse,
|
||||
} from '@/tools/brightdata/types'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
|
||||
export const brightDataSyncScrapeTool: ToolConfig<
|
||||
BrightDataSyncScrapeParams,
|
||||
BrightDataSyncScrapeResponse
|
||||
> = {
|
||||
id: 'brightdata_sync_scrape',
|
||||
name: 'Bright Data Sync Scrape',
|
||||
description:
|
||||
'Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout.',
|
||||
version: '1.0.0',
|
||||
|
||||
params: {
|
||||
apiKey: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-only',
|
||||
description: 'Bright Data API token',
|
||||
},
|
||||
datasetId: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'Dataset scraper ID from your Bright Data dashboard (e.g., "gd_l1viktl72bvl7bjuj0")',
|
||||
},
|
||||
urls: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm',
|
||||
description:
|
||||
'JSON array of URL objects to scrape, up to 20 (e.g., [{"url": "https://example.com/product"}])',
|
||||
},
|
||||
format: {
|
||||
type: 'string',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Output format: "json", "ndjson", or "csv". Defaults to "json"',
|
||||
},
|
||||
includeErrors: {
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
visibility: 'user-or-llm',
|
||||
description: 'Whether to include error reports in results',
|
||||
},
|
||||
},
|
||||
|
||||
request: {
|
||||
method: 'POST',
|
||||
url: (params) => {
|
||||
const queryParams = new URLSearchParams()
|
||||
queryParams.set('dataset_id', params.datasetId)
|
||||
queryParams.set('format', params.format || 'json')
|
||||
if (params.includeErrors) queryParams.set('include_errors', 'true')
|
||||
return `https://api.brightdata.com/datasets/v3/scrape?${queryParams.toString()}`
|
||||
},
|
||||
headers: (params) => ({
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
}),
|
||||
body: (params) => {
|
||||
if (typeof params.urls === 'string') {
|
||||
try {
|
||||
const parsed = JSON.parse(params.urls)
|
||||
return { input: Array.isArray(parsed) ? parsed : [parsed] }
|
||||
} catch {
|
||||
return { input: [{ url: params.urls }] }
|
||||
}
|
||||
}
|
||||
return { input: params.urls }
|
||||
},
|
||||
},
|
||||
|
||||
transformResponse: async (response: Response) => {
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text()
|
||||
throw new Error(errorText || `Sync scrape failed with status ${response.status}`)
|
||||
}
|
||||
|
||||
if (response.status === 202) {
|
||||
const data = await response.json()
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
data: [],
|
||||
snapshotId: data.snapshot_id ?? null,
|
||||
isAsync: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
const results = Array.isArray(data) ? data : [data]
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
data: results,
|
||||
snapshotId: null,
|
||||
isAsync: false,
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
outputs: {
|
||||
data: {
|
||||
type: 'array',
|
||||
description:
|
||||
'Array of scraped result objects with fields specific to the dataset scraper used',
|
||||
items: {
|
||||
type: 'json',
|
||||
description: 'A scraped record with dataset-specific fields',
|
||||
},
|
||||
},
|
||||
snapshotId: {
|
||||
type: 'string',
|
||||
description:
|
||||
'Snapshot ID returned if the request exceeded the 1-minute timeout and switched to async processing',
|
||||
optional: true,
|
||||
},
|
||||
isAsync: {
|
||||
type: 'boolean',
|
||||
description:
|
||||
'Whether the request fell back to async mode (true means use snapshot ID to retrieve results)',
|
||||
},
|
||||
},
|
||||
}
|
||||
145
apps/sim/tools/brightdata/types.ts
Normal file
145
apps/sim/tools/brightdata/types.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
import type { ToolResponse } from '@/tools/types'
|
||||
|
||||
export interface BrightDataScrapeUrlParams {
|
||||
apiKey: string
|
||||
zone: string
|
||||
url: string
|
||||
format?: string
|
||||
country?: string
|
||||
}
|
||||
|
||||
export interface BrightDataScrapeUrlResponse extends ToolResponse {
|
||||
output: {
|
||||
content: string
|
||||
url: string | null
|
||||
statusCode: number | null
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataSerpSearchParams {
|
||||
apiKey: string
|
||||
zone: string
|
||||
query: string
|
||||
searchEngine?: string
|
||||
country?: string
|
||||
language?: string
|
||||
numResults?: number
|
||||
}
|
||||
|
||||
export interface BrightDataSerpSearchResponse extends ToolResponse {
|
||||
output: {
|
||||
results: Array<{
|
||||
title: string | null
|
||||
url: string | null
|
||||
description: string | null
|
||||
rank: number | null
|
||||
}>
|
||||
query: string | null
|
||||
searchEngine: string | null
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataScrapeDatasetParams {
|
||||
apiKey: string
|
||||
datasetId: string
|
||||
urls: string
|
||||
format?: string
|
||||
}
|
||||
|
||||
export interface BrightDataScrapeDatasetResponse extends ToolResponse {
|
||||
output: {
|
||||
snapshotId: string
|
||||
status: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataSyncScrapeParams {
|
||||
apiKey: string
|
||||
datasetId: string
|
||||
urls: string
|
||||
format?: string
|
||||
includeErrors?: boolean
|
||||
}
|
||||
|
||||
export interface BrightDataSyncScrapeResponse extends ToolResponse {
|
||||
output: {
|
||||
data: Array<Record<string, unknown>>
|
||||
snapshotId: string | null
|
||||
isAsync: boolean
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataSnapshotStatusParams {
|
||||
apiKey: string
|
||||
snapshotId: string
|
||||
}
|
||||
|
||||
export interface BrightDataSnapshotStatusResponse extends ToolResponse {
|
||||
output: {
|
||||
snapshotId: string | null
|
||||
datasetId: string | null
|
||||
status: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataDownloadSnapshotParams {
|
||||
apiKey: string
|
||||
snapshotId: string
|
||||
format?: string
|
||||
compress?: boolean
|
||||
}
|
||||
|
||||
export interface BrightDataDownloadSnapshotResponse extends ToolResponse {
|
||||
output: {
|
||||
data: Array<Record<string, unknown>>
|
||||
format: string
|
||||
snapshotId: string | null
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataCancelSnapshotParams {
|
||||
apiKey: string
|
||||
snapshotId: string
|
||||
}
|
||||
|
||||
export interface BrightDataCancelSnapshotResponse extends ToolResponse {
|
||||
output: {
|
||||
snapshotId: string | null
|
||||
cancelled: boolean
|
||||
}
|
||||
}
|
||||
|
||||
export interface BrightDataDiscoverParams {
|
||||
apiKey: string
|
||||
query: string
|
||||
numResults?: number
|
||||
intent?: string
|
||||
includeContent?: boolean
|
||||
format?: string
|
||||
language?: string
|
||||
country?: string
|
||||
}
|
||||
|
||||
export interface BrightDataDiscoverResponse extends ToolResponse {
|
||||
output: {
|
||||
results: Array<{
|
||||
url: string | null
|
||||
title: string | null
|
||||
description: string | null
|
||||
relevanceScore: number | null
|
||||
content: string | null
|
||||
}>
|
||||
query: string | null
|
||||
totalResults: number
|
||||
}
|
||||
}
|
||||
|
||||
export type BrightDataResponse =
|
||||
| BrightDataScrapeUrlResponse
|
||||
| BrightDataSerpSearchResponse
|
||||
| BrightDataScrapeDatasetResponse
|
||||
| BrightDataSyncScrapeResponse
|
||||
| BrightDataSnapshotStatusResponse
|
||||
| BrightDataDownloadSnapshotResponse
|
||||
| BrightDataCancelSnapshotResponse
|
||||
| BrightDataDiscoverResponse
|
||||
@@ -183,6 +183,7 @@ vi.mock('@/tools/registry', () => {
|
||||
name: 'Gmail Read',
|
||||
description: 'Read Gmail messages',
|
||||
version: '1.0.0',
|
||||
oauth: { required: true, provider: 'google-email' },
|
||||
params: {},
|
||||
request: { url: '/api/tools/gmail/read', method: 'GET' },
|
||||
},
|
||||
@@ -191,6 +192,7 @@ vi.mock('@/tools/registry', () => {
|
||||
name: 'Gmail Send',
|
||||
description: 'Send Gmail messages',
|
||||
version: '1.0.0',
|
||||
oauth: { required: true, provider: 'google-email' },
|
||||
params: {},
|
||||
request: { url: '/api/tools/gmail/send', method: 'POST' },
|
||||
},
|
||||
@@ -982,6 +984,37 @@ describe('Copilot File Parameter Normalization', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('Copilot OAuth Credential Enforcement', () => {
|
||||
let cleanupEnvVars: () => void
|
||||
|
||||
beforeEach(() => {
|
||||
process.env.NEXT_PUBLIC_APP_URL = 'http://localhost:3000'
|
||||
cleanupEnvVars = setupEnvVars({ NEXT_PUBLIC_APP_URL: 'http://localhost:3000' })
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.resetAllMocks()
|
||||
cleanupEnvVars()
|
||||
})
|
||||
|
||||
it('fails fast when copilot executes an oauth tool without an explicit credential selector', async () => {
|
||||
const fetchMock = vi.fn()
|
||||
global.fetch = Object.assign(fetchMock, { preconnect: vi.fn() }) as typeof fetch
|
||||
|
||||
const context = createToolExecutionContext({
|
||||
workspaceId: 'workspace-456',
|
||||
copilotToolExecution: true,
|
||||
} as any)
|
||||
|
||||
const result = await executeTool('gmail_read', { maxResults: 5 }, false, context)
|
||||
|
||||
expect(result.success).toBe(false)
|
||||
expect(result.error).toContain('credentialId')
|
||||
expect(result.error).toContain('environment/credentials.json')
|
||||
expect(fetchMock).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('Centralized Error Handling', () => {
|
||||
let cleanupEnvVars: () => void
|
||||
|
||||
|
||||
@@ -165,6 +165,43 @@ async function normalizeCopilotFileParams(
|
||||
}
|
||||
}
|
||||
|
||||
function readExplicitCredentialSelector(params: Record<string, unknown>): string | undefined {
|
||||
for (const key of ['credentialId', 'oauthCredential', 'credential'] as const) {
|
||||
const value = params[key]
|
||||
if (typeof value === 'string' && value.trim().length > 0) {
|
||||
return value.trim()
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
function normalizeCopilotCredentialParams(params: Record<string, unknown>): void {
|
||||
const credentialId = typeof params.credentialId === 'string' ? params.credentialId.trim() : ''
|
||||
if (credentialId && !params.credential && !params.oauthCredential) {
|
||||
params.credential = credentialId
|
||||
}
|
||||
}
|
||||
|
||||
function enforceCopilotCredentialSelection(
|
||||
toolId: string,
|
||||
tool: ToolConfig,
|
||||
params: Record<string, unknown>,
|
||||
scope: ToolExecutionScope
|
||||
): void {
|
||||
if (!scope.copilotToolExecution || !tool.oauth?.required) {
|
||||
return
|
||||
}
|
||||
|
||||
if (readExplicitCredentialSelector(params)) {
|
||||
return
|
||||
}
|
||||
|
||||
const toolLabel = tool.name || toolId
|
||||
throw new Error(
|
||||
`Copilot must pass credentialId for ${toolLabel}. Read environment/credentials.json and pass the exact credentialId for provider "${tool.oauth.provider}".`
|
||||
)
|
||||
}
|
||||
|
||||
/** Result from hosted key injection */
|
||||
interface HostedKeyInjectionResult {
|
||||
isUsingHostedKey: boolean
|
||||
@@ -789,6 +826,8 @@ export async function executeTool(
|
||||
}
|
||||
|
||||
await normalizeCopilotFileParams(tool, contextParams, scope)
|
||||
normalizeCopilotCredentialParams(contextParams)
|
||||
enforceCopilotCredentialSelection(toolId, tool, contextParams, scope)
|
||||
|
||||
// Inject hosted API key if tool supports it and user didn't provide one
|
||||
const hostedKeyInfo = await injectHostedKeyIfNeeded(
|
||||
|
||||
@@ -142,6 +142,41 @@ describe('Tool Parameters Utils', () => {
|
||||
expect(schema.properties).toHaveProperty('message')
|
||||
})
|
||||
|
||||
it.concurrent('adds credentialId only for copilot-facing oauth schemas', () => {
|
||||
const oauthTool = {
|
||||
...mockToolConfig,
|
||||
id: 'oauth_schema_tool',
|
||||
oauth: {
|
||||
required: true,
|
||||
provider: 'google-email',
|
||||
},
|
||||
params: {
|
||||
message: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'user-or-llm' as ParameterVisibility,
|
||||
description: 'Message to send',
|
||||
},
|
||||
accessToken: {
|
||||
type: 'string',
|
||||
required: true,
|
||||
visibility: 'hidden' as ParameterVisibility,
|
||||
description: 'OAuth access token',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
const defaultSchema = createUserToolSchema(oauthTool)
|
||||
const copilotSchema = createUserToolSchema(oauthTool, { surface: 'copilot' })
|
||||
|
||||
expect(defaultSchema.properties).not.toHaveProperty('credentialId')
|
||||
expect(copilotSchema.properties).toHaveProperty('credentialId')
|
||||
expect(copilotSchema.properties.credentialId).toMatchObject({
|
||||
type: 'string',
|
||||
})
|
||||
expect(copilotSchema.required).toContain('credentialId')
|
||||
})
|
||||
|
||||
it.concurrent('keeps shared file params unchanged by default', () => {
|
||||
const toolWithFileParam = {
|
||||
...mockToolConfig,
|
||||
|
||||
@@ -471,6 +471,7 @@ export function createUserToolSchema(
|
||||
toolConfig: ToolConfig,
|
||||
options: UserToolSchemaOptions = {}
|
||||
): ToolSchema {
|
||||
const surface = options.surface ?? 'default'
|
||||
const schema: ToolSchema = {
|
||||
type: 'object',
|
||||
properties: {},
|
||||
@@ -492,12 +493,13 @@ export function createUserToolSchema(
|
||||
}
|
||||
}
|
||||
|
||||
if (toolConfig.oauth?.required) {
|
||||
if (toolConfig.oauth?.required && surface === 'copilot') {
|
||||
schema.properties.credentialId = {
|
||||
type: 'string',
|
||||
description:
|
||||
'Optional credential ID to use when multiple accounts are connected for this provider. Get IDs from environment/credentials.json. If omitted, auto-selects the first available credential.',
|
||||
'Credential ID to use for this OAuth tool call. Required for Copilot/Superagent execution. Get valid IDs from environment/credentials.json.',
|
||||
}
|
||||
schema.required.push('credentialId')
|
||||
}
|
||||
|
||||
return schema
|
||||
|
||||
@@ -236,6 +236,16 @@ import {
|
||||
boxSignResendRequestTool,
|
||||
} from '@/tools/box_sign'
|
||||
import { brandfetchGetBrandTool, brandfetchSearchTool } from '@/tools/brandfetch'
|
||||
import {
|
||||
brightDataCancelSnapshotTool,
|
||||
brightDataDiscoverTool,
|
||||
brightDataDownloadSnapshotTool,
|
||||
brightDataScrapeDatasetTool,
|
||||
brightDataScrapeUrlTool,
|
||||
brightDataSerpSearchTool,
|
||||
brightDataSnapshotStatusTool,
|
||||
brightDataSyncScrapeTool,
|
||||
} from '@/tools/brightdata'
|
||||
import { browserUseRunTaskTool } from '@/tools/browser_use'
|
||||
import {
|
||||
calcomCancelBookingTool,
|
||||
@@ -2921,6 +2931,14 @@ export const tools: Record<string, ToolConfig> = {
|
||||
athena_stop_query: athenaStopQueryTool,
|
||||
brandfetch_get_brand: brandfetchGetBrandTool,
|
||||
brandfetch_search: brandfetchSearchTool,
|
||||
brightdata_cancel_snapshot: brightDataCancelSnapshotTool,
|
||||
brightdata_discover: brightDataDiscoverTool,
|
||||
brightdata_download_snapshot: brightDataDownloadSnapshotTool,
|
||||
brightdata_scrape_dataset: brightDataScrapeDatasetTool,
|
||||
brightdata_scrape_url: brightDataScrapeUrlTool,
|
||||
brightdata_serp_search: brightDataSerpSearchTool,
|
||||
brightdata_snapshot_status: brightDataSnapshotStatusTool,
|
||||
brightdata_sync_scrape: brightDataSyncScrapeTool,
|
||||
box_copy_file: boxCopyFileTool,
|
||||
box_create_folder: boxCreateFolderTool,
|
||||
box_delete_file: boxDeleteFileTool,
|
||||
|
||||
4
bun.lock
4
bun.lock
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 0,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "simstudio",
|
||||
@@ -179,7 +178,6 @@
|
||||
"next-themes": "^0.4.6",
|
||||
"nodemailer": "7.0.11",
|
||||
"officeparser": "^5.2.0",
|
||||
"onedollarstats": "0.0.10",
|
||||
"openai": "^4.91.1",
|
||||
"papaparse": "5.5.3",
|
||||
"pdf-lib": "1.17.1",
|
||||
@@ -3123,8 +3121,6 @@
|
||||
|
||||
"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
|
||||
|
||||
"onedollarstats": ["onedollarstats@0.0.10", "", {}, "sha512-+s2o5qBuKej2BrbJDqVRZr9U7F0ERBsNjXIJs1DSy2yK4yNk8z5iM0nHuwhelbNgqyVEwckCV7BJ9MsP/c8kQw=="],
|
||||
|
||||
"onetime": ["onetime@6.0.0", "", { "dependencies": { "mimic-fn": "^4.0.0" } }, "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ=="],
|
||||
|
||||
"oniguruma-parser": ["oniguruma-parser@0.12.1", "", {}, "sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w=="],
|
||||
|
||||
Reference in New Issue
Block a user